diff --git a/bench/bench_layer.cpp b/bench/bench_layer.cpp index b4370456..a3bc2538 100644 --- a/bench/bench_layer.cpp +++ b/bench/bench_layer.cpp @@ -18,7 +18,7 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fss on 23-4-27. #include #include "../source/layer/details/adaptive_avgpooling.hpp" @@ -238,7 +238,6 @@ BENCHMARK(BM_SiLU)->Args({32, 160, 160})->Unit(benchmark::kMillisecond); BENCHMARK(BM_SiLU)->Args({64, 80, 80})->Unit(benchmark::kMillisecond); BENCHMARK(BM_SiLU)->Args({128, 40, 40})->Unit(benchmark::kMillisecond); - static void BM_ReLU(benchmark::State& state) { using namespace kuiper_infer; @@ -334,6 +333,7 @@ static void BM_Upsample(benchmark::State& state) { uint32_t rows = state.range(1); uint32_t cols = state.range(2); + UpSampleMode mode = UpSampleMode(state.range(3)); std::shared_ptr> input = std::make_shared>(channels, rows, cols); input->Rand(); @@ -342,17 +342,22 @@ static void BM_Upsample(benchmark::State& state) { inputs.push_back(input); std::vector>> outputs(1); - UpSampleLayer layer(2.f, 2.f); + UpSampleLayer layer(3.f, 3.f, mode); for (auto _ : state) { const auto status = layer.Forward(inputs, outputs); } } -BENCHMARK(BM_Upsample)->Args({3, 320, 320})->Unit(benchmark::kMillisecond); -BENCHMARK(BM_Upsample)->Args({32, 160, 160})->Unit(benchmark::kMillisecond); -BENCHMARK(BM_Upsample)->Args({64, 80, 80})->Unit(benchmark::kMillisecond); -BENCHMARK(BM_Upsample)->Args({128, 40, 40})->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Upsample)->Args({3, 320, 320, 0})->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Upsample)->Args({32, 160, 160, 0})->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Upsample)->Args({64, 80, 80, 0})->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Upsample)->Args({128, 40, 40, 0})->Unit(benchmark::kMillisecond); + +BENCHMARK(BM_Upsample)->Args({3, 320, 320, 1})->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Upsample)->Args({32, 160, 160, 1})->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Upsample)->Args({64, 80, 80, 1})->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Upsample)->Args({128, 40, 40, 1})->Unit(benchmark::kMillisecond); static void BM_AdaptivePooling(benchmark::State& state) { using namespace kuiper_infer; diff --git a/include/status_code.hpp b/include/status_code.hpp index a7119339..b243ab41 100644 --- a/include/status_code.hpp +++ b/include/status_code.hpp @@ -75,8 +75,9 @@ enum class ParseParameterAttrStatus { kParameterMissingScale = 14, kParameterMissingResizeMode = 15, kParameterMissingDilation = 16, - kParameterMissingPaddingMode = 16, - kParameterMissingOutputPadding = 17, + kParameterMissingPaddingMode = 17, + kParameterMissingOutputPadding = 18, + kParameterMissingAlignCorner = 19, kAttrMissingBias = 21, kAttrMissingWeight = 22, diff --git a/source/layer/details/convolution.cpp b/source/layer/details/convolution.cpp index a459a59d..7d96fbde 100644 --- a/source/layer/details/convolution.cpp +++ b/source/layer/details/convolution.cpp @@ -246,7 +246,7 @@ InferStatus ConvolutionLayer::Forward( CHECK(kernel_count % groups_ == 0); CHECK(input_c % groups_ == 0); } - uint32_t input_c_group = input_c / groups_; + const uint32_t input_c_group = input_c / groups_; CHECK(input_c_group == kernel_c) << "The number of channel for the kernel " "matrix and input tensor do not match"; diff --git a/source/layer/details/upsample.cpp b/source/layer/details/upsample.cpp index 0786835c..546f14ef 100644 --- a/source/layer/details/upsample.cpp +++ b/source/layer/details/upsample.cpp @@ -25,7 +25,32 @@ #include "layer/abstract/layer_factory.hpp" namespace kuiper_infer { -UpSampleLayer::UpSampleLayer(float scale_h, float scale_w, UpSampleMode mode) +static void CalcIndexAndLambda(int32_t input_size, int32_t output_size, + float div_scale, int32_t output_idx, + float& lambda0, float& lambda1, + int32_t& input_index0, int32_t& input_index1) { + if (output_size == input_size) { + input_index0 = input_index1 = output_idx; + lambda0 = 1; + lambda1 = 0; + } else { + float real_input_idx = + div_scale * (static_cast(output_idx) + 0.5f) - 0.5f; + if (real_input_idx < 0) { + real_input_idx = 0; + } + + input_index0 = static_cast(real_input_idx); + int32_t offset = (input_index0 < input_size - 1) ? 1 : 0; + input_index1 = input_index0 + offset; + + lambda1 = real_input_idx - static_cast(input_index0); + lambda0 = 1.0f - lambda1; + } +} + +UpSampleLayer::UpSampleLayer(uint32_t scale_h, uint32_t scale_w, + UpSampleMode mode) : NonParamLayer("upsample"), scale_h_(scale_h), scale_w_(scale_w), @@ -46,14 +71,15 @@ InferStatus UpSampleLayer::Forward( return InferStatus::kInferFailedInputOutSizeMatchError; } - auto test_scale_factor = [](uint32_t origin, float scale_factor) { - float result = origin * scale_factor; - if (std::abs(result - std::round(result)) > 1e-4f) { + auto test_scale_factor = [](uint32_t origin, uint32_t scale_factor) { + float result = static_cast(origin * scale_factor); + if (std::abs(result - std::round(result)) > 1e-5f) { LOG(ERROR) << "The input scale_factor is wrong"; } }; - LOG_IF(FATAL, this->mode_ != UpSampleMode::kModeNearest) + LOG_IF(FATAL, this->mode_ != UpSampleMode::kModeNearest && + this->mode_ != UpSampleMode::kModeBilinear) << "Unsupported upsample mode: " << int(mode_); for (uint32_t i = 0; i < inputs.size(); ++i) { @@ -69,25 +95,23 @@ InferStatus UpSampleLayer::Forward( } const uint32_t batch_size = inputs.size(); - const uint32_t scale_w = uint32_t(scale_w_); - const uint32_t scale_h = uint32_t(scale_h_); #pragma omp parallel for num_threads(batch_size) for (uint32_t i = 0; i < batch_size; ++i) { const arma::fcube& input_data = inputs.at(i)->data(); std::shared_ptr> output = outputs.at(i); if (output == nullptr || output->empty()) { - output = std::make_shared>( - input_data.n_slices, uint32_t(input_data.n_rows * scale_h), - uint32_t(input_data.n_cols * scale_w)); + output = std::make_shared>(input_data.n_slices, + input_data.n_rows * scale_h_, + input_data.n_cols * scale_w_); outputs.at(i) = output; } auto& output_data = output->data(); - CHECK(output_data.n_rows == input_data.n_rows * scale_h) + CHECK(output_data.n_rows == input_data.n_rows * scale_h_) << "The input and output tensor height of the upsample layer do not " "match " << i << "th"; - CHECK(output_data.n_cols == input_data.n_cols * scale_w) + CHECK(output_data.n_cols == input_data.n_cols * scale_w_) << "The input and output tensor width of the upsample layer do not " "match " << i << "th"; @@ -97,36 +121,80 @@ InferStatus UpSampleLayer::Forward( "match " << i << "th"; + const float div_scale_h = 1.f / static_cast(scale_h_); + const float div_scale_w = 1.f / static_cast(scale_w_); const uint32_t channels = input_data.n_slices; - for (uint32_t c = 0; c < channels; ++c) { - const arma::fmat& input_channel = input_data.slice(c); - arma::fmat& output_channel = output_data.slice(c); - - const uint32_t input_w = input_channel.n_cols; - const uint32_t input_h = input_channel.n_rows; - const uint32_t output_w = output_channel.n_cols; - const uint32_t output_h = output_channel.n_rows; - - for (uint32_t w = 0; w < input_w; ++w) { - const float* input_col_ptr = input_channel.colptr(w); - const uint32_t scaled_w = w * scale_w; - for (uint32_t sw = 0; sw < scale_w; ++sw) { - if (scaled_w + sw >= output_w) { - continue; - } - float* output_col_ptr = output_channel.colptr(scaled_w + sw); - for (uint32_t h = 0; h < input_h; ++h) { - const uint32_t scaled_h = h * scale_h; - float* output_ptr = output_col_ptr + scaled_h; - float input_value = *(input_col_ptr + h); - for (uint32_t sh = 0; sh < scale_h; ++sh) { - if (scaled_h + sh < output_h) { - *(output_ptr + sh) = input_value; + if (mode_ == UpSampleMode::kModeNearest) { +#pragma omp parallel for + for (uint32_t c = 0; c < channels; ++c) { + const arma::fmat& input_channel = input_data.slice(c); + arma::fmat& output_channel = output_data.slice(c); + + const uint32_t input_w = input_channel.n_cols; + const uint32_t input_h = input_channel.n_rows; + const uint32_t output_w = output_channel.n_cols; + const uint32_t output_h = output_channel.n_rows; + for (uint32_t w = 0; w < input_w; ++w) { + const float* input_col_ptr = input_channel.colptr(w); + const uint32_t scaled_w = w * scale_w_; + for (uint32_t sw = 0; sw < scale_w_; ++sw) { + if (scaled_w + sw >= output_w) { + continue; + } + float* output_col_ptr = output_channel.colptr(scaled_w + sw); + for (uint32_t h = 0; h < input_h; ++h) { + const uint32_t scaled_h = h * scale_h_; + float* output_ptr = output_col_ptr + scaled_h; + float input_value = *(input_col_ptr + h); + for (uint32_t sh = 0; sh < scale_h_; ++sh) { + if (scaled_h + sh < output_h) { + *(output_ptr + sh) = input_value; + } } } } } } + } else { +#pragma omp parallel for + for (uint32_t c = 0; c < channels; ++c) { + const arma::fmat& input_channel = input_data.slice(c); + arma::fmat& output_channel = output_data.slice(c); + + const uint32_t input_w = input_channel.n_cols; + const uint32_t input_h = input_channel.n_rows; + const uint32_t output_w = output_channel.n_cols; + const uint32_t output_h = output_channel.n_rows; + for (uint32_t w = 0; w < output_w; ++w) { + float* output_ptr = output_channel.colptr(w); + float w0_lambda = 0.f; + float w1_lambda = 0.f; + int32_t input_w0 = 0; + int32_t input_w1 = 0; + CalcIndexAndLambda(static_cast(input_w), + static_cast(output_w), div_scale_w, + static_cast(w), w0_lambda, w1_lambda, + input_w0, input_w1); + const float* input_ptr0 = input_channel.colptr(input_w0); + const float* input_ptr1 = input_channel.colptr(input_w1); + for (uint32_t h = 0; h < output_h; ++h) { + float h0_lambda = 0.f; + float h1_lambda = 0.f; + int32_t input_h0 = 0; + int32_t input_h1 = 0; + CalcIndexAndLambda(static_cast(input_h), + static_cast(output_h), div_scale_h, + static_cast(h), h0_lambda, h1_lambda, + input_h0, input_h1); + + *(output_ptr + h) = + h0_lambda * w0_lambda * (*(input_ptr0 + input_h0)) + + h0_lambda * w1_lambda * (*(input_ptr1 + input_h0)) + + h1_lambda * w0_lambda * (*(input_ptr0 + input_h1)) + + h1_lambda * w1_lambda * (*(input_ptr1 + input_h1)); + } + } + } } } return InferStatus::kInferSuccess; @@ -143,7 +211,6 @@ ParseParameterAttrStatus UpSampleLayer::CreateInstance( return ParseParameterAttrStatus::kParameterMissingScale; } - const auto& scale_param = params.at("scale_factor"); auto scales = std::dynamic_pointer_cast( params.at("scale_factor")); if (scales == nullptr) { @@ -157,17 +224,45 @@ ParseParameterAttrStatus UpSampleLayer::CreateInstance( return ParseParameterAttrStatus::kParameterMissingResizeMode; } - auto mode = + auto mode_param = std::dynamic_pointer_cast(params.at("mode")); - CHECK(mode->value == "nearest") - << "The mode " << mode->value << " is not supported!"; + + UpSampleMode mode; + if (mode_param->value == "nearest") { + mode = UpSampleMode::kModeNearest; + } else if (mode_param->value == "bilinear") { + mode = UpSampleMode::kModeBilinear; + } else { + LOG(FATAL) << "The mode " << mode_param->value << " is not supported!"; + } + + if (params.find("align_corners") != params.end()) { + auto align_corner_param = std::dynamic_pointer_cast( + params.at("align_corners")); + if (!align_corner_param) { + return ParseParameterAttrStatus::kParameterMissingAlignCorner; + } + bool align_corner = align_corner_param->value; + CHECK_EQ(align_corner, false); + } float scale_h = scales->value.at(0); float scale_w = scales->value.at(1); - upsample_layer = std::make_shared(scale_h, scale_w); + // scale放大的倍数大于0 + CHECK_GT(scale_h, 0.f); + CHECK_GT(scale_w, 0.f); + + // scale放大的倍数必须是整数 + CHECK_LE(scale_h - static_cast(scale_h), 1e-5f); + CHECK_LE(scale_w - static_cast(scale_w), 1e-5f); + + upsample_layer = std::make_shared( + static_cast(scale_h), static_cast(scale_w), mode); return ParseParameterAttrStatus::kParameterAttrParseSuccess; } LayerRegistererWrapper kUpSamplerCreateInstance("nn.Upsample", UpSampleLayer::CreateInstance); +LayerRegistererWrapper kUpSamplerFCreateInstance("F.upsample", + UpSampleLayer::CreateInstance); } // namespace kuiper_infer diff --git a/source/layer/details/upsample.hpp b/source/layer/details/upsample.hpp index 750ef8c2..1f47acac 100644 --- a/source/layer/details/upsample.hpp +++ b/source/layer/details/upsample.hpp @@ -18,7 +18,7 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fss on 22-12-25. #ifndef KUIPER_INFER_SOURCE_LAYER_DETAILS_UPSAMPLE_HPP_ @@ -27,12 +27,13 @@ namespace kuiper_infer { enum class UpSampleMode { - kModeNearest = 0, // 目前上采样层只支持邻近采样 + kModeNearest = 0, + kModeBilinear = 1, // 目前上采样支持这两种 }; class UpSampleLayer : public NonParamLayer { public: - explicit UpSampleLayer(float scale_h, float scale_w, + explicit UpSampleLayer(uint32_t scale_h, uint32_t scale_w, UpSampleMode mode = UpSampleMode::kModeNearest); InferStatus Forward( @@ -44,8 +45,8 @@ class UpSampleLayer : public NonParamLayer { std::shared_ptr& upsample_layer); private: - float scale_h_ = 1.f; - float scale_w_ = 1.f; + uint32_t scale_h_ = 1; + uint32_t scale_w_ = 1; UpSampleMode mode_ = UpSampleMode::kModeNearest; }; } // namespace kuiper_infer diff --git a/test/test_data/test_load_data.cpp b/test/test_data/test_load_data.cpp index 79c083ad..164d6e9c 100644 --- a/test/test_data/test_load_data.cpp +++ b/test/test_data/test_load_data.cpp @@ -18,15 +18,16 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fss on 22-11-21. -#include #include +#include #include "data/load_data.hpp" TEST(test_load, load_csv_data) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/data1.csv"); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/data1.csv"); ASSERT_NE(data.empty(), true); ASSERT_EQ(data.n_rows, 3); ASSERT_EQ(data.n_cols, 4); @@ -41,7 +42,8 @@ TEST(test_load, load_csv_data) { TEST(test_load, load_csv_arange) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/data2.csv"); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/data2.csv"); ASSERT_NE(data.empty(), true); ASSERT_EQ(data.n_rows, 3); ASSERT_EQ(data.n_cols, 4); @@ -59,7 +61,8 @@ TEST(test_load, load_csv_arange) { TEST(test_load, load_csv_missing_data1) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/data4.csv"); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/data4.csv"); ASSERT_NE(data.empty(), true); ASSERT_EQ(data.n_rows, 3); ASSERT_EQ(data.n_cols, 11); @@ -79,7 +82,8 @@ TEST(test_load, load_csv_missing_data1) { TEST(test_load, load_csv_missing_data2) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/data3.csv"); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/data3.csv"); ASSERT_NE(data.empty(), true); ASSERT_EQ(data.n_rows, 3); @@ -105,7 +109,8 @@ TEST(test_load, load_csv_missing_data2) { TEST(test_load, split_char) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/data5.csv", '-'); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/data5.csv", '-'); ASSERT_NE(data.empty(), true); ASSERT_EQ(data.n_rows, 3); @@ -122,7 +127,8 @@ TEST(test_load, split_char) { TEST(test_load, load_minus_data) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/data6.csv", ','); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/data6.csv", ','); ASSERT_NE(data.empty(), true); ASSERT_EQ(data.n_rows, 3); @@ -144,7 +150,8 @@ TEST(test_load, load_minus_data) { TEST(test_load, load_large_data) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/data7.csv", ','); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/data7.csv", ','); ASSERT_NE(data.empty(), true); ASSERT_EQ(data.n_rows, 1024); ASSERT_EQ(data.n_cols, 1024); @@ -165,6 +172,7 @@ TEST(test_load, load_large_data) { TEST(test_load, load_empty_data) { using namespace kuiper_infer; - const arma::fmat &data = CSVDataLoader::LoadData("./tmp/data_loader/notexists.csv", ','); + const arma::fmat& data = + CSVDataLoader::LoadData("./tmp/data_loader/notexists.csv", ','); ASSERT_EQ(data.empty(), true); } \ No newline at end of file diff --git a/test/test_layer/test_upsample.cpp b/test/test_layer/test_upsample.cpp index 72c89f25..860946d3 100644 --- a/test/test_layer/test_upsample.cpp +++ b/test/test_layer/test_upsample.cpp @@ -18,11 +18,13 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fss on 22-12-25. -#include #include +#include #include "../../source/layer/details/upsample.hpp" +#include "data/load_data.hpp" +#include "runtime/runtime_ir.hpp" TEST(test_layer, forward_upsample1) { using namespace kuiper_infer; @@ -32,7 +34,8 @@ TEST(test_layer, forward_upsample1) { const uint32_t rows = 224; const uint32_t cols = 224; - std::shared_ptr> input = std::make_shared>(channels, rows, cols); + std::shared_ptr> input = + std::make_shared>(channels, rows, cols); input->Rand(); std::vector>> inputs; @@ -43,16 +46,17 @@ TEST(test_layer, forward_upsample1) { ASSERT_EQ(status, InferStatus::kInferSuccess); for (int i = 0; i < outputs.size(); ++i) { - const auto &output = outputs.at(i); + const auto& output = outputs.at(i); for (int c = 0; c < channels; ++c) { - const auto &output_channel = output->slice(i); - const auto &input_channel = input->slice(i); + const auto& output_channel = output->slice(i); + const auto& input_channel = input->slice(i); ASSERT_EQ(output_channel.n_rows / input_channel.n_rows, 2); ASSERT_EQ(output_channel.n_cols / input_channel.n_cols, 2); for (int r = 0; r < output_channel.n_rows; ++r) { for (int c_ = 0; c_ < output_channel.n_cols; ++c_) { - ASSERT_EQ(input_channel.at(r / 2, c_ / 2), output_channel.at(r, c_)) << r << " " << c_; + ASSERT_EQ(input_channel.at(r / 2, c_ / 2), output_channel.at(r, c_)) + << r << " " << c_; } } } @@ -67,7 +71,8 @@ TEST(test_layer, forward_upsample2) { const uint32_t rows = 224; const uint32_t cols = 224; - std::shared_ptr> input = std::make_shared>(channels, rows, cols); + std::shared_ptr> input = + std::make_shared>(channels, rows, cols); input->Rand(); std::vector>> inputs; @@ -78,16 +83,17 @@ TEST(test_layer, forward_upsample2) { ASSERT_EQ(status, InferStatus::kInferSuccess); for (int i = 0; i < outputs.size(); ++i) { - const auto &output = outputs.at(i); + const auto& output = outputs.at(i); for (int c = 0; c < channels; ++c) { - const auto &output_channel = output->slice(i); - const auto &input_channel = input->slice(i); + const auto& output_channel = output->slice(i); + const auto& input_channel = input->slice(i); ASSERT_EQ(output_channel.n_rows / input_channel.n_rows, 2); ASSERT_EQ(output_channel.n_cols / input_channel.n_cols, 3); for (int r = 0; r < output_channel.n_rows; ++r) { for (int c_ = 0; c_ < output_channel.n_cols; ++c_) { - ASSERT_EQ(input_channel.at(r / 2, c_ / 3), output_channel.at(r, c_)) << r << " " << c_; + ASSERT_EQ(input_channel.at(r / 2, c_ / 3), output_channel.at(r, c_)) + << r << " " << c_; } } } @@ -101,7 +107,8 @@ TEST(test_layer, forward_upsample3) { const uint32_t rows = 224; const uint32_t cols = 224; - std::shared_ptr> input = std::make_shared>(channels, rows, cols); + std::shared_ptr> input = + std::make_shared>(channels, rows, cols); input->Rand(); std::vector>> inputs; @@ -112,16 +119,17 @@ TEST(test_layer, forward_upsample3) { ASSERT_EQ(status, InferStatus::kInferSuccess); for (int i = 0; i < outputs.size(); ++i) { - const auto &output = outputs.at(i); + const auto& output = outputs.at(i); for (int c = 0; c < channels; ++c) { - const auto &output_channel = output->slice(i); - const auto &input_channel = input->slice(i); + const auto& output_channel = output->slice(i); + const auto& input_channel = input->slice(i); ASSERT_EQ(output_channel.n_rows / input_channel.n_rows, 3); ASSERT_EQ(output_channel.n_cols / input_channel.n_cols, 2); for (int r = 0; r < output_channel.n_rows; ++r) { for (int c_ = 0; c_ < output_channel.n_cols; ++c_) { - ASSERT_EQ(input_channel.at(r / 3, c_ / 2), output_channel.at(r, c_)) << r << " " << c_; + ASSERT_EQ(input_channel.at(r / 3, c_ / 2), output_channel.at(r, c_)) + << r << " " << c_; } } } @@ -135,7 +143,8 @@ TEST(test_layer, forward_upsample4) { const uint32_t rows = 224; const uint32_t cols = 224; - std::shared_ptr> input = std::make_shared>(channels, rows, cols); + std::shared_ptr> input = + std::make_shared>(channels, rows, cols); input->Rand(); std::vector>> inputs; @@ -146,16 +155,17 @@ TEST(test_layer, forward_upsample4) { ASSERT_EQ(status, InferStatus::kInferSuccess); for (int i = 0; i < outputs.size(); ++i) { - const auto &output = outputs.at(i); + const auto& output = outputs.at(i); for (int c = 0; c < channels; ++c) { - const auto &output_channel = output->slice(i); - const auto &input_channel = input->slice(i); + const auto& output_channel = output->slice(i); + const auto& input_channel = input->slice(i); ASSERT_EQ(output_channel.n_rows / input_channel.n_rows, 3); ASSERT_EQ(output_channel.n_cols / input_channel.n_cols, 3); for (int r = 0; r < output_channel.n_rows; ++r) { for (int c_ = 0; c_ < output_channel.n_cols; ++c_) { - ASSERT_EQ(input_channel.at(r / 3, c_ / 3), output_channel.at(r, c_)) << r << " " << c_; + ASSERT_EQ(input_channel.at(r / 3, c_ / 3), output_channel.at(r, c_)) + << r << " " << c_; } } } @@ -169,7 +179,8 @@ TEST(test_layer, forward_upsample5) { const uint32_t rows = 224; const uint32_t cols = 224; - std::shared_ptr> input = std::make_shared>(channels, rows, cols); + std::shared_ptr> input = + std::make_shared>(channels, rows, cols); input->Rand(); std::vector>> inputs; @@ -180,19 +191,92 @@ TEST(test_layer, forward_upsample5) { ASSERT_EQ(status, InferStatus::kInferSuccess); for (int i = 0; i < outputs.size(); ++i) { - const auto &output = outputs.at(i); + const auto& output = outputs.at(i); for (int c = 0; c < channels; ++c) { - const auto &output_channel = output->slice(i); - const auto &input_channel = input->slice(i); + const auto& output_channel = output->slice(i); + const auto& input_channel = input->slice(i); ASSERT_EQ(output_channel.n_rows / input_channel.n_rows, 4); ASSERT_EQ(output_channel.n_cols / input_channel.n_cols, 4); for (int r = 0; r < output_channel.n_rows; ++r) { for (int c_ = 0; c_ < output_channel.n_cols; ++c_) { - ASSERT_EQ(input_channel.at(r / 4, c_ / 4), output_channel.at(r, c_)) << r << " " << c_; + ASSERT_EQ(input_channel.at(r / 4, c_ / 4), output_channel.at(r, c_)) + << r << " " << c_; } } } } } +TEST(test_layer, forward_upsample_bilinear_noalign1) { + using namespace kuiper_infer; + RuntimeGraph graph("tmp/up/up_layer_mod.pnnx.param", + "tmp/up/up_layer_mod.pnnx.bin"); + + graph.Build(); + const uint32_t batch_size = 1; + std::vector>> inputs; + + const uint32_t size = 3 * 16 * 31; + std::vector input_values; + for (uint32_t i = 0; i < size; ++i) { + input_values.push_back(float(i)); + } + sftensor input_tensor = std::make_shared(3, 16, 31); + input_tensor->Fill(input_values, true); + inputs.push_back(input_tensor); + graph.set_inputs("pnnx_input_0", inputs); + + graph.Forward(); + std::vector>> outputs = + graph.get_outputs("pnnx_output_0"); + ASSERT_EQ(outputs.size(), 1); + + sftensor output = outputs.front(); + arma::fmat real = + CSVDataLoader::LoadData("tmp/up/test_upsample_bilinear.csv"); + auto output_values = output->values(true); + for (uint32_t i = 0; i < output->size(); ++i) { + float output1 = real.at(i); + float output2 = output_values.at(i); + ASSERT_LE(std::abs(output1 - output2), 5e-4f) + << i << " output1: " << output1 << " output2: " << output2; + } +} + + +TEST(test_layer, forward_upsample_bilinear_noalign2) { + using namespace kuiper_infer; + RuntimeGraph graph("tmp/up/up_layer_mod1.pnnx.param", + "tmp/up/up_layer_mod1.pnnx.bin"); + + graph.Build(); + const uint32_t batch_size = 1; + std::vector>> inputs; + + const uint32_t size = 7 * 16 * 31; + std::vector input_values; + for (uint32_t i = 0; i < size; ++i) { + input_values.push_back(float(i)); + } + sftensor input_tensor = std::make_shared(7, 16, 31); + input_tensor->Fill(input_values, true); + inputs.push_back(input_tensor); + graph.set_inputs("pnnx_input_0", inputs); + + graph.Forward(); + std::vector>> outputs = + graph.get_outputs("pnnx_output_0"); + ASSERT_EQ(outputs.size(), 1); + + sftensor output = outputs.front(); + arma::fmat real = + CSVDataLoader::LoadData("tmp/up/test_upsample_bilinear11.csv"); + auto output_values = output->values(true); + for (uint32_t i = 0; i < output->size(); ++i) { + float output1 = real.at(i); + float output2 = output_values.at(i); + ASSERT_LE(std::abs(output1 - output2), 5e-4f) + << i << " output1: " << output1 << " output2: " << output2; + } +} diff --git a/tmp b/tmp index 84f3ae85..f67f1833 160000 --- a/tmp +++ b/tmp @@ -1 +1 @@ -Subproject commit 84f3ae857901f05743014fc72f3931239e0e8625 +Subproject commit f67f1833da6a7e2a513f01b0f003ef9e9a2a8d29