From cf332b5e42ae947b4b2daabb4e6fe0b3a1231d5b Mon Sep 17 00:00:00 2001 From: zjhellofss Date: Wed, 2 Aug 2023 22:05:06 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0deconv=E7=AE=97=E5=AD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bench/bench_conv.cpp | 54 +++- include/layer/abstract/param_layer.hpp | 16 +- include/status_code.hpp | 1 + source/layer/details/convolution.cpp | 325 +++++++++++++++++++++---- source/layer/details/convolution.hpp | 49 +++- source/layer/details/yolo_detect.cpp | 3 +- test/test_layer/test_conv.cpp | 90 +++++-- test/test_layer/test_deconv.cpp | 57 +++++ tmp | 2 +- 9 files changed, 498 insertions(+), 99 deletions(-) create mode 100644 test/test_layer/test_deconv.cpp diff --git a/bench/bench_conv.cpp b/bench/bench_conv.cpp index 8f63bcd4..f899d590 100644 --- a/bench/bench_conv.cpp +++ b/bench/bench_conv.cpp @@ -18,12 +18,13 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fushenshen on 2023/3/15. #include #include "../source/layer/details/convolution.hpp" #include "../source/layer/details/winograd.hpp" +#include "runtime/runtime_ir.hpp" static void BM_Convolutionk3x3s1x1(benchmark::State& state) { using namespace kuiper_infer; @@ -46,8 +47,8 @@ static void BM_Convolutionk3x3s1x1(benchmark::State& state) { std::vector outputs(1); std::vector inputs; inputs.push_back(input); - ConvolutionLayer conv_layer(kernel_count, channels, 3, 3, 0, 0, 1, 1, 1, - false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, channels, 3, 3, 0, + 0, 1, 1, 1, false); conv_layer.set_weights(weights); for (auto _ : state) { conv_layer.Forward(inputs, outputs); @@ -73,3 +74,50 @@ BENCHMARK(BM_Convolutionk3x3s1x1) BENCHMARK(BM_Convolutionk3x3s1x1) ->Args({512, 256, 20, 20}) ->Unit(benchmark::kMillisecond); + +static void BM_DeConvolutionk3x3s1x1(benchmark::State& state) { + using namespace kuiper_infer; + + uint32_t kernel_count = state.range(0); + uint32_t channels = state.range(1); + uint32_t rows = state.range(2); + uint32_t cols = state.range(3); + + sftensor input = std::make_shared(channels, rows, cols); + input->Fill(1.f); + + std::vector weight_values(kernel_count * channels * 3 * 3); + for (uint32_t k = 0; k < kernel_count * channels * 3 * 3; ++k) { + weight_values.push_back(float(k % 31)); + } + + std::vector outputs(1); + std::vector inputs; + inputs.push_back(input); + ConvolutionLayer conv_layer(ConvType::OpDeconv, kernel_count, channels, 3, 3, + 0, 0, 1, 1, 1, false); + conv_layer.set_weights(weight_values); + for (auto _ : state) { + conv_layer.Forward(inputs, outputs); + } +} + +BENCHMARK(BM_DeConvolutionk3x3s1x1) + ->Args({32, 3, 320, 320}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK(BM_DeConvolutionk3x3s1x1) + ->Args({64, 32, 160, 160}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK(BM_DeConvolutionk3x3s1x1) + ->Args({128, 64, 80, 80}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK(BM_DeConvolutionk3x3s1x1) + ->Args({256, 128, 40, 40}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK(BM_DeConvolutionk3x3s1x1) + ->Args({512, 256, 20, 20}) + ->Unit(benchmark::kMillisecond); \ No newline at end of file diff --git a/include/layer/abstract/param_layer.hpp b/include/layer/abstract/param_layer.hpp index ae036b79..1cb4c9d2 100644 --- a/include/layer/abstract/param_layer.hpp +++ b/include/layer/abstract/param_layer.hpp @@ -18,7 +18,7 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fss on 22-11-13. #ifndef KUIPER_INFER_SOURCE_LAYER_PARAM_LAYER_HPP_ @@ -28,7 +28,7 @@ namespace kuiper_infer { class ParamLayer : public Layer { public: - explicit ParamLayer(const std::string &layer_name); + explicit ParamLayer(const std::string& layer_name); /** * 初始化权重空间 @@ -54,39 +54,39 @@ class ParamLayer : public Layer { * 返回权重参数 * @return 权重参数 */ - const std::vector>> &weights() const override; + const std::vector>>& weights() const override; /** * 返回偏移参数 * @return 偏移参数 */ - const std::vector>> &bias() const override; + const std::vector>>& bias() const override; /** * 设置权重参数 * @param weights 权重参数 */ - void set_weights(const std::vector &weights) override; + void set_weights(const std::vector& weights) override; /** * 设置偏移量参数 * @param bias 偏移量参数 */ - void set_bias(const std::vector &bias) override; + void set_bias(const std::vector& bias) override; /** * 设置权重参数 * @param weights 权重参数 */ void set_weights( - const std::vector>> &weights) override; + const std::vector>>& weights) override; /** * 设置偏移量参数 * @param bias 偏移量参数 */ void set_bias( - const std::vector>> &bias) override; + const std::vector>>& bias) override; protected: std::vector>> weights_; diff --git a/include/status_code.hpp b/include/status_code.hpp index cf150d88..a7119339 100644 --- a/include/status_code.hpp +++ b/include/status_code.hpp @@ -76,6 +76,7 @@ enum class ParseParameterAttrStatus { kParameterMissingResizeMode = 15, kParameterMissingDilation = 16, kParameterMissingPaddingMode = 16, + kParameterMissingOutputPadding = 17, kAttrMissingBias = 21, kAttrMissingWeight = 22, diff --git a/source/layer/details/convolution.cpp b/source/layer/details/convolution.cpp index 711ef4c1..c10a4fc0 100644 --- a/source/layer/details/convolution.cpp +++ b/source/layer/details/convolution.cpp @@ -23,24 +23,28 @@ #include "convolution.hpp" #include -#include "data/tensor_util.hpp" #include "layer/abstract/layer_factory.hpp" #include "runtime/runtime_ir.hpp" #include "tick.hpp" namespace kuiper_infer { -ConvolutionLayer::ConvolutionLayer(uint32_t output_channel, uint32_t in_channel, - uint32_t kernel_h, uint32_t kernel_w, - uint32_t padding_h, uint32_t padding_w, - uint32_t stride_h, uint32_t stride_w, - uint32_t groups, bool use_bias) - : ParamLayer("Convolution"), +ConvolutionLayer::ConvolutionLayer(ConvType conv_type, uint32_t output_channel, + uint32_t in_channel, uint32_t kernel_h, + uint32_t kernel_w, uint32_t padding_h, + uint32_t padding_w, uint32_t stride_h, + uint32_t stride_w, uint32_t groups, + bool use_bias, uint32_t output_padding_h, + uint32_t output_padding_w) + : ParamLayer("convolution"), + conv_type_(conv_type), use_bias_(use_bias), groups_(groups), padding_h_(padding_h), padding_w_(padding_w), stride_h_(stride_h), - stride_w_(stride_w) { + stride_w_(stride_w), + output_padding_h_(output_padding_h), + output_padding_w_(output_padding_w) { if (groups != 1) { in_channel /= groups; } @@ -48,6 +52,54 @@ ConvolutionLayer::ConvolutionLayer(uint32_t output_channel, uint32_t in_channel, if (use_bias_) { this->InitBiasParam(output_channel, 1, 1, 1); } + CHECK(conv_type_ == ConvType::OpConv || conv_type_ == ConvType::OpDeconv); +} + +void ConvolutionLayer::set_weights( + const std::vector>>& weights) { + if (conv_type_ == ConvType::OpConv) + ; + return ParamLayer::set_weights(weights); +} + +void ConvolutionLayer::set_weights(const std::vector& weights) { + if (conv_type_ == ConvType::OpConv) { + return ParamLayer::set_weights(weights); + } else { + const uint32_t kernel_count = this->weights_.size(); + CHECK(kernel_count > 0); + const uint32_t kernel_count_group = kernel_count / groups_; + const uint32_t kernel_channel = this->weights_.at(0)->channels(); + const uint32_t kernel_height = this->weights_.at(0)->rows(); + const uint32_t kernel_width = this->weights_.at(0)->cols(); + const uint32_t kernel_plane = + kernel_count_group * kernel_channel * kernel_width * kernel_height; + + uint32_t kernel_hw = kernel_height * kernel_width; + uint32_t kernel_chw = kernel_count_group * kernel_height * kernel_width; + + for (uint32_t g = 0; g < groups_; ++g) { + std::vector sub_weights(kernel_plane); + std::copy(weights.data() + g * kernel_plane, + weights.data() + (g + 1) * kernel_plane, sub_weights.begin()); + for (uint32_t kg = 0; kg < kernel_count_group; ++kg) { + const uint32_t channel_offset = kg * kernel_hw; + const uint32_t kernel_idx = g * kernel_count_group + kg; + for (uint32_t ic = 0; ic < kernel_channel; ++ic) { + const uint32_t kernel_offset = ic * kernel_chw; + arma::fmat& kernel_channel_mat = + this->weights_.at(kernel_idx)->slice(ic); + for (uint32_t kw = 0; kw < kernel_width; ++kw) { + float* kernel_ptr = kernel_channel_mat.colptr(kw); + for (uint32_t kh = 0; kh < kernel_height; ++kh) { + *(kernel_ptr + kh) = sub_weights.at( + kernel_offset + channel_offset + kh * kernel_width + kw); + } + } + } + } + } + } } InferStatus ConvolutionLayer::Forward( @@ -126,26 +178,32 @@ InferStatus ConvolutionLayer::Forward( const uint32_t input_padded_w = input->cols() + 2 * padding_w_; CHECK(input_padded_h >= kernel_h && input_padded_w >= kernel_w); - const uint32_t output_h = (input_padded_h - kernel_h) / stride_h_ + 1; - const uint32_t output_w = (input_padded_w - kernel_w) / stride_w_ + 1; + uint32_t output_h = 0; + uint32_t output_w = 0; + uint32_t col_len = 0; + uint32_t input_h = input->rows(); + uint32_t input_w = input->cols(); + CHECK(input_h > 0 && input_w > 0); + + if (conv_type_ == ConvType::OpConv) { + CHECK(input_padded_h >= kernel_h && input_padded_w >= kernel_w); + output_h = (input_padded_h - kernel_h) / stride_h_ + 1; + output_w = (input_padded_w - kernel_w) / stride_w_ + 1; + col_len = output_h * output_w; + } else { + CHECK(conv_type_ == ConvType::OpDeconv); + output_h = (input_h - 1) * stride_h_ + kernel_h + output_padding_h_; + output_w = (input_w - 1) * stride_w_ + kernel_w + output_padding_w_; + CHECK(output_h > 2 * padding_h_ && output_w > 2 * padding_w_); + output_h -= 2 * padding_h_; + output_w -= 2 * padding_w_; + } + CHECK(output_h > 0 && output_w > 0) << "The size of the output tensor should be greater than zero " << i << " th"; - if (groups_ != 1) { - CHECK(kernel_count % groups_ == 0); - CHECK(input_c % groups_ == 0); - } - - uint32_t col_len = output_h * output_w; - uint32_t input_c_group = input_c / groups_; - CHECK(input_c_group == kernel_c) << "The number of channel for the kernel " - "matrix and input tensor do not match"; - for (uint32_t g = 0; g < groups_; ++g) { - const auto& input_matrix = - Im2Col(input, kernel_w, kernel_h, input->cols(), input->rows(), - input_c_group, g, row_len, col_len); std::shared_ptr> output_tensor = outputs.at(i); if (output_tensor == nullptr || output_tensor->empty()) { output_tensor = @@ -160,23 +218,136 @@ InferStatus ConvolutionLayer::Forward( "incorrectly sized tensor " << i << "th"; - const uint32_t kernel_group_start = kernel_count_group * g; + if (groups_ != 1) { + CHECK(kernel_count % groups_ == 0); + CHECK(input_c % groups_ == 0); + } + uint32_t input_c_group = input_c / groups_; + CHECK(input_c_group == kernel_c) + << "The number of channel for the kernel " + "matrix and input tensor do not match"; + + arma::fmat input_matrix; + if (conv_type_ == ConvType::OpConv) { + input_matrix = ConvIm2Col(input, kernel_h, kernel_w, input_h, input_w, + input_c_group, g, row_len, col_len); +#pragma omp parallel for schedule(dynamic) + for (uint32_t k = 0; k < kernel_count_group; ++k) { + ConvGemmBias(input_matrix, output_tensor, g, k, kernel_count_group, + output_h, output_w); + } + } else { + CHECK(conv_type_ == ConvType::OpDeconv); #pragma omp parallel for schedule(dynamic) - for (uint32_t k = 0; k < kernel_count_group; ++k) { - ConvGemmBias(input_matrix, output_tensor, g, k, kernel_count_group, - kernel_matrix_arr_.at(kernel_group_start + k), output_w, - output_h); + for (uint32_t k = 0; k < kernel_count_group; ++k) { + const arma::fmat& gemm_result = DeconvGemm( + input, input_h, input_w, input_c_group, g, k, kernel_count_group); + DeconvCol2Im(gemm_result, output_tensor, input_h, input_w, g, k, + kernel_count_group, kernel_h, kernel_w, output_h, + output_w); + } } } } return InferStatus::kInferSuccess; } -arma::fmat ConvolutionLayer::Im2Col(sftensor input, uint32_t kernel_w, - uint32_t kernel_h, uint32_t input_w, - uint32_t input_h, uint32_t input_c_group, - uint32_t group, uint32_t row_len, - uint32_t col_len) const { +void ConvolutionLayer::DeconvCol2Im(const arma::fmat& gemm_result, + sftensor output_tensor, uint32_t input_h, + uint32_t input_w, uint32_t group, + uint32_t kernel_index, + uint32_t kernel_count_group, + uint32_t kernel_h, uint32_t kernel_w, + uint32_t output_h, uint32_t output_w) { + CHECK(this->conv_type_ == ConvType::OpDeconv); + CHECK(!gemm_result.empty()); + CHECK(input_h > 0 && input_w > 0); + CHECK(output_tensor != nullptr && !output_tensor->empty()); + + uint32_t size_h = (input_h - 1) * stride_h_ + kernel_h + output_padding_h_; + uint32_t size_w = (input_w - 1) * stride_w_ + kernel_w + output_padding_w_; + CHECK(size_h > kernel_h && size_w > kernel_w); + arma::fmat output_padding(output_h + 2 * padding_h_, + output_w + 2 * padding_w_); + + uint32_t slide_count_w = (size_w - kernel_w) / stride_w_ + 1; + uint32_t slide_count_h = (size_h - kernel_h) / stride_h_ + 1; +#pragma omp parallel for collapse(2) + for (uint32_t x = 0; x < slide_count_w; ++x) { + for (uint32_t y = 0; y < slide_count_h; ++y) { + const uint32_t offset_x = x * stride_w_; + const uint32_t offset_y = y * stride_h_; + arma::fmat gemm_column((float*)gemm_result.colptr(x * slide_count_h + y), + gemm_result.n_rows, 1, false, true); + + gemm_column.reshape(kernel_h, kernel_w); + uint32_t gemm_rows = gemm_column.n_rows; + uint32_t gemm_cols = gemm_column.n_cols; + for (uint32_t col = 0; col < gemm_cols; ++col) { + float* gemm_ptr = gemm_column.colptr(col); + float* output_ptr = output_padding.colptr(offset_x + col); + memcpy(output_ptr + offset_y, gemm_ptr, sizeof(float) * gemm_rows); + } + } + } + + kernel_index = kernel_index + group * kernel_count_group; + arma::fmat output(output_tensor->matrix_raw_ptr(kernel_index), output_h, + output_w, false, true); + output = + output_padding.submat(padding_h_, padding_w_, output_h + padding_h_ - 1, + output_w + padding_w_ - 1); + + if (!this->bias_.empty() && this->use_bias_) { + std::shared_ptr> bias; + bias = this->bias_.at(kernel_index); + if (bias != nullptr && !bias->empty()) { + float bias_value = bias->index(0); + output += bias_value; + } else { + LOG(FATAL) << "Bias tensor is empty or nullptr"; + } + } +} + +arma::fmat ConvolutionLayer::DeconvGemm(sftensor input, uint32_t input_h, + uint32_t input_w, + uint32_t input_c_group, uint32_t group, + uint32_t kernel_index, + uint32_t kernel_count_group) { + CHECK(conv_type_ == ConvType::OpDeconv); + CHECK(input != nullptr && !input->empty()); + + kernel_index = kernel_index + group * kernel_count_group; + sftensor group_kernel = this->weights_.at(kernel_index); + CHECK(group_kernel != nullptr && !group_kernel->empty()); + + uint32_t input_hw = input_h * input_w; + uint32_t kernel_hw = group_kernel->rows() * group_kernel->cols(); + arma::fmat gemm_result(kernel_hw, input_hw); + +#pragma omp parallel for schedule(dynamic) + for (uint32_t c = 0; c < input_c_group; ++c) { + arma::fmat input_channel = input->slice(group * input_c_group + c); + input_channel.reshape(1, input_channel.size()); + arma::fmat kernel_channel = group_kernel->slice(group * input_c_group + c); + kernel_channel.reshape(kernel_hw, 1); + + const arma::fmat& gemm_output = kernel_channel * input_channel; +#pragma omp critical + gemm_result += gemm_output; + } + return gemm_result; +} + +arma::fmat ConvolutionLayer::ConvIm2Col(sftensor input, uint32_t kernel_h, + uint32_t kernel_w, uint32_t input_h, + uint32_t input_w, + uint32_t input_c_group, uint32_t group, + uint32_t row_len, + uint32_t col_len) const { + CHECK(conv_type_ == ConvType::OpConv); + CHECK(input && !input->empty()); arma::fmat input_matrix(input_c_group * row_len, col_len); const uint32_t input_padded_h = input_h + 2 * padding_h_; const uint32_t input_padded_w = input_w + 2 * padding_w_; @@ -213,11 +384,20 @@ arma::fmat ConvolutionLayer::Im2Col(sftensor input, uint32_t kernel_w, return input_matrix; } -void ConvolutionLayer::ConvGemmBias( - const arma::fmat& input_matrix, sftensor output_tensor, uint32_t group, - uint32_t kernel_index, uint32_t kernel_count_group, - const arma::frowvec& kernel, uint32_t output_w, uint32_t output_h) const { +void ConvolutionLayer::ConvGemmBias(const arma::fmat& input_matrix, + sftensor output_tensor, uint32_t group, + uint32_t kernel_index, + uint32_t kernel_count_group, + uint32_t output_h, + uint32_t output_w) const { + CHECK(conv_type_ == ConvType::OpConv); + + CHECK(!input_matrix.empty()); + CHECK(output_tensor && !output_tensor->empty()); + kernel_index = kernel_index + group * kernel_count_group; + const arma::frowvec& kernel = this->kernel_matrix_arr_.at(kernel_index); + arma::fmat output(output_tensor->matrix_raw_ptr(kernel_index), output_h, output_w, false, true); output = kernel * input_matrix; @@ -235,6 +415,9 @@ void ConvolutionLayer::ConvGemmBias( } void ConvolutionLayer::InitIm2ColWeight() { + if (this->conv_type_ != ConvType::OpConv) { + return; + } const uint32_t kernel_count = this->weights_.size(); CHECK(kernel_count > 0) << "kernel count must greater than zero"; const uint32_t kernel_h = this->weights_.at(0)->rows(); @@ -355,22 +538,24 @@ ParseParameterAttrStatus ConvolutionLayer::GetInstance( return ParseParameterAttrStatus::kParameterMissingKernel; } - if (params.find("padding_mode") != params.end()) { - auto padding_mode = std::dynamic_pointer_cast( - params.at("padding_mode")); - if (padding_mode == nullptr) { - LOG(ERROR) << "Can not find the padding parameter"; - return ParseParameterAttrStatus::kParameterMissingPaddingMode; - } else { - const std::string& padding_mode_str = padding_mode->value; - if (padding_mode_str != "zeros") { - LOG(ERROR) << "Padding mode unsupported: " << padding_mode_str; + if (op->type == "nn.Conv2d") { + if (params.find("padding_mode") != params.end()) { + auto padding_mode = std::dynamic_pointer_cast( + params.at("padding_mode")); + if (padding_mode == nullptr) { + LOG(ERROR) << "Can not find the padding parameter"; return ParseParameterAttrStatus::kParameterMissingPaddingMode; + } else { + const std::string& padding_mode_str = padding_mode->value; + if (padding_mode_str != "zeros") { + LOG(ERROR) << "Padding mode unsupported: " << padding_mode_str; + return ParseParameterAttrStatus::kParameterMissingPaddingMode; + } } + } else { + LOG(ERROR) << "Can not find the padding parameter"; + return ParseParameterAttrStatus::kParameterMissingPaddingMode; } - } else { - LOG(ERROR) << "Can not find the padding parameter"; - return ParseParameterAttrStatus::kParameterMissingPaddingMode; } auto groups = @@ -399,11 +584,42 @@ ParseParameterAttrStatus ConvolutionLayer::GetInstance( return ParseParameterAttrStatus::kParameterMissingKernel; } + uint32_t output_padding_h = 0; + uint32_t output_padding_w = 0; + if (op->type == "nn.ConvTranspose2d") { + if (params.find("output_padding") != params.end()) { + auto output_padding_arr = + std::dynamic_pointer_cast( + params.at("output_padding")); + if (!output_padding_arr) { + return ParseParameterAttrStatus::kParameterMissingOutputPadding; + } else { + if (output_padding_arr->value.size() != 2) { + return ParseParameterAttrStatus::kParameterMissingOutputPadding; + } + output_padding_h = output_padding_arr->value.at(0); + output_padding_w = output_padding_arr->value.at(1); + } + } else { + return ParseParameterAttrStatus::kParameterMissingOutputPadding; + } + } + + ConvType conv_type = ConvType::OpConv; + if (op->type == "nn.Conv2d") { + conv_type = ConvType::OpConv; + } else if (op->type == "nn.ConvTranspose2d") { + conv_type = ConvType::OpDeconv; + } else { + LOG(FATAL) << "Unknown convolution type: " << op->type; + } + // kernel的方向是倒置的 conv_layer = std::make_shared( - out_channel->value, in_channel->value, kernels.at(0), kernels.at(1), - paddings.at(0), paddings.at(1), strides.at(0), strides.at(1), - groups->value, use_bias->value); + conv_type, out_channel->value, in_channel->value, kernels.at(0), + kernels.at(1), paddings.at(0), paddings.at(1), strides.at(0), + strides.at(1), groups->value, use_bias->value, output_padding_h, + output_padding_w); // load weights const std::map>& attrs = @@ -448,4 +664,7 @@ ParseParameterAttrStatus ConvolutionLayer::GetInstance( LayerRegistererWrapper kConvGetInstance("nn.Conv2d", ConvolutionLayer::GetInstance); + +LayerRegistererWrapper kDeConvGetInstance("nn.ConvTranspose2d", + ConvolutionLayer::GetInstance); } // namespace kuiper_infer diff --git a/source/layer/details/convolution.hpp b/source/layer/details/convolution.hpp index f43a40f9..f6b6f1de 100644 --- a/source/layer/details/convolution.hpp +++ b/source/layer/details/convolution.hpp @@ -18,21 +18,28 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fss on 22-11-13. #ifndef KUIPER_INFER_SOURCE_LAYER_CONVOLUTION_HPP_ #define KUIPER_INFER_SOURCE_LAYER_CONVOLUTION_HPP_ #include "layer/abstract/param_layer.hpp" +enum class ConvType { + OpConvUnknown = -1, + OpConv = 0, // 普通卷积 + OpDeconv = 1, // 转置卷积 +}; namespace kuiper_infer { class ConvolutionLayer : public ParamLayer { public: - explicit ConvolutionLayer(uint32_t output_channel, uint32_t in_channel, - uint32_t kernel_h, uint32_t kernel_w, - uint32_t padding_h, uint32_t padding_w, - uint32_t stride_h, uint32_t stride_w, - uint32_t groups, bool use_bias = true); + explicit ConvolutionLayer(ConvType conv_type, uint32_t output_channel, + uint32_t in_channel, uint32_t kernel_h, + uint32_t kernel_w, uint32_t padding_h, + uint32_t padding_w, uint32_t stride_h, + uint32_t stride_w, uint32_t groups, + bool use_bias = true, uint32_t output_padding_h = 0, + uint32_t output_padding_w = 0); static ParseParameterAttrStatus GetInstance( const std::shared_ptr& op, @@ -42,6 +49,11 @@ class ConvolutionLayer : public ParamLayer { const std::vector>>& inputs, std::vector>>& outputs) override; + void set_weights( + const std::vector>>& weights) override; + + void set_weights(const std::vector& weights) override; + /** * 初始化kernel的im2col排布 */ @@ -50,20 +62,35 @@ class ConvolutionLayer : public ParamLayer { private: void ConvGemmBias(const arma::fmat& input_matrix, sftensor output_tensor, uint32_t group, uint32_t kernel_index, - uint32_t kernel_count_group, const arma::frowvec& kernel, - uint32_t output_w, uint32_t output_h) const; + uint32_t kernel_count_group, uint32_t output_h, + uint32_t output_w) const; - arma::fmat Im2Col(sftensor input, uint32_t kernel_w, uint32_t kernel_h, - uint32_t input_w, uint32_t input_h, uint32_t input_c_group, - uint32_t group, uint32_t row_len, uint32_t col_len) const; + arma::fmat DeconvGemm(sftensor input, uint32_t input_h, uint32_t input_w, + uint32_t input_c_group, uint32_t group, + uint32_t kernel_index, uint32_t kernel_count_group); + + void DeconvCol2Im(const arma::fmat& gemm_result, sftensor output_tensor, + uint32_t input_h, uint32_t input_w, uint32_t group, + uint32_t kernel_index, uint32_t kernel_count_group, + uint32_t kernel_h, uint32_t kernel_w, + uint32_t output_h, uint32_t output_w); + + arma::fmat ConvIm2Col(sftensor input, uint32_t kernel_h, uint32_t kernel_w, + uint32_t input_h, uint32_t input_w, + uint32_t input_c_group, uint32_t group, + uint32_t row_len, uint32_t col_len) const; private: + ConvType conv_type_ = ConvType::OpConvUnknown; bool use_bias_ = false; uint32_t groups_ = 1; uint32_t padding_h_ = 0; uint32_t padding_w_ = 0; uint32_t stride_h_ = 1; uint32_t stride_w_ = 1; + + uint32_t output_padding_h_ = 0; + uint32_t output_padding_w_ = 0; std::vector kernel_matrix_arr_; }; diff --git a/source/layer/details/yolo_detect.cpp b/source/layer/details/yolo_detect.cpp index 50357b5d..61ae2e16 100644 --- a/source/layer/details/yolo_detect.cpp +++ b/source/layer/details/yolo_detect.cpp @@ -289,7 +289,8 @@ ParseParameterAttrStatus YoloDetectLayer::GetInstance( const int kernel_w = out_shapes.at(3); conv_layers.at(i) = std::make_shared( - out_channels, in_channels, kernel_h, kernel_w, 0, 0, 1, 1, 1); + ConvType::OpConv, out_channels, in_channels, kernel_h, kernel_w, 0, 0, + 1, 1, 1); const std::vector& weights = conv_attr->get(); conv_layers.at(i)->set_weights(weights); diff --git a/test/test_layer/test_conv.cpp b/test/test_layer/test_conv.cpp index e0f4c680..addf6329 100644 --- a/test/test_layer/test_conv.cpp +++ b/test/test_layer/test_conv.cpp @@ -18,14 +18,16 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. - + // Created by fss on 23-2-6. #include #include #include "../../source/layer/details/convolution.hpp" #include "../../source/layer/details/winograd.hpp" +#include "data/load_data.hpp" #include "data/tensor.hpp" #include "data/tensor_util.hpp" +#include "runtime/runtime_ir.hpp" #include "tick.hpp" using namespace kuiper_infer; @@ -170,7 +172,8 @@ TEST(test_layer, convolution3x3_winograd3) { std::vector outputs(1); std::vector inputs; inputs.push_back(input); - ConvolutionLayer conv_layer(kernel_count, 131, 3, 3, 0, 0, 1, 1, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, 131, 3, 3, 0, 0, + 1, 1, 1, false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs); ASSERT_EQ(outputs.size(), 1); @@ -195,7 +198,8 @@ TEST(test_layer, convolution3x3_winograd4) { std::vector outputs(1); std::vector inputs; inputs.push_back(input); - ConvolutionLayer conv_layer(kernel_count, 13, 3, 3, 0, 0, 1, 1, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, 13, 3, 3, 0, 0, 1, + 1, 1, false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs); ASSERT_EQ(outputs.size(), 1); @@ -220,7 +224,8 @@ TEST(test_layer, convolution3x3_winograd5) { std::vector outputs(1); std::vector inputs; inputs.push_back(input); - ConvolutionLayer conv_layer(kernel_count, 256, 3, 3, 0, 0, 1, 1, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, 256, 3, 3, 0, 0, + 1, 1, 1, false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs); ASSERT_EQ(outputs.size(), 1); @@ -250,8 +255,9 @@ TEST(test_layer, convolution3x3x32_stride1x1_padding0) { weights.push_back(kernel); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -289,8 +295,9 @@ TEST(test_layer, convolution3x3x32_stride1x1_padding2) { sftensor bias = std::make_shared(1, 1, 1); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -328,8 +335,9 @@ TEST(test_layer, convolution3x3x32_stride2x2_padding2) { sftensor bias = std::make_shared(1, 1, 1); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -366,8 +374,9 @@ TEST(test_layer, convolution3x3x32_stride5x5_padding2) { weights.push_back(kernel); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -404,8 +413,9 @@ TEST(test_layer, convolution5x5x32_stride5x5_padding2) { weights.push_back(kernel); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -447,8 +457,9 @@ TEST(test_layer, convolution5x5x32_stride7x7_padding2) { weights.push_back(kernel); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -490,8 +501,9 @@ TEST(test_layer, convolution13x13x32_stride7x7_padding2) { weights.push_back(kernel); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, true); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + true); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -503,7 +515,7 @@ TEST(test_layer, convolution13x13x32_stride7x7_padding2) { ASSERT_LE(std::abs(outputs1.at(i)->index(j) - outputs2.at(i)->index(j)), 1e-2); #else - ASSERT_LE(std::abs(outputs1.at(i)->index(j) - outputs2.at(i)->index(j)), + ASSERT_LE(std::abs(outputs1.at(i)->index(j) - outputs2.at(i)->index(j)), 1e-3); #endif } @@ -534,8 +546,9 @@ TEST(test_layer, convolution13x13x31_stride19x19_padding2) { sftensor bias = std::make_shared(1, 1, 1); } Convolution(inputs, outputs1, stride_h, stride_w, weights); - ConvolutionLayer conv_layer(kernel_count, in_channel, kernel_h, kernel_w, 0, - 0, stride_h, stride_w, 1, false); + ConvolutionLayer conv_layer(ConvType::OpConv, kernel_count, in_channel, + kernel_h, kernel_w, 0, 0, stride_h, stride_w, 1, + false); conv_layer.set_weights(weights); conv_layer.Forward(inputs, outputs2); ASSERT_EQ(outputs1.size(), outputs2.size()); @@ -547,4 +560,37 @@ TEST(test_layer, convolution13x13x31_stride19x19_padding2) { 1e-3); } } -} \ No newline at end of file +} + +TEST(test_layer, conv3x3_fromtorch) { + using namespace kuiper_infer; + RuntimeGraph graph("tmp/resnet/conv1.pnnx.param", + "tmp/resnet/conv1.pnnx.bin"); + + graph.Build(); + const uint32_t batch_size = 1; + std::vector>> inputs; + + for (int i = 0; i < batch_size; ++i) { + std::shared_ptr> input = + std::make_shared>(18, 5, 5); + std::vector values; + for (int j = 0; j < 450; ++j) { + values.push_back((float)j); + } + input->Fill(values, true); + input->Show(); + inputs.push_back(input); + } + + graph.set_inputs("pnnx_input_0", inputs); + graph.Forward(false); + std::vector outputs = graph.get_outputs("pnnx_output_0"); + const std::vector outputs_values = outputs.front()->values(true); + arma::fmat real_data = CSVDataLoader::LoadData("tmp/resnet/test13.csv"); + for (int i = 0; i < outputs_values.size(); ++i) { + ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 5e-5f) + << i << " real: " << real_data.at(i) + << " predict: " << outputs_values.at(i); + } +} diff --git a/test/test_layer/test_deconv.cpp b/test/test_layer/test_deconv.cpp new file mode 100644 index 00000000..b441adda --- /dev/null +++ b/test/test_layer/test_deconv.cpp @@ -0,0 +1,57 @@ +// MIT License +// Copyright (c) 2022 - 傅莘莘 +// Source URL: https://github.com/zjhellofss/KuiperInfer +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Created by fss on 23-2-6. +#include +#include +#include "data/load_data.hpp" +#include "runtime/runtime_ir.hpp" +#include "tick.hpp" + +TEST(test_layer, deconv_test_1) { + using namespace kuiper_infer; + RuntimeGraph graph("tmp/unet/demo_deconv.pnnx.param", + "tmp/unet/demo_deconv.pnnx.bin"); + + graph.Build(); + const uint32_t batch_size = 1; + std::vector>> inputs; + + for (int i = 0; i < batch_size; ++i) { + std::shared_ptr> input = + std::make_shared>(13, 13, 31); + input->Ones(); + inputs.push_back(input); + } + + graph.set_inputs("pnnx_input_0", inputs); + graph.Forward(false); + + std::vector outputs = graph.get_outputs("pnnx_output_0"); + arma::fmat real_data = CSVDataLoader::LoadData("tmp/unet/test.csv"); + const auto& outputs_values = outputs.front()->values(true); + + for (int i = 0; i < outputs_values.size(); ++i) { + ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-6f) + << i << " real: " << real_data.at(i) + << " predict: " << outputs_values.at(i); + } +} diff --git a/tmp b/tmp index 5620ec57..90b551da 160000 --- a/tmp +++ b/tmp @@ -1 +1 @@ -Subproject commit 5620ec57c4033a87f5a44cf7e02f18712ee4c2a2 +Subproject commit 90b551da0c374d1183d74248283128040366e2f1