Skip to content

Commit

Permalink
增加bilinear上采样
Browse files Browse the repository at this point in the history
  • Loading branch information
zjhellofss committed Aug 17, 2023
1 parent 7218e64 commit 22fcc33
Show file tree
Hide file tree
Showing 8 changed files with 288 additions and 94 deletions.
19 changes: 12 additions & 7 deletions bench/bench_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

// Created by fss on 23-4-27.
#include <benchmark/benchmark.h>
#include "../source/layer/details/adaptive_avgpooling.hpp"
Expand Down Expand Up @@ -238,7 +238,6 @@ BENCHMARK(BM_SiLU)->Args({32, 160, 160})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_SiLU)->Args({64, 80, 80})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_SiLU)->Args({128, 40, 40})->Unit(benchmark::kMillisecond);


static void BM_ReLU(benchmark::State& state) {
using namespace kuiper_infer;

Expand Down Expand Up @@ -334,6 +333,7 @@ static void BM_Upsample(benchmark::State& state) {
uint32_t rows = state.range(1);
uint32_t cols = state.range(2);

UpSampleMode mode = UpSampleMode(state.range(3));
std::shared_ptr<Tensor<float>> input =
std::make_shared<Tensor<float>>(channels, rows, cols);
input->Rand();
Expand All @@ -342,17 +342,22 @@ static void BM_Upsample(benchmark::State& state) {
inputs.push_back(input);

std::vector<std::shared_ptr<Tensor<float>>> outputs(1);
UpSampleLayer layer(2.f, 2.f);
UpSampleLayer layer(3.f, 3.f, mode);

for (auto _ : state) {
const auto status = layer.Forward(inputs, outputs);
}
}

BENCHMARK(BM_Upsample)->Args({3, 320, 320})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({32, 160, 160})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({64, 80, 80})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({128, 40, 40})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({3, 320, 320, 0})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({32, 160, 160, 0})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({64, 80, 80, 0})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({128, 40, 40, 0})->Unit(benchmark::kMillisecond);

BENCHMARK(BM_Upsample)->Args({3, 320, 320, 1})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({32, 160, 160, 1})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({64, 80, 80, 1})->Unit(benchmark::kMillisecond);
BENCHMARK(BM_Upsample)->Args({128, 40, 40, 1})->Unit(benchmark::kMillisecond);

static void BM_AdaptivePooling(benchmark::State& state) {
using namespace kuiper_infer;
Expand Down
5 changes: 3 additions & 2 deletions include/status_code.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ enum class ParseParameterAttrStatus {
kParameterMissingScale = 14,
kParameterMissingResizeMode = 15,
kParameterMissingDilation = 16,
kParameterMissingPaddingMode = 16,
kParameterMissingOutputPadding = 17,
kParameterMissingPaddingMode = 17,
kParameterMissingOutputPadding = 18,
kParameterMissingAlignCorner = 19,

kAttrMissingBias = 21,
kAttrMissingWeight = 22,
Expand Down
2 changes: 1 addition & 1 deletion source/layer/details/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ InferStatus ConvolutionLayer::Forward(
CHECK(kernel_count % groups_ == 0);
CHECK(input_c % groups_ == 0);
}
uint32_t input_c_group = input_c / groups_;
const uint32_t input_c_group = input_c / groups_;
CHECK(input_c_group == kernel_c)
<< "The number of channel for the kernel "
"matrix and input tensor do not match";
Expand Down
177 changes: 136 additions & 41 deletions source/layer/details/upsample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,32 @@
#include "layer/abstract/layer_factory.hpp"
namespace kuiper_infer {

UpSampleLayer::UpSampleLayer(float scale_h, float scale_w, UpSampleMode mode)
static void CalcIndexAndLambda(int32_t input_size, int32_t output_size,
float div_scale, int32_t output_idx,
float& lambda0, float& lambda1,
int32_t& input_index0, int32_t& input_index1) {
if (output_size == input_size) {
input_index0 = input_index1 = output_idx;
lambda0 = 1;
lambda1 = 0;
} else {
float real_input_idx =
div_scale * (static_cast<float>(output_idx) + 0.5f) - 0.5f;
if (real_input_idx < 0) {
real_input_idx = 0;
}

input_index0 = static_cast<int>(real_input_idx);
int32_t offset = (input_index0 < input_size - 1) ? 1 : 0;
input_index1 = input_index0 + offset;

lambda1 = real_input_idx - static_cast<float>(input_index0);
lambda0 = 1.0f - lambda1;
}
}

UpSampleLayer::UpSampleLayer(uint32_t scale_h, uint32_t scale_w,
UpSampleMode mode)
: NonParamLayer("upsample"),
scale_h_(scale_h),
scale_w_(scale_w),
Expand All @@ -46,14 +71,15 @@ InferStatus UpSampleLayer::Forward(
return InferStatus::kInferFailedInputOutSizeMatchError;
}

auto test_scale_factor = [](uint32_t origin, float scale_factor) {
float result = origin * scale_factor;
if (std::abs(result - std::round(result)) > 1e-4f) {
auto test_scale_factor = [](uint32_t origin, uint32_t scale_factor) {
float result = static_cast<float>(origin * scale_factor);
if (std::abs(result - std::round(result)) > 1e-5f) {
LOG(ERROR) << "The input scale_factor is wrong";
}
};

LOG_IF(FATAL, this->mode_ != UpSampleMode::kModeNearest)
LOG_IF(FATAL, this->mode_ != UpSampleMode::kModeNearest &&
this->mode_ != UpSampleMode::kModeBilinear)
<< "Unsupported upsample mode: " << int(mode_);

for (uint32_t i = 0; i < inputs.size(); ++i) {
Expand All @@ -69,25 +95,23 @@ InferStatus UpSampleLayer::Forward(
}

const uint32_t batch_size = inputs.size();
const uint32_t scale_w = uint32_t(scale_w_);
const uint32_t scale_h = uint32_t(scale_h_);

#pragma omp parallel for num_threads(batch_size)
for (uint32_t i = 0; i < batch_size; ++i) {
const arma::fcube& input_data = inputs.at(i)->data();
std::shared_ptr<Tensor<float>> output = outputs.at(i);
if (output == nullptr || output->empty()) {
output = std::make_shared<Tensor<float>>(
input_data.n_slices, uint32_t(input_data.n_rows * scale_h),
uint32_t(input_data.n_cols * scale_w));
output = std::make_shared<Tensor<float>>(input_data.n_slices,
input_data.n_rows * scale_h_,
input_data.n_cols * scale_w_);
outputs.at(i) = output;
}
auto& output_data = output->data();
CHECK(output_data.n_rows == input_data.n_rows * scale_h)
CHECK(output_data.n_rows == input_data.n_rows * scale_h_)
<< "The input and output tensor height of the upsample layer do not "
"match "
<< i << "th";
CHECK(output_data.n_cols == input_data.n_cols * scale_w)
CHECK(output_data.n_cols == input_data.n_cols * scale_w_)
<< "The input and output tensor width of the upsample layer do not "
"match "
<< i << "th";
Expand All @@ -97,36 +121,80 @@ InferStatus UpSampleLayer::Forward(
"match "
<< i << "th";

const float div_scale_h = 1.f / static_cast<float>(scale_h_);
const float div_scale_w = 1.f / static_cast<float>(scale_w_);
const uint32_t channels = input_data.n_slices;
for (uint32_t c = 0; c < channels; ++c) {
const arma::fmat& input_channel = input_data.slice(c);
arma::fmat& output_channel = output_data.slice(c);

const uint32_t input_w = input_channel.n_cols;
const uint32_t input_h = input_channel.n_rows;
const uint32_t output_w = output_channel.n_cols;
const uint32_t output_h = output_channel.n_rows;

for (uint32_t w = 0; w < input_w; ++w) {
const float* input_col_ptr = input_channel.colptr(w);
const uint32_t scaled_w = w * scale_w;
for (uint32_t sw = 0; sw < scale_w; ++sw) {
if (scaled_w + sw >= output_w) {
continue;
}
float* output_col_ptr = output_channel.colptr(scaled_w + sw);
for (uint32_t h = 0; h < input_h; ++h) {
const uint32_t scaled_h = h * scale_h;
float* output_ptr = output_col_ptr + scaled_h;
float input_value = *(input_col_ptr + h);
for (uint32_t sh = 0; sh < scale_h; ++sh) {
if (scaled_h + sh < output_h) {
*(output_ptr + sh) = input_value;
if (mode_ == UpSampleMode::kModeNearest) {
#pragma omp parallel for
for (uint32_t c = 0; c < channels; ++c) {
const arma::fmat& input_channel = input_data.slice(c);
arma::fmat& output_channel = output_data.slice(c);

const uint32_t input_w = input_channel.n_cols;
const uint32_t input_h = input_channel.n_rows;
const uint32_t output_w = output_channel.n_cols;
const uint32_t output_h = output_channel.n_rows;
for (uint32_t w = 0; w < input_w; ++w) {
const float* input_col_ptr = input_channel.colptr(w);
const uint32_t scaled_w = w * scale_w_;
for (uint32_t sw = 0; sw < scale_w_; ++sw) {
if (scaled_w + sw >= output_w) {
continue;
}
float* output_col_ptr = output_channel.colptr(scaled_w + sw);
for (uint32_t h = 0; h < input_h; ++h) {
const uint32_t scaled_h = h * scale_h_;
float* output_ptr = output_col_ptr + scaled_h;
float input_value = *(input_col_ptr + h);
for (uint32_t sh = 0; sh < scale_h_; ++sh) {
if (scaled_h + sh < output_h) {
*(output_ptr + sh) = input_value;
}
}
}
}
}
}
} else {
#pragma omp parallel for
for (uint32_t c = 0; c < channels; ++c) {
const arma::fmat& input_channel = input_data.slice(c);
arma::fmat& output_channel = output_data.slice(c);

const uint32_t input_w = input_channel.n_cols;
const uint32_t input_h = input_channel.n_rows;
const uint32_t output_w = output_channel.n_cols;
const uint32_t output_h = output_channel.n_rows;
for (uint32_t w = 0; w < output_w; ++w) {
float* output_ptr = output_channel.colptr(w);
float w0_lambda = 0.f;
float w1_lambda = 0.f;
int32_t input_w0 = 0;
int32_t input_w1 = 0;
CalcIndexAndLambda(static_cast<int32_t>(input_w),
static_cast<int32_t>(output_w), div_scale_w,
static_cast<int32_t>(w), w0_lambda, w1_lambda,
input_w0, input_w1);
const float* input_ptr0 = input_channel.colptr(input_w0);
const float* input_ptr1 = input_channel.colptr(input_w1);
for (uint32_t h = 0; h < output_h; ++h) {
float h0_lambda = 0.f;
float h1_lambda = 0.f;
int32_t input_h0 = 0;
int32_t input_h1 = 0;
CalcIndexAndLambda(static_cast<int32_t>(input_h),
static_cast<int32_t>(output_h), div_scale_h,
static_cast<int32_t>(h), h0_lambda, h1_lambda,
input_h0, input_h1);

*(output_ptr + h) =
h0_lambda * w0_lambda * (*(input_ptr0 + input_h0)) +
h0_lambda * w1_lambda * (*(input_ptr1 + input_h0)) +
h1_lambda * w0_lambda * (*(input_ptr0 + input_h1)) +
h1_lambda * w1_lambda * (*(input_ptr1 + input_h1));
}
}
}
}
}
return InferStatus::kInferSuccess;
Expand All @@ -143,7 +211,6 @@ ParseParameterAttrStatus UpSampleLayer::CreateInstance(
return ParseParameterAttrStatus::kParameterMissingScale;
}

const auto& scale_param = params.at("scale_factor");
auto scales = std::dynamic_pointer_cast<RuntimeParameterFloatArray>(
params.at("scale_factor"));
if (scales == nullptr) {
Expand All @@ -157,17 +224,45 @@ ParseParameterAttrStatus UpSampleLayer::CreateInstance(
return ParseParameterAttrStatus::kParameterMissingResizeMode;
}

auto mode =
auto mode_param =
std::dynamic_pointer_cast<RuntimeParameterString>(params.at("mode"));
CHECK(mode->value == "nearest")
<< "The mode " << mode->value << " is not supported!";

UpSampleMode mode;
if (mode_param->value == "nearest") {
mode = UpSampleMode::kModeNearest;
} else if (mode_param->value == "bilinear") {
mode = UpSampleMode::kModeBilinear;
} else {
LOG(FATAL) << "The mode " << mode_param->value << " is not supported!";
}

if (params.find("align_corners") != params.end()) {
auto align_corner_param = std::dynamic_pointer_cast<RuntimeParameterBool>(
params.at("align_corners"));
if (!align_corner_param) {
return ParseParameterAttrStatus::kParameterMissingAlignCorner;
}
bool align_corner = align_corner_param->value;
CHECK_EQ(align_corner, false);
}

float scale_h = scales->value.at(0);
float scale_w = scales->value.at(1);
upsample_layer = std::make_shared<UpSampleLayer>(scale_h, scale_w);
// scale放大的倍数大于0
CHECK_GT(scale_h, 0.f);
CHECK_GT(scale_w, 0.f);

// scale放大的倍数必须是整数
CHECK_LE(scale_h - static_cast<uint32_t>(scale_h), 1e-5f);
CHECK_LE(scale_w - static_cast<uint32_t>(scale_w), 1e-5f);

upsample_layer = std::make_shared<UpSampleLayer>(
static_cast<uint32_t>(scale_h), static_cast<uint32_t>(scale_w), mode);
return ParseParameterAttrStatus::kParameterAttrParseSuccess;
}

LayerRegistererWrapper kUpSamplerCreateInstance("nn.Upsample",
UpSampleLayer::CreateInstance);
LayerRegistererWrapper kUpSamplerFCreateInstance("F.upsample",
UpSampleLayer::CreateInstance);
} // namespace kuiper_infer
11 changes: 6 additions & 5 deletions source/layer/details/upsample.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

// Created by fss on 22-12-25.

#ifndef KUIPER_INFER_SOURCE_LAYER_DETAILS_UPSAMPLE_HPP_
Expand All @@ -27,12 +27,13 @@

namespace kuiper_infer {
enum class UpSampleMode {
kModeNearest = 0, // 目前上采样层只支持邻近采样
kModeNearest = 0,
kModeBilinear = 1, // 目前上采样支持这两种
};

class UpSampleLayer : public NonParamLayer {
public:
explicit UpSampleLayer(float scale_h, float scale_w,
explicit UpSampleLayer(uint32_t scale_h, uint32_t scale_w,
UpSampleMode mode = UpSampleMode::kModeNearest);

InferStatus Forward(
Expand All @@ -44,8 +45,8 @@ class UpSampleLayer : public NonParamLayer {
std::shared_ptr<Layer>& upsample_layer);

private:
float scale_h_ = 1.f;
float scale_w_ = 1.f;
uint32_t scale_h_ = 1;
uint32_t scale_w_ = 1;
UpSampleMode mode_ = UpSampleMode::kModeNearest;
};
} // namespace kuiper_infer
Expand Down
Loading

0 comments on commit 22fcc33

Please sign in to comment.