ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Conv2DLayer.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Conv2DLayer.h"
18
19#include "OperationUtils.h"
20#include "../Tensor.h"
21#include "../KernelGenerator.h"
22#include "../Validator.h"
23
24#include <cker/operation/Conv.h>
26#include <ir/Padding.h>
27
28namespace onert::backend::cpu
29{
30
31void Validator::visit(const ir::operation::Conv2D &) { _supported = true; }
32
33void KernelGenerator::visit(const ir::operation::Conv2D &node)
34{
35 using ir::operation::Conv2D;
36
37 const auto ofm_index{node.getOutputs().at(0)};
38 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
39 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
40 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
41
42 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
43 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
44 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
45 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
46
47 const auto stride = node.param().stride;
48 const auto activation = node.param().activation;
49 const auto &param_padding = node.param().padding;
50 const auto dilation = node.param().dilation;
51
52 const bool is_cacheable_weights = ker_tensor->is_constant();
53
54 auto fn = std::make_unique<ops::ConvolutionLayer>();
55
56 if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
57 {
58 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
59 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
60 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
61 activation, ofm_tensor, is_cacheable_weights);
62
63 _return_fn = std::move(fn);
64 return;
65 }
66 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
67 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
68 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
69 const auto &ker_shape = _ctx.at(ker_index).shape();
70 const auto ker_height = ker_shape.dim(1);
71 const auto ker_width = ker_shape.dim(2);
72
73 const auto padding =
74 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
75 dilation.width_factor, dilation.height_factor);
76
77 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
78 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
79 dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
80 is_cacheable_weights);
81
82 _return_fn = std::move(fn);
83}
84
85} // namespace onert::backend::cpu
86
88{
90 : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
91 _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
92 _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
93 _dilationHeightFactor(1), _activation(ir::Activation::NONE),
94 _conv_kernel(new nnfw::cker::Conv()), _prepare(false), _is_cachable_weights(false),
95 _is_hybrid(false)
96{
97 // DO NOTHING
98}
99
101
102void ConvolutionLayer::convFloat32()
103{
104 float output_activation_min = 0, output_activation_max = 0;
105 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
106
107 nnfw::cker::ConvParams op_params;
111 op_params.stride_width = _strideWidth;
112 op_params.stride_height = _strideHeight;
115 op_params.float_activation_min = output_activation_min;
116 op_params.float_activation_max = output_activation_max;
117
119 kernel(op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
120 getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
121 getBuffer<float>(_output));
122}
123
124void ConvolutionLayer::convQ8uPerTensor()
125{
126 int32_t output_activation_min = 0;
127 int32_t output_activation_max = 0;
129 &output_activation_max);
130
131 double real_multiplier = 0.0;
132 int32_t output_multiplier = 0;
133 int32_t output_shift = 0;
135 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
136
137 nnfw::cker::ConvParams op_params;
138 op_params.stride_width = _strideWidth;
139 op_params.stride_height = _strideHeight;
142 op_params.padding_type = getPaddingType(_paddingType);
145 op_params.input_offset = -_input->data_zero_point();
146 op_params.weights_offset = -_kernel->data_zero_point();
148 op_params.output_multiplier = output_multiplier;
149 op_params.output_shift = output_shift;
150 op_params.quantized_activation_min = output_activation_min;
151 op_params.quantized_activation_max = output_activation_max;
152 op_params.is_replaced_weights = true;
153
155 kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
156 getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
157 getBuffer<uint8_t>(_output));
158}
159
160void ConvolutionLayer::convQ8uPerChannel()
161{
162 nnfw::cker::ConvParams op_params;
165 op_params.stride_width = _strideWidth;
166 op_params.stride_height = _strideHeight;
169 op_params.input_offset = -_input->data_zero_point();
171 int32_t output_activation_min = 0;
172 int32_t output_activation_max = 0;
174 &output_activation_max);
175 op_params.quantized_activation_min = output_activation_min;
176 op_params.quantized_activation_max = output_activation_max;
177 // NOTE: The following fields of ConvParams are not used:
178 // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
179
181 kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
182 getBuffer<uint8_t>(_kernel), _kernel->data_zero_points().data(), getShape(_bias),
183 getBuffer<int32_t>(_bias), getShape(_output), getBuffer<uint8_t>(_output));
184}
185
186void ConvolutionLayer::convQ8i()
187{
188 int32_t output_activation_min = 0;
189 int32_t output_activation_max = 0;
191 &output_activation_max);
192
193 nnfw::cker::ConvParams op_params;
194 op_params.input_offset = -_input->data_zero_point();
196 op_params.stride_height = _strideHeight;
197 op_params.stride_width = _strideWidth;
202 op_params.quantized_activation_min = output_activation_min;
203 op_params.quantized_activation_max = output_activation_max;
204
206 kernel(op_params, getShape(_input), reinterpret_cast<const int8_t *>(_input->buffer()),
207 getShape(_kernel), reinterpret_cast<const int8_t *>(_kernel->buffer()), getShape(_bias),
208 reinterpret_cast<const int32_t *>(_bias->buffer()), getShape(_output),
209 reinterpret_cast<int8_t *>(_output->buffer()));
210}
211
212void ConvolutionLayer::convQ8iHybridPerChannel()
213{
214 float output_activation_min = 0;
215 float output_activation_max = 0;
216 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
217
218 const int batch_size = getShape(_input).Dims(0);
219 if (batch_size == 0)
220 throw std::runtime_error{"Convolution input batch_size = 0"};
221 auto input_shape = getShape(_input);
222 const int input_size = input_shape.FlatSize() / batch_size;
223
224 auto input_quantized_ptr = _hybrid_arena->input_quantized.data();
225 auto input_scaling_factors_ptr = _hybrid_arena->input_scaling_factors.data();
226 auto input_offsets_ptr = _hybrid_arena->input_offsets.data();
227 for (int b = 0; b < batch_size; ++b)
228 {
229 const int offset = b * input_size;
231 reinterpret_cast<const float *>(_input->buffer()) + offset, input_size,
232 input_quantized_ptr + offset, &input_scaling_factors_ptr[b], &input_offsets_ptr[b]);
233 }
234 nnfw::cker::ConvParams op_params;
238 op_params.stride_width = _strideWidth;
239 op_params.stride_height = _strideHeight;
242 op_params.float_activation_min = output_activation_min;
243 op_params.float_activation_max = output_activation_max;
244
245 const auto *filter_per_channel_scales = _kernel->data_scales().data();
247 op_params, input_scaling_factors_ptr, getShape(_input), input_quantized_ptr, getShape(_kernel),
248 reinterpret_cast<const int8_t *>(_kernel->buffer()), getShape(_bias),
249 reinterpret_cast<const float *>(_bias->buffer()), getShape(_output),
250 reinterpret_cast<float *>(_output->buffer()), filter_per_channel_scales, input_offsets_ptr);
251}
252
254 const IPortableTensor *bias, const ir::PaddingType paddingType,
255 const uint32_t paddingLeft, const uint32_t paddingRight,
256 const uint32_t paddingTop, const uint32_t paddingBottom,
257 const uint32_t strideWidth, const uint32_t strideHeight,
258 const uint32_t dilationWidthFactor,
259 const uint32_t dilationHeightFactor,
260 const ir::Activation activation, IPortableTensor *output,
261 bool is_cachable_weights)
262{
263 _input = input;
264 _kernel = kernel;
265 _bias = bias;
266 _paddingType = paddingType;
267 _paddingLeft = paddingLeft;
268 _paddingRight = paddingRight;
269 _paddingTop = paddingTop;
270 _paddingBottom = paddingBottom;
271 _strideWidth = strideWidth;
272 _strideHeight = strideHeight;
273 _dilationWidthFactor = dilationWidthFactor;
274 _dilationHeightFactor = dilationHeightFactor;
275 _activation = activation;
276 _output = output;
277 _is_cachable_weights = is_cachable_weights;
278 _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
279 _kernel->data_type() == OperandType::QUANT_INT8_SYMM;
280}
281
283{
284 prepare();
285 if (_input->is_dynamic() || _kernel->is_dynamic())
286 {
287 const auto ifm_shape = _input->getShape().asFeature();
288 const auto ofm_shape = _output->getShape().asFeature();
289 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
290 const auto ker_shape = _kernel->getShape();
291 const auto ker_height = ker_shape.dim(1);
292 const auto ker_width = ker_shape.dim(2);
293
294 ir::Stride stride;
295 stride.vertical = _strideWidth;
296 stride.horizontal = _strideWidth;
297
298 ir::Padding param_padding;
299 param_padding.type = _paddingType;
300 param_padding.param.left = _paddingLeft;
301 param_padding.param.right = _paddingRight;
302 param_padding.param.top = _paddingTop;
303 param_padding.param.bottom = _paddingBottom;
304
305 const auto padding =
306 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
308
309 _paddingLeft = padding.left;
310 _paddingRight = padding.right;
311 _paddingTop = padding.top;
312 _paddingBottom = padding.bottom;
313 }
314 if (_is_hybrid)
315 {
316 convQ8iHybridPerChannel();
317 }
318 else if (_input->data_type() == OperandType::FLOAT32)
319 {
320 convFloat32();
321 }
322 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
323 {
324 const bool per_channel_quantized = _kernel->data_scales().size() > 1;
325 if (per_channel_quantized)
326 convQ8uPerChannel();
327 else
328 convQ8uPerTensor();
329 }
330 else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
331 {
332 convQ8i();
333 }
334 else
335 {
336 throw std::runtime_error{"Conv: unsupported data type"};
337 }
338}
339
341{
342 if (_prepare)
343 return;
344
345 if (_is_hybrid)
346 {
347 // ensure weight is per-channel quantized.
348 int32_t kernel_output_channel = getShape(_kernel).Dims(0);
349 // zero_points comes from flatbuffer vector. Its size is within uint32_t range.
350 size_t kernel_zerop_cnt = _kernel->data_scales().size();
351 // promote to int64_t to compare int32_t and uint32_t
352 if ((int64_t)kernel_output_channel != (int64_t)kernel_zerop_cnt)
353 throw std::runtime_error{"Conv2D hybrid supports only per-channel quantized weight."};
354
355 // allocate memory for activation quantization.
356 // - quantized values (int8_t type and same shape of original input)
357 // - quantization params (= scale/zeropoint for each input)
358 auto input_shape = getShape(_input);
359 const int batch_size = input_shape.Dims(0);
360 const int input_size = input_shape.FlatSize() / batch_size;
361 _hybrid_arena = std::make_unique<nnfw::cker::ConvHybridTempArena>(batch_size, input_size);
362 _prepare = true;
363 return;
364 }
365
367 if (_input->data_type() == OperandType::FLOAT32 && _is_cachable_weights)
368 {
369 bool is_transposed = false;
370 kernel.prepareF32(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
372
373 // Decrease reference of _kernel(weights) only when _kernel is constant
374 if (is_transposed)
375 {
376 auto kernel_tensor = dynamic_cast<const Tensor *>(_kernel);
377 if (kernel_tensor)
378 // TODO Remove const_cast
379 const_cast<Tensor *>(kernel_tensor)->decrease_ref();
380 }
381 }
382 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _is_cachable_weights &&
384 {
385 const bool per_channel_quantized = _kernel->data_scales().size() > 1;
386 if (per_channel_quantized)
387 {
390 _kernel->data_scales().size(), getShape(_kernel).Dims(0),
392 }
393 else
394 {
398 }
399 }
400 else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
401 {
403 {
406 _kernel->data_scales().size(), getShape(_kernel).Dims(0),
408 }
409 else
410 {
411 throw std::runtime_error{"Conv2D: Int8 dynamic weight is not supported"};
412 }
413 }
414 _prepare = true;
415}
416
417} // namespace onert::backend::cpu::ops
void Conv(const float *input_data, const Dims< 4 > &input_dims, const float *filter_data, const Dims< 4 > &filter_dims, const float *bias_data, const Dims< 4 > &bias_dims, int stride_width, int stride_height, int pad_width, int pad_height, float *output_data, const Dims< 4 > &output_dims, float *im2col_data, const Dims< 4 > &im2col_dims)
std::vector< int > & per_channel_output_shift()
Definition Conv.h:151
void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape, uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor, uint32_t dilation_height_factor)
Definition Conv.h:74
void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type, bool &is_replaced_weights, uint32_t dilationWidthFactor, uint32_t dilationHeightFactor)
Definition Conv.h:60
std::vector< int32_t > & per_channel_output_multiplier()
Definition Conv.h:150
int32_t Dims(int i) const
Definition Shape.h:106
A tensor class that is portable for other backends.
const std::vector< float > & data_scales() const override final
float data_scale() const override final
int32_t data_zero_point() const override final
const std::vector< int32_t > & data_zero_points() const override
ir::DataType data_type() const override final
ir::Shape getShape() const override final
Get ir::Shape of tensor.
bool is_dynamic() const override final
Return true if the tensor needs dynamic allocation, meaning that during compile-time the outpus shape...
virtual uint8_t * buffer() const =0
std::unique_ptr< exec::IFunction > _return_fn
std::unique_ptr< nnfw::cker::Conv > _conv_kernel
Definition Conv2DLayer.h:80
void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, ir::PaddingType _paddingType, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t dilationWidthFactor, const uint32_t dilationHeightFactor, const ir::Activation activation, IPortableTensor *output, bool is_cachable_weights)
std::unique_ptr< nnfw::cker::ConvHybridTempArena > _hybrid_arena
Definition Conv2DLayer.h:81
const Object & at(const Index &index) const
Get the object that is associated with the given index.
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
void HybridConvPerChannel(const ConvParams &params, float *scaling_factors_ptr, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const float *per_channel_scale, const int32_t *input_offset)
Definition Conv.h:314
void PortableAsymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, float *scaling_factor, int32_t *offset)
Definition topk_v2.h:30
nnfw::cker::Shape getShape(const IPortableTensor *tensor)
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf=1, uint32_t dhf=1)
Definition Padding.cc:131
void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
CLTensor ker_tensor
CLTensor bias_tensor
Definition Dims.h:26
int16_t stride_height
Definition Types.h:146
PaddingValues padding_values
Definition Types.h:143
float float_activation_max
Definition Types.h:161
int32_t output_multiplier
Definition Types.h:154
int32_t weights_offset
Definition Types.h:152
int32_t output_offset
Definition Types.h:153
int16_t dilation_width_factor
Definition Types.h:147
float float_activation_min
Definition Types.h:160
int32_t quantized_activation_max
Definition Types.h:158
PaddingType padding_type
Definition Types.h:142
int16_t dilation_height_factor
Definition Types.h:148
int32_t quantized_activation_min
Definition Types.h:157
PaddingType type
Definition Padding.h:59
ExplicitPadding param
Definition Padding.h:60