ONE - On-device Neural Engine
Loading...
Searching...
No Matches
DepthwiseConvolutionLayer.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
18
21
23{
24
26{
27 float output_activation_min = 0, output_activation_max = 0;
28 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
29
31 op_params.stride_width = _strideWidth;
32 op_params.stride_height = _strideHeight;
37 op_params.depth_multiplier = _multiplier;
38 op_params.float_activation_min = output_activation_min;
39 op_params.float_activation_max = output_activation_max;
40
41 // TODO: Use the following call if TensorBuilder manages padded_filter_data
42 // and filter_buffers_data:
43 //
44 // void DepthwiseConvOp(
45 // const DepthwiseConvParams &params,
46 // const Shape &input_shape, const float *input_data,
47 // const Shape &filter_shape, const float *filter_data,
48 // const Shape &bias_shape, const float *bias_data,
49 // float *padded_filter_data, bool pad_filter,
50 // float *filter_buffers_data,
51 // const Shape &output_shape, float *output_data
52 // );
53 //
54 // See https://github.com/Samsung/ONE/pull/13669 for an example of using DepthwiseConvOp
55 nnfw::cker::DepthwiseConv<float, float>(
56 op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
57 getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
58 getBuffer<float>(_output), _external_context->ruy_context());
59}
60
62{
63 int32_t output_activation_min = 0;
64 int32_t output_activation_max = 0;
66 &output_activation_max);
67
68 double real_multiplier = 0.0;
69 int32_t output_multiplier = 0;
70 int32_t output_shift = 0;
72 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
73
75 op_params.stride_width = _strideWidth;
76 op_params.stride_height = _strideHeight;
81 op_params.depth_multiplier = _multiplier;
82 op_params.input_offset = -_input->data_zero_point();
85 op_params.output_multiplier = output_multiplier;
86 op_params.output_shift = output_shift;
87 op_params.quantized_activation_min = output_activation_min;
88 op_params.quantized_activation_max = output_activation_max;
89
90 nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
91 op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
92 getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
93 getBuffer<uint8_t>(_output), _external_context->ruy_context());
94}
95
97{
101 op_params.stride_width = _strideWidth;
102 op_params.stride_height = _strideHeight;
105 op_params.depth_multiplier = _multiplier;
106 op_params.input_offset = -_input->data_zero_point();
108 int32_t output_activation_min = 0;
109 int32_t output_activation_max = 0;
111 &output_activation_max);
112 op_params.quantized_activation_min = output_activation_min;
113 op_params.quantized_activation_max = output_activation_max;
114 // NOTE: The following fields of ConvParams are not used:
115 // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
116
118 op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
119 getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
120 _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
121 getShape(_output), getBuffer<uint8_t>(_output));
122}
123
125{
126 if (!_prepared)
127 {
128 prepareQ8i();
129 _prepared = true;
130 }
131
132 int32_t output_activation_min = 0;
133 int32_t output_activation_max = 0;
135 &output_activation_max);
136
141 op_params.depth_multiplier = _multiplier;
142 op_params.stride_width = _strideWidth;
143 op_params.stride_height = _strideHeight;
146 op_params.input_offset = -_input->data_zero_point();
147 op_params.weights_offset = 0;
149 op_params.quantized_activation_min = output_activation_min;
150 op_params.quantized_activation_max = output_activation_max;
151
153 op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
154 getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),
155 getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),
156 _external_context->ruy_context());
157}
158
160{
161 if (!_prepared)
162 {
163 prepareQ8iHybridPerChannel();
164 _prepared = true;
165 }
166
167 float output_activation_min = 0, output_activation_max = 0;
168 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
169
170 auto input_shape = getShape(_input);
171 const int batch_size = input_shape.Dims(0);
172 const int input_size = input_shape.FlatSize() / batch_size;
173
174 auto scaling_factors_ptr = _input_scaling_factors.data();
175 auto input_offsets_ptr = _input_offsets.data();
176
177 for (int b = 0; b < batch_size; ++b)
178 {
179 const int offset = b * input_size;
180 nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer<float>(_input) + offset, input_size,
181 _input_quantized.data() + offset,
182 &scaling_factors_ptr[b], &input_offsets_ptr[b]);
183 }
184
188 op_params.depth_multiplier = _multiplier;
189 op_params.stride_width = _strideWidth;
190 op_params.stride_height = _strideHeight;
193 op_params.float_activation_min = output_activation_min;
194 op_params.float_activation_max = output_activation_max;
195
197 op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(),
198 getShape(_kernel), getBuffer<int8_t>(_kernel), getShape(_bias), getBuffer<float>(_bias),
199 getShape(_output), getBuffer<float>(_output), _kernel->data_scales().data(),
200 _input_offsets.data());
201}
202
203void DepthwiseConvolutionLayer::prepareQ8i()
204{
207 _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
208 _per_channel_output_shift);
209}
210
211void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
212{
215 _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
216 _per_channel_output_shift);
217}
218
219void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()
220{
221 // allocate memory for activation quantization.
222 // - quantized values (int8_t type and same shape of original input)
223 // - quantization params (= scale/zeropoint for each input)
224 auto input_shape = getShape(_input);
225 const int batch_size = input_shape.Dims(0);
226 const int input_size = input_shape.FlatSize() / batch_size;
227 _input_quantized.resize(input_size);
228 // TODO: Optimize the case of batch_size = 1
229 _input_scaling_factors.resize(batch_size);
230 _input_offsets.resize(batch_size);
231}
232
233void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()
234{
235 // ensure weight is per-channel quantized.
236 int32_t kernel_input_channel = getShape(_kernel).Dims(3);
237 // zero_points comes from flatbuffer vector. Its size is within uint32_t range.
238 size_t kernel_zerop_cnt = _kernel->data_scales().size();
239 // promote to int64_t to compare int32_t and uint32_t
240 if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)
241 throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."};
242}
243
245 const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
246 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
247 const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
248 const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
249 const ir::Activation activation, IPortableTensor *output,
250 const std::shared_ptr<ExternalContext> &external_context)
251{
252 _input = input;
253 _kernel = kernel;
254 _bias = bias;
255 _paddingLeft = paddingLeft;
256 _paddingRight = paddingRight;
257 _paddingTop = paddingTop;
258 _paddingBottom = paddingBottom;
259 _strideWidth = strideWidth;
260 _strideHeight = strideHeight;
261 _multiplier = multiplier;
262 _dilationWidth = dilationWidth;
263 _dilationHeight = dilationHeight;
264 _activation = activation;
265 _output = output;
266 _external_context = external_context;
267 _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
268 _kernel->data_type() == OperandType::QUANT_INT8_SYMM;
269
270 if (_is_hybrid)
271 {
272 ensureQ8iHybridPerChannel();
273 prepareQ8iHybridPerChannel();
274 _prepared = true;
275 }
276 else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
277 {
279 {
280 prepareQ8i();
281 _prepared = true;
282 }
283 }
284 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
286 {
287 const bool per_channel_quantized = _kernel->data_scales().size() > 1;
288 if (per_channel_quantized)
289 {
290 prepareQ8uPerChannel();
291 _prepared = true;
292 }
293 }
294}
295
297{
298 if (_is_hybrid)
299 {
301 }
302 else if (_input->data_type() == OperandType::FLOAT32)
303 {
304 convFloat32();
305 }
306 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
307 {
308 const bool per_channel_quantized = _kernel->data_scales().size() > 1;
309 if (per_channel_quantized)
311 else
313 }
314 else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
315 {
316 convQ8i();
317 }
318 else
319 {
320 throw std::runtime_error{"DepthwiseConv: unsupported data type"};
321 }
322}
323
324} // namespace onert::backend::cpu::ops
int32_t Dims(int i) const
Definition Shape.h:106
A tensor class that is portable for other backends.
const std::vector< float > & data_scales() const override final
float data_scale() const override final
int32_t data_zero_point() const override final
const std::vector< int32_t > & data_zero_points() const override
ir::DataType data_type() const override final
bool is_dynamic() const override final
Return true if the tensor needs dynamic allocation, meaning that during compile-time the outpus shape...
bool is_constant() const override final
Return true if the tensor is constant.
void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, const ir::Activation activation, IPortableTensor *output, const std::shared_ptr< ExternalContext > &external_context)
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
void DepthwiseConvPerChannel(const DepthwiseConvParams &params, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, int8_t *output_data, ruy::Context *ruy_context)
void DepthwiseConvPerChannel(const DepthwiseConvParams &params, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const int32_t *filter_zeropoint, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
void DepthwiseConvHybridPerChannel(const DepthwiseConvParams &params, float *scaling_factors_ptr, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const float *per_channel_scale, int32_t *input_offset)
void PortableAsymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, float *scaling_factor, int32_t *offset)
nnfw::cker::Shape getShape(const IPortableTensor *tensor)
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
Definition Dims.h:26
PaddingValues padding_values
Definition Types.h:234