ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Conv2D.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "ConvolutionCommon.h"
19#include "kernels/Utils.h"
20
21#include "PALConv2d.h"
22
23namespace luci_interpreter
24{
25
26namespace
27{
28
29#ifndef DIS_FLOAT
30
31void evalFloat(const circle::Tensor *input, const circle::Tensor *filter,
32 const circle::Tensor *bias, const circle::Tensor *output,
33 const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
34{
35 const auto params = createConv2DParams(input, filter, output, options);
36
37 auto *input_data = runtime_graph->getDataByTensor(input);
38 auto *output_data = runtime_graph->getDataByTensor(output);
39
40 auto *filter_data = runtime_graph->getConstDataByTensor(filter);
41 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
42
43 int32_t input_shape[kMaxSmallSize];
44 kernels::getTensorDims(input, runtime_graph, input_shape);
45
46 int32_t filter_shape[kMaxSmallSize];
47 kernels::getTensorDims(filter, runtime_graph, filter_shape);
48
49 int32_t output_shape[kMaxSmallSize];
50 kernels::getTensorDims(output, runtime_graph, output_shape);
51
52 luci_interpreter_pal::Conv(params, input_shape, kernels::getTensorData<float>(input_data),
53 filter_shape, kernels::getTensorData<float>(filter_data),
54 kernels::getTensorData<float>(bias_data), output_shape,
55 kernels::getTensorData<float>(output_data));
56}
57
58#endif // DIS_FLOAT
59
60#ifndef DIS_QUANT
61
62void evalQuantized(const circle::Tensor *input, const circle::Tensor *filter,
63 const circle::Tensor *bias, const circle::Tensor *output,
64 const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
65{
66 const auto params = createConv2DParams(input, filter, output, options);
67
68 auto *input_data = runtime_graph->getDataByTensor(input);
69 auto *output_data = runtime_graph->getDataByTensor(output);
70
71 auto *filter_data = runtime_graph->getConstDataByTensor(filter);
72 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
73
74 int32_t input_shape[kMaxSmallSize];
75 kernels::getTensorDims(input, runtime_graph, input_shape);
76
77 int32_t filter_shape[kMaxSmallSize];
78 kernels::getTensorDims(filter, runtime_graph, filter_shape);
79
80 int32_t output_shape[kMaxSmallSize];
81 kernels::getTensorDims(output, runtime_graph, output_shape);
82
83 luci_interpreter_pal::Conv(params, input_shape, kernels::getTensorData<uint8_t>(input_data),
84 filter_shape, kernels::getTensorData<uint8_t>(filter_data),
85 kernels::getTensorData<int32_t>(bias_data), output_shape,
86 kernels::getTensorData<uint8_t>(output_data));
87}
88
89void evalQuantizedPerChannel(const circle::Tensor *input, const circle::Tensor *filter,
90 const circle::Tensor *bias, const circle::Tensor *output,
91 const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph,
92 DataType type)
93{
94 auto *raw_input_data = runtime_graph->getDataByTensor(input);
95 auto *raw_output_data = runtime_graph->getDataByTensor(output);
96
97 auto *raw_filter_data = runtime_graph->getConstDataByTensor(filter);
98 auto *raw_bias_data = runtime_graph->getConstDataByTensor(bias);
99
100 const auto params = createConv2DParams(input, filter, output, options);
101
102 if (type == DataType::S8)
103 {
104 int32_t input_shape[kMaxSmallSize];
105 kernels::getTensorDims(input, runtime_graph, input_shape);
106
107 int32_t filter_shape[kMaxSmallSize];
108 kernels::getTensorDims(filter, runtime_graph, filter_shape);
109
110 int32_t output_shape[kMaxSmallSize];
111 kernels::getTensorDims(output, runtime_graph, output_shape);
112
113 luci_interpreter_pal::QuantizedConvPerChannel(
114 params, input_shape, kernels::getTensorData<int8_t>(raw_input_data), filter_shape,
115 kernels::getTensorData<int8_t>(raw_filter_data),
116 kernels::getTensorData<int32_t>(raw_bias_data), output_shape,
117 kernels::getTensorData<int8_t>(raw_output_data));
118
119 return;
120 }
121
122 const int32_t batches = Tensor::dim(input, 0);
123 const int32_t input_height = Tensor::dim(input, 1);
124 const int32_t input_width = Tensor::dim(input, 2);
125 const int32_t input_depth = Tensor::dim(input, 3);
126 const int32_t output_depth = Tensor::dim(filter, 0);
127 const int32_t filter_height = Tensor::dim(filter, 1);
128 const int32_t filter_width = Tensor::dim(filter, 2);
129 const int32_t output_height = Tensor::dim(output, 1);
130 const int32_t output_width = Tensor::dim(output, 2);
131
132 const int32_t padding_h = params.padding_values.height;
133 const int32_t padding_w = params.padding_values.width;
134 const int32_t stride_height = params.stride_height;
135 const int32_t stride_width = params.stride_width;
136 const int32_t dilation_height_factor = params.dilation_height_factor;
137 const int32_t dilation_width_factor = params.dilation_width_factor;
138
139 const int32_t activation_min = params.quantized_activation_min;
140 const int32_t activation_max = params.quantized_activation_max;
141
142 const std::vector<double> effective_output_scale = kernels::getQuantizedConvolutionMultiplers(
143 Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
144
145 // Type U8
146
147 const auto *input_data = kernels::getTensorData<uint8_t>(raw_input_data);
148 assert(input_data != nullptr);
149 const auto *filter_data = kernels::getTensorData<uint8_t>(raw_filter_data);
150 assert(filter_data != nullptr);
151 const auto *bias_data = kernels::getTensorData<int32_t>(raw_bias_data);
152 assert(bias_data != nullptr);
153 auto *output_data = kernels::getTensorData<uint8_t>(raw_output_data);
154 assert(output_data != nullptr);
155
156 const std::vector<kernels::ChannelQuantMultipliers> multipliers_raw =
157 kernels::quantizeMultipliers(effective_output_scale);
158 kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers> quant_multipliers(
159 multipliers_raw);
160
161 for (int32_t batch = 0; batch < batches; ++batch)
162 {
163 for (int32_t out_y = 0; out_y < output_height; ++out_y)
164 {
165 for (int32_t out_x = 0; out_x < output_width; ++out_x)
166 {
167 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
168 {
169 const int32_t in_y_origin = out_y * stride_height - padding_h;
170 const int32_t in_x_origin = out_x * stride_width - padding_w;
171 int32_t acc = 0;
172 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
173 {
174 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
175 {
176 const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
177 const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
178 if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
179 {
180 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
181 {
182 const uint8_t input_val =
183 input_data[kernels::calcOffset(input, batch, in_y, in_x, in_c)];
184 const uint8_t filter_val =
185 filter_data[kernels::calcOffset(filter, out_c, filter_y, filter_x, in_c)];
186 acc += static_cast<int32_t>(input_val - Tensor::zero_point(input)) *
187 static_cast<int32_t>(filter_val - Tensor::zero_points(filter)[out_c]);
188 }
189 }
190 }
191 }
192 if (bias_data)
193 {
194 acc += bias_data[out_c];
195 }
196
198 acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
199
200 scaled_acc += Tensor::zero_point(output);
201 scaled_acc = std::max(scaled_acc, activation_min);
202 scaled_acc = std::min(scaled_acc, activation_max);
203 output_data[kernels::calcOffset(output, batch, out_y, out_x, out_c)] = scaled_acc;
204 }
205 }
206 }
207 }
208}
209#endif // DIS_QUANT
210
211} // namespace
212
213void configure_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
214{
215 kernels::DownsamplingConv2DKernel kernel(cur_op, runtime_graph);
216
217 const auto input = kernel.input();
218 const auto filter = kernel.filter();
219 const auto bias = kernel.bias();
220 const auto output = kernel.output();
221
222 auto filter_data = runtime_graph->getConstDataByTensor(filter);
223
224 assert(filter_data != nullptr);
225
226 const auto *options = cur_op->builtin_options_as_Conv2DOptions();
227
228 if (Tensor::element_type(input) == DataType::FLOAT32 &&
229 Tensor::element_type(filter) == DataType::FLOAT32)
230 {
231 LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::FLOAT32);
232 }
233#ifndef DIS_QUANT
234 else if (Tensor::element_type(input) == DataType::U8 &&
235 Tensor::element_type(filter) == DataType::U8)
236 {
237 LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
238 }
239 else if (Tensor::element_type(input) == DataType::S8 &&
240 Tensor::element_type(filter) == DataType::S8)
241 {
242 LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
243 LUCI_INTERPRETER_CHECK(Tensor::num_dims(filter) == 4);
244 LUCI_INTERPRETER_CHECK(Tensor::scales(filter).size() ==
245 static_cast<size_t>(Tensor::dim(filter, 0)));
246 for (auto zerop : Tensor::zero_points(filter))
247 {
248 LUCI_INTERPRETER_CHECK(zerop == 0);
249 }
250 }
251 else if (Tensor::element_type(input) == DataType::S16 &&
252 Tensor::element_type(filter) == DataType::S16)
253 {
254 LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S64);
255 }
256#endif // DIS_QUANT
257 else
258 {
259 assert(false && "Unsupported type.");
260 }
261 LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == Tensor::element_type(input));
262 LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) == 4 && Tensor::num_dims(filter) == 4);
263
264 const int32_t output_depth = Tensor::dim(filter, 0);
265 LUCI_INTERPRETER_CHECK(Tensor::dim(filter, 3) == Tensor::dim(input, 3));
266
267 LUCI_INTERPRETER_CHECK(bias == nullptr ||
268 (Tensor::num_dims(bias) == 1 && Tensor::dim(bias, 0) == output_depth));
269
270 switch (options->fused_activation_function())
271 {
272 case circle::ActivationFunctionType_NONE:
273 case circle::ActivationFunctionType_RELU:
274 case circle::ActivationFunctionType_RELU6:
275 case circle::ActivationFunctionType_RELU_N1_TO_1:
276 break;
277 default:
278 assert(false && "Unsupported fused activation");
279 }
280}
281
282void execute_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
283{
284 kernels::DownsamplingConv2DKernel kernel(cur_op, runtime_graph);
285
286 const auto input = kernel.input();
287 const auto weights = kernel.filter();
288 const auto bias = kernel.bias();
289 const auto output = kernel.output();
290
291 const auto *options = cur_op->builtin_options_as_Conv2DOptions();
292
293 const auto type = Tensor::element_type(input);
294 switch (type)
295 {
296#ifndef DIS_FLOAT
297 case DataType::FLOAT32:
298 if (Tensor::element_type(weights) == DataType::FLOAT32)
299 {
300 evalFloat(input, weights, bias, output, options, runtime_graph);
301 break;
302 }
303#endif // DIS_FLOAT
304#ifndef DIS_QUANT
305 case DataType::U8:
306 case DataType::S8:
307 if (Tensor::scales(weights).size() == 1 and type == DataType::U8)
308 {
309 evalQuantized(input, weights, bias, output, options, runtime_graph);
310 }
311 else if (Tensor::scales(weights).size() > 1)
312 {
313 LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 4);
314 LUCI_INTERPRETER_CHECK(Tensor::scales(weights).size() ==
315 static_cast<size_t>(Tensor::dim(weights, 0)));
316 evalQuantizedPerChannel(input, weights, bias, output, options, runtime_graph, type);
317 }
318 else
319 {
320 assert(false && "Unsupported yet.");
321 }
322 break;
323#endif // DIS_QUANT
324 default:
325 assert(false && "Unsupported type.");
326 }
327}
328
329} // namespace luci_interpreter
uint8_t * getConstDataByTensor(const circle::Tensor *raw_tensor)
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
list input_data
Definition infer.py:29
DataType
"scalar" value type
Definition DataType.h:27
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
Definition Utils.h:75
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
Definition Utils.h:170
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
Definition Utils.h:147
void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
Definition Utils.h:121
int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
Definition PALUtils.h:77
luci_interpreter_pal::ConvParams createConv2DParams(const circle::Tensor *input, const circle::Tensor *filter, const circle::Tensor *output, const circle::Conv2DOptions *options)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Conv2D.cpp:282
void configure_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Conv2D.cpp:213
int32_t size[5]
Definition Slice.cpp:35
const loco::Dimension & dim(uint32_t axis) const
Definition Tensor.h:44