ONE - On-device Neural Engine
Loading...
Searching...
No Matches
FullyConnected.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Builders.h"
18#include "kernels/Utils.h"
19
20#include "PALFullyConnected.h"
21
22namespace luci_interpreter
23{
24
25namespace
26{
27void evalFloat(const circle::Tensor *input, const circle::Tensor *weights,
28 const circle::Tensor *bias, const circle::Tensor *output,
29 const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
30{
31 float activation_min{};
32 float activation_max{};
33 kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
34 &activation_min, &activation_max);
35
37 params.float_activation_min = activation_min;
38 params.float_activation_max = activation_max;
39
40 auto *input_data = runtime_graph->getDataByTensor(input);
41 auto *output_data = runtime_graph->getDataByTensor(output);
42
43 auto *weights_data = runtime_graph->getConstDataByTensor(weights);
44 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
45
46 assert(input_data != nullptr);
47 assert(weights_data != nullptr);
48 assert(output_data != nullptr);
49
50 int32_t input_shape[kMaxSmallSize];
51 kernels::getTensorDims(input, runtime_graph, input_shape);
52
53 int32_t weight_shape[kMaxSmallSize];
54 kernels::getTensorDims(weights, runtime_graph, weight_shape);
55
56 int32_t output_shape[kMaxSmallSize];
57 kernels::getTensorDims(output, runtime_graph, output_shape);
58 // TODO remove code duplication, introduce func
59#ifndef DIS_DYN_SHAPES
60 // Dynamic shape case
61 if (output_shape[0] != input_shape[0] or output_shape[1] != weight_shape[1])
62 {
63 output_shape[0] = input_shape[0];
64 output_shape[1] = weight_shape[0];
65 uint32_t num_dims = Tensor::num_dims(output);
66 luci_interpreter::RuntimeShape dynamic_shape(num_dims);
67 int32_t data_size = 1;
68 for (int i = 0; i < num_dims; ++i)
69 {
70 dynamic_shape.setDim(i, output_shape[i]);
71 data_size *= output_shape[i];
72 }
73 data_size *= size(Tensor::element_type(output));
74
75 runtime_graph->addDynamicShapeTensor(output, std::move(dynamic_shape));
76
77 if (data_size == 0)
78 {
79 runtime_graph->resetTensorData(nullptr, output);
80 return;
81 }
82
83 auto new_output_data = new uint8_t[data_size];
84 output_data = new_output_data;
85 runtime_graph->resetTensorData(new_output_data, output);
86 }
87#endif // DIS_DYN_SHAPES
88
89 switch (Tensor::element_type(weights))
90 {
91 case DataType::FLOAT32:
92 {
93 luci_interpreter_pal::FullyConnected(
94 params, input_shape, kernels::getTensorData<float>(input_data), weight_shape,
95 kernels::getTensorData<float>(weights_data), kernels::getTensorData<float>(bias_data),
96 output_shape, kernels::getTensorData<float>(output_data), Tensor::num_dims(output),
97 Tensor::num_dims(weights));
98 break;
99 }
100 case DataType::S8:
101 {
102 // Hybrid mode
103 params.weights_scales =
104 reinterpret_cast<const float *>(weights->quantization()->scale()->data());
105 params.is_channel_wise_quant = weights->quantization()->scale()->size() > 1;
106 luci_interpreter_pal::FullyConnected(
107 params, input_shape, kernels::getTensorData<float>(input_data), weight_shape,
108 kernels::getTensorData<int8_t>(weights_data), kernels::getTensorData<float>(bias_data),
109 output_shape, kernels::getTensorData<float>(output_data), Tensor::num_dims(output),
110 Tensor::num_dims(weights));
111 break;
112 }
113 default:
114 assert(false && "Unsupported hybrid weight type");
115 }
116}
117
118#ifndef DIS_QUANT
119void evalQuantized(const circle::Tensor *input, const circle::Tensor *weights,
120 const circle::Tensor *bias, const circle::Tensor *output,
121 const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph,
122 DataType type)
123{
124 double real_multiplier = 0.0;
125 int output_shift;
126 int32_t output_activation_min;
127 int32_t output_activation_max;
128 int32_t output_multiplier;
130 Tensor::scale(input), Tensor::scale(weights), Tensor::scale(output));
131 kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
133 output, &output_activation_min,
134 &output_activation_max);
135
136 int32_t input_offset = -Tensor::zero_point(input);
137 int32_t filter_offset = 0;
138 if (type == DataType::U8)
139 filter_offset = -Tensor::zero_point(weights);
140 int32_t output_offset = Tensor::zero_point(output);
141
143 op_params.input_offset = input_offset;
144 op_params.weights_offset = filter_offset;
145 op_params.output_offset = output_offset;
146 op_params.output_multiplier = output_multiplier;
147 op_params.output_shift = output_shift;
148 op_params.quantized_activation_min = output_activation_min;
149 op_params.quantized_activation_max = output_activation_max;
150 op_params.lhs_cacheable = false;
151 op_params.rhs_cacheable = false;
152
153 auto *input_data = runtime_graph->getDataByTensor(input);
154 auto *output_data = runtime_graph->getDataByTensor(output);
155
156 auto *weights_data = runtime_graph->getConstDataByTensor(weights);
157 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
158
159 assert(input_data != nullptr);
160 assert(weights_data != nullptr);
161 assert(output_data != nullptr);
162
163 int32_t input_shape[kMaxSmallSize];
164 kernels::getTensorDims(input, runtime_graph, input_shape);
165
166 int32_t weights_shape[kMaxSmallSize];
167 kernels::getTensorDims(weights, runtime_graph, weights_shape);
168
169 int32_t output_shape[kMaxSmallSize];
170 kernels::getTensorDims(output, runtime_graph, output_shape);
171 if (type == DataType::S8)
172 {
174 op_params, input_shape, kernels::getTensorData<int8_t>(input_data), weights_shape,
175 kernels::getTensorData<int8_t>(weights_data), kernels::getTensorData<int32_t>(bias_data),
176 output_shape, kernels::getTensorData<int8_t>(output_data), Tensor::num_dims(output),
177 Tensor::num_dims(weights));
178 }
179 else if (type == DataType::U8)
180 {
181 luci_interpreter_pal::FullyConnected<uint8_t>(
182 op_params, input_shape, kernels::getTensorData<uint8_t>(input_data), weights_shape,
183 kernels::getTensorData<uint8_t>(weights_data), kernels::getTensorData<int32_t>(bias_data),
184 output_shape, kernels::getTensorData<uint8_t>(output_data), Tensor::num_dims(output),
185 Tensor::num_dims(weights));
186 }
187 else if (type == DataType::S16)
188 {
189 luci_interpreter_pal::FullyConnected(
190 op_params, input_shape, kernels::getTensorData<int16_t>(input_data), weights_shape,
191 kernels::getTensorData<int8_t>(weights_data), kernels::getTensorData<int64_t>(bias_data),
192 output_shape, kernels::getTensorData<int16_t>(output_data), Tensor::num_dims(output),
193 Tensor::num_dims(weights));
194 }
195 else
196 {
197 assert(false && "Unsupported quantize type");
198 }
199}
200#endif
201
202} // namespace
203
204void configure_kernel_CircleFullyConnected(const circle::Operator *cur_op,
205 BaseRuntimeGraph *runtime_graph)
206{
207 const auto input_index = cur_op->inputs()->operator[](0);
208 const auto weight_index = cur_op->inputs()->operator[](1);
209 const auto bias_index = cur_op->inputs()->operator[](2);
210 const auto output_index = cur_op->outputs()->operator[](0);
211
212 assert(input_index != -1);
213 assert(weight_index != -1);
214 assert(output_index != -1);
215
216 const auto input = runtime_graph->getCircleTensorByIndex(input_index);
217 const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
218 const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
219 const auto output = runtime_graph->getCircleTensorByIndex(output_index);
220
221 assert(input != nullptr);
222 assert(weights != nullptr);
223 assert(output != nullptr);
224
225#ifndef DIS_FLOAT
226 if (Tensor::element_type(weights) == DataType::S8 and
227 Tensor::element_type(input) == DataType::FLOAT32)
228 {
229 // hybrid mode
230 LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::FLOAT32);
231 LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::FLOAT32)
232 }
233 else if (Tensor::element_type(weights) == DataType::FLOAT32)
234 {
235 LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::FLOAT32);
236 LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::FLOAT32);
237 LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::FLOAT32)
238 }
239#endif // DIS_FLOAT
240#ifndef DIS_QUANT
241 else if (Tensor::element_type(weights) == DataType::U8)
242 {
243 LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::U8);
244 LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::U8);
245 LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32)
246 }
247 else if (Tensor::element_type(weights) == DataType::S8)
248 {
249 LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::S8 ||
250 Tensor::element_type(input) == DataType::FLOAT32);
251 LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::S8 ||
252 Tensor::element_type(output) == DataType::FLOAT32);
253 LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32 ||
254 Tensor::element_type(bias) == DataType::S64 ||
255 Tensor::element_type(bias) == DataType::FLOAT32)
256 if (Tensor::element_type(input) == DataType::FLOAT32)
257 {
258 // Check it is channel wise quantization
259 LUCI_INTERPRETER_CHECK(weights->quantization() != nullptr);
260 LUCI_INTERPRETER_CHECK(weights->quantization()->scale() != nullptr);
261 }
262 }
263#endif // DIS_QUANT
264 else
265 {
266 assert(false && "Unsupported type.");
267 }
268
269 LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 2);
270 LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::num_elements(bias) == Tensor::dim(weights, 0));
271
272#ifdef DIS_DYN_SHAPES
273 int32_t input_num_elements = Tensor::num_elements(input);
274 LUCI_INTERPRETER_CHECK(input_num_elements % Tensor::dim(weights, 1) == 0);
275#endif // DIS_DYN_SHAPES
276
277 if (bias)
278 LUCI_INTERPRETER_CHECK(Tensor::num_elements(bias) == Tensor::dim(weights, 0));
279}
280
281// TODO think how remove unused param
282void execute_kernel_CircleFullyConnected(const circle::Operator *cur_op,
283 BaseRuntimeGraph *runtime_graph)
284{
285 const auto input_index = cur_op->inputs()->operator[](0);
286 const auto weight_index = cur_op->inputs()->operator[](1);
287 const auto bias_index = cur_op->inputs()->operator[](2);
288 const auto output_index = cur_op->outputs()->operator[](0);
289
290 assert(input_index != -1);
291 assert(weight_index != -1);
292 assert(output_index != -1);
293
294 const auto input = runtime_graph->getCircleTensorByIndex(input_index);
295 const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
296 const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
297 const auto output = runtime_graph->getCircleTensorByIndex(output_index);
298
299 assert(input != nullptr);
300 assert(weights != nullptr);
301 assert(output != nullptr);
302
303 const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
304 const auto input_type = Tensor::element_type(input);
305 switch (input_type)
306 {
307#ifndef DIS_QUANT
308 case DataType::U8:
309 case DataType::S8:
310 case DataType::S16:
311 evalQuantized(input, weights, bias, output, options, runtime_graph, input_type);
312 break;
313#endif // DIS_QUANT
314#ifndef DIS_FLOAT
315 case DataType::FLOAT32:
316 evalFloat(input, weights, bias, output, options, runtime_graph);
317 break;
318#endif // DIS_FLOAT
319 default:
320 assert(false && "Unsupported type.");
321 }
322}
323
324} // namespace luci_interpreter
const circle::Tensor * getCircleTensorByIndex(int32_t index)
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
list input_data
Definition infer.py:29
DataType
"scalar" value type
Definition DataType.h:27
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
Definition Utils.cpp:52
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
Definition Utils.h:137
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157
void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
Definition Utils.h:121
void FullyConnected< int8_t >(const tflite::FullyConnectedParams &params, const tflite::RuntimeShape &input_shape, const int8_t *input_data, const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, const tflite::RuntimeShape &output_shape, int8_t *output_data)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleFullyConnected(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
void configure_kernel_CircleFullyConnected(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
int32_t size[5]
Definition Slice.cpp:35
const loco::Dimension & dim(uint32_t axis) const
Definition Tensor.h:44