ONE - On-device Neural Engine
Loading...
Searching...
No Matches
OMUtils.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "execute/OMUtils.h"
18#include <cmath>
19
20using namespace onert_micro::execute;
21using namespace onert_micro;
22
23void onert_micro::execute::quantizeMultiplier(double double_multiplier,
24 int32_t *quantized_multiplier, int *shift)
25{
26 if (double_multiplier == 0.0)
27 {
28 *quantized_multiplier = 0;
29 *shift = 0;
30 return;
31 }
32
33 const double q = std::frexp(double_multiplier, shift);
34 auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
35
36 if (q_fixed == (int64_t(1) << 31))
37 {
38 q_fixed /= 2;
39 ++*shift;
40 }
41 assert(q_fixed <= std::numeric_limits<int32_t>::max());
42 // A shift amount smaller than -31 would cause all bits to be shifted out
43 // and thus all results would be zero. We implement that instead with
44 // q_fixed==0, so as to avoid hitting issues with right-shift
45 // operations with shift amounts greater than 31. Note that this happens
46 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
47 // that we're effectively flushing tiny double_multiplier's to zero.
48 // We could conceivably handle values in the range (roughly) [32, 63]
49 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
50 // the present handling is just doing 'flush denormals to zero'. We could
51 // reconsider and actually generate nonzero denormals if a need arises.
52 if (*shift < -31)
53 {
54 *shift = 0;
55 q_fixed = 0;
56 }
57 *quantized_multiplier = static_cast<int32_t>(q_fixed);
58}
59
61 int32_t *quantized_multiplier,
62 int *left_shift)
63{
64 assert(double_multiplier < 1.0);
65 assert(double_multiplier > 0.0);
66 int shift;
67 onert_micro::execute::quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
68 assert(shift <= 0);
69 *left_shift = shift;
70}
71
72namespace
73{
74OMStatus calculateActivationRangeQuantizedImpl(circle::ActivationFunctionType activation,
75 int32_t qmin, int32_t qmax, int32_t zero_point,
76 float scale, int32_t *activation_min,
77 int32_t *activation_max)
78{
79 assert(scale != 0.f);
80
81 auto quantize = [scale, zero_point](float x) {
82 return zero_point + static_cast<int32_t>(std::round(x / scale));
83 };
84
85 switch (activation)
86 {
87 case circle::ActivationFunctionType::ActivationFunctionType_NONE:
88 case circle::ActivationFunctionType::ActivationFunctionType_TANH:
89 *activation_min = qmin;
90 *activation_max = qmax;
91 break;
92 case circle::ActivationFunctionType::ActivationFunctionType_RELU:
93 *activation_min = std::max(qmin, quantize(0.0f));
94 *activation_max = qmax;
95 break;
96 case circle::ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
97 *activation_min = std::max(qmin, quantize(-1.0f));
98 *activation_max = std::min(qmax, quantize(1.0f));
99 break;
100 case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
101 *activation_min = std::max(qmin, quantize(0.0f));
102 *activation_max = std::min(qmax, quantize(6.0f));
103 break;
104 default:
105 assert(false && "Unsupported activation.");
107 }
108 return Ok;
109}
110} // namespace
111
113 circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale,
114 circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
115{
116 int32_t qmin;
117 int32_t qmax;
118 switch (data_type)
119 {
120 case circle::TensorType_UINT8:
121 qmin = 0;
122 qmax = std::numeric_limits<uint8_t>::max();
123 break;
124 case circle::TensorType_INT8:
125 qmin = std::numeric_limits<int8_t>::min();
126 qmax = std::numeric_limits<int8_t>::max();
127 break;
128 case circle::TensorType_INT16:
129 // For now, assume that signed int16 type implies signed symmetric quantization.
130 assert(output_zero_point == 0);
131 qmin = std::numeric_limits<int16_t>::min();
132 qmax = std::numeric_limits<int16_t>::max();
133 break;
134 default:
135 assert(false && "Unsupported type.");
136 return UnsupportedType;
137 }
138
139 return calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point,
140 output_scale, activation_min, activation_max);
141}
142
143void onert_micro::execute::readQuantParams(const circle::Tensor *tensor, long &zero_point,
144 float &scale)
145{
146 // additional check
147 assert(tensor->quantization() != nullptr); // Fix caller
148 assert(tensor->quantization()->scale() != nullptr and
149 tensor->quantization()->scale()->size() == 1); // Fix caller
150 assert(tensor->quantization()->zero_point() != nullptr and
151 tensor->quantization()->zero_point()->size() == 1); // Fix caller
152
153 // read zero point
154 zero_point = tensor->quantization()->zero_point()->operator[](0);
155 // read scale
156 scale = tensor->quantization()->scale()->operator[](0);
157}
158
160 const circle::Tensor **input,
161 const circle::Tensor **output, uint8_t **input_data,
162 uint8_t **output_data)
163{
164 OMStatus status;
165
166 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
167 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
168 uint16_t op_index = execute_args.kernel_index;
169
170 {
171 OMRuntimeKernel runtime_kernel;
172 runtime_kernel.readKernel(op_index, runtime_context);
173
174 *input = runtime_kernel.inputs[0];
175 *output = runtime_kernel.outputs[0];
176
177 assert(*input != nullptr);
178 assert(*output != nullptr);
179
180 status = runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
181 if (status != Ok)
182 return status;
183
184 *input_data = runtime_kernel.inputs_data[0];
185 *output_data = runtime_kernel.outputs_data[0];
186 }
187
188 assert(*input_data != nullptr);
189 assert(*output_data != nullptr);
190
191 return status;
192}
193
195 const circle::Tensor *input1,
196 const circle::Tensor *input2,
197 const circle::Tensor *output,
198 circle::ActivationFunctionType act)
199{
200 long input1_zp;
201 long input2_zp;
202 long output_zp;
203
204 float input1_scale;
205 float input2_scale;
206 float output_scale;
207
208 // Read input1 quant params
209 readQuantParams(input1, input1_zp, input1_scale);
210 // Read input2 quant params
211 readQuantParams(input2, input2_zp, input2_scale);
212 // Read output quant params
213 readQuantParams(output, output_zp, output_scale);
214
215 params.input1_offset = -static_cast<int32_t>(input1_zp);
216 params.input2_offset = -static_cast<int32_t>(input2_zp);
217 params.output_offset = static_cast<int32_t>(output_zp);
218 params.left_shift = (output->type() == circle::TensorType_INT16) ? 15 : 20;
219 const double twice_max_input_scale =
220 2 * static_cast<double>(std::max(input1_scale, input2_scale));
221 const double real_input1_multiplier = static_cast<double>(input1_scale) / twice_max_input_scale;
222 const double real_input2_multiplier = static_cast<double>(input2_scale) / twice_max_input_scale;
223 const double real_output_multiplier =
224 twice_max_input_scale / ((1 << params.left_shift) * static_cast<double>(output_scale));
225
226 quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &params.input1_multiplier,
227 &params.input1_shift);
228
229 quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &params.input2_multiplier,
230 &params.input2_shift);
231
232 quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &params.output_multiplier,
233 &params.output_shift);
234
235 calculateActivationRangeQuantized(act, output_zp, output_scale, output->type(),
238}
239
241 const circle::Tensor **input1,
242 const circle::Tensor **input2,
243 const circle::Tensor **output,
244 OMRuntimeKernel *runtime_kernel)
245{
246 OMStatus status;
247
248 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
249 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
250 uint16_t op_index = execute_args.kernel_index;
251
252 status = runtime_kernel->readKernel(op_index, runtime_context);
253
254 *input1 = runtime_kernel->inputs[0];
255 *input2 = runtime_kernel->inputs[1];
256 *output = runtime_kernel->outputs[0];
257
258 assert(*input1 != nullptr);
259 assert(*input2 != nullptr);
260 assert(*output != nullptr);
261
262 status = runtime_kernel->getDataFromStorage(op_index, runtime_storage, runtime_context);
263 if (status != Ok)
264 return status;
265
266 return status;
267}
uint8_t * outputs_data[maxOutputSize]
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition OMUtils.cpp:23
OMStatus SISOHeader(const OMExecuteArgs &execute_args, const circle::Tensor **input, const circle::Tensor **output, uint8_t **input_data, uint8_t **output_data)
Definition OMUtils.cpp:159
void readQuantParams(const circle::Tensor *tensor, long &zero_point, float &scale)
Definition OMUtils.cpp:143
OMStatus calculateActivationRangeQuantized(circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
Definition OMUtils.cpp:112
void calculateQuantParams(core::ArithmeticQuantParams &params, const circle::Tensor *input1, const circle::Tensor *input2, const circle::Tensor *output, circle::ActivationFunctionType act)
Definition OMUtils.cpp:194
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition OMUtils.cpp:60
OMStatus TISOHeader(const OMExecuteArgs &execute_args, const circle::Tensor **input1, const circle::Tensor **input2, const circle::Tensor **output, OMRuntimeKernel *runtime_kernel)
Definition OMUtils.cpp:240
@ UnsupportedType
Definition OMStatus.h:26
@ UnsupportedActivation
Definition OMStatus.h:28
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage