ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Utils.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "kernels/Utils.h"
19
20#include <cassert>
21#include <cmath>
22#include <limits>
23
24namespace luci_interpreter
25{
26namespace kernels
27{
28
29luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor,
30 BaseRuntimeGraph *runtime_graph)
31{
32 luci_interpreter::RuntimeShape input_shape = getTensorShape(circle_tensor);
33
34#ifndef DIS_DYN_SHAPES
35 auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(circle_tensor);
36 if (dynamic_shape_vector != nullptr)
37 {
38 input_shape.resize(dynamic_shape_vector->dimensionsCount());
39
40 for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
41 {
42 input_shape.setDim(n, dynamic_shape_vector->dims(n));
43 }
44 }
45#endif // DIS_DYN_SHAPES
46 return input_shape;
47}
48
49template <typename T>
50void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
51{
52 switch (activation)
53 {
54 case Activation::NONE:
55 *activation_min = std::numeric_limits<T>::lowest();
56 *activation_max = std::numeric_limits<T>::max();
57 break;
58 case Activation::RELU:
59 *activation_min = 0;
60 *activation_max = std::numeric_limits<T>::max();
61 break;
62 case Activation::RELU_N1_TO_1:
63 *activation_min = -1;
64 *activation_max = 1;
65 break;
66 case Activation::RELU6:
67 *activation_min = 0;
68 *activation_max = 6;
69 break;
70 default:
71 assert(false && "Unsupported activation.");
72 }
73}
74
75void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
76 int32_t n_col, int32_t *output)
77{
78 for (int i = 0; i < n_row; ++i)
79 {
80 int32_t row_sum = 0;
81 for (int j = 0; j < n_col; ++j)
82 {
83 row_sum += *matrix++;
84 }
85 output[i] += row_sum * scalar;
86 }
87}
88
90 const luci_interpreter::RuntimeShape &input_shape2)
91{
92 if (input_shape1.dimensionsCount() == input_shape2.dimensionsCount())
93 {
94 int N = input_shape1.dimensionsCount();
95 for (int i = 0; i < N; ++i)
96 {
97 if (input_shape1.dims(i) != input_shape2.dims(i))
98 return false;
99 }
100 return true;
101 }
102 return false;
103}
104
105template void calculateActivationRange(Activation activation, float *activation_min,
106 float *activation_max);
107template void calculateActivationRange(Activation activation, int32_t *activation_min,
108 int32_t *activation_max);
109template void calculateActivationRange(Activation activation, int64_t *activation_min,
110 int64_t *activation_max);
111
112#ifndef DIS_QUANT
113bool checkedLog2(const float x, int *log2_result)
114{
115 const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
116 const float x_log2_rounded = std::round(x_log2);
117 const float x_log2_fracpart = x_log2 - x_log2_rounded;
118
119 *log2_result = static_cast<int>(x_log2_rounded);
120 return std::abs(x_log2_fracpart) < 1e-3f;
121}
122
123int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
124{
125 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
126 (1LL << (total_signed_bits - input_integer_bits)) /
127 (1LL << input_left_shift);
128 // Tighten bound using floor. Suppose that we could use the exact value.
129 // After scaling the difference, the result would be at the maximum. Thus we
130 // must ensure that our value has lower magnitude.
131 return static_cast<int>(std::floor(max_input_rescaled));
132}
133
134static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
135 int32_t zero_point, float scale,
136 int32_t *activation_min, int32_t *activation_max)
137{
138 auto quantize = [scale, zero_point](float x) {
139 return zero_point + static_cast<int32_t>(std::round(x / scale));
140 };
141
142 switch (activation)
143 {
144 case Activation::NONE:
145 case Activation::TANH:
146 *activation_min = qmin;
147 *activation_max = qmax;
148 break;
149 case Activation::RELU:
150 *activation_min = std::max(qmin, quantize(0.0f));
151 *activation_max = qmax;
152 break;
153 case Activation::RELU_N1_TO_1:
154 *activation_min = std::max(qmin, quantize(-1.0f));
155 *activation_max = std::min(qmax, quantize(1.0f));
156 break;
157 case Activation::RELU6:
158 *activation_min = std::max(qmin, quantize(0.0f));
159 *activation_max = std::min(qmax, quantize(6.0f));
160 break;
161 default:
162 assert(false && "Unsupported activation.");
163 }
164}
165
166static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
167 const circle::Tensor *output,
168 int32_t *activation_min, int32_t *activation_max)
169{
170 const float scale = Tensor::scale(output);
171 const int32_t zero_point = Tensor::zero_point(output);
172
173 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, zero_point, zero_point,
174 activation_min, activation_max);
175}
176
177void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
178 float output_scale, DataType data_type,
179 int32_t *activation_min, int32_t *activation_max)
180{
181 int32_t qmin{};
182 int32_t qmax{};
183 switch (data_type)
184 {
185 case DataType::U8:
186 qmin = 0;
187 qmax = std::numeric_limits<uint8_t>::max();
188 break;
189 case DataType::S8:
190 qmin = -std::numeric_limits<int8_t>::max();
191 qmax = std::numeric_limits<int8_t>::max();
192 break;
193 case DataType::S16:
194 // For now, assume that signed int16 type implies signed symmetric quantization.
195 assert(output_zero_point == 0);
196 qmin = -std::numeric_limits<int16_t>::max();
197 qmax = std::numeric_limits<int16_t>::max();
198 break;
199 default:
200 assert(false && "Unsupported type.");
201 }
202
203 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
204 activation_min, activation_max);
205}
206
207void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
208 int32_t *activation_min, int32_t *activation_max)
209{
210 assert(Tensor::zero_points(output).size() == 1);
211 const float scale = Tensor::scale(output);
212 const int32_t zero_point = Tensor::zero_point(output);
213 calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
214 activation_min, activation_max);
215}
216
217void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
218{
219 if (double_multiplier == 0.0)
220 {
221 *quantized_multiplier = 0;
222 *shift = 0;
223 return;
224 }
225
226 const double q = std::frexp(double_multiplier, shift);
227 auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
228
229 if (q_fixed == (int64_t(1) << 31))
230 {
231 q_fixed /= 2;
232 ++*shift;
233 }
234 assert(q_fixed <= std::numeric_limits<int32_t>::max());
235 // A shift amount smaller than -31 would cause all bits to be shifted out
236 // and thus all results would be zero. We implement that instead with
237 // q_fixed==0, so as to avoid hitting issues with right-shift
238 // operations with shift amounts greater than 31. Note that this happens
239 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
240 // that we're effectively flushing tiny double_multiplier's to zero.
241 // We could conceivably handle values in the range (roughly) [32, 63]
242 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
243 // the present handling is just doing 'flush denormals to zero'. We could
244 // reconsider and actually generate nonzero denormals if a need arises.
245 if (*shift < -31)
246 {
247 *shift = 0;
248 q_fixed = 0;
249 }
250 *quantized_multiplier = static_cast<int32_t>(q_fixed);
251}
252
253void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
254 int *left_shift)
255{
256 assert(double_multiplier < 1.0);
257 assert(double_multiplier > 0.0);
258 int shift;
259 quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
260 assert(shift <= 0);
261 *left_shift = shift;
262}
263#endif
264
266 const circle::Tensor *input2)
267{
268 const int num_input1_dims = Tensor::num_dims(input1);
269 const int num_input2_dims = Tensor::num_dims(input2);
270 const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
272
273 for (int i = 0; i < num_out_dims; ++i)
274 {
275 const int32_t input1_dim =
276 i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
277 const int32_t input2_dim =
278 i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
279
280 bool need_broadcast = input1_dim != input2_dim;
281 bool can_broadcast = input1_dim == 1 || input2_dim == 1;
282 LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
283
284 output_shape.setDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
285 }
286
287 return output_shape;
288}
289
290} // namespace kernels
291} // namespace luci_interpreter
luci_interpreter::RuntimeShape * getDynamicShapeTensor(const circle::Tensor *tensor)
int32_t dimensionsCount() const
Definition Tensor.h:106
int32_t dims(int i) const
Definition Tensor.h:108
void setDim(int i, int32_t val)
Definition Tensor.h:114
void resize(int dimensions_count)
Definition Tensor.h:121
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
bool checkedLog2(const float x, int *log2_result)
Definition Utils.cpp:113
int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
Definition Utils.cpp:123
Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
Definition Utils.cpp:204
bool areShapesEqual(const luci_interpreter::RuntimeShape &input_shape1, const luci_interpreter::RuntimeShape &input_shape2)
Definition Utils.cpp:89
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194
luci_interpreter::FusedActFunc Activation
Definition Utils.h:34
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
Definition Utils.cpp:52
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition Utils.cpp:193
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157
void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row, int32_t n_col, int32_t *output)
Definition Utils.cpp:75
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
Definition Utils.cpp:29
DataType
"scalar" value type
Definition DataType.h:32
Index shift(const Index &in_index, const Shape &shift_from)
Definition Common.cpp:26
int32_t size[5]
Definition Slice.cpp:35
const loco::Dimension & dim(uint32_t axis) const
Definition Tensor.h:44