ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Utils.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
19#define LUCI_INTERPRETER_KERNELS_UTILS_H
20
21#include "luci_interpreter/core/Tensor.h"
22#include "Builders.h"
23#include "Params.h"
24#include <cassert>
25#include <cstdint>
26
27#include <cmath>
28
29namespace luci_interpreter
30{
31namespace kernels
32{
33
35
36#define LUCI_INTERPRETER_CHECK(cond) \
37 if (!(cond)) \
38 { \
39 assert(false && "LUCI_INTERPRETER_CHECK fails"); \
40 }
41
42inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
43 int32_t filter_size, int32_t out_size)
44{
45 const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
46 const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
47 return padding > 0 ? padding : 0;
48}
49
50inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
51 int32_t filter_size, int32_t out_size, int32_t *offset)
52{
53 int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
54 int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
55 total_padding = total_padding > 0 ? total_padding : 0;
56 *offset = total_padding % 2;
57 return total_padding / 2;
58}
59
60inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
61 int32_t stride, int32_t dilation_rate = 1)
62{
63 const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
64 switch (padding)
65 {
66 case Padding::SAME:
67 assert(stride != 0);
68 return (image_size + stride - 1) / stride;
69 case Padding::VALID:
70 assert(stride != 0);
71 return (image_size + stride - effective_filter_size) / stride;
72 default:
73 assert(false);
74 return 0;
75 }
76}
77
78inline int32_t calcOffset(const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2,
79 int32_t d3)
80{
81
82 return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
83 Tensor::dim(tensor, 3) +
84 d3;
85}
86
87template <typename T>
88void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
89
91 const circle::Tensor *input2);
92
93// Helper wrapper to hide broadcast logic
94template <typename T> class BroadcastableWrapper
95{
96public:
97 BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
98
99 T operator[](int idx) { return _v[idx * _stride]; }
100
101private:
102 const std::vector<T> &_v;
103 int _stride;
104};
105
106inline luci_interpreter::RuntimeShape getTensorShape(const circle::Tensor *tensor)
107{
108 if (tensor == nullptr)
110
111 auto const tensor_shape = Tensor::tensor_shape(tensor);
112
113 luci_interpreter::RuntimeShape runtime_shape(tensor_shape.size());
114 for (int i = 0; i < tensor_shape.size(); ++i)
115 {
116 runtime_shape.setDim(i, tensor_shape[i]);
117 }
118 return runtime_shape;
119}
120
121inline void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph,
122 int32_t *dims)
123{
124 if (tensor == nullptr)
125 {
126 dims = nullptr;
127 return;
128 }
129
130#ifndef DIS_DYN_SHAPES
131 auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(tensor);
132 if (dynamic_shape_vector != nullptr)
133 {
134 for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
135 {
136 dims[n] = dynamic_shape_vector->dims(n);
137 }
138 }
139 else
140 {
141 auto const tensor_shape = Tensor::tensor_shape(tensor);
142 assert(tensor_shape.size() <= kMaxSmallSize);
143 for (int i = 0; i < tensor_shape.size(); ++i)
144 {
145 dims[i] = tensor_shape[i];
146 }
147 }
148#else
149 auto const tensor_shape = Tensor::tensor_shape(tensor);
150 assert(tensor_shape.size() <= kMaxSmallSize);
151 for (int i = 0; i < tensor_shape.size(); ++i)
152 {
153 dims[i] = tensor_shape[i];
154 }
155#endif // DIS_DYN_SHAPES
156}
157
158template <typename T> const T *getTensorData(const uint8_t *tensor_data)
159{
160 return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
161}
162
163template <typename T> inline T *getTensorData(uint8_t *tensor_data)
164{
165 return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
166}
167
168luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor,
169 BaseRuntimeGraph *runtime_graph);
170
171// A list of tensors in a format that can be used by kernels like split and
172// concatenation.
173template <typename T, bool is_const> class VectorOfTensors
174{
175public:
176 using ElementT = typename std::conditional<is_const, const T, T>::type;
177 using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
178
179 // Build with the tensors in 'tensor_list'.
180 explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
181 {
182 const int num_tensors = tensor_list.size();
183
184 all_data_.reserve(num_tensors);
185 all_shape_.reserve(num_tensors);
186 all_shape_ptr_.reserve(num_tensors);
187
188 for (TensorT *tensor : tensor_list)
189 {
190 all_data_.push_back(getTensorData<T>(tensor));
191 all_shape_.push_back(getTensorShape(tensor));
192 }
193
194 // Taking the pointer from inside a std::vector is only OK if the vector is
195 // never modified, so we populate all_shape in the previous loop and then we
196 // are free to grab iterators here.
197 for (luci_interpreter::RuntimeShape &shape : all_shape_)
198 {
199 all_shape_ptr_.push_back(&shape);
200 }
201 }
202 // Return a pointer to the data pointers of all tensors in the list. For
203 // example:
204 // float* const* f = v.data();
205 // f[0][1] is the second element of the first tensor.
206 ElementT *const *data() const { return all_data_.data(); }
207
208 // Return a pointer the shape pointers of all tensors in the list. For
209 // example:
210 // const RuntimeShape* const* d = v.dims();
211 // dims[1] are the dimensions of the second tensor in the list.
212 const luci_interpreter::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
213
214private:
215 std::vector<ElementT *> all_data_;
216 std::vector<luci_interpreter::RuntimeShape> all_shape_;
217 std::vector<luci_interpreter::RuntimeShape *> all_shape_ptr_;
218};
219
220template <typename T> constexpr bool one_of_types() { return false; }
221
222// Checks if T is equal to one of {U,Other} types
223template <typename T, typename U, typename... Other> constexpr bool one_of_types()
224{
225 return std::is_same<T, U>::value || one_of_types<T, Other...>();
226}
227
228void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
229 int32_t n_col, int32_t *output);
230
231// Checks if input and output dimensions are equal
232bool areShapesEqual(const luci_interpreter::RuntimeShape &input_shape1,
233 const luci_interpreter::RuntimeShape &input_shape2);
234
235#ifndef DIS_QUANT
236bool checkedLog2(const float x, int *log2_result);
237
238int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits);
239
240void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
241 int32_t *activation_min, int32_t *activation_max);
242
243void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
244 float output_scale, DataType data_type,
245 int32_t *activation_min, int32_t *activation_max);
246
247// Decompose a double multiplier into a Q0.31 int32 representation of its
248// significand, and shift representation of its exponent.
249//
250// Handles an arbitrary positive multiplier. The 'shift' output-value is
251// basically the 'floating-point exponent' of the multiplier:
252// Negative for a right-shift (when the multiplier is <1), positive for a
253// left-shift (when the multiplier is >1)
254void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
255
256// Decompose a double multiplier into a Q0.31 int32 representation of its
257// significand, and shift representation of NEGATIVE its exponent ---
258// this is intended as a RIGHT-shift.
259//
260// Restricted to the case where the multiplier < 1 (and non-negative).
261void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
262 int *left_shift);
263
264inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
265 float output_scale)
266{
267 const double input_product_scale = static_cast<double>(input_scale * filter_scale);
268 LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
269 return input_product_scale / static_cast<double>(output_scale);
270}
271
272// TODO rename getQuantizedConvolutionMultiplers to something more general
273// it is used for non conv operators too
274inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
275 const std::vector<float> &filter_scale,
276 float output_scale)
277{
278 std::vector<double> effective_output_scales;
279 size_t n = filter_scale.size();
280 effective_output_scales.reserve(n);
281 for (size_t i = 0; i < n; ++i)
282 {
283 effective_output_scales.push_back(
284 getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
285 }
286 return effective_output_scales;
287}
288
289struct ChannelQuantMultipliers
290{
291 int shift;
292 int32_t multiplier;
294};
295
296inline std::vector<ChannelQuantMultipliers>
297quantizeMultipliers(const std::vector<double> &effective_scale)
298{
299 size_t n = effective_scale.size();
300 std::vector<ChannelQuantMultipliers> params(n);
301 for (size_t i = 0; i < n; ++i)
302 {
303 quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
304 }
305 return params;
306}
307
308// A list of quantized tensors in a format that can be used by kernels like
309// split and concatenation.
310template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
311{
312public:
313 using typename VectorOfTensors<uint8_t, is_const>::TensorT;
314
315 // Build with the tensors in 'tensor_list'.
316 explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
317 : VectorOfTensors<uint8_t, is_const>(tensor_list)
318 {
319 for (TensorT *tensor : tensor_list)
320 {
321 zero_point_.push_back(tensor->zero_point());
322 scale_.push_back(tensor->scale());
323 }
324 }
325
326 const float *scale() const { return scale_.data(); }
327 const int32_t *zero_point() const { return zero_point_.data(); }
328
329private:
330 std::vector<int32_t> zero_point_;
331 std::vector<float> scale_;
332};
333#endif // DIS_QUANT
334
335} // namespace kernels
336} // namespace luci_interpreter
337
338#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
luci_interpreter::RuntimeShape * getDynamicShapeTensor(const circle::Tensor *tensor)
int32_t dims(int i) const
Definition Tensor.h:108
void setDim(int i, int32_t val)
Definition Tensor.h:114
BroadcastableWrapper(const std::vector< T > &v)
Definition Utils.h:97
VectorOfQuantizedTensors(const std::vector< TensorT * > &tensor_list)
Definition Utils.h:316
VectorOfTensors(const std::vector< TensorT * > &tensor_list)
Definition Utils.h:180
ElementT *const * data() const
Definition Utils.h:206
typename std::conditional< is_const, const Tensor, Tensor >::type TensorT
Definition Utils.h:224
typename std::conditional< is_const, const T, T >::type ElementT
Definition Utils.h:223
const luci_interpreter::RuntimeShape *const * shapes() const
Definition Utils.h:212
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
DataType
"scalar" value type
Definition DataType.h:27
bool checkedLog2(const float x, int *log2_result)
Definition Utils.cpp:113
int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
Definition Utils.h:41
int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
Definition Utils.cpp:123
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
Definition Utils.h:75
Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
Definition Utils.cpp:204
bool areShapesEqual(const luci_interpreter::RuntimeShape &input_shape1, const luci_interpreter::RuntimeShape &input_shape2)
Definition Utils.cpp:89
const T * getTensorData(const Tensor *tensor)
Definition Utils.h:208
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
Definition Utils.h:170
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194
luci_interpreter::FusedActFunc Activation
Definition Utils.h:34
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
Definition Utils.cpp:52
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition Utils.cpp:193
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119
constexpr bool one_of_types()
Definition Utils.h:88
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
Definition Utils.h:137
int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size, int32_t *offset)
Definition Utils.h:49
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157
int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
Definition Utils.h:59
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
Definition Utils.h:147
void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
Definition Utils.h:121
void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row, int32_t n_col, int32_t *output)
Definition Utils.cpp:75
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
Definition Utils.cpp:29
int32_t size[5]
Definition Slice.cpp:35
const loco::Dimension & dim(uint32_t axis) const
Definition Tensor.h:44