18#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
19#define LUCI_INTERPRETER_KERNELS_UTILS_H
21#include "luci_interpreter/core/Tensor.h"
36#define LUCI_INTERPRETER_CHECK(cond) \
39 assert(false && "LUCI_INTERPRETER_CHECK fails"); \
42inline int32_t
computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
43 int32_t filter_size, int32_t out_size)
45 const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
46 const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
47 return padding > 0 ? padding : 0;
51 int32_t filter_size, int32_t out_size, int32_t *
offset)
53 int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
54 int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
55 total_padding = total_padding > 0 ? total_padding : 0;
56 *
offset = total_padding % 2;
57 return total_padding / 2;
61 int32_t stride, int32_t dilation_rate = 1)
63 const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
68 return (image_size + stride - 1) / stride;
71 return (image_size + stride - effective_filter_size) / stride;
78inline int32_t
calcOffset(
const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2,
91 const circle::Tensor *input2);
94template <
typename T>
class BroadcastableWrapper
102 const std::vector<T> &_v;
108 if (tensor ==
nullptr)
111 auto const tensor_shape = Tensor::tensor_shape(tensor);
114 for (
int i = 0; i < tensor_shape.size(); ++i)
116 runtime_shape.
setDim(i, tensor_shape[i]);
118 return runtime_shape;
124 if (tensor ==
nullptr)
130#ifndef DIS_DYN_SHAPES
132 if (dynamic_shape_vector !=
nullptr)
134 for (
int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
136 dims[n] = dynamic_shape_vector->
dims(n);
141 auto const tensor_shape = Tensor::tensor_shape(tensor);
142 assert(tensor_shape.size() <= kMaxSmallSize);
143 for (
int i = 0; i < tensor_shape.size(); ++i)
145 dims[i] = tensor_shape[i];
149 auto const tensor_shape = Tensor::tensor_shape(tensor);
150 assert(tensor_shape.size() <= kMaxSmallSize);
151 for (
int i = 0; i < tensor_shape.size(); ++i)
153 dims[i] = tensor_shape[i];
160 return tensor_data !=
nullptr ?
reinterpret_cast<const T *
>(tensor_data) :
nullptr;
165 return tensor_data !=
nullptr ?
reinterpret_cast<T *
>(tensor_data) :
nullptr;
173template <
typename T,
bool is_const>
class VectorOfTensors
176 using ElementT =
typename std::conditional<is_const, const T, T>::type;
177 using TensorT =
typename std::conditional<is_const, const Tensor, Tensor>::type;
182 const int num_tensors = tensor_list.size();
184 all_data_.reserve(num_tensors);
185 all_shape_.reserve(num_tensors);
186 all_shape_ptr_.reserve(num_tensors);
188 for (
TensorT *tensor : tensor_list)
190 all_data_.push_back(getTensorData<T>(tensor));
199 all_shape_ptr_.push_back(&shape);
215 std::vector<ElementT *> all_data_;
216 std::vector<luci_interpreter::RuntimeShape> all_shape_;
217 std::vector<luci_interpreter::RuntimeShape *> all_shape_ptr_;
220template <
typename T>
constexpr bool one_of_types() {
return false; }
223template <
typename T,
typename U,
typename... Other>
constexpr bool one_of_types()
225 return std::is_same<T, U>::value ||
one_of_types<T, Other...>();
229 int32_t n_col, int32_t *output);
241 int32_t *activation_min, int32_t *activation_max);
244 float output_scale,
DataType data_type,
245 int32_t *activation_min, int32_t *activation_max);
254void quantizeMultiplier(
double double_multiplier, int32_t *quantized_multiplier,
int *shift);
267 const double input_product_scale =
static_cast<double>(input_scale * filter_scale);
269 return input_product_scale /
static_cast<double>(output_scale);
275 const std::vector<float> &filter_scale,
278 std::vector<double> effective_output_scales;
279 size_t n = filter_scale.size();
280 effective_output_scales.reserve(n);
281 for (
size_t i = 0; i < n; ++i)
283 effective_output_scales.push_back(
286 return effective_output_scales;
289struct ChannelQuantMultipliers
296inline std::vector<ChannelQuantMultipliers>
299 size_t n = effective_scale.size();
300 std::vector<ChannelQuantMultipliers> params(n);
301 for (
size_t i = 0; i < n; ++i)
310template <
bool is_const>
class VectorOfQuantizedTensors :
public VectorOfTensors<uint8_t, is_const>
313 using typename VectorOfTensors<uint8_t, is_const>::TensorT;
319 for (
TensorT *tensor : tensor_list)
321 zero_point_.push_back(tensor->zero_point());
322 scale_.push_back(tensor->scale());
326 const float *
scale()
const {
return scale_.data(); }
327 const int32_t *
zero_point()
const {
return zero_point_.data(); }
330 std::vector<int32_t> zero_point_;
331 std::vector<float> scale_;
luci_interpreter::RuntimeShape * getDynamicShapeTensor(const circle::Tensor *tensor)
int32_t dims(int i) const
void setDim(int i, int32_t val)
BroadcastableWrapper(const std::vector< T > &v)
const float * scale() const
const int32_t * zero_point() const
VectorOfQuantizedTensors(const std::vector< TensorT * > &tensor_list)
VectorOfTensors(const std::vector< TensorT * > &tensor_list)
ElementT *const * data() const
typename std::conditional< is_const, const Tensor, Tensor >::type TensorT
typename std::conditional< is_const, const T, T >::type ElementT
const luci_interpreter::RuntimeShape *const * shapes() const
#define LUCI_INTERPRETER_CHECK(cond)
__global uchar * offset(const Image *img, int x, int y)
DataType
"scalar" value type
bool checkedLog2(const float x, int *log2_result)
int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
bool areShapesEqual(const luci_interpreter::RuntimeShape &input_shape1, const luci_interpreter::RuntimeShape &input_shape2)
const T * getTensorData(const Tensor *tensor)
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
luci_interpreter::FusedActFunc Activation
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
constexpr bool one_of_types()
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size, int32_t *offset)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row, int32_t n_col, int32_t *output)
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
const loco::Dimension & dim(uint32_t axis) const
ChannelQuantMultipliers()=default