ONE - On-device Neural Engine
Loading...
Searching...
No Matches
luci_interpreter::kernels Namespace Reference

Namespaces

namespace  lstm
 
namespace  testing
 

Data Structures

class  Abs
 
class  Add
 
class  ArgMax
 
class  AveragePool2D
 
class  BatchMatMul
 
class  BatchToSpaceND
 
class  BroadcastableWrapper
 
class  BroadcastTo
 
class  Cast
 
struct  ChannelQuantMultipliers
 
class  Concatenation
 
class  Conv2D
 
class  Cos
 
class  CumSum
 
class  DepthToSpace
 
class  DepthwiseConv2D
 
class  Dequantize
 
class  Div
 
class  DownsamplingConv2DKernel
 
class  Elu
 
class  Equal
 
class  Exp
 
class  ExpandDims
 
class  Fill
 
class  Floor
 
class  FloorDiv
 
class  FloorMod
 
class  FullyConnected
 
class  Gather
 
class  Gelu
 
class  Greater
 
class  GreaterEqual
 
class  GRU
 
class  HardSwish
 
class  If
 
class  InstanceNorm
 
class  L2Normalize
 
class  L2Pool2D
 
class  LeakyRelu
 
class  Less
 
class  LessEqual
 
class  LocalResponseNormalization
 
class  Log
 
class  LogicalAnd
 
class  LogicalNot
 
class  LogicalOr
 
class  Logistic
 
class  LogSoftmax
 
class  Maximum
 
class  MaxPool2D
 
class  Mean
 
class  Minimum
 
class  MirrorPad
 
class  MISOKernel
 
class  Mul
 
class  Neg
 
class  NotEqual
 
class  OneHot
 
class  Pack
 
class  Pad
 
class  PadV2
 
class  Pow
 
class  PRelu
 
class  Quantize
 
class  ReduceMax
 
class  ReduceProd
 
class  Relu
 
class  Relu0To1
 
class  Relu6
 
class  Reshape
 
class  ResizeBilinear
 
class  ResizeNearestNeighbor
 
class  ReverseV2
 
class  RmsNorm
 
class  RoPE
 
class  Rsqrt
 
class  Select
 
class  SelectV2
 
class  ShapeKernel
 
class  Sin
 
class  SISOKernel
 
class  Slice
 
class  Softmax
 
class  SpaceToBatchND
 
class  SpaceToDepth
 
class  Split
 
class  SplitV
 
class  Sqrt
 
class  Square
 
class  SquaredDifference
 
class  Squeeze
 
class  StridedSlice
 
class  Sub
 
class  Sum
 
class  SVDF
 
class  Tanh
 
class  Tile
 
struct  TISOData
 
class  TISOKernel
 
class  Transpose
 
class  TransposeConv
 
class  UnidirectionalSequenceLSTM
 
class  Unpack
 
class  VectorOfQuantizedTensors
 
class  VectorOfTensors
 
class  While
 

Typedefs

using Activation = luci_interpreter::FusedActFunc
 

Functions

void TransposeRowsColumns (const Tensor *tensor_in, Tensor *tensor_out)
 
template<typename T , typename Op , int N = 5>
void BinaryOpBroadcastSlow (const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, const tflite::RuntimeShape &unextended_input2_shape, const T *input2_data, const tflite::RuntimeShape &unextended_output_shape, T *output_data, Op op)
 
template<typename T >
Shape calculateOutputShape (const Tensor *input, const Tensor *begin, const Tensor *size)
 
template<typename T >
void getBeginAndSizeVectors (int dimensions, const Tensor *begin, const Tensor *size, std::vector< int > *begins, std::vector< int > *sizes)
 
TfLiteFusedActivation getTfLiteActivation (Activation activation)
 
template<typename T >
void calculateActivationRange (Activation activation, T *activation_min, T *activation_max)
 
template void calculateActivationRange (Activation activation, float *activation_min, float *activation_max)
 
template void calculateActivationRange (Activation activation, int32_t *activation_min, int32_t *activation_max)
 
template void calculateActivationRange (Activation activation, int64_t *activation_min, int64_t *activation_max)
 
void calculateActivationRangeQuantized (Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
 
void quantizeMultiplier (double double_multiplier, int32_t *quantized_multiplier, int *shift)
 
void quantizeMultiplierSmallerThanOneExp (double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
 
Shape calculateShapeForBroadcast (const Shape &input1_shape, const Shape &input2_shape)
 
int32_t computePadding (int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
 
int32_t computePaddingWithOffset (int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size, int32_t *offset)
 
int32_t computeOutputSize (Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
 
int32_t calcOffset (const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
 
template<typename T >
constexpr bool one_of_types ()
 
template<typename T , typename U , typename... Other>
constexpr bool one_of_types ()
 
template<typename T >
void fillArithmeticActivationRange (tflite::ArithmeticParams &p, Activation act)
 
double getQuantizedConvolutionMultipler (float input_scale, float filter_scale, float output_scale)
 
std::vector< double > getQuantizedConvolutionMultiplers (float input_scale, const std::vector< float > &filter_scale, float output_scale)
 
std::vector< ChannelQuantMultipliersquantizeMultipliers (const std::vector< double > &effective_scale)
 
tflite::RuntimeShape getTensorShape (const Tensor *tensor)
 
template<typename T >
const T * getTensorData (const Tensor *tensor)
 
template<typename T >
T * getTensorData (Tensor *tensor)
 
template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void evalTISOKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel *kernel, kernels::TISOData *kernel_data, const Options *options, RuntimeShape &&input_shape_1, RuntimeShape &&input_shape_2, RuntimeShape &&output_shape)
 
template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void evalTISOInplaceKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel *kernel, const Options *options, RuntimeShape &&input_shape_1, RuntimeShape &&input_shape_2, RuntimeShape &&output_shape)
 
void CheckBinaryOpDataTypesEqual (const kernels::TISOKernel &kernel)
 
template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void evalTISOQuantizedKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel *kernel, kernels::TISOData *kernel_data, const Options *options)
 
template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void evalTISOInplaceQuantizedKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel *kernel, const Options *options)
 
template<typename T >
void evalComparisonGeneric (const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output, BaseRuntimeGraph *runtime_graph, bool F(T, T))
 
luci_interpreter::RuntimeShape getTensorRuntimeShape (const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
 
void matrixScalarMultiplyAccumulate (const int8_t *matrix, int32_t scalar, int32_t n_row, int32_t n_col, int32_t *output)
 
bool areShapesEqual (const luci_interpreter::RuntimeShape &input_shape1, const luci_interpreter::RuntimeShape &input_shape2)
 
bool checkedLog2 (const float x, int *log2_result)
 
int calculateInputRadius (int input_integer_bits, int input_left_shift, int total_signed_bits)
 
void calculateActivationRangeQuantized (Activation activation, int32_t output_zero_point, float output_scale, DataType data_type, int32_t *activation_min, int32_t *activation_max)
 
void calculateActivationRangeQuantized (Activation activation, const circle::Tensor *output, int32_t *activation_min, int32_t *activation_max)
 
luci_interpreter::RuntimeShape calculateShapeForBroadcast (const circle::Tensor *input1, const circle::Tensor *input2)
 
int32_t calcOffset (const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
 
luci_interpreter::RuntimeShape getTensorShape (const circle::Tensor *tensor)
 
void getTensorDims (const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
 
template<typename T >
const T * getTensorData (const uint8_t *tensor_data)
 
template<typename T >
T * getTensorData (uint8_t *tensor_data)
 

Variables

const int max_dim = 4
 

Typedef Documentation

◆ Activation

Function Documentation

◆ areShapesEqual()

bool luci_interpreter::kernels::areShapesEqual ( const luci_interpreter::RuntimeShape input_shape1,
const luci_interpreter::RuntimeShape input_shape2 
)

Definition at line 89 of file Utils.cpp.

91{
92 if (input_shape1.dimensionsCount() == input_shape2.dimensionsCount())
93 {
94 int N = input_shape1.dimensionsCount();
95 for (int i = 0; i < N; ++i)
96 {
97 if (input_shape1.dims(i) != input_shape2.dims(i))
98 return false;
99 }
100 return true;
101 }
102 return false;
103}
int32_t dimensionsCount() const
Definition Tensor.h:106
int32_t dims(int i) const
Definition Tensor.h:108

References luci_interpreter::RuntimeShape::dimensionsCount(), and luci_interpreter::RuntimeShape::dims().

Referenced by luci_interpreter::execute_kernel_CircleFloorDiv(), luci_interpreter::execute_kernel_CircleFloorMod(), luci_interpreter::execute_kernel_CircleMaximum(), and luci_interpreter::execute_kernel_CircleMinimum().

◆ BinaryOpBroadcastSlow()

template<typename T , typename Op , int N = 5>
void luci_interpreter::kernels::BinaryOpBroadcastSlow ( const tflite::RuntimeShape &  unextended_input1_shape,
const T *  input1_data,
const tflite::RuntimeShape &  unextended_input2_shape,
const T *  input2_data,
const tflite::RuntimeShape &  unextended_output_shape,
T *  output_data,
Op  op 
)

Definition at line 31 of file BinaryOpCommon.h.

37{
38 if (unextended_input1_shape == unextended_input2_shape)
39 {
40 const int flat_size = tflite::MatchingElementsSize(
41 unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
42 for (int i = 0; i < flat_size; ++i)
43 {
44 output_data[i] = op(input1_data[i], input2_data[i]);
45 }
46 }
47 else
48 {
49 assert(unextended_input1_shape.DimensionsCount() <= N);
50 assert(unextended_input2_shape.DimensionsCount() <= N);
51 assert(unextended_output_shape.DimensionsCount() <= N);
52
53 tflite::NdArrayDesc<N> desc1{};
54 tflite::NdArrayDesc<N> desc2{};
55 tflite::NdArrayDesc<N> output_desc{};
56 tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape,
57 &desc1, &desc2);
58 tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape),
59 &output_desc);
60
61 auto fn = [&](int indexes[N]) {
62 output_data[SubscriptToIndex(output_desc, indexes)] =
63 op(input1_data[SubscriptToIndex(desc1, indexes)],
64 input2_data[SubscriptToIndex(desc2, indexes)]);
65 };
66 tflite::NDOpsHelper<N>(output_desc, fn);
67 }
68}
int SubscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
Definition NDArray.h:54
NdArrayDesc< 4 > desc1
NdArrayDesc< 4 > desc2

References desc1, desc2, and SubscriptToIndex().

Referenced by luci_interpreter::kernels::LogicalOr::execute().

◆ calcOffset() [1/2]

int32_t luci_interpreter::kernels::calcOffset ( const circle::Tensor *  tensor,
int32_t  d0,
int32_t  d1,
int32_t  d2,
int32_t  d3 
)
inline

Definition at line 78 of file Utils.h.

80{
81
82 return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
83 Tensor::dim(tensor, 3) +
84 d3;
85}
const loco::Dimension & dim(uint32_t axis) const
Definition Tensor.h:44

References circle_eval_diff::TensorShape::dim().

◆ calcOffset() [2/2]

int32_t luci_interpreter::kernels::calcOffset ( const Shape shape,
int32_t  d0,
int32_t  d1,
int32_t  d2,
int32_t  d3 
)
inline

Definition at line 75 of file Utils.h.

76{
77 return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
78}
int32_t dim(int i) const
Definition Tensor.h:41

References luci_interpreter::Shape::dim().

◆ calculateActivationRange() [1/4]

template void luci_interpreter::kernels::calculateActivationRange ( Activation  activation,
float *  activation_min,
float *  activation_max 
)

◆ calculateActivationRange() [2/4]

template void luci_interpreter::kernels::calculateActivationRange ( Activation  activation,
int32_t *  activation_min,
int32_t *  activation_max 
)

◆ calculateActivationRange() [3/4]

template void luci_interpreter::kernels::calculateActivationRange ( Activation  activation,
int64_t *  activation_min,
int64_t *  activation_max 
)

◆ calculateActivationRange() [4/4]

template<typename T >
void luci_interpreter::kernels::calculateActivationRange ( Activation  activation,
T *  activation_min,
T *  activation_max 
)

Definition at line 52 of file Utils.cpp.

53{
54 switch (activation)
55 {
56 case Activation::NONE:
57 *activation_min = std::numeric_limits<T>::lowest();
58 *activation_max = std::numeric_limits<T>::max();
59 break;
60 case Activation::RELU:
61 *activation_min = 0;
62 *activation_max = std::numeric_limits<T>::max();
63 break;
64 case Activation::RELU_N1_TO_1:
65 *activation_min = -1;
66 *activation_max = 1;
67 break;
68 case Activation::RELU6:
69 *activation_min = 0;
70 *activation_max = 6;
71 break;
72 default:
73 throw std::runtime_error("Unsupported activation.");
74 }
75}

Referenced by luci_interpreter::createConv2DParams(), luci_interpreter::createPoolParams(), luci_interpreter::kernels::L2Pool2D::execute(), and fillArithmeticActivationRange().

◆ calculateActivationRangeQuantized() [1/3]

void luci_interpreter::kernels::calculateActivationRangeQuantized ( Activation  activation,
const circle::Tensor *  output,
int32_t *  activation_min,
int32_t *  activation_max 
)

Definition at line 207 of file Utils.cpp.

209{
210 assert(Tensor::zero_points(output).size() == 1);
211 const float scale = Tensor::scale(output);
212 const int32_t zero_point = Tensor::zero_point(output);
213 calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
214 activation_min, activation_max);
215}
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119
int32_t size[5]
Definition Slice.cpp:35

References calculateActivationRangeQuantized(), and size.

◆ calculateActivationRangeQuantized() [2/3]

void luci_interpreter::kernels::calculateActivationRangeQuantized ( Activation  activation,
const Tensor output,
int32_t *  activation_min,
int32_t *  activation_max 
)

Definition at line 119 of file Utils.cpp.

121{
122 assert(output->zero_points().size() == 1);
123 int32_t qmin{};
124 int32_t qmax{};
125 switch (output->element_type())
126 {
127 case DataType::U4:
128 qmin = 0;
129 qmax = 15;
130 break;
131 case DataType::U8:
132 qmin = 0;
133 qmax = std::numeric_limits<uint8_t>::max();
134 break;
135 case DataType::S4:
136 qmin = -8;
137 qmax = 7;
138 break;
139 case DataType::S8:
140 qmin = -std::numeric_limits<int8_t>::max();
141 qmax = std::numeric_limits<int8_t>::max();
142 break;
143 case DataType::S16:
144 // For now, assume that signed int16 type implies signed symmetric quantization.
145 assert(output->zero_point() == 0);
146 qmin = -std::numeric_limits<int16_t>::max();
147 qmax = std::numeric_limits<int16_t>::max();
148 break;
149 default:
150 throw std::runtime_error("luci-intp (calculateActivationRangeQuantized) Unsupported type.");
151 }
152
153 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
154 activation_max);
155}

Referenced by calculateActivationRangeQuantized(), luci_interpreter::createConv2DParams(), luci_interpreter::createPoolParams(), and evalTISOQuantizedKernel().

◆ calculateActivationRangeQuantized() [3/3]

void luci_interpreter::kernels::calculateActivationRangeQuantized ( Activation  activation,
int32_t  output_zero_point,
float  output_scale,
DataType  data_type,
int32_t *  activation_min,
int32_t *  activation_max 
)

Definition at line 177 of file Utils.cpp.

180{
181 int32_t qmin{};
182 int32_t qmax{};
183 switch (data_type)
184 {
185 case DataType::U8:
186 qmin = 0;
187 qmax = std::numeric_limits<uint8_t>::max();
188 break;
189 case DataType::S8:
190 qmin = -std::numeric_limits<int8_t>::max();
191 qmax = std::numeric_limits<int8_t>::max();
192 break;
193 case DataType::S16:
194 // For now, assume that signed int16 type implies signed symmetric quantization.
195 assert(output_zero_point == 0);
196 qmin = -std::numeric_limits<int16_t>::max();
197 qmax = std::numeric_limits<int16_t>::max();
198 break;
199 default:
200 assert(false && "Unsupported type.");
201 }
202
203 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
204 activation_min, activation_max);
205}

◆ calculateInputRadius()

int luci_interpreter::kernels::calculateInputRadius ( int  input_integer_bits,
int  input_left_shift,
int  total_signed_bits 
)

Definition at line 123 of file Utils.cpp.

124{
125 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
126 (1LL << (total_signed_bits - input_integer_bits)) /
127 (1LL << input_left_shift);
128 // Tighten bound using floor. Suppose that we could use the exact value.
129 // After scaling the difference, the result would be at the maximum. Thus we
130 // must ensure that our value has lower magnitude.
131 return static_cast<int>(std::floor(max_input_rescaled));
132}

◆ calculateOutputShape()

template<typename T >
Shape luci_interpreter::kernels::calculateOutputShape ( const Tensor input,
const Tensor begin,
const Tensor size 
)

Definition at line 37 of file Slice.cpp.

38{
39 Shape output_shape = Shape(input->shape().num_dims());
40 for (int idx = 0; idx < input->shape().num_dims(); idx++)
41 {
42 T size_value = getTensorData<T>(size)[idx];
43 if (size_value < 0)
44 {
45 if (size_value != -1)
46 {
47 throw std::runtime_error("Invalid size.");
48 }
49 size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
50 }
51 else
52 {
53 if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
54 {
55 throw std::runtime_error("Invalid begin and size.");
56 }
57 }
58 output_shape.dim(idx) = static_cast<int>(size_value);
59 }
60 return output_shape;
61}
const luci_interpreter::RuntimeShape output_shape
int32_t begin[5]
Definition Slice.cpp:33

References begin, output_shape, and size.

◆ calculateShapeForBroadcast() [1/2]

luci_interpreter::RuntimeShape luci_interpreter::kernels::calculateShapeForBroadcast ( const circle::Tensor *  input1,
const circle::Tensor *  input2 
)

Definition at line 265 of file Utils.cpp.

267{
268 const int num_input1_dims = Tensor::num_dims(input1);
269 const int num_input2_dims = Tensor::num_dims(input2);
270 const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
272
273 for (int i = 0; i < num_out_dims; ++i)
274 {
275 const int32_t input1_dim =
276 i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
277 const int32_t input2_dim =
278 i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
279
280 bool need_broadcast = input1_dim != input2_dim;
281 bool can_broadcast = input1_dim == 1 || input2_dim == 1;
282 LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
283
284 output_shape.setDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
285 }
286
287 return output_shape;
288}
void setDim(int i, int32_t val)
Definition Tensor.h:114
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36

References circle_eval_diff::TensorShape::dim(), LUCI_INTERPRETER_CHECK, output_shape, and luci_interpreter::RuntimeShape::setDim().

◆ calculateShapeForBroadcast() [2/2]

Shape luci_interpreter::kernels::calculateShapeForBroadcast ( const Shape input1_shape,
const Shape input2_shape 
)

Definition at line 204 of file Utils.cpp.

205{
206 const int num_input1_dims = input1_shape.num_dims();
207 const int num_input2_dims = input2_shape.num_dims();
208 const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
209 Shape output_shape(num_out_dims);
210
211 for (int i = 0; i < num_out_dims; ++i)
212 {
213 const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
214 const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
215
216 bool need_broadcast = input1_dim != input2_dim;
217 bool can_broadcast = input1_dim == 1 || input2_dim == 1;
218 LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
219
220 output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
221 }
222
223 return output_shape;
224}
int num_dims() const
Definition Tensor.h:39

References luci_interpreter::Shape::dim(), LUCI_INTERPRETER_CHECK, luci_interpreter::Shape::num_dims(), and output_shape.

Referenced by luci_interpreter::kernels::Add::configure(), luci_interpreter::kernels::Div::configure(), luci_interpreter::kernels::Equal::configure(), luci_interpreter::kernels::FloorDiv::configure(), luci_interpreter::kernels::FloorMod::configure(), luci_interpreter::kernels::Greater::configure(), luci_interpreter::kernels::GreaterEqual::configure(), luci_interpreter::kernels::Less::configure(), luci_interpreter::kernels::LessEqual::configure(), luci_interpreter::kernels::LogicalAnd::configure(), luci_interpreter::kernels::LogicalOr::configure(), luci_interpreter::kernels::Maximum::configure(), luci_interpreter::kernels::Minimum::configure(), luci_interpreter::kernels::Mul::configure(), luci_interpreter::kernels::NotEqual::configure(), luci_interpreter::kernels::Pow::configure(), luci_interpreter::kernels::PRelu::configure(), luci_interpreter::kernels::Select::configure(), luci_interpreter::kernels::SelectV2::configure(), luci_interpreter::kernels::SquaredDifference::configure(), and luci_interpreter::kernels::Sub::configure().

◆ CheckBinaryOpDataTypesEqual()

void luci_interpreter::kernels::CheckBinaryOpDataTypesEqual ( const kernels::TISOKernel kernel)
inline

Definition at line 116 of file BinaryOpCommon.h.

117{
118 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
119 Tensor::element_type(kernel.input2()));
120 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
121 Tensor::element_type(kernel.output()));
122}
const circle::Tensor * output() const
Definition TISOKernel.h:62
const circle::Tensor * input2() const
Definition TISOKernel.h:61
const circle::Tensor * input1() const
Definition TISOKernel.h:60

References luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOKernel::input2(), LUCI_INTERPRETER_CHECK, and luci_interpreter::kernels::TISOKernel::output().

Referenced by luci_interpreter::configure_kernel_CircleMaximum(), and luci_interpreter::configure_kernel_CircleMinimum().

◆ checkedLog2()

bool luci_interpreter::kernels::checkedLog2 ( const float  x,
int *  log2_result 
)

Definition at line 113 of file Utils.cpp.

114{
115 const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
116 const float x_log2_rounded = std::round(x_log2);
117 const float x_log2_fracpart = x_log2 - x_log2_rounded;
118
119 *log2_result = static_cast<int>(x_log2_rounded);
120 return std::abs(x_log2_fracpart) < 1e-3f;
121}

◆ computeOutputSize()

int32_t luci_interpreter::kernels::computeOutputSize ( Padding  padding,
int32_t  image_size,
int32_t  filter_size,
int32_t  stride,
int32_t  dilation_rate = 1 
)
inline

Definition at line 59 of file Utils.h.

61{
62 const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
63 switch (padding)
64 {
65 case Padding::SAME:
66 return (image_size + stride - 1) / stride;
67 case Padding::VALID:
68 return (image_size + stride - effective_filter_size) / stride;
69 default:
70 assert(false);
71 return 0;
72 }
73}

Referenced by luci_interpreter::computeConvPadding(), luci_interpreter::kernels::AveragePool2D::configure(), luci_interpreter::kernels::Conv2D::configure(), luci_interpreter::kernels::DepthwiseConv2D::configure(), luci_interpreter::kernels::L2Pool2D::configure(), luci_interpreter::kernels::MaxPool2D::configure(), luci_interpreter::kernels::TransposeConv::configure(), and luci_interpreter::createPoolParams().

◆ computePadding()

int32_t luci_interpreter::kernels::computePadding ( int32_t  stride,
int32_t  dilation_rate,
int32_t  in_size,
int32_t  filter_size,
int32_t  out_size 
)
inline

Definition at line 41 of file Utils.h.

43{
44 const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
45 const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
46 return padding > 0 ? padding : 0;
47}

Referenced by luci_interpreter::computeConvPadding(), luci_interpreter::kernels::AveragePool2D::configure(), luci_interpreter::kernels::Conv2D::configure(), luci_interpreter::kernels::DepthwiseConv2D::configure(), luci_interpreter::kernels::L2Pool2D::configure(), luci_interpreter::kernels::MaxPool2D::configure(), luci_interpreter::kernels::TransposeConv::configure(), and luci_interpreter::createPoolParams().

◆ computePaddingWithOffset()

int32_t luci_interpreter::kernels::computePaddingWithOffset ( int32_t  stride,
int32_t  dilation_rate,
int32_t  in_size,
int32_t  filter_size,
int32_t  out_size,
int32_t *  offset 
)
inline

Definition at line 49 of file Utils.h.

51{
52 int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
53 int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
54 total_padding = total_padding > 0 ? total_padding : 0;
55 *offset = total_padding % 2;
56 return total_padding / 2;
57}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References offset().

◆ evalComparisonGeneric()

template<typename T >
void luci_interpreter::kernels::evalComparisonGeneric ( const circle::Tensor *  x,
const circle::Tensor *  y,
const circle::Tensor *  output,
BaseRuntimeGraph runtime_graph,
bool   FT, T 
)

Definition at line 31 of file ComparisonCommon.h.

34{
35 auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
36 if (x_data == nullptr)
37 x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
38
39 assert(x_data != nullptr);
40
41 auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
42 if (y_data == nullptr)
43 y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
44
45 assert(y_data != nullptr);
46
47 auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
48
50 op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
51
52 if (op_params.is_broadcast)
53 {
54 luci_interpreter_pal::BroadcastComparison4DSlowNoScaling<T>(
55 op_params, kernels::getTensorShape(x), x_data, kernels::getTensorShape(y), y_data,
56 kernels::getTensorShape(output), output_data, F);
57 }
58 else
59 {
60 const int64_t flat_size = kernels::getTensorShape(x).flatSize();
61 luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data, F);
62 }
63}
uint8_t * getConstDataByTensor(const circle::Tensor *raw_tensor)
uint8_t * getDataByTensor(const circle::Tensor *raw_tensor)

References luci_interpreter::RuntimeGraph::getConstDataByTensor(), luci_interpreter::RuntimeGraph::getDataByTensor(), getTensorShape(), and luci_interpreter_pal::ComparisonParams::is_broadcast.

◆ evalTISOInplaceKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void luci_interpreter::kernels::evalTISOInplaceKernel ( TISOFunc  tiso_func,
TISOBroadcastFunc  tiso_broadcast_func,
kernels::TISOKernel kernel,
const Options *  options,
RuntimeShape &&  input_shape_1,
RuntimeShape &&  input_shape_2,
RuntimeShape &&  output_shape 
)

Definition at line 89 of file BinaryOpCommon.h.

93{
94 uint8_t *inplace_data_ptr = nullptr;
95 circle::Tensor *input_inplace_tensor = nullptr;
96
97 kernels::TISOData kernel_data = kernel->readInplaceData(inplace_data_ptr, input_inplace_tensor);
98
99 evalTISOKernel<T, TISOFunc, TISOBroadcastFunc, Options>(
100 tiso_func, tiso_broadcast_func, kernel, &kernel_data, options, std::move(input_shape_1),
101 std::move(input_shape_2), std::move(output_shape));
102
103 BaseRuntimeGraph *runtime_graph = kernel->runtime_graph();
104
105 runtime_graph->makeInplaceOperation(input_inplace_tensor, kernel->output());
106 if (input_inplace_tensor == kernel->input1())
107 {
108 runtime_graph->makeInplaceOperation(kernel->input2(), nullptr);
109 }
110 else
111 {
112 runtime_graph->makeInplaceOperation(kernel->input1(), nullptr);
113 }
114}
void makeInplaceOperation(const circle::Tensor *src_tensor, const circle::Tensor *dst_tensor)
BaseRuntimeGraph * runtime_graph() const
Definition TISOKernel.h:64
TISOData readInplaceData(uint8_t *&inplace_data_ptr, circle::Tensor *&input_inplace_tensor)
Definition TISOKernel.h:84

References luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOKernel::input2(), luci_interpreter::RuntimeGraph::makeInplaceOperation(), luci_interpreter::kernels::TISOKernel::output(), output_shape, luci_interpreter::kernels::TISOKernel::readInplaceData(), and luci_interpreter::kernels::TISOKernel::runtime_graph().

◆ evalTISOInplaceQuantizedKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void luci_interpreter::kernels::evalTISOInplaceQuantizedKernel ( TISOFunc  tiso_func,
TISOBroadcastFunc  tiso_broadcast_func,
kernels::TISOKernel kernel,
const Options *  options 
)

Definition at line 195 of file BinaryOpCommon.h.

197{
198 uint8_t *inplace_data_ptr = nullptr;
199 circle::Tensor *input_inplace_tensor = nullptr;
200
201 kernels::TISOData kernel_data = kernel->readInplaceData(inplace_data_ptr, input_inplace_tensor);
202
203 evalTISOQuantizedKernel<T, TISOFunc, TISOBroadcastFunc, Options>(tiso_func, tiso_broadcast_func,
204 kernel, &kernel_data, options);
205
206 kernel->runtime_graph()->makeInplaceOperation(input_inplace_tensor, kernel->output());
207 if (input_inplace_tensor == kernel->input1())
208 {
209 kernel->runtime_graph()->makeInplaceOperation(kernel->input2(), nullptr);
210 }
211 else
212 {
213 kernel->runtime_graph()->makeInplaceOperation(kernel->input1(), nullptr);
214 }
215}

References luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOKernel::input2(), luci_interpreter::RuntimeGraph::makeInplaceOperation(), luci_interpreter::kernels::TISOKernel::output(), luci_interpreter::kernels::TISOKernel::readInplaceData(), and luci_interpreter::kernels::TISOKernel::runtime_graph().

◆ evalTISOKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void luci_interpreter::kernels::evalTISOKernel ( TISOFunc  tiso_func,
TISOBroadcastFunc  tiso_broadcast_func,
kernels::TISOKernel kernel,
kernels::TISOData kernel_data,
const Options *  options,
RuntimeShape &&  input_shape_1,
RuntimeShape &&  input_shape_2,
RuntimeShape &&  output_shape 
)

Definition at line 61 of file BinaryOpCommon.h.

65{
67 fillArithmeticActivationRange<T>(params, luci_actfunc(options->fused_activation_function()));
68
69 const bool need_broadcast =
70 luci_interpreter_pal::ProcessBroadcastShapes(input_shape_1, input_shape_2, &params);
71
72 if (need_broadcast)
73 {
74 tiso_broadcast_func(params, input_shape_1, kernels::getTensorData<T>(kernel_data->input1_data),
75 input_shape_2, kernels::getTensorData<T>(kernel_data->input2_data),
76 output_shape, kernels::getTensorData<T>(kernel_data->output_data));
77 }
78 else
79 {
80 const int flat_size = input_shape_1.flatSize();
81 tiso_func(params, flat_size, kernels::getTensorData<T>(kernel_data->input1_data),
82 kernels::getTensorData<T>(kernel_data->input2_data),
83 kernels::getTensorData<T>(kernel_data->output_data));
84 }
85}
bool ProcessBroadcastShapes(const luci_interpreter::RuntimeShape &shape0, const luci_interpreter::RuntimeShape &shape1, luci_interpreter_pal::ArithmeticParams *params)
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)

References luci_interpreter::kernels::TISOData::input1_data, luci_interpreter::kernels::TISOData::input2_data, luci::luci_actfunc(), luci_interpreter::kernels::TISOData::output_data, output_shape, and luci_interpreter_pal::ProcessBroadcastShapes().

◆ evalTISOQuantizedKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void luci_interpreter::kernels::evalTISOQuantizedKernel ( TISOFunc  tiso_func,
TISOBroadcastFunc  tiso_broadcast_func,
kernels::TISOKernel kernel,
kernels::TISOData kernel_data,
const Options *  options 
)

Definition at line 127 of file BinaryOpCommon.h.

130{
131 const auto *input1 = kernel->input1();
132 const auto *input2 = kernel->input2();
133 const auto *output = kernel->output();
134
135 const auto input1_scale = static_cast<double>(Tensor::scale(input1));
136 const auto input2_scale = static_cast<double>(Tensor::scale(input2));
137 const auto output_scale = static_cast<double>(Tensor::scale(output));
138
139 const int left_shift = 20;
140 const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
141 const double real_input1_multiplier = input1_scale / twice_max_input_scale;
142 const double real_input2_multiplier = input2_scale / twice_max_input_scale;
143 const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
144
145 int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
146 int input1_shift{}, input2_shift{}, output_shift{};
147 kernels::quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier,
148 &input1_shift);
149 kernels::quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier,
150 &input2_shift);
151 kernels::quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier,
152 &output_shift);
153
154 int32_t activation_min{};
155 int32_t activation_max{};
156 kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
157 output, &activation_min, &activation_max);
158
160 params.left_shift = left_shift;
161 // The kernel expects inputs' zero points to be negated.
162 params.input1_offset = -Tensor::zero_point(input1); // Note the '-'.
163 params.input1_multiplier = input1_multiplier;
164 params.input1_shift = input1_shift;
165 params.input2_offset = -Tensor::zero_point(input2); // Note the '-'.
166 params.input2_multiplier = input2_multiplier;
167 params.input2_shift = input2_shift;
168 params.output_offset = Tensor::zero_point(output);
169 params.output_multiplier = output_multiplier;
170 params.output_shift = output_shift;
171 params.quantized_activation_min = activation_min;
172 params.quantized_activation_max = activation_max;
173
174 const bool need_broadcast = luci_interpreter_pal::ProcessBroadcastShapes(
175 kernels::getTensorShape(input1), kernels::getTensorShape(input2), &params);
176
177 if (need_broadcast)
178 {
179 tiso_broadcast_func(
180 params, kernels::getTensorShape(input1), kernels::getTensorData<T>(kernel_data->input1_data),
181 kernels::getTensorShape(input2), kernels::getTensorData<T>(kernel_data->input2_data),
182 kernels::getTensorShape(output), kernels::getTensorData<T>(kernel_data->output_data));
183 }
184 else
185 {
186 tiso_func(params, kernels::getTensorShape(input1),
187 kernels::getTensorData<uint8_t>(kernel_data->input1_data),
188 kernels::getTensorShape(input2), kernels::getTensorData<T>(kernel_data->input2_data),
189 kernels::getTensorShape(output), kernels::getTensorData<T>(kernel_data->output_data));
190 }
191}

References calculateActivationRangeQuantized(), getTensorShape(), luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOData::input1_data, luci_interpreter::kernels::TISOKernel::input2(), luci_interpreter::kernels::TISOData::input2_data, luci_interpreter_pal::ArithmeticParams::left_shift, luci::luci_actfunc(), luci_interpreter::kernels::TISOKernel::output(), luci_interpreter::kernels::TISOData::output_data, luci_interpreter_pal::ProcessBroadcastShapes(), and quantizeMultiplierSmallerThanOneExp().

◆ fillArithmeticActivationRange()

template<typename T >
void luci_interpreter::kernels::fillArithmeticActivationRange ( tflite::ArithmeticParams &  p,
Activation  act 
)

Fills activation min and max parameters depending on given data type and activation

T is a template parameter, so after optimization this code left with only required if case

Template Parameters
Tdata type of arithmetic operation output tensor
Parameters
paramstflite params to fill
activationluci_interpreter::Activation of arithmetic operation

Definition at line 106 of file Utils.h.

107{
108 static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
109
110 if (std::is_same<T, float>::value)
111 calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
112 if (std::is_same<T, int32_t>::value)
113 calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
114 else
115 calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
116}
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
Definition Utils.cpp:52

References calculateActivationRange().

◆ getBeginAndSizeVectors()

template<typename T >
void luci_interpreter::kernels::getBeginAndSizeVectors ( int  dimensions,
const Tensor begin,
const Tensor size,
std::vector< int > *  begins,
std::vector< int > *  sizes 
)

Definition at line 64 of file Slice.cpp.

66{
67 for (int idx = dimensions - 1; idx >= 0; --idx)
68 {
69 begins->push_back(getTensorData<T>(begin)[idx]);
70 sizes->push_back(getTensorData<T>(size)[idx]);
71 }
72}

References begin, and size.

◆ getQuantizedConvolutionMultipler()

double luci_interpreter::kernels::getQuantizedConvolutionMultipler ( float  input_scale,
float  filter_scale,
float  output_scale 
)
inline

Definition at line 137 of file Utils.h.

139{
140 const double input_product_scale = static_cast<double>(input_scale * filter_scale);
141 LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
142 return input_product_scale / static_cast<double>(output_scale);
143}

References LUCI_INTERPRETER_CHECK.

Referenced by getQuantizedConvolutionMultiplers().

◆ getQuantizedConvolutionMultiplers()

std::vector< double > luci_interpreter::kernels::getQuantizedConvolutionMultiplers ( float  input_scale,
const std::vector< float > &  filter_scale,
float  output_scale 
)
inline

Definition at line 147 of file Utils.h.

150{
151 std::vector<double> effective_output_scales;
152 size_t n = filter_scale.size();
153 effective_output_scales.reserve(n);
154 for (size_t i = 0; i < n; ++i)
155 {
156 effective_output_scales.push_back(
157 getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
158 }
159 return effective_output_scales;
160}
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
Definition Utils.h:137

References getQuantizedConvolutionMultipler().

Referenced by luci_interpreter::kernels::PRelu::configure(), luci_interpreter::kernels::TransposeConv::configure(), and luci_interpreter::createConv2DParams().

◆ getTensorData() [1/4]

template<typename T >
const T * luci_interpreter::kernels::getTensorData ( const Tensor tensor)

Definition at line 208 of file Utils.h.

209{
210 return tensor != nullptr ? tensor->data<T>() : nullptr;
211}

◆ getTensorData() [2/4]

template<typename T >
const T * luci_interpreter::kernels::getTensorData ( const uint8_t *  tensor_data)

Definition at line 158 of file Utils.h.

159{
160 return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
161}

◆ getTensorData() [3/4]

template<typename T >
T * luci_interpreter::kernels::getTensorData ( Tensor tensor)

Definition at line 213 of file Utils.h.

214{
215 return tensor != nullptr ? tensor->data<T>() : nullptr;
216}

◆ getTensorData() [4/4]

template<typename T >
T * luci_interpreter::kernels::getTensorData ( uint8_t *  tensor_data)
inline

Definition at line 163 of file Utils.h.

164{
165 return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
166}

◆ getTensorDims()

void luci_interpreter::kernels::getTensorDims ( const circle::Tensor *  tensor,
BaseRuntimeGraph runtime_graph,
int32_t *  dims 
)
inline

Definition at line 121 of file Utils.h.

123{
124 if (tensor == nullptr)
125 {
126 dims = nullptr;
127 return;
128 }
129
130#ifndef DIS_DYN_SHAPES
131 auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(tensor);
132 if (dynamic_shape_vector != nullptr)
133 {
134 for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
135 {
136 dims[n] = dynamic_shape_vector->dims(n);
137 }
138 }
139 else
140 {
141 auto const tensor_shape = Tensor::tensor_shape(tensor);
142 assert(tensor_shape.size() <= kMaxSmallSize);
143 for (int i = 0; i < tensor_shape.size(); ++i)
144 {
145 dims[i] = tensor_shape[i];
146 }
147 }
148#else
149 auto const tensor_shape = Tensor::tensor_shape(tensor);
150 assert(tensor_shape.size() <= kMaxSmallSize);
151 for (int i = 0; i < tensor_shape.size(); ++i)
152 {
153 dims[i] = tensor_shape[i];
154 }
155#endif // DIS_DYN_SHAPES
156}
luci_interpreter::RuntimeShape * getDynamicShapeTensor(const circle::Tensor *tensor)

References luci_interpreter::RuntimeShape::dims(), and luci_interpreter::RuntimeGraph::getDynamicShapeTensor().

Referenced by luci_interpreter_pal::lstm_internal::calculateLstmGate().

◆ getTensorRuntimeShape()

luci_interpreter::RuntimeShape luci_interpreter::kernels::getTensorRuntimeShape ( const circle::Tensor *  circle_tensor,
BaseRuntimeGraph runtime_graph 
)

Definition at line 29 of file Utils.cpp.

31{
32 luci_interpreter::RuntimeShape input_shape = getTensorShape(circle_tensor);
33
34#ifndef DIS_DYN_SHAPES
35 auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(circle_tensor);
36 if (dynamic_shape_vector != nullptr)
37 {
38 input_shape.resize(dynamic_shape_vector->dimensionsCount());
39
40 for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
41 {
42 input_shape.setDim(n, dynamic_shape_vector->dims(n));
43 }
44 }
45#endif // DIS_DYN_SHAPES
46 return input_shape;
47}
void resize(int dimensions_count)
Definition Tensor.h:121
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194

References luci_interpreter::RuntimeGraph::getDynamicShapeTensor(), getTensorShape(), luci_interpreter::RuntimeShape::resize(), and luci_interpreter::RuntimeShape::setDim().

Referenced by luci_interpreter::evalInteger(), luci_interpreter::execute_kernel_CircleAbs(), luci_interpreter::execute_kernel_CircleAdd(), luci_interpreter::execute_kernel_CircleArgMax(), luci_interpreter::execute_kernel_CircleArgMin(), luci_interpreter::execute_kernel_CircleBatchToSpaceND(), luci_interpreter::execute_kernel_CircleBroadcastTo(), luci_interpreter::execute_kernel_CircleCast(), luci_interpreter::execute_kernel_CircleCeil(), luci_interpreter::execute_kernel_CircleCos(), luci_interpreter::execute_kernel_CircleDepthToSpace(), luci_interpreter::execute_kernel_CircleDequantize(), luci_interpreter::execute_kernel_CircleDiv(), luci_interpreter::execute_kernel_CircleElu(), luci_interpreter::execute_kernel_CircleExp(), luci_interpreter::execute_kernel_CircleFloorDiv(), luci_interpreter::execute_kernel_CircleFloorMod(), luci_interpreter::execute_kernel_CircleGatherND(), luci_interpreter::execute_kernel_CircleL2Normalize(), luci_interpreter::execute_kernel_CircleLeakyRelu(), luci_interpreter::execute_kernel_CircleLog(), luci_interpreter::execute_kernel_CircleLogistic(), luci_interpreter::execute_kernel_CircleLogSoftmax(), luci_interpreter::execute_kernel_CircleMaximum(), luci_interpreter::execute_kernel_CircleMinimum(), luci_interpreter::execute_kernel_CircleMul(), luci_interpreter::execute_kernel_CirclePRelu(), luci_interpreter::execute_kernel_CircleQuantize(), luci_interpreter::execute_kernel_CircleRelu(), luci_interpreter::execute_kernel_CircleRelu6(), luci_interpreter::execute_kernel_CircleResizeNearestNeighbor(), luci_interpreter::execute_kernel_CircleRound(), luci_interpreter::execute_kernel_CircleRsqrt(), luci_interpreter::execute_kernel_CircleSin(), luci_interpreter::execute_kernel_CircleSpaceToBatchND(), luci_interpreter::execute_kernel_CircleSpaceToDepth(), luci_interpreter::execute_kernel_CircleSqrt(), luci_interpreter::execute_kernel_CircleSquare(), luci_interpreter::execute_kernel_CircleSquaredDifference(), luci_interpreter::execute_kernel_CircleSub(), luci_interpreter::execute_kernel_CircleTanh(), and luci_interpreter::execute_kernel_CircleZerosLike().

◆ getTensorShape() [1/2]

luci_interpreter::RuntimeShape luci_interpreter::kernels::getTensorShape ( const circle::Tensor *  tensor)
inline

Definition at line 106 of file Utils.h.

107{
108 if (tensor == nullptr)
110
111 auto const tensor_shape = Tensor::tensor_shape(tensor);
112
113 luci_interpreter::RuntimeShape runtime_shape(tensor_shape.size());
114 for (int i = 0; i < tensor_shape.size(); ++i)
115 {
116 runtime_shape.setDim(i, tensor_shape[i]);
117 }
118 return runtime_shape;
119}

References luci_interpreter::RuntimeShape::setDim().

◆ getTensorShape() [2/2]

tflite::RuntimeShape luci_interpreter::kernels::getTensorShape ( const Tensor tensor)
inline

Definition at line 194 of file Utils.h.

195{
196 if (tensor == nullptr)
197 return tflite::RuntimeShape();
198
199 const Shape &shape = tensor->shape();
200 tflite::RuntimeShape runtime_shape(shape.num_dims());
201 for (int i = 0; i < shape.num_dims(); ++i)
202 {
203 runtime_shape.SetDim(i, shape.dim(i));
204 }
205 return runtime_shape;
206}

References luci_interpreter::Shape::dim(), and luci_interpreter::Shape::num_dims().

Referenced by luci_interpreter::kernels::AveragePool2D::configure(), luci_interpreter::kernels::BatchMatMul::configure(), luci_interpreter::kernels::Conv2D::configure(), luci_interpreter::kernels::DepthwiseConv2D::configure(), luci_interpreter::kernels::GRU::configure(), luci_interpreter::kernels::StridedSlice::configure(), luci_interpreter::configure_kernel_CircleFloor(), luci_interpreter::configure_kernel_CircleLogicalNot(), luci_interpreter::configure_kernel_CircleResizeBilinear(), luci_interpreter::configure_kernel_CircleSquare(), luci_interpreter::configure_kernel_CircleSqueeze(), evalComparisonGeneric(), evalTISOQuantizedKernel(), luci_interpreter::kernels::BatchMatMul::execute(), luci_interpreter::kernels::BatchToSpaceND::execute(), luci_interpreter::kernels::CumSum::execute(), luci_interpreter::kernels::DepthToSpace::execute(), luci_interpreter::kernels::Dequantize::execute(), luci_interpreter::kernels::Elu::execute(), luci_interpreter::kernels::Fill::execute(), luci_interpreter::kernels::HardSwish::execute(), luci_interpreter::kernels::L2Pool2D::execute(), luci_interpreter::kernels::LocalResponseNormalization::execute(), luci_interpreter::kernels::LogicalOr::execute(), luci_interpreter::kernels::Pad::execute(), luci_interpreter::kernels::PadV2::execute(), luci_interpreter::kernels::Quantize::execute(), luci_interpreter::kernels::ResizeBilinear::execute(), luci_interpreter::kernels::ResizeNearestNeighbor::execute(), luci_interpreter::kernels::ReverseV2::execute(), luci_interpreter::kernels::Slice::execute(), luci_interpreter::kernels::SpaceToBatchND::execute(), luci_interpreter::kernels::SpaceToDepth::execute(), luci_interpreter::kernels::StridedSlice::execute(), luci_interpreter::kernels::Transpose::execute(), luci_interpreter::execute_kernel_CircleAveragePool2D(), luci_interpreter::execute_kernel_CircleFloor(), luci_interpreter::execute_kernel_CircleL2Pool2D(), luci_interpreter::execute_kernel_CircleLogicalAnd(), luci_interpreter::execute_kernel_CircleLogicalNot(), luci_interpreter::execute_kernel_CircleLogicalOr(), luci_interpreter::execute_kernel_CircleMaxPool2D(), luci_interpreter::execute_kernel_CircleMean(), luci_interpreter::execute_kernel_CircleNeg(), luci_interpreter::execute_kernel_CirclePadCommon(), luci_interpreter::execute_kernel_CircleResizeBilinear(), luci_interpreter::execute_kernel_CircleSlice(), luci_interpreter::execute_kernel_CircleStridedSlice(), luci_interpreter::execute_kernel_CircleTranspose(), getTensorRuntimeShape(), TransposeRowsColumns(), and luci_interpreter::kernels::VectorOfTensors< T, is_const >::VectorOfTensors().

◆ getTfLiteActivation()

TfLiteFusedActivation luci_interpreter::kernels::getTfLiteActivation ( Activation  activation)

Definition at line 30 of file Utils.cpp.

31{
32 switch (activation)
33 {
35 return kTfLiteActRelu;
37 return kTfLiteActRelu6;
39 return kTfLiteActReluN1To1;
41 return kTfLiteActTanh;
43 return kTfLiteActSignBit;
45 return kTfLiteActNone;
46 default:
47 throw std::runtime_error("Unsupported activation type");
48 }
49}

References luci::NONE, luci::RELU, luci::RELU6, luci::RELU_N1_TO_1, luci::SIGN_BIT, and luci::TANH.

◆ matrixScalarMultiplyAccumulate()

void luci_interpreter::kernels::matrixScalarMultiplyAccumulate ( const int8_t *  matrix,
int32_t  scalar,
int32_t  n_row,
int32_t  n_col,
int32_t *  output 
)

Definition at line 75 of file Utils.cpp.

77{
78 for (int i = 0; i < n_row; ++i)
79 {
80 int32_t row_sum = 0;
81 for (int j = 0; j < n_col; ++j)
82 {
83 row_sum += *matrix++;
84 }
85 output[i] += row_sum * scalar;
86 }
87}

◆ one_of_types() [1/2]

template<typename T >
constexpr bool luci_interpreter::kernels::one_of_types ( )
constexpr

Definition at line 88 of file Utils.h.

88{ return false; }

Referenced by one_of_types().

◆ one_of_types() [2/2]

template<typename T , typename U , typename... Other>
constexpr bool luci_interpreter::kernels::one_of_types ( )
constexpr

Definition at line 91 of file Utils.h.

92{
93 return std::is_same<T, U>::value || one_of_types<T, Other...>();
94}
constexpr bool one_of_types()
Definition Utils.h:88

References one_of_types().

◆ quantizeMultiplier()

void luci_interpreter::kernels::quantizeMultiplier ( double  double_multiplier,
int32_t *  quantized_multiplier,
int *  shift 
)

Definition at line 157 of file Utils.cpp.

158{
159 if (double_multiplier == 0.0)
160 {
161 *quantized_multiplier = 0;
162 *shift = 0;
163 return;
164 }
165
166 const double q = std::frexp(double_multiplier, shift);
167 auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
168
169 if (q_fixed == (INT64_C(1) << 31))
170 {
171 q_fixed /= 2;
172 ++*shift;
173 }
174 assert(q_fixed <= std::numeric_limits<int32_t>::max());
175 // A shift amount smaller than -31 would cause all bits to be shifted out
176 // and thus all results would be zero. We implement that instead with
177 // q_fixed==0, so as to avoid hitting issues with right-shift
178 // operations with shift amounts greater than 31. Note that this happens
179 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
180 // that we're effectively flushing tiny double_multiplier's to zero.
181 // We could conceivably handle values in the range (roughly) [32, 63]
182 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
183 // the present handling is just doing 'flush denormals to zero'. We could
184 // reconsider and actually generate nonzero denormals if a need arises.
185 if (*shift < -31)
186 {
187 *shift = 0;
188 q_fixed = 0;
189 }
190 *quantized_multiplier = static_cast<int32_t>(q_fixed);
191}

Referenced by luci_interpreter::kernels::LeakyRelu::configure(), luci_interpreter::kernels::PRelu::configure(), luci_interpreter::kernels::Relu::configure(), luci_interpreter::kernels::Relu0To1::configure(), luci_interpreter::kernels::Relu6::configure(), luci_interpreter::createConv2DParams(), quantizeMultipliers(), and quantizeMultiplierSmallerThanOneExp().

◆ quantizeMultipliers()

std::vector< ChannelQuantMultipliers > luci_interpreter::kernels::quantizeMultipliers ( const std::vector< double > &  effective_scale)
inline

Definition at line 170 of file Utils.h.

171{
172 size_t n = effective_scale.size();
173 std::vector<ChannelQuantMultipliers> params(n);
174 for (size_t i = 0; i < n; ++i)
175 {
176 quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
177 }
178 return params;
179}
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157

References quantizeMultiplier().

Referenced by luci_interpreter::kernels::PRelu::configure(), and luci_interpreter::kernels::TransposeConv::configure().

◆ quantizeMultiplierSmallerThanOneExp()

void luci_interpreter::kernels::quantizeMultiplierSmallerThanOneExp ( double  double_multiplier,
int32_t *  quantized_multiplier,
int *  left_shift 
)

Definition at line 193 of file Utils.cpp.

195{
196 assert(double_multiplier < 1.0);
197 assert(double_multiplier > 0.0);
198 int shift;
199 quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
200 assert(shift <= 0);
201 *left_shift = shift;
202}

References quantizeMultiplier().

Referenced by luci_interpreter::kernels::Equal::configure(), luci_interpreter::kernels::Greater::configure(), luci_interpreter::kernels::GreaterEqual::configure(), luci_interpreter::kernels::Less::configure(), luci_interpreter::kernels::LessEqual::configure(), luci_interpreter::kernels::NotEqual::configure(), and evalTISOQuantizedKernel().

◆ TransposeRowsColumns()

void luci_interpreter::kernels::TransposeRowsColumns ( const Tensor tensor_in,
Tensor tensor_out 
)

Definition at line 124 of file BatchMatMul.cpp.

125{
126 tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
127 tflite::RuntimeShape shape(getTensorShape(tensor_in));
128 tflite::TransposeParams params;
129 int rank = shape.DimensionsCount();
130 params.perm_count = rank;
131 for (int i = 0; i < rank - 2; ++i)
132 {
133 params.perm[i] = i;
134 }
135 // Transpose the last two dimensions.
136 params.perm[rank - 2] = rank - 1;
137 params.perm[rank - 1] = rank - 2;
138 transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
139 transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
140 switch (tensor_in->element_type())
141 {
142 case DataType::FLOAT32:
143 tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
144 transposed_shape, getTensorData<float>(tensor_out));
145 break;
146 default:
147 throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
148 }
149}
DataType element_type() const
Definition Tensor.h:105

References luci_interpreter::Tensor::element_type(), and getTensorShape().

Referenced by luci_interpreter::kernels::BatchMatMul::execute().

Variable Documentation

◆ max_dim

const int luci_interpreter::kernels::max_dim = 4