ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert_micro::execute Namespace Reference

Namespaces

namespace  pal
 
namespace  testing
 

Data Structures

class  KernelBuiltinExecuteRegistry
 
class  KernelCustomExecuteRegistry
 
struct  OMExecuteArgs
 
struct  OMKernelExecute
 
class  OMRuntimeKernel
 

Typedefs

using KernelExecuteFunc = OMStatus(const OMExecuteArgs &)
 

Functions

OMStatus execute_arg_common (const OMExecuteArgs &execute_args, const std::function< OMStatus(const core::OMRuntimeShape &input1_shape, const float *input1_data, const int *input2_data, const core::OMRuntimeShape &output_shape, int *output_data)> &f_float)
 
template<typename T >
void readDataKernel (OMRuntimeKernel *runtime_kernel, const T *&cast_input_data1, const T *&cast_input_data2, bool *&cast_output_data, core::OMRuntimeShape &input1_shape_ref, core::OMRuntimeShape &input2_shape_ref, core::OMRuntimeShape &output_shape_ref)
 
template<typename T >
void evalComparisonGeneric (OMRuntimeKernel *runtime_kernel, bool F(T, T))
 
template<typename T , typename AccType >
void evalQuantizedComparisonGeneric (OMRuntimeKernel *runtime_kernel, bool F(AccType, AccType))
 
OMStatus createConvParams (core::ConvQuant &params, const circle::Tensor *input, const circle::Tensor *filter, const circle::Tensor *output, circle::ActivationFunctionType act_type)
 
OMStatus execute_math_common (const OMExecuteArgs &execute_args, const std::function< OMStatus(const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)> &f_float)
 
OMStatus execute_pooling_common (const OMExecuteArgs &execute_args, const std::function< OMStatus(const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)> &f_float, const std::function< OMStatus(const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)> &f_int8)
 
OMStatus readKernelDataTISO (const OMExecuteArgs &execute_args, uint8_t *&input_data1, uint8_t *&input_data2, uint8_t *&output_data, core::OMRuntimeShape &input1_shape_ref, core::OMRuntimeShape &input2_shape_ref, core::OMRuntimeShape &output_shape_ref, circle::TensorType &tensor_type)
 
OMStatus execute_relu_common (const OMExecuteArgs &execute_args, bool is_relu_6)
 
OMStatus execute_reshape_common (const OMExecuteArgs &execute_args)
 
OMStatus execute_spaces_batches_nd_common (const OMExecuteArgs &execute_args, const std::function< OMStatus(const core::OMRuntimeShape &unextended_input1_shape, const float *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *crops_data, const core::OMRuntimeShape &unextended_output_shape, float *output_data)> &f)
 
void readQuantParams (const circle::Tensor *tensor, long &zero_point, float &scale)
 
template<typename T >
OMStatus calculateActivationRange (circle::ActivationFunctionType activation, T *activation_min, T *activation_max)
 
double getQuantizedConvolutionMultipler (float input_scale, float filter_scale, float output_scale)
 
void quantizeMultiplier (double double_multiplier, int32_t *quantized_multiplier, int *shift)
 
void quantizeMultiplierSmallerThanOneExp (double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
 
std::vector< double > getQuantizedConvolutionMultiplers (float input_scale, const flatbuffers::Vector< float > *filter_scale, float output_scale)
 
OMStatus calculateActivationRangeQuantized (circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
 
int computeOutSize (circle::Padding padding, int image_size, int filter_size, int stride, int dilation_rate=1)
 
int computePadding (int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
 
void computePaddingHeightWidth (int32_t stride_height, int32_t stride_width, int32_t dilation_rate_height, int32_t dilation_rate_width, int32_t in_height, int32_t in_width, int32_t filter_height, int32_t filter_width, circle::Padding padding, int32_t *padding_h, int32_t *padding_w)
 
void calculateQuantParams (core::ArithmeticQuantParams &params, const circle::Tensor *input1, const circle::Tensor *input2, const circle::Tensor *output, circle::ActivationFunctionType act)
 
OMStatus SISOHeader (const OMExecuteArgs &execute_args, const circle::Tensor **input, const circle::Tensor **output, uint8_t **input_data, uint8_t **output_data)
 
OMStatus TISOHeader (const OMExecuteArgs &execute_args, const circle::Tensor **input1, const circle::Tensor **input2, const circle::Tensor **output, OMRuntimeKernel *runtime_kernel)
 
int calculateInputRadius (int input_integer_bits, int input_left_shift, int total_signed_bits)
 

Variables

constexpr KernelBuiltinExecuteRegistry kernel_builtin_execute
 
constexpr KernelCustomExecuteRegistry kernel_custom_execute
 

Typedef Documentation

◆ KernelExecuteFunc

Definition at line 31 of file OMKernelExecutionBuilder.h.

Function Documentation

◆ calculateActivationRange()

template<typename T >
OMStatus onert_micro::execute::calculateActivationRange ( circle::ActivationFunctionType  activation,
T *  activation_min,
T *  activation_max 
)

Definition at line 36 of file OMUtils.h.

38{
39 switch (activation)
40 {
41 case circle::ActivationFunctionType::ActivationFunctionType_NONE:
42 *activation_min = std::numeric_limits<T>::lowest();
43 *activation_max = std::numeric_limits<T>::max();
44 break;
45 case circle::ActivationFunctionType::ActivationFunctionType_RELU:
46 *activation_min = 0;
47 *activation_max = std::numeric_limits<T>::max();
48 break;
49 case circle::ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
50 *activation_min = -1;
51 *activation_max = 1;
52 break;
53 case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
54 *activation_min = 0;
55 *activation_max = 6;
56 break;
57 default:
58 assert(false && "Unsupported activation.");
60 }
61
62 return Ok;
63}
@ UnsupportedActivation
Definition OMStatus.h:28

References onert_micro::Ok, and onert_micro::UnsupportedActivation.

◆ calculateActivationRangeQuantized()

OMStatus onert_micro::execute::calculateActivationRangeQuantized ( circle::ActivationFunctionType  activation,
int32_t  output_zero_point,
float  output_scale,
circle::TensorType  data_type,
int32_t *  activation_min,
int32_t *  activation_max 
)

Definition at line 112 of file OMUtils.cpp.

115{
116 int32_t qmin;
117 int32_t qmax;
118 switch (data_type)
119 {
120 case circle::TensorType_UINT8:
121 qmin = 0;
122 qmax = std::numeric_limits<uint8_t>::max();
123 break;
124 case circle::TensorType_INT8:
125 qmin = std::numeric_limits<int8_t>::min();
126 qmax = std::numeric_limits<int8_t>::max();
127 break;
128 case circle::TensorType_INT16:
129 // For now, assume that signed int16 type implies signed symmetric quantization.
130 assert(output_zero_point == 0);
131 qmin = std::numeric_limits<int16_t>::min();
132 qmax = std::numeric_limits<int16_t>::max();
133 break;
134 default:
135 assert(false && "Unsupported type.");
136 return UnsupportedType;
137 }
138
139 return calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point,
140 output_scale, activation_min, activation_max);
141}
@ UnsupportedType
Definition OMStatus.h:26

References onert_micro::UnsupportedType.

Referenced by calculateQuantParams(), and createConvParams().

◆ calculateInputRadius()

int onert_micro::execute::calculateInputRadius ( int  input_integer_bits,
int  input_left_shift,
int  total_signed_bits 
)
inline

Definition at line 170 of file OMUtils.h.

171{
172 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
173 (1LL << (total_signed_bits - input_integer_bits)) /
174 (1LL << input_left_shift);
175 // Tighten bound using floor. Suppose that we could use the exact value.
176 // After scaling the difference, the result would be at the maximum. Thus we
177 // must ensure that our value has lower magnitude.
178 return static_cast<int>(std::floor(max_input_rescaled));
179}

◆ calculateQuantParams()

void onert_micro::execute::calculateQuantParams ( core::ArithmeticQuantParams params,
const circle::Tensor *  input1,
const circle::Tensor *  input2,
const circle::Tensor *  output,
circle::ActivationFunctionType  act 
)

Definition at line 194 of file OMUtils.cpp.

199{
200 long input1_zp;
201 long input2_zp;
202 long output_zp;
203
204 float input1_scale;
205 float input2_scale;
206 float output_scale;
207
208 // Read input1 quant params
209 readQuantParams(input1, input1_zp, input1_scale);
210 // Read input2 quant params
211 readQuantParams(input2, input2_zp, input2_scale);
212 // Read output quant params
213 readQuantParams(output, output_zp, output_scale);
214
215 params.input1_offset = -static_cast<int32_t>(input1_zp);
216 params.input2_offset = -static_cast<int32_t>(input2_zp);
217 params.output_offset = static_cast<int32_t>(output_zp);
218 params.left_shift = (output->type() == circle::TensorType_INT16) ? 15 : 20;
219 const double twice_max_input_scale =
220 2 * static_cast<double>(std::max(input1_scale, input2_scale));
221 const double real_input1_multiplier = static_cast<double>(input1_scale) / twice_max_input_scale;
222 const double real_input2_multiplier = static_cast<double>(input2_scale) / twice_max_input_scale;
223 const double real_output_multiplier =
224 twice_max_input_scale / ((1 << params.left_shift) * static_cast<double>(output_scale));
225
226 quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &params.input1_multiplier,
227 &params.input1_shift);
228
229 quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &params.input2_multiplier,
230 &params.input2_shift);
231
232 quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &params.output_multiplier,
233 &params.output_shift);
234
235 calculateActivationRangeQuantized(act, output_zp, output_scale, output->type(),
238}
void readQuantParams(const circle::Tensor *tensor, long &zero_point, float &scale)
Definition OMUtils.cpp:143
OMStatus calculateActivationRangeQuantized(circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
Definition OMUtils.cpp:112
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition OMUtils.cpp:60

References calculateActivationRangeQuantized(), onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, onert_micro::core::ArithmeticQuantParams::quantized_activation_min, quantizeMultiplierSmallerThanOneExp(), and readQuantParams().

◆ computeOutSize()

int onert_micro::execute::computeOutSize ( circle::Padding  padding,
int  image_size,
int  filter_size,
int  stride,
int  dilation_rate = 1 
)
inline

Definition at line 114 of file OMUtils.h.

116{
117 int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
118
119 if (stride == 0)
120 return 0;
121
122 switch (padding)
123 {
124 case circle::Padding_SAME:
125 return (image_size + stride - 1) / stride;
126 case circle::Padding_VALID:
127 return (image_size + stride - effective_filter_size) / stride;
128 default:
129 return 0;
130 }
131}

Referenced by computePaddingHeightWidth().

◆ computePadding()

int onert_micro::execute::computePadding ( int32_t  stride,
int32_t  dilation_rate,
int32_t  in_size,
int32_t  filter_size,
int32_t  out_size 
)
inline

Definition at line 133 of file OMUtils.h.

135{
136 int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
137 int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
138 return padding > 0 ? padding : 0;
139}

Referenced by computePaddingHeightWidth().

◆ computePaddingHeightWidth()

void onert_micro::execute::computePaddingHeightWidth ( int32_t  stride_height,
int32_t  stride_width,
int32_t  dilation_rate_height,
int32_t  dilation_rate_width,
int32_t  in_height,
int32_t  in_width,
int32_t  filter_height,
int32_t  filter_width,
circle::Padding  padding,
int32_t *  padding_h,
int32_t *  padding_w 
)
inline

Definition at line 141 of file OMUtils.h.

146{
147
148 int out_width =
149 computeOutSize(padding, in_width, filter_width, stride_width, dilation_rate_width);
150 int out_height =
151 computeOutSize(padding, in_height, filter_height, stride_height, dilation_rate_height);
152
153 *padding_h =
154 computePadding(stride_height, dilation_rate_height, in_height, filter_height, out_height);
155
156 *padding_w = computePadding(stride_width, dilation_rate_width, in_width, filter_width, out_width);
157}
uint32_t computeOutSize(uint32_t imageSize, uint32_t filterSize, uint32_t stride, uint32_t paddingHead, uint32_t paddingTail)
Definition Spatial.h:23

References computeOutSize(), and computePadding().

◆ createConvParams()

OMStatus onert_micro::execute::createConvParams ( core::ConvQuant params,
const circle::Tensor *  input,
const circle::Tensor *  filter,
const circle::Tensor *  output,
circle::ActivationFunctionType  act_type 
)

Definition at line 23 of file ConvolutionCommon.cpp.

28{
29 assert(input->quantization() != nullptr); // Fix caller
30 assert(filter->quantization() != nullptr); // Fix caller
31 assert(output->quantization() != nullptr); // Fix caller
32
33 const auto *input_scales = input->quantization()->scale();
34 const auto *filter_scales = filter->quantization()->scale();
35 const auto *output_scales = output->quantization()->scale();
36
37 assert(input_scales != nullptr); // Fix caller
38 assert(filter_scales != nullptr); // Fix caller
39 assert(output_scales != nullptr); // Fix caller
40
41 assert(input_scales->size() != 0); // Fix caller
42 assert(filter_scales->size() != 0); // Fix caller
43 assert(output_scales->size() != 0); // Fix caller
44
45 const auto input_zero_points = input->quantization()->zero_point();
46 const auto filter_zero_points = filter->quantization()->zero_point();
47 const auto output_zero_points = output->quantization()->zero_point();
48
49 assert(input_zero_points != nullptr); // Fix caller
50 assert(filter_zero_points != nullptr); // Fix caller
51 assert(output_zero_points != nullptr); // Fix caller
52
53 assert(input_zero_points->size() != 0); // Fix caller
54 assert(filter_zero_points->size() != 0); // Fix caller
55 assert(output_zero_points->size() != 0); // Fix caller
56
57 const auto input_zp = input_zero_points->operator[](0);
58 const auto filter_zp = filter_zero_points->operator[](0);
59 const auto output_zp = output_zero_points->operator[](0);
60
61 const auto output_scale = output_scales->operator[](0);
62
63 int32_t activation_min{};
64 int32_t activation_max{};
66 act_type, static_cast<int32_t>(output_zp), output_scale, output->type(), &activation_min,
67 &activation_max);
68 assert(status == Ok);
69 if (status != Ok)
70 return status;
71
72 // The kernel expects input and filter zero points to be negated.
73 params.input_offset = -static_cast<int32_t>(input_zp); // Note the '-'.
74 params.weights_offset = -static_cast<int32_t>(filter_zp); // Note the '-'.
75 params.output_offset = static_cast<int32_t>(output_zp);
76 params.quantized_activation_min = activation_min;
77 params.quantized_activation_max = activation_max;
78
79 assert(filter_scales->size() > 1); // Support only channel-wise quantization
80 // Channel-wise quantization
81 const auto input_scale = input_scales->operator[](0);
82 const std::vector<double> effective_output_scale =
83 execute::getQuantizedConvolutionMultiplers(input_scale, filter_scales, output_scale);
84
85 size_t n = effective_output_scale.size();
86 params.per_channel_output_shift.resize(n);
87 params.per_channel_output_multiplier.resize(n);
88 for (size_t i = 0; i < n; ++i)
89 {
90 execute::quantizeMultiplier(effective_output_scale[i], &params.per_channel_output_multiplier[i],
91 &params.per_channel_output_shift[i]);
92 }
93
94 return Ok;
95}
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition OMUtils.cpp:23
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const flatbuffers::Vector< float > *filter_scale, float output_scale)
Definition OMUtils.h:95
std::vector< int > per_channel_output_shift
std::vector< int32_t > per_channel_output_multiplier

References calculateActivationRangeQuantized(), getQuantizedConvolutionMultiplers(), onert_micro::core::ConvQuant::input_offset, onert_micro::Ok, onert_micro::core::ConvQuant::output_offset, onert_micro::core::ConvQuant::per_channel_output_multiplier, onert_micro::core::ConvQuant::per_channel_output_shift, onert_micro::core::ConvQuant::quantized_activation_max, onert_micro::core::ConvQuant::quantized_activation_min, quantizeMultiplier(), and onert_micro::core::ConvQuant::weights_offset.

◆ evalComparisonGeneric()

template<typename T >
void onert_micro::execute::evalComparisonGeneric ( OMRuntimeKernel runtime_kernel,
bool   FT, T 
)

Definition at line 82 of file ComparisonCommon.h.

83{
84
85 const T *cast_input_data1 = nullptr;
86 const T *cast_input_data2 = nullptr;
87 bool *cast_output_data = nullptr;
88
89 core::OMRuntimeShape input1_shape;
90 core::OMRuntimeShape input2_shape;
92
93 readDataKernel(runtime_kernel, cast_input_data1, cast_input_data2, cast_output_data, input1_shape,
94 input2_shape, output_shape);
95
97 op_params.is_broadcast = input1_shape.flatSize() != input2_shape.flatSize();
98
99 if (op_params.is_broadcast)
100 {
101 onert_micro::execute::pal::BroadcastComparison4DSlowNoScaling<T>(
102 op_params, input1_shape, cast_input_data1, input2_shape, cast_input_data2, output_shape,
103 cast_output_data, F);
104 }
105 else
106 {
107 const int64_t flat_size = input1_shape.flatSize();
108 onert_micro::execute::pal::ComparisonNoScaling<T>(flat_size, cast_input_data1, cast_input_data2,
109 cast_output_data, F);
110 }
111}
const luci_interpreter::RuntimeShape output_shape
void readDataKernel(OMRuntimeKernel *runtime_kernel, const T *&cast_input_data1, const T *&cast_input_data2, bool *&cast_output_data, core::OMRuntimeShape &input1_shape_ref, core::OMRuntimeShape &input2_shape_ref, core::OMRuntimeShape &output_shape_ref)

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::core::ComparisonParams::is_broadcast, output_shape, and readDataKernel().

◆ evalQuantizedComparisonGeneric()

template<typename T , typename AccType >
void onert_micro::execute::evalQuantizedComparisonGeneric ( OMRuntimeKernel runtime_kernel,
bool   FAccType, AccType 
)

Definition at line 114 of file ComparisonCommon.h.

115{
116 const circle::Tensor *input1 = nullptr;
117 const circle::Tensor *input2 = nullptr;
118 const circle::Tensor *output = nullptr;
119
120 input1 = runtime_kernel->inputs[input1TensorIdx];
121 input2 = runtime_kernel->inputs[input2TensorIdx];
122 output = runtime_kernel->outputs[outputTensorIdx];
123
124 assert(input1 != nullptr);
125 assert(input2 != nullptr);
126 assert(output != nullptr);
127
128 const T *cast_input_data1 = nullptr;
129 const T *cast_input_data2 = nullptr;
130 bool *cast_output_data = nullptr;
131
132 core::OMRuntimeShape input1_shape;
133 core::OMRuntimeShape input2_shape;
135
136 readDataKernel(runtime_kernel, cast_input_data1, cast_input_data2, cast_output_data, input1_shape,
137 input2_shape, output_shape);
138
139 assert(input1->quantization() != nullptr);
140 assert(input1->quantization()->scale() != nullptr);
141 assert(input1->quantization()->scale()->size() == 1);
142 assert(input1->quantization()->zero_point() != nullptr);
143 assert(input1->quantization()->zero_point()->size() == 1);
144
145 auto input1_scale = *input1->quantization()->scale()->begin();
146 auto input2_scale = *input2->quantization()->scale()->begin();
147
148 auto input1_zero_point = *input1->quantization()->zero_point()->begin();
149 auto input2_zero_point = *input2->quantization()->zero_point()->begin();
150
151 int32_t x_multiplier;
152 int x_shift;
153
154 int32_t y_multiplier;
155 int y_shift;
156
157 onert_micro::execute::quantizeMultiplierSmallerThanOneExp(input1_scale, &x_multiplier, &x_shift);
158 onert_micro::execute::quantizeMultiplierSmallerThanOneExp(input2_scale, &y_multiplier, &y_shift);
159
161 op_params.left_shift = 8;
162 op_params.input1_offset = -input1_zero_point; // Note the '-'
163 op_params.input1_shift = x_shift;
164 op_params.input1_multiplier = x_multiplier;
165 op_params.input2_offset = -input2_zero_point; // Note the '-'
166 op_params.input2_shift = y_shift;
167 op_params.input2_multiplier = y_multiplier;
168 op_params.is_broadcast = input1_shape.flatSize() != input2_shape.flatSize();
169 ;
170
171 if (op_params.is_broadcast)
172 {
173 onert_micro::execute::pal::BroadcastComparison4DSlowWithScaling<T>(
174 op_params, input1_shape, cast_input_data1, input2_shape, cast_input_data2, output_shape,
175 cast_output_data, F);
176 }
177 else
178 {
179 const int64_t flat_size = input1_shape.flatSize();
180 onert_micro::execute::pal::ComparisonWithScaling<T>(op_params, flat_size, cast_input_data1,
181 cast_input_data2, cast_output_data, F);
182 }
183}
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::core::ComparisonParams::input1_multiplier, onert_micro::core::ComparisonParams::input1_offset, onert_micro::core::ComparisonParams::input1_shift, onert_micro::core::ComparisonParams::input2_multiplier, onert_micro::core::ComparisonParams::input2_offset, onert_micro::core::ComparisonParams::input2_shift, onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::core::ComparisonParams::is_broadcast, onert_micro::core::ComparisonParams::left_shift, output_shape, onert_micro::execute::OMRuntimeKernel::outputs, quantizeMultiplierSmallerThanOneExp(), and readDataKernel().

◆ execute_arg_common()

OMStatus onert_micro::execute::execute_arg_common ( const OMExecuteArgs execute_args,
const std::function< OMStatus(const core::OMRuntimeShape &input1_shape, const float *input1_data, const int *input2_data, const core::OMRuntimeShape &output_shape, int *output_data)> &  f_float 
)

Definition at line 32 of file ArgCommon.cpp.

37{
38 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
39 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
40 uint16_t op_index = execute_args.kernel_index;
41 const circle::Tensor *output;
42 const circle::Tensor *input1;
43 const circle::Tensor *input2;
44
45 uint8_t *output_data;
46 uint8_t *input_data;
47 uint8_t *axis_data;
48
49 // Read kernel
50 execute::OMRuntimeKernel runtime_kernel;
51 runtime_kernel.readKernel(op_index, runtime_context);
52
53 output = runtime_kernel.outputs[outputTensorIdx];
54 assert(output != nullptr);
55
56 input1 = runtime_kernel.inputs[input1TensorIdx];
57 assert(input1 != nullptr);
58
59 input2 = runtime_kernel.inputs[input2TensorIdx];
60 assert(input2 != nullptr);
61
62 runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
63
64 output_data = runtime_kernel.outputs_data[outputTensorIdx];
65 assert(output_data != nullptr);
66
67 input_data = runtime_kernel.inputs_data[input1TensorIdx];
68 assert(input_data != nullptr);
69
70 axis_data = runtime_kernel.inputs_data[input2TensorIdx];
71 assert(axis_data != nullptr);
72
73 OMStatus status;
74 const core::OMRuntimeShape input1_shape(input1);
76 switch (input1->type())
77 {
78#ifndef DIS_FLOAT
79 case circle::TensorType_FLOAT32:
80 {
81 status = f_float(input1_shape, reinterpret_cast<const float *>(input_data),
82 reinterpret_cast<const int *>(axis_data), output_shape,
83 reinterpret_cast<int *>(output_data));
84 }
85 break;
86#endif // DIS_FLOAT
87 default:
88 {
89 status = UnsupportedType;
90 assert(false && "Unsupported type.");
91 }
92 }
93 return status;
94}
uint8_t * outputs_data[maxOutputSize]
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage

References onert_micro::execute::OMRuntimeKernel::getDataFromStorage(), onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::execute::OMRuntimeKernel::inputs_data, onert_micro::execute::OMExecuteArgs::kernel_index, output_shape, onert_micro::execute::OMRuntimeKernel::outputs, onert_micro::execute::OMRuntimeKernel::outputs_data, onert_micro::execute::OMRuntimeKernel::readKernel(), onert_micro::execute::OMExecuteArgs::runtime_context, onert_micro::execute::OMExecuteArgs::runtime_storage, and onert_micro::UnsupportedType.

◆ execute_math_common()

OMStatus onert_micro::execute::execute_math_common ( const OMExecuteArgs execute_args,
const std::function< OMStatus(const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)> &  f_float 
)

◆ execute_pooling_common()

OMStatus onert_micro::execute::execute_pooling_common ( const OMExecuteArgs execute_args,
const std::function< OMStatus(const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)> &  f_float,
const std::function< OMStatus(const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)> &  f_int8 
)

◆ execute_relu_common()

OMStatus onert_micro::execute::execute_relu_common ( const OMExecuteArgs execute_args,
bool  is_relu_6 
)

Definition at line 32 of file ReluCommon.cpp.

34{
35 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
36 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
37 uint16_t op_index = execute_args.kernel_index;
38
39 const circle::Tensor *input = nullptr;
40 const circle::Tensor *output = nullptr;
41
42 uint8_t *input_data = nullptr;
43 uint8_t *output_data = nullptr;
44
45 OMStatus status = Ok;
46
47 OMRuntimeKernel runtime_kernel;
48 runtime_kernel.readKernel(op_index, runtime_context);
49
50 input = runtime_kernel.inputs[inputTensorIdx];
51 output = runtime_kernel.outputs[outputTensorIdx];
52
53 assert(input != nullptr);
54 assert(output != nullptr);
55
56 status = runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
57 if (status != Ok)
58 return status;
59
60 input_data = runtime_kernel.inputs_data[inputTensorIdx];
61 output_data = runtime_kernel.outputs_data[outputTensorIdx];
62
63 assert(input_data != nullptr);
64 assert(output_data != nullptr);
65
66 float alpha = 0.f;
67 auto options = runtime_kernel.first_operator->builtin_options_as_LeakyReluOptions();
68 if (options != nullptr)
69 alpha = options->alpha();
70
71 switch (input->type())
72 {
73#ifndef DIS_FLOAT
74 case circle::TensorType_FLOAT32:
75 {
76 core::OMRuntimeShape input_shape(input);
78
79 const auto *input_data_float = core::utils::castInputData<float>(input_data);
80 auto *output_data_float = core::utils::castOutputData<float>(output_data);
81
82 assert(output_data_float);
83 const int flat_size = input_shape.flatSize();
84
85 status = pal::ReLUCommon(flat_size, input_data_float, output_data_float, alpha, is_relu_6);
86 }
87 break;
88#endif // DIS_FLOAT
89#ifndef DIS_QUANT
90 case circle::TensorType_INT8:
91 {
92 core::OMRuntimeShape input_shape(input);
94
95 const auto *input_data_int8 = core::utils::castInputData<int8_t>(input_data);
96 auto *output_data_int8 = core::utils::castOutputData<int8_t>(output_data);
97
98 assert(output_data_int8);
99 const int flat_size = input_shape.flatSize();
100
101 status = pal::ReLUCommon(flat_size, input_data_int8, output_data_int8, alpha, is_relu_6);
102 }
103 break;
104#endif // DIS_QUANT
105
106 default:
107 {
108 status = UnsupportedType;
109 assert(false && "Unsupported type.");
110 break;
111 }
112 }
113
114 return status;
115}
const circle::Operator * first_operator
OMStatus ReLUCommon(const int flat_size, const Type *input_data, Type *output_data, const float alpha, const bool is_relu_6)

References onert_micro::execute::OMRuntimeKernel::first_operator, onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::execute::OMRuntimeKernel::getDataFromStorage(), onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::execute::OMRuntimeKernel::inputs_data, onert_micro::execute::OMExecuteArgs::kernel_index, onert_micro::Ok, output_shape, onert_micro::execute::OMRuntimeKernel::outputs, onert_micro::execute::OMRuntimeKernel::outputs_data, onert_micro::execute::OMRuntimeKernel::readKernel(), onert_micro::execute::pal::ReLUCommon(), onert_micro::execute::OMExecuteArgs::runtime_context, onert_micro::execute::OMExecuteArgs::runtime_storage, and onert_micro::UnsupportedType.

◆ execute_reshape_common()

OMStatus onert_micro::execute::execute_reshape_common ( const OMExecuteArgs execute_args)

Definition at line 31 of file ReshapeCommon.cpp.

32{
33 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
34 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
35 uint16_t op_index = execute_args.kernel_index;
36
37 OMRuntimeKernel runtime_kernel;
38 runtime_kernel.readKernel(op_index, runtime_context);
39
40 const circle::Tensor *input = runtime_kernel.inputs[inputTensorIdx];
41 const circle::Tensor *output = runtime_kernel.outputs[outputTensorIdx];
42
43 assert(input != nullptr);
44 assert(output != nullptr);
45
46 OMStatus status = Ok;
47
48 status = runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
49 if (status != Ok)
50 return status;
51
52 uint8_t *input_data = runtime_kernel.inputs_data[inputTensorIdx];
53 uint8_t *output_data = runtime_kernel.outputs_data[outputTensorIdx];
54
55 assert(input_data != nullptr);
56 assert(output_data != nullptr);
57
58 // Check is it inplace kernel
59 if (input_data == output_data)
60 return Ok;
61
62 const core::OMRuntimeShape shape(input);
63
64 const size_t element_size =
65 static_cast<uint32_t>(getOMDataTypeSize(core::onertMicroDatatype(input->type())));
66 const int32_t num_elements = shape.flatSize();
67 std::memcpy(output_data, input_data, num_elements * element_size);
68
69 return status;
70}
OMDataType onertMicroDatatype(const circle::TensorType type)

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::execute::OMRuntimeKernel::getDataFromStorage(), onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::execute::OMRuntimeKernel::inputs_data, onert_micro::execute::OMExecuteArgs::kernel_index, onert_micro::Ok, onert_micro::core::onertMicroDatatype(), onert_micro::execute::OMRuntimeKernel::outputs, onert_micro::execute::OMRuntimeKernel::outputs_data, onert_micro::execute::OMRuntimeKernel::readKernel(), onert_micro::execute::OMExecuteArgs::runtime_context, and onert_micro::execute::OMExecuteArgs::runtime_storage.

◆ execute_spaces_batches_nd_common()

OMStatus onert_micro::execute::execute_spaces_batches_nd_common ( const OMExecuteArgs execute_args,
const std::function< OMStatus(const core::OMRuntimeShape &unextended_input1_shape, const float *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *crops_data, const core::OMRuntimeShape &unextended_output_shape, float *output_data)> &  f 
)

◆ getQuantizedConvolutionMultipler()

double onert_micro::execute::getQuantizedConvolutionMultipler ( float  input_scale,
float  filter_scale,
float  output_scale 
)
inline

Definition at line 65 of file OMUtils.h.

67{
68 const double input_product_scale = static_cast<double>(input_scale * filter_scale);
69
70 assert(input_product_scale >= 0);
71
72 assert(output_scale != 0.f);
73
74 return input_product_scale / static_cast<double>(output_scale);
75}

Referenced by getQuantizedConvolutionMultiplers().

◆ getQuantizedConvolutionMultiplers()

std::vector< double > onert_micro::execute::getQuantizedConvolutionMultiplers ( float  input_scale,
const flatbuffers::Vector< float > *  filter_scale,
float  output_scale 
)
inline

Definition at line 95 of file OMUtils.h.

97{
98 std::vector<double> effective_output_scales;
99 size_t n = filter_scale->size();
100 effective_output_scales.reserve(n);
101 for (size_t i = 0; i < n; ++i)
102 {
103 effective_output_scales.push_back(
104 getQuantizedConvolutionMultipler(input_scale, filter_scale->operator[](i), output_scale));
105 }
106 return effective_output_scales;
107}
uoffset_t size() const

References getQuantizedConvolutionMultipler(), and flatbuffers::Vector< T >::size().

Referenced by createConvParams().

◆ quantizeMultiplier()

void onert_micro::execute::quantizeMultiplier ( double  double_multiplier,
int32_t *  quantized_multiplier,
int *  shift 
)

Definition at line 23 of file OMUtils.cpp.

25{
26 if (double_multiplier == 0.0)
27 {
28 *quantized_multiplier = 0;
29 *shift = 0;
30 return;
31 }
32
33 const double q = std::frexp(double_multiplier, shift);
34 auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
35
36 if (q_fixed == (int64_t(1) << 31))
37 {
38 q_fixed /= 2;
39 ++*shift;
40 }
41 assert(q_fixed <= std::numeric_limits<int32_t>::max());
42 // A shift amount smaller than -31 would cause all bits to be shifted out
43 // and thus all results would be zero. We implement that instead with
44 // q_fixed==0, so as to avoid hitting issues with right-shift
45 // operations with shift amounts greater than 31. Note that this happens
46 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
47 // that we're effectively flushing tiny double_multiplier's to zero.
48 // We could conceivably handle values in the range (roughly) [32, 63]
49 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
50 // the present handling is just doing 'flush denormals to zero'. We could
51 // reconsider and actually generate nonzero denormals if a need arises.
52 if (*shift < -31)
53 {
54 *shift = 0;
55 q_fixed = 0;
56 }
57 *quantized_multiplier = static_cast<int32_t>(q_fixed);
58}

Referenced by createConvParams(), and quantizeMultiplierSmallerThanOneExp().

◆ quantizeMultiplierSmallerThanOneExp()

void onert_micro::execute::quantizeMultiplierSmallerThanOneExp ( double  double_multiplier,
int32_t *  quantized_multiplier,
int *  left_shift 
)

Definition at line 60 of file OMUtils.cpp.

63{
64 assert(double_multiplier < 1.0);
65 assert(double_multiplier > 0.0);
66 int shift;
67 onert_micro::execute::quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
68 assert(shift <= 0);
69 *left_shift = shift;
70}

References quantizeMultiplier().

Referenced by calculateQuantParams(), and evalQuantizedComparisonGeneric().

◆ readDataKernel()

template<typename T >
void onert_micro::execute::readDataKernel ( OMRuntimeKernel runtime_kernel,
const T *&  cast_input_data1,
const T *&  cast_input_data2,
bool *&  cast_output_data,
core::OMRuntimeShape input1_shape_ref,
core::OMRuntimeShape input2_shape_ref,
core::OMRuntimeShape output_shape_ref 
)

Definition at line 44 of file ComparisonCommon.h.

48{
49 const circle::Tensor *input1 = nullptr;
50 const circle::Tensor *input2 = nullptr;
51 const circle::Tensor *output = nullptr;
52
53 uint8_t *input_data1 = nullptr;
54 uint8_t *input_data2 = nullptr;
55 uint8_t *output_data = nullptr;
56
57 input1 = runtime_kernel->inputs[input1TensorIdx];
58 input2 = runtime_kernel->inputs[input2TensorIdx];
59 output = runtime_kernel->outputs[outputTensorIdx];
60
61 assert(input1 != nullptr);
62 assert(input2 != nullptr);
63 assert(output != nullptr);
64
65 input_data1 = runtime_kernel->inputs_data[input1TensorIdx];
66 input_data2 = runtime_kernel->inputs_data[input2TensorIdx];
67 output_data = runtime_kernel->outputs_data[outputTensorIdx];
68
69 assert(input_data1 != nullptr);
70 assert(input_data2 != nullptr);
71 assert(output_data != nullptr);
72
73 cast_input_data1 = core::utils::castInputData<T>(input_data1);
74 cast_input_data2 = core::utils::castInputData<T>(input_data2);
75 cast_output_data = core::utils::castOutputData<bool>(output_data);
76
77 input1_shape_ref = std::move(core::OMRuntimeShape(input1));
78 input2_shape_ref = std::move(core::OMRuntimeShape(input2));
79 output_shape_ref = std::move(core::OMRuntimeShape(output));
80}

References onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::execute::OMRuntimeKernel::inputs_data, onert_micro::execute::OMRuntimeKernel::outputs, and onert_micro::execute::OMRuntimeKernel::outputs_data.

Referenced by evalComparisonGeneric(), and evalQuantizedComparisonGeneric().

◆ readKernelDataTISO()

OMStatus onert_micro::execute::readKernelDataTISO ( const OMExecuteArgs execute_args,
uint8_t *&  input_data1,
uint8_t *&  input_data2,
uint8_t *&  output_data,
core::OMRuntimeShape input1_shape_ref,
core::OMRuntimeShape input2_shape_ref,
core::OMRuntimeShape output_shape_ref,
circle::TensorType &  tensor_type 
)

Definition at line 32 of file ReadKernelDataCommon.cpp.

40{
41
42 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
43 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
44 uint16_t op_index = execute_args.kernel_index;
45
46 OMStatus status = Ok;
47
48 OMRuntimeKernel runtime_kernel;
49 runtime_kernel.readKernel(op_index, runtime_context);
50
51 status = runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
52 if (status != Ok)
53 return status;
54
55 const circle::Tensor *input1 = nullptr;
56 const circle::Tensor *input2 = nullptr;
57 const circle::Tensor *output = nullptr;
58
59 input1 = runtime_kernel.inputs[TensorIndexTISO::input1TensorIdx];
60 input2 = runtime_kernel.inputs[TensorIndexTISO::input2TensorIdx];
61 output = runtime_kernel.outputs[TensorIndexTISO::outputTensorIdx];
62
63 assert(input1 != nullptr);
64 assert(input2 != nullptr);
65 assert(output != nullptr);
66
67 input_data1 = runtime_kernel.inputs_data[TensorIndexTISO::input1TensorIdx];
68 input_data2 = runtime_kernel.inputs_data[TensorIndexTISO::input2TensorIdx];
69 output_data = runtime_kernel.outputs_data[TensorIndexTISO::outputTensorIdx];
70
71 input1_shape_ref = std::move(core::OMRuntimeShape(input1));
72 input2_shape_ref = std::move(core::OMRuntimeShape(input2));
73 output_shape_ref = std::move(core::OMRuntimeShape(output));
74
75 tensor_type = input1->type();
76
77 return status;
78}
constexpr uint32_t input1TensorIdx
constexpr uint32_t outputTensorIdx
constexpr uint32_t input2TensorIdx

References onert_micro::execute::OMRuntimeKernel::getDataFromStorage(), TensorIndexTISO::input1TensorIdx, TensorIndexTISO::input2TensorIdx, onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::execute::OMRuntimeKernel::inputs_data, onert_micro::execute::OMExecuteArgs::kernel_index, onert_micro::Ok, onert_micro::execute::OMRuntimeKernel::outputs, onert_micro::execute::OMRuntimeKernel::outputs_data, TensorIndexTISO::outputTensorIdx, onert_micro::execute::OMRuntimeKernel::readKernel(), onert_micro::execute::OMExecuteArgs::runtime_context, and onert_micro::execute::OMExecuteArgs::runtime_storage.

◆ readQuantParams()

void onert_micro::execute::readQuantParams ( const circle::Tensor *  tensor,
long &  zero_point,
float &  scale 
)

Definition at line 143 of file OMUtils.cpp.

145{
146 // additional check
147 assert(tensor->quantization() != nullptr); // Fix caller
148 assert(tensor->quantization()->scale() != nullptr and
149 tensor->quantization()->scale()->size() == 1); // Fix caller
150 assert(tensor->quantization()->zero_point() != nullptr and
151 tensor->quantization()->zero_point()->size() == 1); // Fix caller
152
153 // read zero point
154 zero_point = tensor->quantization()->zero_point()->operator[](0);
155 // read scale
156 scale = tensor->quantization()->scale()->operator[](0);
157}

Referenced by calculateQuantParams().

◆ SISOHeader()

OMStatus onert_micro::execute::SISOHeader ( const OMExecuteArgs execute_args,
const circle::Tensor **  input,
const circle::Tensor **  output,
uint8_t **  input_data,
uint8_t **  output_data 
)

Definition at line 159 of file OMUtils.cpp.

163{
164 OMStatus status;
165
166 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
167 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
168 uint16_t op_index = execute_args.kernel_index;
169
170 {
171 OMRuntimeKernel runtime_kernel;
172 runtime_kernel.readKernel(op_index, runtime_context);
173
174 *input = runtime_kernel.inputs[0];
175 *output = runtime_kernel.outputs[0];
176
177 assert(*input != nullptr);
178 assert(*output != nullptr);
179
180 status = runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
181 if (status != Ok)
182 return status;
183
184 *input_data = runtime_kernel.inputs_data[0];
185 *output_data = runtime_kernel.outputs_data[0];
186 }
187
188 assert(*input_data != nullptr);
189 assert(*output_data != nullptr);
190
191 return status;
192}

References onert_micro::execute::OMRuntimeKernel::getDataFromStorage(), onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::execute::OMRuntimeKernel::inputs_data, onert_micro::execute::OMExecuteArgs::kernel_index, onert_micro::Ok, onert_micro::execute::OMRuntimeKernel::outputs, onert_micro::execute::OMRuntimeKernel::outputs_data, onert_micro::execute::OMRuntimeKernel::readKernel(), onert_micro::execute::OMExecuteArgs::runtime_context, and onert_micro::execute::OMExecuteArgs::runtime_storage.

◆ TISOHeader()

OMStatus onert_micro::execute::TISOHeader ( const OMExecuteArgs execute_args,
const circle::Tensor **  input1,
const circle::Tensor **  input2,
const circle::Tensor **  output,
OMRuntimeKernel runtime_kernel 
)

Definition at line 240 of file OMUtils.cpp.

245{
246 OMStatus status;
247
248 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
249 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
250 uint16_t op_index = execute_args.kernel_index;
251
252 status = runtime_kernel->readKernel(op_index, runtime_context);
253
254 *input1 = runtime_kernel->inputs[0];
255 *input2 = runtime_kernel->inputs[1];
256 *output = runtime_kernel->outputs[0];
257
258 assert(*input1 != nullptr);
259 assert(*input2 != nullptr);
260 assert(*output != nullptr);
261
262 status = runtime_kernel->getDataFromStorage(op_index, runtime_storage, runtime_context);
263 if (status != Ok)
264 return status;
265
266 return status;
267}

References onert_micro::execute::OMRuntimeKernel::getDataFromStorage(), onert_micro::execute::OMRuntimeKernel::inputs, onert_micro::execute::OMExecuteArgs::kernel_index, onert_micro::Ok, onert_micro::execute::OMRuntimeKernel::outputs, onert_micro::execute::OMRuntimeKernel::readKernel(), onert_micro::execute::OMExecuteArgs::runtime_context, and onert_micro::execute::OMExecuteArgs::runtime_storage.

Variable Documentation

◆ kernel_builtin_execute

constexpr KernelBuiltinExecuteRegistry onert_micro::execute::kernel_builtin_execute
constexpr

◆ kernel_custom_execute

constexpr KernelCustomExecuteRegistry onert_micro::execute::kernel_custom_execute
constexpr