24 int32_t *quantized_multiplier,
int *shift)
26 if (double_multiplier == 0.0)
28 *quantized_multiplier = 0;
33 const double q = std::frexp(double_multiplier, shift);
34 auto q_fixed =
static_cast<int64_t
>(std::round(q * (int64_t(1) << 31)));
36 if (q_fixed == (int64_t(1) << 31))
41 assert(q_fixed <= std::numeric_limits<int32_t>::max());
57 *quantized_multiplier =
static_cast<int32_t
>(q_fixed);
61 int32_t *quantized_multiplier,
64 assert(double_multiplier < 1.0);
65 assert(double_multiplier > 0.0);
74OMStatus calculateActivationRangeQuantizedImpl(circle::ActivationFunctionType activation,
75 int32_t qmin, int32_t qmax, int32_t zero_point,
76 float scale, int32_t *activation_min,
77 int32_t *activation_max)
81 auto quantize = [scale, zero_point](
float x) {
82 return zero_point +
static_cast<int32_t
>(std::round(x / scale));
87 case circle::ActivationFunctionType::ActivationFunctionType_NONE:
88 case circle::ActivationFunctionType::ActivationFunctionType_TANH:
89 *activation_min = qmin;
90 *activation_max = qmax;
92 case circle::ActivationFunctionType::ActivationFunctionType_RELU:
93 *activation_min = std::max(qmin, quantize(0.0f));
94 *activation_max = qmax;
96 case circle::ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
97 *activation_min = std::max(qmin, quantize(-1.0f));
98 *activation_max = std::min(qmax, quantize(1.0f));
100 case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
101 *activation_min = std::max(qmin, quantize(0.0f));
102 *activation_max = std::min(qmax, quantize(6.0f));
105 assert(
false &&
"Unsupported activation.");
113 circle::ActivationFunctionType activation, int32_t output_zero_point,
float output_scale,
114 circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
120 case circle::TensorType_UINT8:
122 qmax = std::numeric_limits<uint8_t>::max();
124 case circle::TensorType_INT8:
125 qmin = std::numeric_limits<int8_t>::min();
126 qmax = std::numeric_limits<int8_t>::max();
128 case circle::TensorType_INT16:
130 assert(output_zero_point == 0);
131 qmin = std::numeric_limits<int16_t>::min();
132 qmax = std::numeric_limits<int16_t>::max();
135 assert(
false &&
"Unsupported type.");
139 return calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point,
140 output_scale, activation_min, activation_max);
147 assert(tensor->quantization() !=
nullptr);
148 assert(tensor->quantization()->scale() !=
nullptr and
149 tensor->quantization()->scale()->size() == 1);
150 assert(tensor->quantization()->zero_point() !=
nullptr and
151 tensor->quantization()->zero_point()->size() == 1);
154 zero_point = tensor->quantization()->zero_point()->operator[](0);
156 scale = tensor->quantization()->scale()->operator[](0);
160 const circle::Tensor **input,
161 const circle::Tensor **output, uint8_t **input_data,
162 uint8_t **output_data)
172 runtime_kernel.
readKernel(op_index, runtime_context);
174 *input = runtime_kernel.
inputs[0];
175 *output = runtime_kernel.
outputs[0];
177 assert(*input !=
nullptr);
178 assert(*output !=
nullptr);
180 status = runtime_kernel.
getDataFromStorage(op_index, runtime_storage, runtime_context);
188 assert(*input_data !=
nullptr);
189 assert(*output_data !=
nullptr);
195 const circle::Tensor *input1,
196 const circle::Tensor *input2,
197 const circle::Tensor *output,
198 circle::ActivationFunctionType act)
218 params.
left_shift = (output->type() == circle::TensorType_INT16) ? 15 : 20;
219 const double twice_max_input_scale =
220 2 *
static_cast<double>(std::max(input1_scale, input2_scale));
221 const double real_input1_multiplier =
static_cast<double>(input1_scale) / twice_max_input_scale;
222 const double real_input2_multiplier =
static_cast<double>(input2_scale) / twice_max_input_scale;
223 const double real_output_multiplier =
224 twice_max_input_scale / ((1 << params.
left_shift) *
static_cast<double>(output_scale));
241 const circle::Tensor **input1,
242 const circle::Tensor **input2,
243 const circle::Tensor **output,
252 status = runtime_kernel->
readKernel(op_index, runtime_context);
254 *input1 = runtime_kernel->
inputs[0];
255 *input2 = runtime_kernel->
inputs[1];
256 *output = runtime_kernel->
outputs[0];
258 assert(*input1 !=
nullptr);
259 assert(*input2 !=
nullptr);
260 assert(*output !=
nullptr);
262 status = runtime_kernel->
getDataFromStorage(op_index, runtime_storage, runtime_context);
uint8_t * outputs_data[maxOutputSize]
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
uint8_t * inputs_data[maxInputSize]
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
OMStatus SISOHeader(const OMExecuteArgs &execute_args, const circle::Tensor **input, const circle::Tensor **output, uint8_t **input_data, uint8_t **output_data)
void readQuantParams(const circle::Tensor *tensor, long &zero_point, float &scale)
OMStatus calculateActivationRangeQuantized(circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
void calculateQuantParams(core::ArithmeticQuantParams ¶ms, const circle::Tensor *input1, const circle::Tensor *input2, const circle::Tensor *output, circle::ActivationFunctionType act)
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
OMStatus TISOHeader(const OMExecuteArgs &execute_args, const circle::Tensor **input1, const circle::Tensor **input2, const circle::Tensor **output, OMRuntimeKernel *runtime_kernel)
int32_t quantized_activation_min
int32_t quantized_activation_max
int32_t input1_multiplier
int32_t input2_multiplier
int32_t output_multiplier
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage