19#include "kernels/Utils.h"
21#include "kernels/BinaryOpCommon.h"
32void evalQuantized(
const circle::Tensor *input1,
const circle::Tensor *input2,
33 const circle::Tensor *output,
const circle::AddOptions *options,
36 assert(type == DataType::S16 or type == DataType::S8 &&
"Wrong Type");
44 const bool need_broadcast =
47 assert(need_broadcast ==
false &&
"Broadcast for INT8 and INT16 not supported now");
49 params.input1_offset = -Tensor::zero_point(input1);
50 params.input2_offset = -Tensor::zero_point(input2);
51 params.output_offset = Tensor::zero_point(output);
52 params.left_shift = (
type == DataType::S16) ? 15 : 20;
54 const auto input1_scale = Tensor::scale(input1);
55 const auto input2_scale = Tensor::scale(input2);
56 const auto output_scale = Tensor::scale(output);
58 const double twice_max_input_scale =
59 2 *
static_cast<double>(std::max(input1_scale, input2_scale));
60 const double real_input1_multiplier =
static_cast<double>(input1_scale / twice_max_input_scale);
61 const double real_input2_multiplier =
static_cast<double>(input2_scale / twice_max_input_scale);
62 const double real_output_multiplier =
63 twice_max_input_scale / ((1 << params.left_shift) *
static_cast<double>(output_scale));
66 ¶ms.input1_shift);
68 ¶ms.input2_shift);
70 ¶ms.output_shift);
73 output, ¶ms.quantized_activation_min,
74 ¶ms.quantized_activation_max);
75 if (type == DataType::S8)
79 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input1)),
80 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input2)),
81 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(output)));
87 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input1)),
88 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input2)),
89 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));
101 Tensor::element_type(kernel.
input2()));
103 Tensor::element_type(kernel.
input2()));
106 if (Tensor::element_type(kernel.
input1()) == DataType::S16)
109 Tensor::zero_points(kernel.
input2()).size() == 1);
111 Tensor::zero_point(kernel.
input2()) == 0 &&
112 Tensor::zero_point(kernel.
output()) == 0);
121 const auto *options = cur_op->builtin_options_as_AddOptions();
131#ifndef DIS_DYN_SHAPES
151 int32_t data_size = 1;
152 for (
int i = 0; i < num_dims; ++i)
157 data_size *=
size(Tensor::element_type(kernel.
output()));
166 auto new_output_data =
new uint8_t[data_size];
171 const auto type = Tensor::element_type(kernel.
input1());
175 case DataType::FLOAT32:
177 auto tiso_func = luci_interpreter_pal::Add<float>;
178 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<float>;
181 kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
182 std::move(input_shape1), std::move(input_shape2),
188 kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
189 options, std::move(input_shape1), std::move(input_shape2),
197 auto tiso_func = luci_interpreter_pal::Add<int64_t>;
198 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int64_t>;
201 kernels::evalTISOInplaceKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, options,
202 std::move(input_shape1), std::move(input_shape2),
208 kernels::evalTISOKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
209 options, std::move(input_shape1), std::move(input_shape2),
216 auto tiso_func = luci_interpreter_pal::Add<int32_t>;
217 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int32_t>;
220 kernels::evalTISOInplaceKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, options,
221 std::move(input_shape1), std::move(input_shape2),
227 kernels::evalTISOKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
228 options, std::move(input_shape1), std::move(input_shape2),
237 evalQuantized(kernel.
input1(), kernel.
input2(), kernel.
output(), options, runtime_graph,
243 assert(
false &&
"Unsupported type.");
void addDynamicShapeTensor(const circle::Tensor *tensor, luci_interpreter::RuntimeShape &&shapes)
void resetTensorData(uint8_t *new_data, const circle::Tensor *tensor)
bool is_inplace_op(const circle::Operator *op)
int32_t dimensionsCount() const
int32_t dims(int i) const
void setDim(int i, int32_t val)
const circle::Tensor * output() const
const circle::Tensor * input2() const
const circle::Tensor * input1() const
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
DataType
"scalar" value type
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
void Add(const ArithmeticParams ¶ms, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
bool ProcessBroadcastShapes(const luci_interpreter::RuntimeShape &shape0, const luci_interpreter::RuntimeShape &shape1, luci_interpreter_pal::ArithmeticParams *params)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
void configure_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)