20#include "kernels/BinaryOpCommon.h"
21#include "kernels/Utils.h"
23#include <tensorflow/lite/kernels/internal/reference/add.h>
24#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
42 if (
input1()->element_type() == DataType::S16)
47 output()->zero_point() == 0);
55 switch (
input1()->element_type())
57 case DataType::FLOAT32:
61 evalInteger<int64_t>();
64 evalInteger<int32_t>();
73 throw std::runtime_error(
"luci-intp Add Unsupported type.");
77void Add::evalFloat()
const
79 tflite::ArithmeticParams
params{};
82 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
87 tflite::reference_ops::BroadcastAdd4DSlow(
99template <
typename T>
void Add::evalInteger()
const
101 tflite::ArithmeticParams
params{};
104 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
109 tflite::reference_ops::BroadcastAdd4DSlow(
121void Add::evalQuantized()
const
123 const auto input1_scale =
static_cast<double>(
input1()->
scale());
124 const auto input2_scale =
static_cast<double>(
input2()->
scale());
125 const auto output_scale =
static_cast<double>(
output()->
scale());
127 const int left_shift = 20;
128 const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
129 const double real_input1_multiplier = input1_scale / twice_max_input_scale;
130 const double real_input2_multiplier = input2_scale / twice_max_input_scale;
131 const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
133 int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
134 int input1_shift{}, input2_shift{}, output_shift{};
139 int32_t activation_min{};
140 int32_t activation_max{};
143 tflite::ArithmeticParams
params{};
144 params.left_shift = left_shift;
147 params.input1_multiplier = input1_multiplier;
148 params.input1_shift = input1_shift;
150 params.input2_multiplier = input2_multiplier;
151 params.input2_shift = input2_shift;
153 params.output_multiplier = output_multiplier;
154 params.output_shift = output_shift;
155 params.quantized_activation_min = activation_min;
156 params.quantized_activation_max = activation_max;
158 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
163 tflite::reference_ops::BroadcastAdd4DSlow(
175void Add::evalQuantizedS16()
const
177 const auto input1_scale =
static_cast<double>(
input1()->
scale());
178 const auto input2_scale =
static_cast<double>(
input2()->
scale());
179 const auto output_scale =
static_cast<double>(
output()->
scale());
181 constexpr int left_shift = 12;
182 const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
183 const double real_input1_multiplier = input1_scale / twice_max_input_scale;
184 const double real_input2_multiplier = input2_scale / twice_max_input_scale;
185 const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
187 int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
188 int input1_shift{}, input2_shift{}, output_shift{};
193 int32_t activation_min{};
194 int32_t activation_max{};
197 auto fn = [input1_multiplier, input1_shift,
198 input2_multiplier, input2_shift,
199 output_multiplier, output_shift,
200 activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
201 const int32_t shifted_input1_val =
static_cast<int32_t
>(input1_val) << left_shift;
202 const int32_t shifted_input2_val =
static_cast<int32_t
>(input2_val) << left_shift;
203 const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
204 shifted_input1_val, input1_multiplier, input1_shift);
205 const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
206 shifted_input2_val, input2_multiplier, input2_shift);
207 const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
208 const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
209 raw_sum, output_multiplier, output_shift);
210 const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
211 return static_cast<int16_t
>(clamped_output);
const AddParams & params() const
void resize(const Shape &new_shape)
int32_t zero_point() const
const Tensor * input2() const
Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms)
void configure() override
const Tensor * input1() const
void execute() const override
#define LUCI_INTERPRETER_CHECK(cond)
Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, const tflite::RuntimeShape &unextended_input2_shape, const T *input2_data, const tflite::RuntimeShape &unextended_output_shape, T *output_data, Op op)