20#include "kernels/BinaryOpCommon.h"
21#include "kernels/Utils.h"
23#include <tensorflow/lite/kernels/internal/reference/add.h>
24#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
42 if (
input1()->element_type() == DataType::S16)
47 output()->zero_point() == 0);
55 switch (
input1()->element_type())
57 case DataType::FLOAT32:
73 throw std::runtime_error(
"luci-intp Add Unsupported type.");
77void Add::evalFloat()
const
79 tflite::ArithmeticParams
params{};
82 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
87 tflite::reference_ops::BroadcastAdd4DSlow(
99template <
typename T>
void Add::evalInteger()
const
101 tflite::ArithmeticParams
params{};
104 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
109 tflite::reference_ops::BroadcastAdd4DSlow(
121void Add::evalQuantized()
const
125 const auto output_scale =
static_cast<double>(
output()->
scale());
127 const int left_shift = 20;
133 int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
134 int input1_shift{}, input2_shift{}, output_shift{};
139 int32_t activation_min{};
140 int32_t activation_max{};
143 tflite::ArithmeticParams
params{};
144 params.left_shift = left_shift;
147 params.input1_multiplier = input1_multiplier;
148 params.input1_shift = input1_shift;
150 params.input2_multiplier = input2_multiplier;
151 params.input2_shift = input2_shift;
153 params.output_multiplier = output_multiplier;
154 params.output_shift = output_shift;
155 params.quantized_activation_min = activation_min;
156 params.quantized_activation_max = activation_max;
158 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
163 tflite::reference_ops::BroadcastAdd4DSlow(
175void Add::evalQuantizedS16()
const
179 const auto output_scale =
static_cast<double>(
output()->
scale());
181 constexpr int left_shift = 12;
187 int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
188 int input1_shift{}, input2_shift{}, output_shift{};
193 int32_t activation_min{};
194 int32_t activation_max{};
197 auto fn = [input1_multiplier, input1_shift,
198 input2_multiplier, input2_shift,
199 output_multiplier, output_shift,
203 const int32_t
scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
205 const int32_t
scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
208 const int32_t
raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
209 raw_sum, output_multiplier, output_shift);
const AddParams & params() const
void resize(const Shape &new_shape)
int32_t zero_point() const
const Tensor * input2() const
Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms)
void configure() override
const Tensor * input1() const
void execute() const override
#define LUCI_INTERPRETER_CHECK(cond)
Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, const tflite::RuntimeShape &unextended_input2_shape, const T *input2_data, const tflite::RuntimeShape &unextended_output_shape, T *output_data, Op op)
T must_cast(loco::Node *node)