20#include "kernels/Utils.h"
22#include <tensorflow/lite/kernels/internal/reference/reduce.h>
31static void resolveAxes(
const int32_t *axes_data,
int num_axes, tflite::MeanParams *params)
33 params->axis_count = num_axes;
34 for (
int i = 0; i < num_axes; ++i)
36 params->axis[i] =
static_cast<int16>(axes_data[i]);
38 for (
int i = num_axes; i < 4; ++i)
45static int getAxisReductionCount(
const int32_t *axes_data,
int num_axes,
int input_num_dims)
47 int reduction_count = num_axes;
48 for (
int i = 0; i < num_axes; ++i)
50 int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
51 assert(current >= 0 && current < input_num_dims);
52 for (
int j = 0; j < i; j++)
54 int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
56 if (current == previous)
63 return reduction_count;
66static Shape getOutputShape(
const Shape &input_shape,
const int32_t *axes_data,
int num_axes,
69 int input_num_dims = input_shape.num_dims();
70 if (input_num_dims == 0)
78 for (
int idx = 0; idx < input_num_dims; ++idx)
81 for (
int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
83 if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
102 int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
104 int num_skip_axes = 0;
105 for (
int idx = 0; idx < input_num_dims; ++idx)
107 bool is_axis =
false;
108 for (
int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
110 if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
119 output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
137 if (
input()->element_type() == DataType::S16)
143 int input_num_dims = input_shape.
num_dims();
145 const auto *axes_data = getTensorData<int32_t>(
axes());
147 assert(num_axes <= 4);
152 tflite::MeanParams
params{};
153 resolveAxes(axes_data, num_axes, &
params);
154 _need_temporaries = !(
157 if (_need_temporaries)
163 temp_index->resize(
Shape(input_num_dims));
164 resolved_axes->resize(
Shape(num_axes));
165 temp_sum->resize(
output()->shape());
173 temp_index->set_allocatable(
false);
174 resolved_axes->set_allocatable(
false);
175 temp_sum->set_allocatable(
false);
181 switch (
input()->element_type())
183 case DataType::FLOAT32:
193 throw std::runtime_error(
"luci-intp Mean Unsupported type.");
197void Mean::evalFloat()
const
200 int input_num_dims = input_shape.
num_dims();
201 const auto *axes_data = getTensorData<int32_t>(
axes());
204 tflite::MeanParams
params{};
205 resolveAxes(axes_data, num_axes, &
params);
222 input()->shape().num_dims(), getTensorData<float>(
output()),
225 getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
226 getTensorData<float>(temp_sum));
230void Mean::evalQuantized()
const
233 int input_num_dims = input_shape.num_dims();
234 const auto *axes_data = getTensorData<int32_t>(
axes());
237 tflite::MeanParams
params{};
238 resolveAxes(axes_data, num_axes, &
params);
251 getTensorData<uint8_t>(
output()),
output()->zero_point(),
257 input()->shape().num_dims(), getTensorData<uint8_t>(
output()),
260 getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
261 getTensorData<int>(temp_sum));
265 tflite::reference_ops::QuantizedMeanOrSum<>(
270 _params.
keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
271 getTensorData<int>(temp_sum),
276void Mean::evalQuantizedS16()
const
284 const auto *axes_data = getTensorData<int32_t>(
axes());
287 constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
288 constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
292 ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
294 const int32_t batches = input_shape.dim(0);
295 const int32_t input_height = input_shape.dim(1);
296 const int32_t input_width = input_shape.dim(2);
297 const int32_t depth = input_shape.dim(3);
304 const double real_multiplier =
307 int32_t output_multiplier{};
311 const int32_t num_elements_in_axes = input_height * input_width;
313 for (int32_t batch = 0; batch < batches; ++batch)
315 for (int32_t c = 0; c < depth; ++c)
318 for (int32_t in_y = 0; in_y < input_height; ++in_y)
320 for (int32_t in_x = 0; in_x < input_width; ++in_x)
326 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
328 scaled_acc = scaled_acc > 0
329 ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
330 : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
332 scaled_acc = std::max(scaled_acc, output_min);
333 scaled_acc = std::min(scaled_acc, output_max);
341 throw std::runtime_error(
"Unsupported configuration.");
const std::vector< Tensor * > & getOutputTensors() const
const ReducerParams _params
const ReducerParams & params() const
int32_t num_elements() const
void resize(const Shape &new_shape)
const Shape & shape() const
Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms)
void execute() const override
const Tensor * axes() const
const Tensor * input() const
void configure() override
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)