18#ifndef __NNFW_CKER_REDUCEMEAN_H__
19#define __NNFW_CKER_REDUCEMEAN_H__
33 return static_cast<float>(
static_cast<int>(value - 0.5f));
37 return static_cast<float>(
static_cast<int>(value + 0.5f));
40template <
typename Out,
typename In>
43 return data1 +
static_cast<Out
>(data2) / normalizer;
46template <
typename In>
int sum_reducer(
const int data1,
const In data2)
48 return data1 +
static_cast<int>(data2);
51template <
typename In,
typename Out>
53 const int num_axis,
int *input_iter,
54 Out reducer(
const Out current,
const In in,
int normalizer),
57 const auto input_dims = input_shape.
DimsData();
61 for (
int idx = 0; idx < input_num_dims; ++idx)
66 for (
int idx = 0; idx < num_axis; ++idx)
68 normalizer *= input_dims[axis[idx]];
73 size_t input_offset =
ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0,
nullptr);
74 size_t output_offset =
76 output_data[output_offset] =
77 reducer(output_data[output_offset], input_data[input_offset], normalizer);
78 }
while (
NextIndex(input_num_dims, input_dims, input_iter));
84 const int num_axis,
int *input_iter,
85 int reducer(
const int current,
const In in),
int *temp_sum)
87 const auto input_dims = input_shape.
DimsData();
89 size_t normalizer = 1;
91 for (
int idx = 0; idx < input_num_dims; ++idx)
96 for (
int idx = 0; idx < num_axis; ++idx)
98 normalizer *= input_dims[axis[idx]];
103 size_t input_offset =
ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0,
nullptr);
104 size_t output_offset =
106 temp_sum[output_offset] = reducer(temp_sum[output_offset], input_data[input_offset]);
107 }
while (
NextIndex(input_num_dims, input_dims, input_iter));
116 template <
typename T>
118 const std::vector<int> &axes, T *output_data, T init_value)
125 const auto input_dims = input_shape.
DimsData();
127 int resolved_axis_size = 1;
128 const auto num_axes = axes.size();
130 for (
size_t idx = 0; idx < num_axes; idx++)
132 int current = axes[idx] < 0 ? (axes[idx] + num_dims) : axes[idx];
133 assert(current >= 0 && current < num_dims);
134 resolved_axis_size *= input_dims[current];
137 prepare(num_dims, resolved_axis_size);
140 int num_resolved_axis = 0;
146 return num_resolved_axis;
151 template <
typename In,
typename Out>
153 Out *output_data,
const std::vector<int> &axes,
bool, Out init_value,
154 Out reducer(
const Out current,
const Out in,
int normalizer))
156 int num_resolved_axis;
158 if (num_resolved_axis == -1)
162 return ReduceMeanImpl<In, Out>(input_data, input_shape,
resolved_axis_data(), num_resolved_axis,
166 template <
typename In,
typename Out>
167 inline bool ReduceOp(
const Shape &input_shape,
const In *input_data,
float input_scale,
169 float output_scale, int32_t output_offset,
const std::vector<int> &axes,
170 bool, Out init_value,
int reducer(
const int current,
const In in))
172 size_t num_outputs = 1;
175 for (
size_t idx = 0; idx < static_cast<size_t>(
output_shape.DimensionsCount()); idx++)
177 num_outputs *= output_dims[idx];
179 _temp_sum.resize(num_outputs, 0);
180 int num_resolved_axis;
182 if (num_resolved_axis == -1)
188 ReduceSumQuantImpl<In>(input_data, input_shape,
resolved_axis_data(), num_resolved_axis,
192 float scale = input_scale / output_scale;
193 float bias = -input_offset * scale;
194 for (
size_t idx = 0; idx < num_outputs; idx++)
196 float float_mean =
static_cast<float>(_temp_sum[idx]) / normalizer;
197 float result = std::min(
round_nearest(float_mean * scale + bias + output_offset),
198 static_cast<float>(std::numeric_limits<Out>::max()));
199 result = std::max(result,
static_cast<float>(std::numeric_limits<Out>::min()));
200 output_data[idx] =
static_cast<Out
>(result);
207 std::vector<int> _temp_sum;
210template <
typename In,
typename Out>
212 Out *output_data,
const std::vector<int> &axes)
220template <
typename In,
typename Out>
223 float output_scale, int32_t output_offset,
const std::vector<int> &axes)
228 output_data, output_scale, output_offset, axes,
true, (Out)0,
232template <
typename In,
typename Out>
242 const int input_height = input_shape.
Dims(1);
243 const int input_width = input_shape.
Dims(2);
245 for (
int out_b = 0; out_b < output_batch; ++out_b)
247 for (
int out_d = 0; out_d < output_depth; ++out_d)
250 for (
int in_h = 0; in_h < input_height; ++in_h)
252 for (
int in_w = 0; in_w < input_width; ++in_w)
254 value += input_data[
Offset(input_shape, out_b, in_h, in_w, out_d)];
257 output_data[
Offset(
output_shape, out_b, 0, 0, out_d)] = value / (input_width * input_height);
int32_t * resolved_axis_data(void)
void prepare(size_t temp_index_size, size_t resolved_axis_size)
int32_t * temp_index_data(void)
int PrepareforReduce(const Shape &input_shape, const Shape &output_shape, const std::vector< int > &axes, T *output_data, T init_value)
bool ReduceOp(const Shape &input_shape, const In *input_data, float input_scale, int32_t input_offset, const Shape &output_shape, Out *output_data, float output_scale, int32_t output_offset, const std::vector< int > &axes, bool, Out init_value, int reducer(const int current, const In in))
bool ReduceOp(const Shape &input_shape, const In *input_data, const Shape &output_shape, Out *output_data, const std::vector< int > &axes, bool, Out init_value, Out reducer(const Out current, const Out in, int normalizer))
int32_t DimensionsCount() const
int32_t Dims(int i) const
const luci_interpreter::RuntimeShape output_shape
int sum_reducer(const int data1, const In data2)
int Offset(const Shape &shape, int i0, int i1, int i2, int i3)
void MeanQ8Asymm(const Shape &input_shape, const In *input_data, float input_scale, int32_t input_offset, const Shape &output_shape, Out *output_data, float output_scale, int32_t output_offset, const std::vector< int > &axes)
bool NextIndex(const int num_dims, const int *dims, int *current)
size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index, const int num_axis, const int *axis)
Out mean_reducer(const Out data1, const In data2, int normalizer)
size_t ReduceSumQuantImpl(const In *input_data, const Shape &input_shape, const int *axis, const int num_axis, int *input_iter, int reducer(const int current, const In in), int *temp_sum)
bool ReduceMeanImpl(const In *input_data, const Shape &input_shape, const int *axis, const int num_axis, int *input_iter, Out reducer(const Out current, const In in, int normalizer), Out *output_data)
bool ResolveAxis(const int num_dims, const std::vector< int > &axes, int *out_axis, int *out_num_axis)
bool InitTensorDataForReduce(const Shape &shape, const T init_value, T *data)
float round_nearest(float value)
void MeanAxis1And2(const Shape &input_shape, const In *input_data, const Shape &output_shape, Out *output_data)
void Mean(const Shape &input_shape, const In *input_data, const Shape &output_shape, Out *output_data, const std::vector< int > &axes)