ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Mean.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "kernels/Mean.h"
19
20#include "kernels/Utils.h"
21
22#include <tensorflow/lite/kernels/internal/reference/reduce.h>
23
24#include <stdexcept>
25
26namespace luci_interpreter
27{
28namespace kernels
29{
30
31static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
32{
33 params->axis_count = num_axes;
34 for (int i = 0; i < num_axes; ++i)
35 {
36 params->axis[i] = static_cast<int16>(axes_data[i]);
37 }
38 for (int i = num_axes; i < 4; ++i)
39 {
40 params->axis[i] = 1;
41 }
42}
43
44// Returns the number of axes that will be reduced. Removes duplicates.
45static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
46{
47 int reduction_count = num_axes;
48 for (int i = 0; i < num_axes; ++i)
49 {
50 int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
51 assert(current >= 0 && current < input_num_dims);
52 for (int j = 0; j < i; j++)
53 {
54 int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
55 // This checks for duplicate axis
56 if (current == previous)
57 {
58 --reduction_count;
59 break;
60 }
61 }
62 }
63 return reduction_count;
64}
65
66static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
67 bool keep_dims)
68{
69 int input_num_dims = input_shape.num_dims();
70 if (input_num_dims == 0)
71 {
72 return Shape(0);
73 }
74
75 if (keep_dims)
76 {
77 Shape output_shape(input_num_dims);
78 for (int idx = 0; idx < input_num_dims; ++idx)
79 {
80 bool is_axis = false;
81 for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
82 {
83 if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
84 {
85 is_axis = true;
86 break;
87 }
88 }
89 if (is_axis)
90 {
91 output_shape.dim(idx) = 1;
92 }
93 else
94 {
95 output_shape.dim(idx) = input_shape.dim(idx);
96 }
97 }
98 return output_shape;
99 }
100 else
101 {
102 int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
103 Shape output_shape(input_num_dims - num_reduce_axes);
104 int num_skip_axes = 0;
105 for (int idx = 0; idx < input_num_dims; ++idx)
106 {
107 bool is_axis = false;
108 for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
109 {
110 if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
111 {
112 ++num_skip_axes;
113 is_axis = true;
114 break;
115 }
116 }
117 if (!is_axis)
118 {
119 output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
120 }
121 }
122 return output_shape;
123 }
124}
125
126Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
127 Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
128 : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
129 params)
130{
131}
132
134{
135 LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
136 LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
137 if (input()->element_type() == DataType::S16)
138 {
139 LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
140 }
141
142 const Shape &input_shape = input()->shape();
143 int input_num_dims = input_shape.num_dims();
144
145 const auto *axes_data = getTensorData<int32_t>(axes());
146 int num_axes = axes()->shape().num_elements();
147 assert(num_axes <= 4);
148
149 Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
151
152 tflite::MeanParams params{};
153 resolveAxes(axes_data, num_axes, &params);
154 _need_temporaries = !(
155 _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
156 ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
157 if (_need_temporaries)
158 {
159 auto temp_index = getOutputTensors()[1];
160 auto resolved_axes = getOutputTensors()[2];
161 auto temp_sum = getOutputTensors()[3];
162
163 temp_index->resize(Shape(input_num_dims));
164 resolved_axes->resize(Shape(num_axes));
165 temp_sum->resize(output()->shape());
166 }
167 else
168 {
169 auto temp_index = getOutputTensors()[1];
170 auto resolved_axes = getOutputTensors()[2];
171 auto temp_sum = getOutputTensors()[3];
172
173 temp_index->set_allocatable(false);
174 resolved_axes->set_allocatable(false);
175 temp_sum->set_allocatable(false);
176 }
177}
178
179void Mean::execute() const
180{
181 switch (input()->element_type())
182 {
183 case DataType::FLOAT32:
184 evalFloat();
185 break;
186 case DataType::U8:
187 evalQuantized();
188 break;
189 case DataType::S16:
190 evalQuantizedS16();
191 break;
192 default:
193 throw std::runtime_error("luci-intp Mean Unsupported type.");
194 }
195}
196
197void Mean::evalFloat() const
198{
199 const Shape &input_shape = input()->shape();
200 int input_num_dims = input_shape.num_dims();
201 const auto *axes_data = getTensorData<int32_t>(axes());
202 int num_axes = axes()->shape().num_elements();
203
204 tflite::MeanParams params{};
205 resolveAxes(axes_data, num_axes, &params);
206
207 auto temp_index = getOutputTensors()[1];
208 auto resolved_axes = getOutputTensors()[2];
209 auto temp_sum = getOutputTensors()[3];
210
211 // Defer to specialized implementation for 4D Mean across axes 1 & 2.
212 if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
213 ((params.axis[0] == 1 && params.axis[1] == 2) ||
214 (params.axis[0] == 2 && params.axis[1] == 1)))
215 {
216 tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
217 getTensorShape(output()), getTensorData<float>(output()));
218 }
219 else
220 {
221 tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
222 input()->shape().num_dims(), getTensorData<float>(output()),
223 getTensorShape(output()).DimsData(), output()->shape().num_dims(),
224 axes_data, num_axes, _params.keep_dims,
225 getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
226 getTensorData<float>(temp_sum));
227 }
228}
229
230void Mean::evalQuantized() const
231{
232 const Shape &input_shape = input()->shape();
233 int input_num_dims = input_shape.num_dims();
234 const auto *axes_data = getTensorData<int32_t>(axes());
235 int num_axes = axes()->shape().num_elements();
236
237 tflite::MeanParams params{};
238 resolveAxes(axes_data, num_axes, &params);
239
240 auto temp_index = getOutputTensors()[1];
241 auto resolved_axes = getOutputTensors()[2];
242 auto temp_sum = getOutputTensors()[3];
243
244 // Defer to specialized implementation for 4D Mean across axes 1 & 2.
245 if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
246 ((params.axis[0] == 1 && params.axis[1] == 2) ||
247 (params.axis[0] == 2 && params.axis[1] == 1)))
248 {
249 tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
250 input()->zero_point(), input()->scale(), getTensorShape(output()),
251 getTensorData<uint8_t>(output()), output()->zero_point(),
252 output()->scale());
253 }
254 else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
255 {
256 tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
257 input()->shape().num_dims(), getTensorData<uint8_t>(output()),
258 getTensorShape(output()).DimsData(), output()->shape().num_dims(),
259 axes_data, num_axes, _params.keep_dims,
260 getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
261 getTensorData<int>(temp_sum));
262 }
263 else
264 {
265 tflite::reference_ops::QuantizedMeanOrSum<>(
266 getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
267 getTensorShape(input()).DimsData(), input()->shape().num_dims(),
268 getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
269 getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
270 _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
271 getTensorData<int>(temp_sum),
272 /*compute_sum=*/false);
273 }
274}
275
276void Mean::evalQuantizedS16() const
277{
278 const auto *input_data = getTensorData<int16_t>(input());
279 auto *output_data = getTensorData<int16_t>(output());
280
281 const Shape &input_shape = input()->shape();
282 const Shape &output_shape = output()->shape();
283
284 const auto *axes_data = getTensorData<int32_t>(axes());
285 const int num_axes = axes()->shape().num_elements();
286
287 constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
288 constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
289
290 // Defer to specialized implementation for 4D Mean across axes 1 & 2.
291 if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
292 ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
293 {
294 const int32_t batches = input_shape.dim(0);
295 const int32_t input_height = input_shape.dim(1);
296 const int32_t input_width = input_shape.dim(2);
297 const int32_t depth = input_shape.dim(3);
298 assert(output_shape.num_dims() == 4);
299 assert(output_shape.dim(0) == batches);
300 assert(output_shape.dim(1) == 1);
301 assert(output_shape.dim(2) == 1);
302 assert(output_shape.dim(3) == depth);
303
304 const double real_multiplier =
305 static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
306
307 int32_t output_multiplier{};
308 int output_shift{};
309 quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
310
311 const int32_t num_elements_in_axes = input_height * input_width;
312
313 for (int32_t batch = 0; batch < batches; ++batch)
314 {
315 for (int32_t c = 0; c < depth; ++c)
316 {
317 int32_t acc = 0;
318 for (int32_t in_y = 0; in_y < input_height; ++in_y)
319 {
320 for (int32_t in_x = 0; in_x < input_width; ++in_x)
321 {
322 acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
323 }
324 }
325 int32_t scaled_acc =
326 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
327 // Divide by the number of elements rounding to the nearest integer.
328 scaled_acc = scaled_acc > 0
329 ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
330 : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
331
332 scaled_acc = std::max(scaled_acc, output_min);
333 scaled_acc = std::min(scaled_acc, output_max);
334
335 output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
336 }
337 }
338 }
339 else
340 {
341 throw std::runtime_error("Unsupported configuration.");
342 }
343}
344
345} // namespace kernels
346} // namespace luci_interpreter
std::int16_t int16
Definition Macro.h:53
const std::vector< Tensor * > & getOutputTensors() const
Definition Kernel.h:40
const ReducerParams & params() const
Definition Kernel.h:67
int32_t num_elements() const
Definition Tensor.h:53
int num_dims() const
Definition Tensor.h:39
void resize(const Shape &new_shape)
Definition Tensor.cpp:56
const Shape & shape() const
Definition Tensor.h:107
float scale() const
Definition Tensor.h:109
Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
Definition Mean.cpp:126
void execute() const override
Definition Mean.cpp:179
const Tensor * axes() const
Definition Mean.h:37
Tensor * output() const
Definition Mean.h:38
const Tensor * input() const
Definition Mean.h:36
void configure() override
Definition Mean.cpp:133
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
list input_data
Definition infer.py:29
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
Definition Utils.h:75
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157
Definition Shape.h:28