ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Mean.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "kernels/Mean.h"
19
20#include "kernels/Utils.h"
21
22#include <tensorflow/lite/kernels/internal/reference/reduce.h>
23#include <tensorflow/lite/kernels/internal/optimized/reduce.h>
24
25#include <stdexcept>
26
27namespace luci_interpreter
28{
29namespace kernels
30{
31
32static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
33{
34 params->axis_count = num_axes;
35 for (int i = 0; i < num_axes; ++i)
36 {
37 params->axis[i] = static_cast<int16>(axes_data[i]);
38 }
39 for (int i = num_axes; i < 4; ++i)
40 {
41 params->axis[i] = 1;
42 }
43}
44
45// Returns the number of axes that will be reduced. Removes duplicates.
46static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
47{
49 for (int i = 0; i < num_axes; ++i)
50 {
51 int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
52 assert(current >= 0 && current < input_num_dims);
53 for (int j = 0; j < i; j++)
54 {
56 // This checks for duplicate axis
57 if (current == previous)
58 {
60 break;
61 }
62 }
63 }
64 return reduction_count;
65}
66
67static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
68 bool keep_dims)
69{
71 if (input_num_dims == 0)
72 {
73 return Shape(0);
74 }
75
76 if (keep_dims)
77 {
79 for (int idx = 0; idx < input_num_dims; ++idx)
80 {
81 bool is_axis = false;
82 for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
83 {
85 {
86 is_axis = true;
87 break;
88 }
89 }
90 if (is_axis)
91 {
92 output_shape.dim(idx) = 1;
93 }
94 else
95 {
96 output_shape.dim(idx) = input_shape.dim(idx);
97 }
98 }
99 return output_shape;
100 }
101 else
102 {
103 int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
105 int num_skip_axes = 0;
106 for (int idx = 0; idx < input_num_dims; ++idx)
107 {
108 bool is_axis = false;
109 for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
110 {
112 {
114 is_axis = true;
115 break;
116 }
117 }
118 if (!is_axis)
119 {
121 }
122 }
123 return output_shape;
124 }
125}
126
127Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
130 params)
131{
132}
133
135{
136 LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
137 LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
138 if (input()->element_type() == DataType::S16)
139 {
140 LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
141 }
142
143 const Shape &input_shape = input()->shape();
145
146 const auto *axes_data = getTensorData<int32_t>(axes());
147 int num_axes = axes()->shape().num_elements();
148 assert(num_axes <= 4);
149
152
153 tflite::MeanParams params{};
154 resolveAxes(axes_data, num_axes, &params);
155
156 auto temp_index = getOutputTensors()[1];
158 auto temp_sum = getOutputTensors()[3];
159
161 resolved_axes->resize(Shape(num_axes));
162 temp_sum->resize(output()->shape());
163}
164
165void Mean::execute() const
166{
167 switch (input()->element_type())
168 {
169 case DataType::FLOAT32:
170 evalFloat();
171 break;
172 case DataType::U8:
173 evalQuantized();
174 break;
175 case DataType::S16:
176 evalQuantizedS16();
177 break;
178 default:
179 throw std::runtime_error("luci-intp Mean Unsupported type.");
180 }
181}
182
183void Mean::evalFloat() const
184{
185 const auto *axes_data = getTensorData<int32_t>(axes());
186 int num_axes = axes()->shape().num_elements();
187
188 tflite::MeanParams params{};
189 resolveAxes(axes_data, num_axes, &params);
190
191 auto temp_index = getOutputTensors()[1];
193 auto temp_sum = getOutputTensors()[3];
194
195 tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
196 input()->shape().num_dims(), getTensorData<float>(output()),
197 getTensorShape(output()).DimsData(), output()->shape().num_dims(),
201}
202
203void Mean::evalQuantized() const
204{
205 const auto *axes_data = getTensorData<int32_t>(axes());
206 int num_axes = axes()->shape().num_elements();
207
208 tflite::MeanParams params{};
209 resolveAxes(axes_data, num_axes, &params);
210
211 auto temp_index = getOutputTensors()[1];
213 auto temp_sum = getOutputTensors()[3];
214
215 tflite::optimized_ops::QuantizedMeanOrSum<uint8_t, int>(
216 getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
217 getTensorShape(input()).DimsData(), input()->shape().num_dims(),
218 getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
219 getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
222 /*compute_sum=*/false);
223}
224
225void Mean::evalQuantizedS16() const
226{
227 const auto *input_data = getTensorData<int16_t>(input());
229
230 const Shape &input_shape = input()->shape();
231 const Shape &output_shape = output()->shape();
232
233 const auto *axes_data = getTensorData<int32_t>(axes());
234 const int num_axes = axes()->shape().num_elements();
235
236 constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
237 constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
238
239 // Defer to specialized implementation for 4D Mean across axes 1 & 2.
240 if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
241 ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
242 {
243 const int32_t batches = input_shape.dim(0);
244 const int32_t input_height = input_shape.dim(1);
245 const int32_t input_width = input_shape.dim(2);
246 const int32_t depth = input_shape.dim(3);
247 assert(output_shape.num_dims() == 4);
248 assert(output_shape.dim(0) == batches);
249 assert(output_shape.dim(1) == 1);
250 assert(output_shape.dim(2) == 1);
251 assert(output_shape.dim(3) == depth);
252
253 const double real_multiplier =
254 static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
255
256 int32_t output_multiplier{};
257 int output_shift{};
258 quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
259
261
262 for (int32_t batch = 0; batch < batches; ++batch)
263 {
264 for (int32_t c = 0; c < depth; ++c)
265 {
266 int32_t acc = 0;
267 for (int32_t in_y = 0; in_y < input_height; ++in_y)
268 {
269 for (int32_t in_x = 0; in_x < input_width; ++in_x)
270 {
272 }
273 }
274 int32_t scaled_acc =
275 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
276 // Divide by the number of elements rounding to the nearest integer.
280
281 scaled_acc = std::max(scaled_acc, output_min);
282 scaled_acc = std::min(scaled_acc, output_max);
283
285 }
286 }
287 }
288 else
289 {
290 throw std::runtime_error("Unsupported configuration.");
291 }
292}
293
294} // namespace kernels
295} // namespace luci_interpreter
const std::vector< Tensor * > & getOutputTensors() const
Definition Kernel.h:40
const ReducerParams & params() const
Definition Kernel.h:67
int32_t num_elements() const
Definition Tensor.h:53
int num_dims() const
Definition Tensor.h:39
void resize(const Shape &new_shape)
Definition Tensor.cpp:56
const Shape & shape() const
Definition Tensor.h:107
float scale() const
Definition Tensor.h:109
Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
Definition Mean.cpp:127
void execute() const override
Definition Mean.cpp:165
const Tensor * axes() const
Definition Mean.h:37
Tensor * output() const
Definition Mean.h:38
const Tensor * input() const
Definition Mean.h:36
void configure() override
Definition Mean.cpp:134
std::int16_t int16
Definition Macro.h:53
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
list input_data
Definition infer.py:29
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
Definition Utils.h:75
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157
T must_cast(loco::Node *node)
Definition Shape.h:28