ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALReduceCommon.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef ONERT_MICRO_PAL_REDUCE_COMMON_H
19#define ONERT_MICRO_PAL_REDUCE_COMMON_H
20
21#include "PALUtils.h"
23#include "core/OMRuntimeShape.h"
24#include "core/OMTypeTraits.h"
25
26#include <set>
27#include <unordered_map>
28
29namespace core = onert_micro::core;
30
33using core::type_traits::IsQuantized;
34
36{
37
38// clang-format off
39
40// ------------------------------------------------------------------------------------------------
41
42template <class T>
44{
45 void operator()(T& total, const T value)
46 {
47 total += value;
48 }
49};
50
51// ------------------------------------------------------------------------------------------------
52
53template <typename T>
55{
56 void operator()(T& total, const T value)
57 {
58 total *= value;
59 }
60};
61
62// ------------------------------------------------------------------------------------------------
63
64template <typename T>
66{
67 void operator()(T& total, const T value)
68 {
69 total = std::max(total, value);
70 }
71};
72
73// ------------------------------------------------------------------------------------------------
74
75template <typename T, template <typename> class ReduceFn>
77{
78 using ValueType = std::conditional_t<IsQuantized<T>, float, T>;
79
80private:
84
85 T _init_value;
86 ReduceFn<ValueType> _reducer;
87
88 std::unordered_map<size_t, size_t> _curr_index = {};
89 std::unordered_map<size_t, uint32_t> _resolved_axes = {};
90 std::unordered_map<size_t, ValueType> _accumulator = {};
91
92public:
93 explicit Reducer(core::OMReduceDataContext<T> &ctx, T init_value)
94 : _input(ctx.Input())
95 , _output(ctx.Output())
96 , _axes(ctx.Axis())
97 , _init_value(init_value)
98 {}
99
100public:
101 bool Mean()
102 {
103 if (SpecialCaseMeanImpl())
104 return true;
105
106 return ReduceImpl(true);
107 }
108
109 bool Reduce()
110 {
111 return ReduceImpl();
112 }
113
114private:
115 bool ReduceImpl(bool mean = false);
116 bool SpecialCaseMeanImpl();
117
118 bool ResolveAxis();
119 T ResolvedAxisLength();
120
121 size_t ReducedOutputOffset(int num_axes, const uint32_t *axes);
122 bool NextIndex();
123};
124
125// ------------------------------------------------------------------------------------------------
126
127template <typename T, template <typename> class ReduceFn>
128bool Reducer<T, ReduceFn>::ResolveAxis()
129{
130 size_t num_resolved_axes = 0;
131 _resolved_axes.clear();
132
133 if (_input.IsScalar())
134 return 0;
135
136 for (size_t i = 0; i < _axes.ElementsCount(); ++i)
137 {
138 int current = _axes.Data().At(i);
139
140 if (_resolved_axes.count(current) > 0)
141 continue;
142
143 if (_resolved_axes.size() > 1)
144 return false;
145
146 _resolved_axes[num_resolved_axes++] = current;
147 }
148
149 return true;
150}
151
152template <typename T, template <typename> class ReduceFn>
153bool Reducer<T, ReduceFn>::SpecialCaseMeanImpl()
154{
155 /*
156 Case: Mean over WH of axis 1 and 2.
157 Detail: for tensor with rank=4 and simultaneous reduction over width and height.
158 */
159 const uint32_t *axes_data = _axes.Data().Get();
160 std::set<uint32_t> axes_values = { axes_data[0], axes_data[1] };
161
162 if (_input.DimsCount() != 4)
163 return false;
164
165 if (_axes.ElementsCount() != 2)
166 return false;
167
168 if (axes_values.count(1) != 1 || axes_values.count(2) != 1)
169 return false;
170
171 auto input_shape = OMRuntimeShape::extendedShape(4, _input.Shape());
172 auto output_shape = OMRuntimeShape::extendedShape(4, _output.Shape());
173
174 const int output_batch = output_shape.dims(0);
175 const int output_depth = output_shape.dims(3);
176
177 const int input_height = input_shape.dims(1);
178 const int input_width = input_shape.dims(2);
179
180 for (int out_b = 0; out_b < output_batch; ++out_b)
181 {
182 for (int out_d = 0; out_d < output_depth; ++out_d)
183 {
184 float value = 0;
185
186 for (int in_h = 0; in_h < input_height; ++in_h)
187 {
188 for (int in_w = 0; in_w < input_width; ++in_w)
189 {
190 size_t idx = offset(input_shape.dimsData(), out_b, in_h, in_w, out_d);
191 value += static_cast<float>(_input.Data().At(idx));
192 }
193 }
194
195 float result = value / (input_width * input_height);
196 size_t idx = offset(output_shape.dimsData(), out_b, 0, 0, out_d);
197 _output.Data().SetAt(idx, result);
198 }
199 }
200
201 return true;
202}
203
204template <typename T, template <typename> class ReduceFn>
205bool Reducer<T, ReduceFn>::ReduceImpl(bool mean)
206{
207 _accumulator.clear();
208 _curr_index.clear();
209
210 auto *axes_data = _axes.Data().Get();
211 auto num_outputs = _output.ElementsCount();
212
213 const auto &input_data = _input.Data();
214
215 if (_input.HasZeroSizeDims())
216 {
217 return false;
218 }
219
220 for (size_t i = 0; i < num_outputs; ++i)
221 {
222 _accumulator[i] = _init_value;
223 }
224
225 if (!ResolveAxis())
226 {
227 return false;
228 }
229
230 do
231 {
232 size_t input_offset = ReducedOutputOffset(0, nullptr);
233 size_t output_offset = ReducedOutputOffset(_resolved_axes.size(), axes_data);
234
235 _reducer(_accumulator[output_offset], input_data.ValueAt(input_offset));
236
237 } while (NextIndex());
238
239 for (size_t i = 0; i < num_outputs; ++i)
240 {
241 auto value = _accumulator.at(i);
242
243 if (mean)
244 {
245 value /= ResolvedAxisLength();
246 }
247
248 _output.Data().SetValueAt(i, value);
249 }
250
251 return true;
252}
253
254template <typename T, template <typename> class ReduceFn>
255T Reducer<T, ReduceFn>::ResolvedAxisLength()
256{
257 T axis_length = 1;
258 constexpr static auto kMax = std::numeric_limits<size_t>::max();
259
260 for (auto i = 0u; i < _resolved_axes.size(); ++i)
261 {
262 auto &axis = _resolved_axes.at(i);
263 auto current = static_cast<size_t>(_input.Dims()[axis]);
264
265 if (current == 0)
266 return false;
267
268 // Overflow prevention.
269 if (current > (kMax / axis_length))
270 return false;
271
272 axis_length *= current;
273 }
274
275 return static_cast<T>(axis_length);
276}
277
278/*
279 Gets offset of index if reducing on axis. When reducing, the flattened offset
280 will not change, if the input index changes on the given axis.
281 For example, if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
282 then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened offset.
283*/
284template <typename T, template <typename> class ReduceFn>
285size_t Reducer<T, ReduceFn>::ReducedOutputOffset(int num_axes, const uint32_t *axes_data)
286{
287 size_t offset = 0;
288
289 for (auto dim_idx = 0u; dim_idx < _input.DimsCount(); ++dim_idx)
290 {
291 bool skip_axis = false;
292
293 if (axes_data != nullptr)
294 {
295 skip_axis = std::any_of(axes_data, axes_data + num_axes, [&dim_idx](auto axis)
296 {
297 return axis == dim_idx;
298 });
299 }
300
301 if (!skip_axis)
302 {
303 offset *= _input.DimLength(dim_idx);
304 offset += _curr_index[dim_idx];
305 }
306 }
307
308 return offset;
309}
310
311/*
312 Gets next index to iterate through a multidimensional array.
313*/
314template <typename T, template <typename> class ReduceFn>
315bool Reducer<T, ReduceFn>::NextIndex()
316{
317 if (_input.DimsCount() == 0)
318 {
319 return false;
320 }
321
322 for (int idx = _input.DimsCount() - 1; idx >= 0; --idx)
323 {
324 auto current_val = _curr_index[idx] + 1;
325
326 if (_input.DimLength(idx) != current_val)
327 {
328 _curr_index[idx] = current_val;
329 return true;
330 }
331
332 _curr_index[idx] = 0;
333 }
334
335 return false;
336}
337
338// ------------------------------------------------------------------------------------------------
339
340} // namespace onert_micro::execute::pal
341
342#endif // ONERT_MICRO_PAL_REDUCE_COMMON_H
int32_t dims(int i) const
Definition Tensor.h:108
Reducer(core::OMReduceDataContext< T > &ctx, T init_value)
const luci_interpreter::RuntimeShape output_shape
result
Definition infer.py:103
list input_data
Definition infer.py:29
bool NextIndex(const int num_dims, const int *dims, int *current)
Definition Utils.h:411
size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index, const int num_axis, const int *axis)
Definition Utils.h:444
bool ResolveAxis(const int num_dims, const std::vector< int > &axes, int *out_axis, int *out_num_axis)
Definition Reduce.h:169
int offset(const int32_t *dims_data, int i0, int i1, int i2, int i3)
Definition PALUtils.h:159
void operator()(T &total, const T value)
void operator()(T &total, const T value)
void operator()(T &total, const T value)