ONE - On-device Neural Engine
Loading...
Searching...
No Matches
BinaryArithmeticOps.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
19#define __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
20
21#include "cker/Shape.h"
22#include "cker/Types.h"
23#include "cker/Utils.h"
24
25#include <cmath>
26
27namespace nnfw
28{
29namespace cker
30{
31namespace reference
32{
33
34// Maximum dimension supported by the broadcast operation.
35constexpr int kMaxBroadcastDim = 6;
36
37template <typename T>
38inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
39 const T *input1_data, const Shape &input2_shape,
40 const T *input2_data, const Shape &output_shape, T *output_data,
41 const std::function<T(const T &, const T &)> &fn)
42{
43 const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
44 for (int i = 0; i < flat_size; ++i)
45 {
46 output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
49 }
50}
51
52template <>
53inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
54 const float *input1_data, const Shape &input2_shape,
55 const float *input2_data, const Shape &output_shape,
56 float *output_data,
57 const std::function<float(const float &, const float &)> &fn)
58{
59 const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
60 for (int i = 0; i < size; i++)
61 {
62 output_data[i] = ActivationFunctionWithMinMax(
63 fn(input1_data[i], input2_data[i]), params.float_activation_min, params.float_activation_max);
64 }
65}
66
67template <>
68inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &, const Shape &input1_shape,
69 const bool *input1_data, const Shape &input2_shape,
70 const bool *input2_data, const Shape &output_shape,
71 bool *output_data,
72 const std::function<bool(const bool &, const bool &)> &fn)
73{
74 const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
75 for (int i = 0; i < size; i++)
76 {
77 output_data[i] = fn(input1_data[i], input2_data[i]);
78 }
79}
80
81template <>
82inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
83 const int64_t *input1_data, const Shape &input2_shape,
84 const int64_t *input2_data, const Shape &output_shape,
85 int64_t *output_data,
86 const std::function<int64_t(const int64_t &, const int64_t &)> &fn)
87{
88 const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
89 for (int i = 0; i < flat_size; ++i)
90 {
91 output_data[i] = ActivationFunctionWithMinMax(
92 fn(input1_data[i], input2_data[i]), params.int64_activation_min, params.int64_activation_max);
93 }
94}
95
96template <typename T>
97inline typename std::enable_if_t<is_quant8<T>::value> BroadcastBinaryArithmeticOpSlow(
98 const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
99 const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
100 const std::function<T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
101{
104 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
105 const Shape extended_output_shape = Shape::ExtendedShape(kMaxBroadcastDim, output_shape);
106
107 // Comment from tensorflow lite:
108 //
109 // In Tensorflow, the dimensions are canonically named (batch_number, row,
110 // col, channel), with extents (batches, height, width, depth), with the
111 // trailing dimension changing most rapidly (channels has the smallest stride,
112 // typically 1 element).
113 //
114 // In generated C code, we store arrays with the dimensions reversed. The
115 // first dimension has smallest stride.
116 //
117 // We name our variables by their Tensorflow convention, but generate C code
118 // nesting loops such that the innermost loop has the smallest stride for the
119 // best cache behavior.
120 for (int d0 = 0; d0 < extended_output_shape.Dims(0); ++d0)
121 {
122 for (int d1 = 0; d1 < extended_output_shape.Dims(1); ++d1)
123 {
124 for (int d2 = 0; d2 < extended_output_shape.Dims(2); ++d2)
125 {
126 for (int d3 = 0; d3 < extended_output_shape.Dims(3); ++d3)
127 {
128 for (int d4 = 0; d4 < extended_output_shape.Dims(4); ++d4)
129 {
130 for (int d5 = 0; d5 < extended_output_shape.Dims(5); ++d5)
131 {
132 output_data[Offset(extended_output_shape, d0, d1, d2, d3, d4, d5)] =
133 ActivationFunctionWithMinMax<T>(
134 fn(params, input1_data[SubscriptToIndex(desc1, d0, d1, d2, d3, d4, d5)],
135 input2_data[SubscriptToIndex(desc2, d0, d1, d2, d3, d4, d5)]),
137 }
138 }
139 }
140 }
141 }
142 }
143}
144template <typename T>
146 const Shape &input1_shape, const T *input1_data,
147 const Shape &input2_shape, const T *input2_data,
148 const Shape &output_shape, T *output_data,
149 const std::function<T(const T &, const T &)> &fn)
150{
153 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
154 const Shape extended_output_shape = Shape::ExtendedShape(kMaxBroadcastDim, output_shape);
155
156 // Comment from tensorflow lite:
157 //
158 // In Tensorflow, the dimensions are canonically named (batch_number, row,
159 // col, channel), with extents (batches, height, width, depth), with the
160 // trailing dimension changing most rapidly (channels has the smallest stride,
161 // typically 1 element).
162 //
163 // In generated C code, we store arrays with the dimensions reversed. The
164 // first dimension has smallest stride.
165 //
166 // We name our variables by their Tensorflow convention, but generate C code
167 // nesting loops such that the innermost loop has the smallest stride for the
168 // best cache behavior.
169 for (int d0 = 0; d0 < extended_output_shape.Dims(0); ++d0)
170 {
171 for (int d1 = 0; d1 < extended_output_shape.Dims(1); ++d1)
172 {
173 for (int d2 = 0; d2 < extended_output_shape.Dims(2); ++d2)
174 {
175 for (int d3 = 0; d3 < extended_output_shape.Dims(3); ++d3)
176 {
177 for (int d4 = 0; d4 < extended_output_shape.Dims(4); ++d4)
178 {
179 for (int d5 = 0; d5 < extended_output_shape.Dims(5); ++d5)
180 {
181 output_data[Offset(extended_output_shape, d0, d1, d2, d3, d4, d5)] =
182 ActivationFunctionWithMinMax<T>(
183 fn(input1_data[SubscriptToIndex(desc1, d0, d1, d2, d3, d4, d5)],
184 input2_data[SubscriptToIndex(desc2, d0, d1, d2, d3, d4, d5)]),
186 }
187 }
188 }
189 }
190 }
191 }
192}
193
194template <>
196 const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float *input1_data,
197 const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
198 float *output_data, const std::function<float(const float &, const float &)> &fn)
199{
202 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
203 const Shape extended_output_shape = Shape::ExtendedShape(kMaxBroadcastDim, output_shape);
204
205 for (int d0 = 0; d0 < extended_output_shape.Dims(0); ++d0)
206 {
207 for (int d1 = 0; d1 < extended_output_shape.Dims(1); ++d1)
208 {
209 for (int d2 = 0; d2 < extended_output_shape.Dims(2); ++d2)
210 {
211 for (int d3 = 0; d3 < extended_output_shape.Dims(3); ++d3)
212 {
213 for (int d4 = 0; d4 < extended_output_shape.Dims(4); ++d4)
214 {
215 for (int d5 = 0; d5 < extended_output_shape.Dims(5); ++d5)
216 {
217 output_data[Offset(extended_output_shape, d0, d1, d2, d3, d4, d5)] =
219 fn(input1_data[SubscriptToIndex(desc1, d0, d1, d2, d3, d4, d5)],
220 input2_data[SubscriptToIndex(desc2, d0, d1, d2, d3, d4, d5)]),
222 }
223 }
224 }
225 }
226 }
227 }
228}
229
230template <>
232 const BinaryArithmeticOpParam &, const Shape &input1_shape, const bool *input1_data,
233 const Shape &input2_shape, const bool *input2_data, const Shape &output_shape, bool *output_data,
234 const std::function<bool(const bool &, const bool &)> &fn)
235{
238 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
239 const Shape extended_output_shape = Shape::ExtendedShape(kMaxBroadcastDim, output_shape);
240
241 for (int d0 = 0; d0 < extended_output_shape.Dims(0); ++d0)
242 {
243 for (int d1 = 0; d1 < extended_output_shape.Dims(1); ++d1)
244 {
245 for (int d2 = 0; d2 < extended_output_shape.Dims(2); ++d2)
246 {
247 for (int d3 = 0; d3 < extended_output_shape.Dims(3); ++d3)
248 {
249 for (int d4 = 0; d4 < extended_output_shape.Dims(4); ++d4)
250 {
251 for (int d5 = 0; d5 < extended_output_shape.Dims(5); ++d5)
252 {
253 output_data[Offset(extended_output_shape, d0, d1, d2, d3, d4, d5)] =
254 fn(input1_data[SubscriptToIndex(desc1, d0, d1, d2, d3, d4, d5)],
255 input2_data[SubscriptToIndex(desc2, d0, d1, d2, d3, d4, d5)]);
256 }
257 }
258 }
259 }
260 }
261 }
262}
263
264template <>
266 const BinaryArithmeticOpParam &params, const Shape &input1_shape, const int64_t *input1_data,
267 const Shape &input2_shape, const int64_t *input2_data, const Shape &output_shape,
268 int64_t *output_data, const std::function<int64_t(const int64_t &, const int64_t &)> &fn)
269{
272 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
273 const Shape extended_output_shape = Shape::ExtendedShape(kMaxBroadcastDim, output_shape);
274
275 for (int d0 = 0; d0 < extended_output_shape.Dims(0); ++d0)
276 {
277 for (int d1 = 0; d1 < extended_output_shape.Dims(1); ++d1)
278 {
279 for (int d2 = 0; d2 < extended_output_shape.Dims(2); ++d2)
280 {
281 for (int d3 = 0; d3 < extended_output_shape.Dims(3); ++d3)
282 {
283 for (int d4 = 0; d4 < extended_output_shape.Dims(4); ++d4)
284 {
285 for (int d5 = 0; d5 < extended_output_shape.Dims(5); ++d5)
286 {
287 output_data[Offset(extended_output_shape, d0, d1, d2, d3, d4, d5)] =
289 fn(input1_data[SubscriptToIndex(desc1, d0, d1, d2, d3, d4, d5)],
290 input2_data[SubscriptToIndex(desc2, d0, d1, d2, d3, d4, d5)]),
292 }
293 }
294 }
295 }
296 }
297 }
298}
299
300} // namespace reference
301} // namespace cker
302} // namespace nnfw
303
304#endif // __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
int32_t Dims(int i) const
Definition Shape.h:106
NdArrayDesc< 4 > desc1
const luci_interpreter::RuntimeShape output_shape
NdArrayDesc< 4 > desc2
std::enable_if_t< is_quant8< T >::value > BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data, const std::function< T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data, const std::function< T(const T &, const T &)> &fn)
int Offset(const Shape &shape, int i0, int i1, int i2, int i3)
Definition Shape.h:325
void NdArrayDescsForElementwiseBroadcast(const Shape &input0_shape, const Shape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
Definition Utils.h:314
T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
Definition Utils.h:43
int MatchingElementsSize(const Shape &shape, const Shape &check_shape_0, const Shape &check_shape_1)
Definition Shape.h:448
int SubscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
Definition Utils.h:255
Definition topk_v2.h:30
int32_t size[5]
Definition Slice.cpp:35