ONE - On-device Neural Engine
Loading...
Searching...
No Matches
BinaryArithmeticOps.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
19#define __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
20
21#include "cker/Shape.h"
22#include "cker/Types.h"
23#include "cker/Utils.h"
24
25#include <cmath>
26
27namespace nnfw
28{
29namespace cker
30{
31namespace reference
32{
33
34template <typename T>
35inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
36 const T *input1_data, const Shape &input2_shape,
37 const T *input2_data, const Shape &output_shape, T *output_data,
38 const std::function<T(const T &, const T &)> &fn)
39{
40 const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
41 for (int i = 0; i < flat_size; ++i)
42 {
43 output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
46 }
47}
48
49template <>
50inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
51 const float *input1_data, const Shape &input2_shape,
52 const float *input2_data, const Shape &output_shape,
53 float *output_data,
54 const std::function<float(const float &, const float &)> &fn)
55{
56 const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
57 for (int i = 0; i < size; i++)
58 {
59 output_data[i] = ActivationFunctionWithMinMax(
60 fn(input1_data[i], input2_data[i]), params.float_activation_min, params.float_activation_max);
61 }
62}
63
64template <>
65inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &, const Shape &input1_shape,
66 const bool *input1_data, const Shape &input2_shape,
67 const bool *input2_data, const Shape &output_shape,
68 bool *output_data,
69 const std::function<bool(const bool &, const bool &)> &fn)
70{
71 const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
72 for (int i = 0; i < size; i++)
73 {
74 output_data[i] = fn(input1_data[i], input2_data[i]);
75 }
76}
77
78template <>
79inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
80 const int64_t *input1_data, const Shape &input2_shape,
81 const int64_t *input2_data, const Shape &output_shape,
82 int64_t *output_data,
83 const std::function<int64_t(const int64_t &, const int64_t &)> &fn)
84{
85 const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
86 for (int i = 0; i < flat_size; ++i)
87 {
88 output_data[i] = ActivationFunctionWithMinMax(
89 fn(input1_data[i], input2_data[i]), params.int64_activation_min, params.int64_activation_max);
90 }
91}
92
93template <typename T>
94inline typename std::enable_if_t<is_quant8<T>::value> BroadcastBinaryArithmeticOpSlow(
95 const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
96 const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
97 const std::function<T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
98{
101 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
102 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
103
104 // Comment from tensorflow lite:
105 //
106 // In Tensorflow, the dimensions are canonically named (batch_number, row,
107 // col, channel), with extents (batches, height, width, depth), with the
108 // trailing dimension changing most rapidly (channels has the smallest stride,
109 // typically 1 element).
110 //
111 // In generated C code, we store arrays with the dimensions reversed. The
112 // first dimension has smallest stride.
113 //
114 // We name our variables by their Tensorflow convention, but generate C code
115 // nesting loops such that the innermost loop has the smallest stride for the
116 // best cache behavior.
117 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
118 {
119 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
120 {
121 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
122 {
123 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
124 {
125 output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
126 fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
127 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
129 }
130 }
131 }
132 }
133}
134template <typename T>
136 const Shape &input1_shape, const T *input1_data,
137 const Shape &input2_shape, const T *input2_data,
138 const Shape &output_shape, T *output_data,
139 const std::function<T(const T &, const T &)> &fn)
140{
143 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
144 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
145
146 // Comment from tensorflow lite:
147 //
148 // In Tensorflow, the dimensions are canonically named (batch_number, row,
149 // col, channel), with extents (batches, height, width, depth), with the
150 // trailing dimension changing most rapidly (channels has the smallest stride,
151 // typically 1 element).
152 //
153 // In generated C code, we store arrays with the dimensions reversed. The
154 // first dimension has smallest stride.
155 //
156 // We name our variables by their Tensorflow convention, but generate C code
157 // nesting loops such that the innermost loop has the smallest stride for the
158 // best cache behavior.
159 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
160 {
161 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
162 {
163 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
164 {
165 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
166 {
167 output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
168 fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
169 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
171 }
172 }
173 }
174 }
175}
176
177template <>
179 const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float *input1_data,
180 const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
181 float *output_data, const std::function<float(const float &, const float &)> &fn)
182{
185 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
186 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
187
188 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
189 {
190 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
191 {
192 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
193 {
194 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
195 {
196 output_data[Offset(extended_output_shape, b, y, x, c)] =
197 ActivationFunctionWithMinMax(fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
198 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
200 }
201 }
202 }
203 }
204}
205
206template <>
208 const BinaryArithmeticOpParam &, const Shape &input1_shape, const bool *input1_data,
209 const Shape &input2_shape, const bool *input2_data, const Shape &output_shape, bool *output_data,
210 const std::function<bool(const bool &, const bool &)> &fn)
211{
214 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
215 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
216
217 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
218 {
219 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
220 {
221 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
222 {
223 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
224 {
225 output_data[Offset(extended_output_shape, b, y, x, c)] =
226 fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
227 input2_data[SubscriptToIndex(desc2, b, y, x, c)]);
228 }
229 }
230 }
231 }
232}
233
234template <>
236 const BinaryArithmeticOpParam &params, const Shape &input1_shape, const int64_t *input1_data,
237 const Shape &input2_shape, const int64_t *input2_data, const Shape &output_shape,
238 int64_t *output_data, const std::function<int64_t(const int64_t &, const int64_t &)> &fn)
239{
242 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
243 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
244
245 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
246 {
247 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
248 {
249 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
250 {
251 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
252 {
253 output_data[Offset(extended_output_shape, b, y, x, c)] =
254 ActivationFunctionWithMinMax(fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
255 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
257 }
258 }
259 }
260 }
261}
262
263} // namespace reference
264} // namespace cker
265} // namespace nnfw
266
267#endif // __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
int32_t Dims(int i) const
Definition Shape.h:92
NdArrayDesc< 4 > desc1
const luci_interpreter::RuntimeShape output_shape
NdArrayDesc< 4 > desc2
std::enable_if_t< is_quant8< T >::value > BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data, const std::function< T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data, const std::function< T(const T &, const T &)> &fn)
int Offset(const Shape &shape, int i0, int i1, int i2, int i3)
Definition Shape.h:237
void NdArrayDescsForElementwiseBroadcast(const Shape &input0_shape, const Shape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
Definition Utils.h:290
T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
Definition Utils.h:43
int MatchingElementsSize(const Shape &shape, const Shape &check_shape_0, const Shape &check_shape_1)
Definition Shape.h:333
int SubscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
Definition Utils.h:255
Definition topk_v2.h:30
int32_t size[5]
Definition Slice.cpp:35