ONE - On-device Neural Engine
Loading...
Searching...
No Matches
ProcessBroadcastShapes.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef ONERT_MICRO_EXECUTE_PAL_PROCESS_BROADCAST_SHAPES_H
19#define ONERT_MICRO_EXECUTE_PAL_PROCESS_BROADCAST_SHAPES_H
20
21#include "core/OMRuntimeShape.h"
22#include "core/OMKernelData.h"
23
24namespace onert_micro
25{
26namespace execute
27{
28namespace pal
29{
30// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
31// BROADCASTING.
32//
33// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
34// rectangular array of numbers.
35//
36// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
37// However, as Dims<N> is to be deprecated, this class exists as an adaptor
38// to enable simple unoptimized implementations of element-wise broadcasting
39// operations.
40template <int N> struct NdArrayDesc
41{
42 // The "extent" of each dimension. Indices along dimension d must be in the
43 // half-open interval [0, extents[d]).
44 int extents[N];
45
46 // The number of *elements* (not bytes) between consecutive indices of each
47 // dimension.
48 int strides[N];
49};
50
51// Copies dims to desc, calculating strides.
52template <int N>
53inline void copyDimsToDesc(const core::OMRuntimeShape &input_shape, NdArrayDesc<N> *desc_out)
54{
55 int desc_stride = 1;
56 for (int i = N - 1; i >= 0; --i)
57 {
58 desc_out->extents[i] = input_shape.dims(i);
59 desc_out->strides[i] = desc_stride;
60 desc_stride *= input_shape.dims(i);
61 }
62}
63
64template <int N, int DIM, typename Calc>
65typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(const NdArrayDesc<N> &output,
66 const Calc &calc, int indexes[N])
67{
68 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
69 {
70 calc(indexes);
71 }
72}
73
74template <int N, int DIM, typename Calc>
75typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(const NdArrayDesc<N> &output,
76 const Calc &calc, int indexes[N])
77{
78 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
79 {
80 NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
81 }
82}
83
84// Execute the calc function in the innermost iteration based on the shape of
85// the output. The calc function should take a single argument of type int[N].
86template <int N, typename Calc>
87inline void NDOpsHelper(const NdArrayDesc<N> &output, const Calc &calc)
88{
89 int indexes[N] = {0};
90 NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
91}
92
93template <int N>
95 const core::OMRuntimeShape &input1_shape,
96 NdArrayDesc<N> *desc0_out,
97 NdArrayDesc<N> *desc1_out)
98{
99
100 auto extended_input0_shape = core::OMRuntimeShape::extendedShape(N, input0_shape);
101 auto extended_input1_shape = core::OMRuntimeShape::extendedShape(N, input1_shape);
102
103 // Copy dims to desc, calculating strides.
104 copyDimsToDesc<N>(extended_input0_shape, desc0_out);
105 copyDimsToDesc<N>(extended_input1_shape, desc1_out);
106
107 // Walk over each dimension. If the extents are equal do nothing.
108 // Otherwise, set the desc with extent 1 to have extent equal to the other and
109 // stride 0.
110 for (int i = 0; i < N; ++i)
111 {
112 const int extent0 = extended_input0_shape.dims(i);
113 const int extent1 = extended_input1_shape.dims(i);
114 if (extent0 != extent1)
115 {
116 if (extent0 == 1)
117 {
118 desc0_out->strides[i] = 0;
119 desc0_out->extents[i] = extent1;
120 }
121 else
122 {
123 desc1_out->strides[i] = 0;
124 desc1_out->extents[i] = extent0;
125 }
126 }
127 }
128}
129
130inline int subscriptToIndex(const NdArrayDesc<4> &desc, int i0, int i1, int i2, int i3)
131{
132 return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + i3 * desc.strides[3];
133}
134
135inline int subscriptToIndex(const NdArrayDesc<5> &desc, int indexes[5])
136{
137 return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
138 indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + indexes[4] * desc.strides[4];
139}
140
141// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
142//
143// For example, if sequence of dimensions of one input is
144// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
145// we can consolidate these as
146// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
147//
148// The category is updated in the less-frequent case of shapes that are
149// not suited to a fivefold-loop broadcast.
150//
151// Falls back to generic pattern when it does not know how to process properly.
152//
153// Returns true iff there is some sort of broadcast, which includes five-fold
154// patterns and falling back to generic broadcast.
156 const core::OMRuntimeShape &shape1,
158{
159 const int dims_count = std::max(shape0.dimensionsCount(), shape1.dimensionsCount());
160
162
163 auto extended_shape0 = core::OMRuntimeShape::extendedShape(dims_count, shape0);
164 auto extended_shape1 = core::OMRuntimeShape::extendedShape(dims_count, shape1);
165
166 // Check for "exact" match, implicitly accepting any scalar shapes.
167 if (extended_shape0 == extended_shape1)
168 {
170 return false;
171 }
172
173 if (shape0.flatSize() == 1)
174 {
176 return true;
177 }
178 else if (shape1.flatSize() == 1)
179 {
181 return true;
182 }
183
184 for (int i = dims_count - 1; i >= 0; --i)
185 {
186 if (extended_shape0.dims(i) == extended_shape1.dims(i))
187 {
188 continue;
189 }
190 else if (extended_shape0.dims(i) == 1)
191 {
193 return true;
194 }
195 else if (extended_shape1.dims(i) == 1)
196 {
198 return true;
199 }
200 else
201 {
202 // This case is erroneous: there is a dimension that does not match and
203 // is not a broadcast from one shape to the other.
205 return true;
206 }
207 }
208
209 return false;
210}
211
212} // namespace pal
213} // namespace execute
214} // namespace onert_micro
215
216#endif // ONERT_MICRO_EXECUTE_PAL_PROCESS_BROADCAST_SHAPES_H
static OMRuntimeShape extendedShape(int new_shape_size, const OMRuntimeShape &shape)
std::enable_if< DIM==N-1, void >::type NDOpsHelperImpl(const NdArrayDesc< N > &output, const Calc &calc, int indexes[N])
bool processBroadcastShapes(const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
void NdArrayDescsForElementwiseBroadcast(const core::OMRuntimeShape &input0_shape, const core::OMRuntimeShape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
void copyDimsToDesc(const core::OMRuntimeShape &input_shape, NdArrayDesc< N > *desc_out)
void NDOpsHelper(const NdArrayDesc< N > &output, const Calc &calc)
int subscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)