ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Gather.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18
19#include "core/OMUtils.h"
20#include "core/OMKernelData.h"
21
23#include "execute/OMUtils.h"
25
26using namespace onert_micro;
27using namespace onert_micro::core;
28using namespace onert_micro::execute;
29
30namespace
31{
32
33constexpr uint32_t inputTensorIdx = 0;
34constexpr uint32_t positionsTensorIdx = 1;
35
36constexpr uint32_t outputTensorIdx = 0;
37
38template <typename InputT, typename CoordsT = int32_t>
39void gather(const InputT *input_data, const CoordsT *coords_data, InputT *output_data,
40 int32_t axis_size, int32_t batch_size, int32_t outer_size, int32_t inner_size,
41 int32_t coord_size)
42{
43
44 for (int batch = 0; batch < batch_size; ++batch)
45 {
46 for (int outer = 0; outer < outer_size; ++outer)
47 {
48 for (int coord = 0; coord < coord_size; ++coord)
49 {
50 auto x = coords_data[coord];
51 std::memcpy(
52 output_data + (((batch * outer_size) + outer) * coord_size + coord) * inner_size,
53 input_data +
54 (((batch * outer_size) + outer) * axis_size + coords_data[batch * coord_size + coord]) *
55 inner_size,
56 sizeof(InputT) * inner_size);
57 }
58 }
59 }
60}
61
62} // namespace
63
64// NOTE: doesn't currently support dynamic shapes
65namespace onert_micro
66{
67namespace execute
68{
69
71{
72 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
73 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
74 uint16_t op_index = execute_args.kernel_index;
75
76 const circle::Tensor *input;
77 const circle::Tensor *position;
78 const circle::Tensor *output;
79
80 uint8_t *input_data;
81 uint8_t *position_data;
82 uint8_t *output_data;
83
84 const circle::GatherOptions *options;
85 // Read kernel
86 {
87 execute::OMRuntimeKernel runtime_kernel;
88 OMStatus status = runtime_kernel.readKernel(op_index, runtime_context);
89 if (status != Ok)
90 return status;
91
92 input = runtime_kernel.inputs[inputTensorIdx];
93 position = runtime_kernel.inputs[positionsTensorIdx];
94 output = runtime_kernel.outputs[outputTensorIdx];
95 assert(input != nullptr);
96 assert(position != nullptr);
97 assert(output != nullptr);
98
99 status = runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
100 if (status != Ok)
101 return status;
102
103 input_data = runtime_kernel.inputs_data[inputTensorIdx];
104 position_data = runtime_kernel.inputs_data[positionsTensorIdx];
105 output_data = runtime_kernel.outputs_data[outputTensorIdx];
106 assert(input_data != nullptr);
107 assert(position_data != nullptr);
108 assert(output_data != nullptr);
109
110 options = runtime_kernel.first_operator->builtin_options_as_GatherOptions();
111 }
112
113 OMStatus status = Ok;
114
115 OMRuntimeShape position_shape(position);
116 OMRuntimeShape input_shape(input);
117
118 const int input_dims_size = input_shape.dimensionsCount();
119 int axis = options->axis();
120 if (axis < 0)
121 {
122 axis += input_dims_size;
123 }
124
125 int batch_dims = options->batch_dims();
126 // batch_dims should be in range: [-rank(coords), rank(coords)].
127 // Negative batch_dims is added with rank of coords.
128 const int coords_dims_size = position_shape.dimensionsCount();
129 if (batch_dims < 0)
130 {
131 batch_dims += coords_dims_size;
132 }
133
134 const int axis_size = input_shape.dims(axis);
135
136 int batch_size = 1;
137 for (int i = 0; i < batch_dims; ++i)
138 {
139 batch_size *= input_shape.dims(i);
140 }
141 int outer_size = 1;
142 for (int i = batch_dims; i < axis; ++i)
143 {
144 outer_size *= input_shape.dims(i);
145 }
146 int inner_size = 1;
147 for (int i = axis + 1; i < input_dims_size; ++i)
148 {
149 inner_size *= input_shape.dims(i);
150 }
151 int coord_size = 1;
152 for (int i = batch_dims; i < coords_dims_size; ++i)
153 {
154 coord_size *= position_shape.dims(i);
155 }
156
157 switch (input->type())
158 {
159#ifndef DIS_FLOAT
160 case circle::TensorType_FLOAT32:
161 {
162 gather<float, int32_t>(utils::castInputData<float>(input_data),
163 utils::castInputData<int32_t>(position_data),
164 utils::castOutputData<float>(output_data), axis_size, batch_size,
165 outer_size, inner_size, coord_size);
166 }
167 break;
168#endif // DIS_FLOAT
169#ifndef DIS_QUANT
170 case circle::TensorType_INT8:
171 {
172 gather<int8_t, int32_t>(utils::castInputData<int8_t>(input_data),
173 utils::castInputData<int32_t>(position_data),
174 utils::castOutputData<int8_t>(output_data), axis_size, batch_size,
175 outer_size, inner_size, coord_size);
176 }
177 break;
178#endif // DIS_QUANT
179 case circle::TensorType_INT32:
180 {
181 gather<int32_t, int32_t>(utils::castInputData<int32_t>(input_data),
182 utils::castInputData<int32_t>(position_data),
183 utils::castOutputData<int32_t>(output_data), axis_size, batch_size,
184 outer_size, inner_size, coord_size);
185 }
186 break;
187 default:
188 {
189 status = UnsupportedActivation;
190 assert(false && "Unsupported type.");
191 }
192 }
193
194 return status;
195}
196
197} // namespace execute
198} // namespace onert_micro
size_t dimensionsCount() const noexcept
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
constexpr uint32_t outputTensorIdx
OMStatus execute_kernel_CircleGather(const OMExecuteArgs &execute_args)
Definition Gather.cpp:70
@ UnsupportedActivation
Definition OMStatus.h:28
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage