ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Add.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "execute/OMUtils.h"
19#include "OMStatus.h"
21#include "core/OMUtils.h"
22
23#include "core/OMRuntimeShape.h"
24#include "PALAdd.h"
25
26using namespace onert_micro;
27using namespace onert_micro::execute;
28
29namespace
30{
31
32constexpr uint32_t input1TensorIdx = 0;
33constexpr uint32_t input2TensorIdx = 1;
34constexpr uint32_t outputTensorIdx = 0;
35
36} // namespace
37
38// NOTE: doesnt currently support dynamic shapes
39// TODO: reduce code duplication with Mul, Sub
40namespace onert_micro
41{
42namespace execute
43{
44
46{
47 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
48 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
49 uint16_t op_index = execute_args.kernel_index;
50
51 const circle::Tensor *input1;
52 const circle::Tensor *input2;
53 const circle::Tensor *output;
54
55 uint8_t *input1_data;
56 uint8_t *input2_data;
57 uint8_t *output_data;
58
59 uint16_t input1_index = 0;
60 uint16_t input2_index = 0;
61
62 const circle::AddOptions *options;
63 // Read kernel
64 {
65 execute::OMRuntimeKernel runtime_kernel;
66 runtime_kernel.readKernel(op_index, runtime_context);
67
68 input1 = runtime_kernel.inputs[input1TensorIdx];
69 input2 = runtime_kernel.inputs[input2TensorIdx];
70 output = runtime_kernel.outputs[outputTensorIdx];
71 assert(input1 != nullptr);
72 assert(input2 != nullptr);
73 assert(output != nullptr);
74
75 runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
76
77 input1_data = runtime_kernel.inputs_data[input1TensorIdx];
78 input2_data = runtime_kernel.inputs_data[input2TensorIdx];
79 output_data = runtime_kernel.outputs_data[outputTensorIdx];
80 assert(input1_data != nullptr);
81 assert(input2_data != nullptr);
82 assert(output_data != nullptr);
83
84 options = runtime_kernel.first_operator->builtin_options_as_AddOptions();
85
86 input1_index = runtime_kernel.inputs_index[input1TensorIdx];
87 input2_index = runtime_kernel.inputs_index[input2TensorIdx];
88 }
89
90 OMStatus status;
91
92 core::OMRuntimeShape input1_shape(input1);
93 core::OMRuntimeShape input2_shape(input2);
95
96#ifndef DIS_DYN_SHAPES
97 // Check dynamic shapes
98 {
99 auto input_1_dynamic_shape = runtime_storage.getDynamicRuntimeShape(input1_index);
100 if (input_1_dynamic_shape.flatSize() != 0)
101 input1_shape = input_1_dynamic_shape;
102
103 auto input_2_dynamic_shape = runtime_storage.getDynamicRuntimeShape(input2_index);
104 if (input_2_dynamic_shape.flatSize() != 0)
105 input2_shape = input_2_dynamic_shape;
106 }
107#endif // DIS_DYN_SHAPES
108
109 // Check broadcast property
111 const bool need_broadcast = pal::processBroadcastShapes(input1_shape, input2_shape, &params);
112 switch (input1->type())
113 {
114#ifndef DIS_FLOAT
115 case circle::TensorType_FLOAT32:
116 {
117 execute::calculateActivationRange(options->fused_activation_function(),
118 &params.float_activation_min, &params.float_activation_max);
119 if (need_broadcast)
120 {
122 params, input1_shape, core::utils::castInputData<float>(input1_data), input2_shape,
123 core::utils::castInputData<float>(input2_data), output_shape,
124 core::utils::castOutputData<float>(output_data));
125 }
126 else
127 {
128 status =
129 pal::Add(params, output_shape.flatSize(), core::utils::castInputData<float>(input1_data),
130 core::utils::castInputData<float>(input2_data),
131 core::utils::castOutputData<float>(output_data));
132 }
133 }
134 break;
135#endif // DIS_FLOAT
136 case circle::TensorType_INT64:
137 {
138 execute::calculateActivationRange(options->fused_activation_function(),
139 &params.int64_activation_min, &params.int64_activation_max);
140
141 if (need_broadcast)
142 {
144 params, input1_shape, core::utils::castInputData<int64_t>(input1_data), input2_shape,
145 core::utils::castInputData<int64_t>(input2_data), output_shape,
146 core::utils::castOutputData<int64_t>(output_data));
147 }
148 else
149 {
150 status = pal::Add(params, input1_shape.flatSize(),
151 core::utils::castInputData<int64_t>(input1_data),
152 core::utils::castInputData<int64_t>(input2_data),
153 core::utils::castOutputData<int64_t>(output_data));
154 }
155 }
156 break;
157 case circle::TensorType_INT32:
158 {
159 execute::calculateActivationRange(options->fused_activation_function(),
160 &params.int32_activation_min, &params.int32_activation_max);
161
162 if (need_broadcast)
163 {
165 params, input1_shape, core::utils::castInputData<int32_t>(input1_data), input2_shape,
166 core::utils::castInputData<int32_t>(input2_data), output_shape,
167 core::utils::castOutputData<int32_t>(output_data));
168 }
169 else
170 {
171 status = pal::Add(params, input1_shape.flatSize(),
172 core::utils::castInputData<int32_t>(input1_data),
173 core::utils::castInputData<int32_t>(input2_data),
174 core::utils::castOutputData<int32_t>(output_data));
175 }
176 }
177 break;
178#ifndef DIS_QUANT
179 case circle::TensorType_INT8:
180 {
181 core::ArithmeticQuantParams add_params{};
182
183 calculateQuantParams(add_params, input1, input2, output,
184 options->fused_activation_function());
185
186 if (need_broadcast)
187 {
189 add_params, input1_shape, core::utils::castInputData<int8_t>(input1_data), input2_shape,
190 core::utils::castInputData<int8_t>(input2_data), output_shape,
191 core::utils::castOutputData<int8_t>(output_data));
192 }
193 else
194 {
195 status = pal::Add(add_params, input1_shape.flatSize(),
196 core::utils::castInputData<int8_t>(input1_data),
197 core::utils::castInputData<int8_t>(input2_data),
198 core::utils::castOutputData<int8_t>(output_data));
199 }
200 }
201 break;
202#endif // DIF_QUANT
203 default:
204 {
205 status = UnsupportedType;
206 assert(false && "Unsupported type.");
207 }
208 }
209
210 return status;
211}
212
213} // namespace execute
214} // namespace onert_micro
OMRuntimeShape getDynamicRuntimeShape(uint16_t tensor_index)
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
const luci_interpreter::RuntimeShape output_shape
constexpr uint32_t input1TensorIdx
constexpr uint32_t outputTensorIdx
constexpr uint32_t input2TensorIdx
OMStatus BroadcastAdd4DSlow(const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
bool processBroadcastShapes(const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
OMStatus Add(const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
Definition PALAdd.h:33
void calculateQuantParams(core::ArithmeticQuantParams &params, const circle::Tensor *input1, const circle::Tensor *input2, const circle::Tensor *output, circle::ActivationFunctionType act)
Definition OMUtils.cpp:194
OMStatus calculateActivationRange(circle::ActivationFunctionType activation, T *activation_min, T *activation_max)
Definition OMUtils.h:36
OMStatus execute_kernel_CircleAdd(const OMExecuteArgs &execute_args)
Definition Add.cpp:45
@ UnsupportedType
Definition OMStatus.h:26
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage