ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Sub.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18
19#include "core/OMUtils.h"
20#include "core/OMRuntimeShape.h"
21
22#include "execute/OMUtils.h"
25
26#include "PALSub.h"
27
28using namespace onert_micro;
29using namespace onert_micro::execute;
30
31namespace
32{
33
34constexpr uint32_t numInput = 2;
35constexpr uint32_t numOutput = 1;
36
37constexpr uint32_t input1TensorIdx = 0;
38constexpr uint32_t input2TensorIdx = 1;
39constexpr uint32_t outputTensorIdx = 0;
40
41} // namespace
42
43// NOTE: doesnt currently support dynamic shapes
44// TODO: reduce code duplication with Add, Mul
45OMStatus onert_micro::execute::execute_kernel_CircleSub(const OMExecuteArgs &execute_args)
46{
47 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
48 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
49 uint16_t op_index = execute_args.kernel_index;
50
51 const circle::Tensor *input1;
52 const circle::Tensor *input2;
53 const circle::Tensor *output;
54
55 uint8_t *input1_data;
56 uint8_t *input2_data;
57 uint8_t *output_data;
58
59 const circle::SubOptions *options;
60 // Read kernel
61 {
62 execute::OMRuntimeKernel runtime_kernel;
63 runtime_kernel.readKernel(op_index, runtime_context);
64
65 input1 = runtime_kernel.inputs[input1TensorIdx];
66 input2 = runtime_kernel.inputs[input2TensorIdx];
67 output = runtime_kernel.outputs[outputTensorIdx];
68 assert(input1 != nullptr);
69 assert(input2 != nullptr);
70 assert(output != nullptr);
71
72 runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
73
74 input1_data = runtime_kernel.inputs_data[input1TensorIdx];
75 input2_data = runtime_kernel.inputs_data[input2TensorIdx];
77 assert(input1_data != nullptr);
78 assert(input2_data != nullptr);
79 assert(output_data != nullptr);
80
81 options = runtime_kernel.first_operator->builtin_options_as_SubOptions();
82 }
83
84 OMStatus status;
85
86 core::OMRuntimeShape input1_shape(input1);
87 core::OMRuntimeShape input2_shape(input2);
89
91 const bool need_broadcast = pal::processBroadcastShapes(input1_shape, input2_shape, &params);
92
93 switch (input1->type())
94 {
95#ifndef DIS_FLOAT
96 case circle::TensorType_FLOAT32:
97 {
98 status = execute::calculateActivationRange(options->fused_activation_function(),
99 &params.float_activation_min,
100 &params.float_activation_max);
101
102 if (need_broadcast)
103 {
105 params, input1_shape, core::utils::castInputData<float>(input1_data), input2_shape,
106 core::utils::castInputData<float>(input2_data), output_shape,
107 core::utils::castOutputData<float>(output_data));
108 }
109 else
110 {
111 status =
112 pal::Sub(params, input1_shape.flatSize(), core::utils::castInputData<float>(input1_data),
113 core::utils::castInputData<float>(input2_data),
114 core::utils::castOutputData<float>(output_data));
115 }
116 }
117 break;
118 case circle::TensorType_INT64:
119 {
120 status = execute::calculateActivationRange(options->fused_activation_function(),
121 &params.int64_activation_min,
122 &params.int64_activation_max);
123
124 if (need_broadcast)
125 {
127 params, input1_shape, core::utils::castInputData<int64_t>(input1_data), input2_shape,
128 core::utils::castInputData<int64_t>(input2_data), output_shape,
129 core::utils::castOutputData<int64_t>(output_data));
130 }
131 else
132 {
133 status = pal::Sub(params, input1_shape.flatSize(),
134 core::utils::castInputData<int64_t>(input1_data),
135 core::utils::castInputData<int64_t>(input2_data),
136 core::utils::castOutputData<int64_t>(output_data));
137 }
138 }
139 break;
140 case circle::TensorType_INT32:
141 {
142 status = execute::calculateActivationRange(options->fused_activation_function(),
143 &params.int32_activation_min,
144 &params.int32_activation_max);
145
146 if (need_broadcast)
147 {
149 params, input1_shape, core::utils::castInputData<int32_t>(input1_data), input2_shape,
150 core::utils::castInputData<int32_t>(input2_data), output_shape,
151 core::utils::castOutputData<int32_t>(output_data));
152 }
153 else
154 {
155 status = pal::Sub(params, input1_shape.flatSize(),
156 core::utils::castInputData<int32_t>(input1_data),
157 core::utils::castInputData<int32_t>(input2_data),
158 core::utils::castOutputData<int32_t>(output_data));
159 }
160 }
161 break;
162#endif // DIS_FLOAT
163#ifndef DIS_QUANT
164 case circle::TensorType_INT8:
165 {
166 core::ArithmeticQuantParams sub_params{};
167
168 calculateQuantParams(sub_params, input1, input2, output,
169 options->fused_activation_function());
170
171 if (need_broadcast)
172 {
174 sub_params, input1_shape, core::utils::castInputData<int8_t>(input1_data), input2_shape,
175 core::utils::castInputData<int8_t>(input2_data), output_shape,
176 core::utils::castOutputData<int8_t>(output_data));
177 }
178 else
179 {
180 status = pal::Sub(sub_params, input1_shape.flatSize(),
181 core::utils::castInputData<int8_t>(input1_data),
182 core::utils::castInputData<int8_t>(input2_data),
183 core::utils::castOutputData<int8_t>(output_data));
184 }
185 }
186 break;
187#endif // DIF_QUANT
188 default:
189 {
190 status = UnsupportedType;
191 assert(false && "Unsupported type.");
192 }
193 }
194
195 return status;
196}
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
const luci_interpreter::RuntimeShape output_shape
constexpr uint32_t input1TensorIdx
constexpr uint32_t outputTensorIdx
constexpr uint32_t input2TensorIdx
OMStatus Sub(const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
bool processBroadcastShapes(const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
OMStatus BroadcastSub4DSlow(const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
void calculateQuantParams(core::ArithmeticQuantParams &params, const circle::Tensor *input1, const circle::Tensor *input2, const circle::Tensor *output, circle::ActivationFunctionType act)
Definition OMUtils.cpp:194
OMStatus calculateActivationRange(circle::ActivationFunctionType activation, T *activation_min, T *activation_max)
Definition OMUtils.h:36
@ UnsupportedType
Definition OMStatus.h:26
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage