ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Mul.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18
19#include "core/OMUtils.h"
20#include "core/OMRuntimeShape.h"
21
22#include "execute/OMUtils.h"
25#include "PALMul.h"
26
27using namespace onert_micro;
28using namespace onert_micro::execute;
29
30namespace
31{
32
33constexpr uint32_t input1TensorIdx = 0;
34constexpr uint32_t input2TensorIdx = 1;
35constexpr uint32_t outputTensorIdx = 0;
36
37// TODO: Remove duplicated code with Sub,Add
38void calculateQuantParamsForMul(core::ArithmeticQuantParams &params, const circle::Tensor *input1,
39 const circle::Tensor *input2, const circle::Tensor *output,
40 circle::ActivationFunctionType act)
41{
42 long input1_zp;
43 long input2_zp;
44 long output_zp;
45
46 float input1_scale;
47 float input2_scale;
48 float output_scale;
49
50 // Read input1 quant params
51 readQuantParams(input1, input1_zp, input1_scale);
52 // Read input2 quant params
53 readQuantParams(input2, input2_zp, input2_scale);
54 // Read output quant params
55 readQuantParams(output, output_zp, output_scale);
56
57 params.input1_offset = static_cast<int32_t>(input1_zp);
58 params.input2_offset = static_cast<int32_t>(input2_zp);
59 params.output_offset = static_cast<int32_t>(output_zp);
60 params.left_shift = (output->type() == circle::TensorType_INT16) ? 15 : 20;
61
62 double real_multiplier = static_cast<double>(input1_scale) * static_cast<double>(input2_scale) /
63 static_cast<double>(output_scale);
64 quantizeMultiplier(real_multiplier, &params.output_multiplier, &params.output_shift);
65
66 calculateActivationRangeQuantized(act, output_zp, output_scale, output->type(),
69}
70
71} // namespace
72
73// NOTE: doesnt currently support dynamic shapes
74// TODO: reduce code duplication with Add, Sub
75namespace onert_micro
76{
77namespace execute
78{
79
81{
82 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
83 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
84 uint16_t op_index = execute_args.kernel_index;
85
86 const circle::Tensor *input1;
87 const circle::Tensor *input2;
88 const circle::Tensor *output;
89
90 uint8_t *input1_data;
91 uint8_t *input2_data;
92 uint8_t *output_data;
93
94 const circle::MulOptions *options;
95 // Read kernel
96 {
97 execute::OMRuntimeKernel runtime_kernel;
98 runtime_kernel.readKernel(op_index, runtime_context);
99
100 input1 = runtime_kernel.inputs[input1TensorIdx];
101 input2 = runtime_kernel.inputs[input2TensorIdx];
102 output = runtime_kernel.outputs[outputTensorIdx];
103 assert(input1 != nullptr);
104 assert(input2 != nullptr);
105 assert(output != nullptr);
106
107 runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
108
109 input1_data = runtime_kernel.inputs_data[input1TensorIdx];
110 input2_data = runtime_kernel.inputs_data[input2TensorIdx];
111 output_data = runtime_kernel.outputs_data[outputTensorIdx];
112 assert(input1_data != nullptr);
113 assert(input2_data != nullptr);
114 assert(output_data != nullptr);
115
116 options = runtime_kernel.first_operator->builtin_options_as_MulOptions();
117 }
118
119 OMStatus status;
120
121 core::OMRuntimeShape input1_shape(input1);
122 core::OMRuntimeShape input2_shape(input2);
124
126 const bool need_broadcast = pal::processBroadcastShapes(input1_shape, input2_shape, &params);
127
128 switch (input1->type())
129 {
130#ifndef DIS_FLOAT
131 case circle::TensorType_FLOAT32:
132 {
133 status = execute::calculateActivationRange(options->fused_activation_function(),
134 &params.float_activation_min,
135 &params.float_activation_max);
136
137 if (need_broadcast)
138 {
140 params, input1_shape, core::utils::castInputData<float>(input1_data), input2_shape,
141 core::utils::castInputData<float>(input2_data), output_shape,
142 core::utils::castOutputData<float>(output_data));
143 }
144 else
145 {
146 status =
147 pal::Mul(params, input1_shape.flatSize(), core::utils::castInputData<float>(input1_data),
148 core::utils::castInputData<float>(input2_data),
149 core::utils::castOutputData<float>(output_data));
150 }
151 }
152 break;
153 case circle::TensorType_INT64:
154 {
155 status = execute::calculateActivationRange(options->fused_activation_function(),
156 &params.int64_activation_min,
157 &params.int64_activation_max);
158
159 if (need_broadcast)
160 {
162 params, input1_shape, core::utils::castInputData<int64_t>(input1_data), input2_shape,
163 core::utils::castInputData<int64_t>(input2_data), output_shape,
164 core::utils::castOutputData<int64_t>(output_data));
165 }
166 else
167 {
168 status = pal::Mul(params, input1_shape.flatSize(),
169 core::utils::castInputData<int64_t>(input1_data),
170 core::utils::castInputData<int64_t>(input2_data),
171 core::utils::castOutputData<int64_t>(output_data));
172 }
173 }
174 break;
175 case circle::TensorType_INT32:
176 {
177 status = execute::calculateActivationRange(options->fused_activation_function(),
178 &params.int32_activation_min,
179 &params.int32_activation_max);
180
181 if (need_broadcast)
182 {
184 params, input1_shape, core::utils::castInputData<int32_t>(input1_data), input2_shape,
185 core::utils::castInputData<int32_t>(input2_data), output_shape,
186 core::utils::castOutputData<int32_t>(output_data));
187 }
188 else
189 {
190 status = pal::Mul(params, input1_shape.flatSize(),
191 core::utils::castInputData<int32_t>(input1_data),
192 core::utils::castInputData<int32_t>(input2_data),
193 core::utils::castOutputData<int32_t>(output_data));
194 }
195 }
196 break;
197#endif // DIS_FLOAT
198#ifndef DIS_QUANT
199 case circle::TensorType_INT8:
200 {
201 core::ArithmeticQuantParams add_params{};
202
203 calculateQuantParamsForMul(add_params, input1, input2, output,
204 options->fused_activation_function());
205
206 if (need_broadcast)
207 {
209 add_params, input1_shape, core::utils::castInputData<int8_t>(input1_data), input2_shape,
210 core::utils::castInputData<int8_t>(input2_data), output_shape,
211 core::utils::castOutputData<int8_t>(output_data));
212 }
213 else
214 {
215 assert(input1_shape.flatSize() == input2_shape.flatSize());
216 assert(input1_shape.flatSize() == output_shape.flatSize());
217 status = pal::Mul(add_params, input1_shape.flatSize(),
218 core::utils::castInputData<int8_t>(input1_data),
219 core::utils::castInputData<int8_t>(input2_data),
220 core::utils::castOutputData<int8_t>(output_data));
221 }
222 }
223 break;
224#endif // DIF_QUANT
225 default:
226 {
227 status = UnsupportedType;
228 assert(false && "Unsupported type.");
229 }
230 }
231
232 return status;
233}
234
235} // namespace execute
236} // namespace onert_micro
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
const luci_interpreter::RuntimeShape output_shape
constexpr uint32_t input1TensorIdx
constexpr uint32_t outputTensorIdx
constexpr uint32_t input2TensorIdx
OMStatus BroadcastMul6DSlow(const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
bool processBroadcastShapes(const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
OMStatus Mul(const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
Definition PALMul.h:33
OMStatus BroadcastMul4DSlow(const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition OMUtils.cpp:23
void readQuantParams(const circle::Tensor *tensor, long &zero_point, float &scale)
Definition OMUtils.cpp:143
OMStatus calculateActivationRangeQuantized(circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
Definition OMUtils.cpp:112
OMStatus execute_kernel_CircleMul(const OMExecuteArgs &execute_args)
Definition Mul.cpp:80
OMStatus calculateActivationRange(circle::ActivationFunctionType activation, T *activation_min, T *activation_max)
Definition OMUtils.h:36
@ UnsupportedType
Definition OMStatus.h:26
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage