ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Mul.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18
19#include "core/OMUtils.h"
20#include "core/OMRuntimeShape.h"
21
22#include "execute/OMUtils.h"
25#include "PALMul.h"
26
27using namespace onert_micro;
28using namespace onert_micro::execute;
29
30namespace
31{
32
33constexpr uint32_t input1TensorIdx = 0;
34constexpr uint32_t input2TensorIdx = 1;
35constexpr uint32_t outputTensorIdx = 0;
36
37// TODO: Remove duplicated code with Sub,Add
38void calculateQuantParamsForMul(core::ArithmeticQuantParams &params, const circle::Tensor *input1,
39 const circle::Tensor *input2, const circle::Tensor *output,
40 circle::ActivationFunctionType act)
41{
42 long input1_zp;
43 long input2_zp;
44 long output_zp;
45
46 float input1_scale;
47 float input2_scale;
48 float output_scale;
49
50 // Read input1 quant params
51 readQuantParams(input1, input1_zp, input1_scale);
52 // Read input2 quant params
53 readQuantParams(input2, input2_zp, input2_scale);
54 // Read output quant params
55 readQuantParams(output, output_zp, output_scale);
56
57 params.input1_offset = static_cast<int32_t>(input1_zp);
58 params.input2_offset = static_cast<int32_t>(input2_zp);
59 params.output_offset = static_cast<int32_t>(output_zp);
60 params.left_shift = (output->type() == circle::TensorType_INT16) ? 15 : 20;
61
62 double real_multiplier = static_cast<double>(input1_scale) * static_cast<double>(input2_scale) /
63 static_cast<double>(output_scale);
64 quantizeMultiplier(real_multiplier, &params.output_multiplier, &params.output_shift);
65
66 calculateActivationRangeQuantized(act, output_zp, output_scale, output->type(),
69}
70
71} // namespace
72
73// NOTE: doesnt currently support dynamic shapes
74// TODO: reduce code duplication with Add, Sub
75OMStatus onert_micro::execute::execute_kernel_CircleMul(const OMExecuteArgs &execute_args)
76{
77 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
78 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
79 uint16_t op_index = execute_args.kernel_index;
80
81 const circle::Tensor *input1;
82 const circle::Tensor *input2;
83 const circle::Tensor *output;
84
85 uint8_t *input1_data;
86 uint8_t *input2_data;
87 uint8_t *output_data;
88
89 const circle::MulOptions *options;
90 // Read kernel
91 {
92 execute::OMRuntimeKernel runtime_kernel;
93 runtime_kernel.readKernel(op_index, runtime_context);
94
95 input1 = runtime_kernel.inputs[input1TensorIdx];
96 input2 = runtime_kernel.inputs[input2TensorIdx];
97 output = runtime_kernel.outputs[outputTensorIdx];
98 assert(input1 != nullptr);
99 assert(input2 != nullptr);
100 assert(output != nullptr);
101
102 runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
103
104 input1_data = runtime_kernel.inputs_data[input1TensorIdx];
105 input2_data = runtime_kernel.inputs_data[input2TensorIdx];
106 output_data = runtime_kernel.outputs_data[outputTensorIdx];
107 assert(input1_data != nullptr);
108 assert(input2_data != nullptr);
109 assert(output_data != nullptr);
110
111 options = runtime_kernel.first_operator->builtin_options_as_MulOptions();
112 }
113
114 OMStatus status;
115
116 core::OMRuntimeShape input1_shape(input1);
117 core::OMRuntimeShape input2_shape(input2);
119
121 const bool need_broadcast = pal::processBroadcastShapes(input1_shape, input2_shape, &params);
122
123 switch (input1->type())
124 {
125#ifndef DIS_FLOAT
126 case circle::TensorType_FLOAT32:
127 {
128 status = execute::calculateActivationRange(options->fused_activation_function(),
129 &params.float_activation_min,
130 &params.float_activation_max);
131
132 if (need_broadcast)
133 {
135 params, input1_shape, core::utils::castInputData<float>(input1_data), input2_shape,
136 core::utils::castInputData<float>(input2_data), output_shape,
137 core::utils::castOutputData<float>(output_data));
138 }
139 else
140 {
141 status =
142 pal::Mul(params, input1_shape.flatSize(), core::utils::castInputData<float>(input1_data),
143 core::utils::castInputData<float>(input2_data),
144 core::utils::castOutputData<float>(output_data));
145 }
146 }
147 break;
148 case circle::TensorType_INT64:
149 {
150 status = execute::calculateActivationRange(options->fused_activation_function(),
151 &params.int64_activation_min,
152 &params.int64_activation_max);
153
154 if (need_broadcast)
155 {
157 params, input1_shape, core::utils::castInputData<int64_t>(input1_data), input2_shape,
158 core::utils::castInputData<int64_t>(input2_data), output_shape,
159 core::utils::castOutputData<int64_t>(output_data));
160 }
161 else
162 {
163 status = pal::Mul(params, input1_shape.flatSize(),
164 core::utils::castInputData<int64_t>(input1_data),
165 core::utils::castInputData<int64_t>(input2_data),
166 core::utils::castOutputData<int64_t>(output_data));
167 }
168 }
169 break;
170 case circle::TensorType_INT32:
171 {
172 status = execute::calculateActivationRange(options->fused_activation_function(),
173 &params.int32_activation_min,
174 &params.int32_activation_max);
175
176 if (need_broadcast)
177 {
179 params, input1_shape, core::utils::castInputData<int32_t>(input1_data), input2_shape,
180 core::utils::castInputData<int32_t>(input2_data), output_shape,
181 core::utils::castOutputData<int32_t>(output_data));
182 }
183 else
184 {
185 status = pal::Mul(params, input1_shape.flatSize(),
186 core::utils::castInputData<int32_t>(input1_data),
187 core::utils::castInputData<int32_t>(input2_data),
188 core::utils::castOutputData<int32_t>(output_data));
189 }
190 }
191 break;
192#endif // DIS_FLOAT
193#ifndef DIS_QUANT
194 case circle::TensorType_INT8:
195 {
196 core::ArithmeticQuantParams add_params{};
197
198 calculateQuantParamsForMul(add_params, input1, input2, output,
199 options->fused_activation_function());
200
201 if (need_broadcast)
202 {
204 add_params, input1_shape, core::utils::castInputData<int8_t>(input1_data), input2_shape,
205 core::utils::castInputData<int8_t>(input2_data), output_shape,
206 core::utils::castOutputData<int8_t>(output_data));
207 }
208 else
209 {
210 assert(input1_shape.flatSize() == input2_shape.flatSize());
211 assert(input1_shape.flatSize() == output_shape.flatSize());
212 status = pal::Mul(add_params, input1_shape.flatSize(),
213 core::utils::castInputData<int8_t>(input1_data),
214 core::utils::castInputData<int8_t>(input2_data),
215 core::utils::castOutputData<int8_t>(output_data));
216 }
217 }
218 break;
219#endif // DIF_QUANT
220 default:
221 {
222 status = UnsupportedType;
223 assert(false && "Unsupported type.");
224 }
225 }
226
227 return status;
228}
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
const luci_interpreter::RuntimeShape output_shape
constexpr uint32_t input1TensorIdx
constexpr uint32_t outputTensorIdx
constexpr uint32_t input2TensorIdx
OMStatus BroadcastMul6DSlow(const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
bool processBroadcastShapes(const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
OMStatus Mul(const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
Definition PALMul.h:33
OMStatus BroadcastMul4DSlow(const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition OMUtils.cpp:23
void readQuantParams(const circle::Tensor *tensor, long &zero_point, float &scale)
Definition OMUtils.cpp:143
OMStatus calculateActivationRangeQuantized(circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
Definition OMUtils.cpp:112
OMStatus calculateActivationRange(circle::ActivationFunctionType activation, T *activation_min, T *activation_max)
Definition OMUtils.h:36
@ UnsupportedType
Definition OMStatus.h:26
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage