ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FullyConnected.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18
19#include "core/OMUtils.h"
20#include "core/OMKernelData.h"
21
23#include "execute/OMUtils.h"
25
26#include "PALFullyConnected.h"
27
28using namespace onert_micro;
29using namespace onert_micro::core;
30using namespace onert_micro::execute;
31
32namespace
33{
34
35constexpr uint32_t inputTensorIdx = 0;
36constexpr uint32_t weightTensorIdx = 1;
37constexpr uint32_t biasTensorIdx = 2;
38
39constexpr uint32_t outputTensorIdx = 0;
40
41#ifndef DIS_QUANT
42void calculateOpDataFullyConnected(const circle::Tensor *input, const circle::Tensor *weights,
43 const circle::Tensor *output,
44 circle::ActivationFunctionType activation,
46{
47 double real_multiplier = 0.0;
48 int output_shift;
49 int32_t output_activation_min;
50 int32_t output_activation_max;
51 int32_t output_multiplier;
52
53 assert(input->quantization() != nullptr); // Fix caller
54 assert(input->quantization()->scale()->size() == 1); // Fix caller
55 assert(input->quantization()->zero_point()->size() == 1); // Fix caller
56
57 assert(weights->quantization() != nullptr); // Fix caller
58 assert(weights->quantization()->scale()->size() == 1); // Fix caller
59 assert(weights->quantization()->zero_point()->size() == 1); // Fix caller
60
61 assert(output->quantization() != nullptr); // Fix caller
62 assert(output->quantization()->scale()->size() == 1); // Fix caller
63 assert(output->quantization()->zero_point()->size() == 1); // Fix caller
64
65 const float input_scale = *input->quantization()->scale()->begin();
66 const float weight_scale = *weights->quantization()->scale()->begin();
67 const float output_scale = *output->quantization()->scale()->begin();
68
69 const long input_zero_point = *input->quantization()->zero_point()->begin();
70 const long weights_zero_point = *weights->quantization()->zero_point()->begin();
71 const long output_zero_point = *output->quantization()->zero_point()->begin();
72
73 real_multiplier =
74 execute::getQuantizedConvolutionMultipler(input_scale, weight_scale, output_scale);
75 execute::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
76 execute::calculateActivationRangeQuantized(activation, output_zero_point, output_scale,
77 output->type(), &output_activation_min,
78 &output_activation_max);
79
80 params.output_shift = output_shift;
81 params.output_multiplier = output_multiplier;
82 params.input_offset = -input_zero_point;
83 params.weights_offset = -weights_zero_point;
84 params.output_offset = output_zero_point;
85 params.quantized_activation_max = output_activation_max;
86 params.quantized_activation_min = output_activation_min;
87}
88#endif
89
90} // namespace
91
92namespace onert_micro
93{
94namespace execute
95{
96
97// NOTE: doesnt currently support dynamic shapes
99{
100 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
101 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
102 uint16_t op_index = execute_args.kernel_index;
103
104 const circle::Tensor *input;
105 const circle::Tensor *weight;
106 const circle::Tensor *output;
107
108 uint8_t *input_data;
109 uint8_t *weight_data;
110 uint8_t *bias_data;
111 uint8_t *output_data;
112
113 const circle::FullyConnectedOptions *options;
114 // Read kernel
115 {
116 execute::OMRuntimeKernel runtime_kernel;
117 runtime_kernel.readKernel(op_index, runtime_context);
118
119 input = runtime_kernel.inputs[inputTensorIdx];
120 weight = runtime_kernel.inputs[weightTensorIdx];
121 output = runtime_kernel.outputs[outputTensorIdx];
122 assert(input != nullptr);
123 assert(weight != nullptr);
124 // Bias can be nullptr
125 assert(output != nullptr);
126
127 runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
128
129 input_data = runtime_kernel.inputs_data[inputTensorIdx];
130 weight_data = runtime_kernel.inputs_data[weightTensorIdx];
131 bias_data = runtime_kernel.inputs_data[biasTensorIdx];
132 output_data = runtime_kernel.outputs_data[outputTensorIdx];
133 assert(input_data != nullptr);
134 assert(weight_data != nullptr);
135 // Bias can be nullptr
136 assert(output_data != nullptr);
137
138 options = runtime_kernel.first_operator->builtin_options_as_FullyConnectedOptions();
139 }
140
141 OMStatus status;
142
143 switch (input->type())
144 {
145#ifndef DIS_FLOAT
146 case circle::TensorType_FLOAT32:
147 {
148 FullyConnectedParams params{};
149 status = calculateActivationRange(options->fused_activation_function(),
151 if (status != Ok)
152 return status;
153
154 switch (weight->type())
155 {
156 case circle::TensorType_FLOAT32:
157 {
158
159 status = pal::FullyConnected(
160 params, core::utils::castInputData<float>(input_data), OMRuntimeShape(weight),
161 core::utils::castInputData<float>(weight_data),
162 core::utils::castInputData<float>(bias_data), OMRuntimeShape(output),
163 core::utils::castOutputData<float>(output_data));
164 }
165 break;
166 case circle::TensorType_INT8:
167 {
168 // weight quantized INT8 mode
169 params.weights_scales =
170 reinterpret_cast<const float *>(weight->quantization()->scale()->data());
171 params.is_channel_wise_quant = weight->quantization()->scale()->size() > 1;
172
173 status = pal::FullyConnected(
174 params, core::utils::castInputData<float>(input_data), OMRuntimeShape(weight),
175 core::utils::castInputData<int8_t>(weight_data),
176 core::utils::castInputData<float>(bias_data), OMRuntimeShape(output),
177 core::utils::castOutputData<float>(output_data));
178 }
179 break;
180 default:
181 assert(false && "Unsupported hybrid weight type");
182 }
183 }
184 break;
185#endif // DIS_FLOAT
186#ifndef DIS_QUANT
187 case circle::TensorType_INT8:
188 {
189 FullyConnectedParams op_params{};
190
191 calculateOpDataFullyConnected(input, weight, output, options->fused_activation_function(),
192 op_params);
193
194 status =
195 pal::FullyConnected(op_params, core::utils::castInputData<int8_t>(input_data),
196 OMRuntimeShape(weight), core::utils::castInputData<int8_t>(weight_data),
197 core::utils::castInputData<int32_t>(bias_data), OMRuntimeShape(output),
198 core::utils::castOutputData<int8_t>(output_data));
199 }
200 break;
201 case circle::TensorType_INT16:
202 {
203 FullyConnectedParams op_params{};
204
205 calculateOpDataFullyConnected(input, weight, output, options->fused_activation_function(),
206 op_params);
207
208 status =
209 pal::FullyConnected(op_params, core::utils::castInputData<int16_t>(input_data),
210 OMRuntimeShape(weight), core::utils::castInputData<int8_t>(weight_data),
211 core::utils::castInputData<int32_t>(bias_data), OMRuntimeShape(output),
212 core::utils::castOutputData<int16_t>(output_data));
213 }
214 break;
215#endif // DIS_QUANT
216 default:
217 {
218 status = UnsupportedType;
219 assert(false && "Unsupported type.");
220 }
221 }
222
223 return status;
224}
225
226} // namespace execute
227} // namespace onert_micro
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
constexpr uint32_t outputTensorIdx
OMStatus FullyConnected(const core::FullyConnectedParams &params, const int16_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int64_t *bias_data, const core::OMRuntimeShape &output_shape, int16_t *output_data)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition OMUtils.cpp:23
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
Definition OMUtils.h:65
OMStatus calculateActivationRangeQuantized(circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
Definition OMUtils.cpp:112
OMStatus execute_kernel_CircleFullyConnected(const OMExecuteArgs &execute_args)
OMStatus calculateActivationRange(circle::ActivationFunctionType activation, T *activation_min, T *activation_max)
Definition OMUtils.h:36
@ UnsupportedType
Definition OMStatus.h:26
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage