26#include "PALFullyConnected.h"
35constexpr uint32_t inputTensorIdx = 0;
36constexpr uint32_t weightTensorIdx = 1;
37constexpr uint32_t biasTensorIdx = 2;
42void calculateOpDataFullyConnected(
const circle::Tensor *input,
const circle::Tensor *weights,
43 const circle::Tensor *output,
44 circle::ActivationFunctionType activation,
47 double real_multiplier = 0.0;
49 int32_t output_activation_min;
50 int32_t output_activation_max;
51 int32_t output_multiplier;
53 assert(
input->quantization() !=
nullptr);
54 assert(
input->quantization()->scale()->size() == 1);
55 assert(
input->quantization()->zero_point()->size() == 1);
57 assert(weights->quantization() !=
nullptr);
58 assert(weights->quantization()->scale()->size() == 1);
59 assert(weights->quantization()->zero_point()->size() == 1);
61 assert(
output->quantization() !=
nullptr);
62 assert(
output->quantization()->scale()->size() == 1);
63 assert(
output->quantization()->zero_point()->size() == 1);
65 const float input_scale = *
input->quantization()->scale()->begin();
66 const float weight_scale = *weights->quantization()->scale()->begin();
67 const float output_scale = *
output->quantization()->scale()->begin();
69 const long input_zero_point = *
input->quantization()->zero_point()->begin();
70 const long weights_zero_point = *weights->quantization()->zero_point()->begin();
71 const long output_zero_point = *
output->quantization()->zero_point()->begin();
77 output->type(), &output_activation_min,
78 &output_activation_max);
104 const circle::Tensor *input;
105 const circle::Tensor *weight;
106 const circle::Tensor *output;
109 uint8_t *weight_data;
111 uint8_t *output_data;
113 const circle::FullyConnectedOptions *options;
117 runtime_kernel.
readKernel(op_index, runtime_context);
119 input = runtime_kernel.
inputs[inputTensorIdx];
120 weight = runtime_kernel.
inputs[weightTensorIdx];
121 output = runtime_kernel.
outputs[outputTensorIdx];
122 assert(input !=
nullptr);
123 assert(weight !=
nullptr);
125 assert(output !=
nullptr);
129 input_data = runtime_kernel.
inputs_data[inputTensorIdx];
130 weight_data = runtime_kernel.
inputs_data[weightTensorIdx];
131 bias_data = runtime_kernel.
inputs_data[biasTensorIdx];
132 output_data = runtime_kernel.
outputs_data[outputTensorIdx];
133 assert(input_data !=
nullptr);
134 assert(weight_data !=
nullptr);
136 assert(output_data !=
nullptr);
138 options = runtime_kernel.
first_operator->builtin_options_as_FullyConnectedOptions();
143 switch (input->type())
146 case circle::TensorType_FLOAT32:
154 switch (weight->type())
156 case circle::TensorType_FLOAT32:
160 params, core::utils::castInputData<float>(input_data),
OMRuntimeShape(weight),
161 core::utils::castInputData<float>(weight_data),
162 core::utils::castInputData<float>(bias_data),
OMRuntimeShape(output),
163 core::utils::castOutputData<float>(output_data));
166 case circle::TensorType_INT8:
170 reinterpret_cast<const float *
>(weight->quantization()->scale()->data());
174 params, core::utils::castInputData<float>(input_data),
OMRuntimeShape(weight),
175 core::utils::castInputData<int8_t>(weight_data),
176 core::utils::castInputData<float>(bias_data),
OMRuntimeShape(output),
177 core::utils::castOutputData<float>(output_data));
181 assert(
false &&
"Unsupported hybrid weight type");
187 case circle::TensorType_INT8:
191 calculateOpDataFullyConnected(input, weight, output, options->fused_activation_function(),
196 OMRuntimeShape(weight), core::utils::castInputData<int8_t>(weight_data),
197 core::utils::castInputData<int32_t>(bias_data),
OMRuntimeShape(output),
198 core::utils::castOutputData<int8_t>(output_data));
201 case circle::TensorType_INT16:
205 calculateOpDataFullyConnected(input, weight, output, options->fused_activation_function(),
210 OMRuntimeShape(weight), core::utils::castInputData<int8_t>(weight_data),
211 core::utils::castInputData<int32_t>(bias_data),
OMRuntimeShape(output),
212 core::utils::castOutputData<int16_t>(output_data));
219 assert(
false &&
"Unsupported type.");
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
uint8_t * inputs_data[maxInputSize]
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
constexpr uint32_t outputTensorIdx
OMStatus FullyConnected(const core::FullyConnectedParams ¶ms, const int16_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int64_t *bias_data, const core::OMRuntimeShape &output_shape, int16_t *output_data)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
OMStatus calculateActivationRangeQuantized(circle::ActivationFunctionType activation, int32_t output_zero_point, float output_scale, circle::TensorType data_type, int32_t *activation_min, int32_t *activation_max)
OMStatus execute_kernel_CircleFullyConnected(const OMExecuteArgs &execute_args)
OMStatus calculateActivationRange(circle::ActivationFunctionType activation, T *activation_min, T *activation_max)
bool is_channel_wise_quant
float float_activation_min
int32_t output_multiplier
int32_t quantized_activation_max
int32_t quantized_activation_min
float float_activation_max
const float * weights_scales
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage