18#include "kernels/Utils.h"
20#include "PALFullyConnected.h"
27void evalFloat(
const circle::Tensor *input,
const circle::Tensor *weights,
28 const circle::Tensor *bias,
const circle::Tensor *output,
29 const circle::FullyConnectedOptions *options,
BaseRuntimeGraph *runtime_graph)
31 float activation_min{};
32 float activation_max{};
34 &activation_min, &activation_max);
38 params.float_activation_max = activation_max;
40 auto *
input_data = runtime_graph->getDataByTensor(input);
41 auto *
output_data = runtime_graph->getDataByTensor(output);
43 auto *weights_data = runtime_graph->getConstDataByTensor(weights);
44 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
46 assert(input_data !=
nullptr);
47 assert(weights_data !=
nullptr);
48 assert(output_data !=
nullptr);
50 int32_t input_shape[kMaxSmallSize];
53 int32_t weight_shape[kMaxSmallSize];
65 uint32_t num_dims = Tensor::num_dims(output);
67 int32_t data_size = 1;
68 for (
int i = 0; i < num_dims; ++i)
73 data_size *=
size(Tensor::element_type(output));
75 runtime_graph->addDynamicShapeTensor(output, std::move(dynamic_shape));
79 runtime_graph->resetTensorData(
nullptr, output);
83 auto new_output_data =
new uint8_t[data_size];
85 runtime_graph->resetTensorData(new_output_data, output);
89 switch (Tensor::element_type(weights))
91 case DataType::FLOAT32:
93 luci_interpreter_pal::FullyConnected(
94 params, input_shape, kernels::getTensorData<float>(input_data), weight_shape,
95 kernels::getTensorData<float>(weights_data), kernels::getTensorData<float>(bias_data),
96 output_shape, kernels::getTensorData<float>(output_data), Tensor::num_dims(output),
97 Tensor::num_dims(weights));
103 params.weights_scales =
104 reinterpret_cast<const float *
>(weights->quantization()->scale()->data());
105 params.is_channel_wise_quant = weights->quantization()->scale()->size() > 1;
106 luci_interpreter_pal::FullyConnected(
107 params, input_shape, kernels::getTensorData<float>(input_data), weight_shape,
108 kernels::getTensorData<int8_t>(weights_data), kernels::getTensorData<float>(bias_data),
109 output_shape, kernels::getTensorData<float>(output_data), Tensor::num_dims(output),
110 Tensor::num_dims(weights));
114 assert(
false &&
"Unsupported hybrid weight type");
119void evalQuantized(
const circle::Tensor *input,
const circle::Tensor *weights,
120 const circle::Tensor *bias,
const circle::Tensor *output,
121 const circle::FullyConnectedOptions *options,
BaseRuntimeGraph *runtime_graph,
124 double real_multiplier = 0.0;
126 int32_t output_activation_min;
127 int32_t output_activation_max;
128 int32_t output_multiplier;
130 Tensor::scale(input), Tensor::scale(weights), Tensor::scale(output));
133 output, &output_activation_min,
134 &output_activation_max);
136 int32_t input_offset = -Tensor::zero_point(input);
137 int32_t filter_offset = 0;
138 if (type == DataType::U8)
139 filter_offset = -Tensor::zero_point(weights);
140 int32_t output_offset = Tensor::zero_point(output);
144 op_params.weights_offset = filter_offset;
145 op_params.output_offset = output_offset;
146 op_params.output_multiplier = output_multiplier;
147 op_params.output_shift = output_shift;
148 op_params.quantized_activation_min = output_activation_min;
149 op_params.quantized_activation_max = output_activation_max;
150 op_params.lhs_cacheable =
false;
151 op_params.rhs_cacheable =
false;
153 auto *
input_data = runtime_graph->getDataByTensor(input);
154 auto *
output_data = runtime_graph->getDataByTensor(output);
156 auto *weights_data = runtime_graph->getConstDataByTensor(weights);
157 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
159 assert(input_data !=
nullptr);
160 assert(weights_data !=
nullptr);
161 assert(output_data !=
nullptr);
163 int32_t input_shape[kMaxSmallSize];
166 int32_t weights_shape[kMaxSmallSize];
171 if (type == DataType::S8)
174 op_params, input_shape, kernels::getTensorData<int8_t>(input_data), weights_shape,
175 kernels::getTensorData<int8_t>(weights_data), kernels::getTensorData<int32_t>(bias_data),
176 output_shape, kernels::getTensorData<int8_t>(output_data), Tensor::num_dims(output),
177 Tensor::num_dims(weights));
179 else if (type == DataType::U8)
181 luci_interpreter_pal::FullyConnected<uint8_t>(
182 op_params, input_shape, kernels::getTensorData<uint8_t>(input_data), weights_shape,
183 kernels::getTensorData<uint8_t>(weights_data), kernels::getTensorData<int32_t>(bias_data),
184 output_shape, kernels::getTensorData<uint8_t>(output_data), Tensor::num_dims(output),
185 Tensor::num_dims(weights));
187 else if (type == DataType::S16)
189 luci_interpreter_pal::FullyConnected(
190 op_params, input_shape, kernels::getTensorData<int16_t>(input_data), weights_shape,
191 kernels::getTensorData<int8_t>(weights_data), kernels::getTensorData<int64_t>(bias_data),
192 output_shape, kernels::getTensorData<int16_t>(output_data), Tensor::num_dims(output),
193 Tensor::num_dims(weights));
197 assert(
false &&
"Unsupported quantize type");
207 const auto input_index = cur_op->inputs()->operator[](0);
208 const auto weight_index = cur_op->inputs()->operator[](1);
209 const auto bias_index = cur_op->inputs()->operator[](2);
210 const auto output_index = cur_op->outputs()->operator[](0);
212 assert(input_index != -1);
213 assert(weight_index != -1);
214 assert(output_index != -1);
221 assert(input !=
nullptr);
222 assert(weights !=
nullptr);
223 assert(output !=
nullptr);
226 if (Tensor::element_type(weights) == DataType::S8 and
227 Tensor::element_type(input) == DataType::FLOAT32)
233 else if (Tensor::element_type(weights) == DataType::FLOAT32)
241 else if (Tensor::element_type(weights) == DataType::U8)
247 else if (Tensor::element_type(weights) == DataType::S8)
250 Tensor::element_type(input) == DataType::FLOAT32);
252 Tensor::element_type(output) == DataType::FLOAT32);
254 Tensor::element_type(bias) == DataType::S64 ||
255 Tensor::element_type(bias) == DataType::FLOAT32)
256 if (Tensor::element_type(input) == DataType::FLOAT32)
266 assert(
false &&
"Unsupported type.");
273 int32_t input_num_elements = Tensor::num_elements(input);
285 const auto input_index = cur_op->inputs()->operator[](0);
286 const auto weight_index = cur_op->inputs()->operator[](1);
287 const auto bias_index = cur_op->inputs()->operator[](2);
288 const auto output_index = cur_op->outputs()->operator[](0);
290 assert(input_index != -1);
291 assert(weight_index != -1);
292 assert(output_index != -1);
299 assert(input !=
nullptr);
300 assert(weights !=
nullptr);
301 assert(output !=
nullptr);
303 const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
304 const auto input_type = Tensor::element_type(input);
311 evalQuantized(input, weights, bias, output, options, runtime_graph, input_type);
315 case DataType::FLOAT32:
316 evalFloat(input, weights, bias, output, options, runtime_graph);
320 assert(
false &&
"Unsupported type.");
const circle::Tensor * getCircleTensorByIndex(int32_t index)
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
DataType
"scalar" value type
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
void FullyConnected< int8_t >(const tflite::FullyConnectedParams ¶ms, const tflite::RuntimeShape &input_shape, const int8_t *input_data, const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, const tflite::RuntimeShape &output_shape, int8_t *output_data)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleFullyConnected(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
void configure_kernel_CircleFullyConnected(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
const loco::Dimension & dim(uint32_t axis) const
float float_activation_min