19#include "kernels/Utils.h"
21#include "PALFullyConnected.h"
39 if (
weights()->element_type() == DataType::U8)
45 else if (
weights()->element_type() == DataType::FLOAT32)
51 else if (
weights()->element_type() == DataType::S8)
57 else if (
weights()->element_type() == DataType::S4)
64 else if (
weights()->element_type() == DataType::U4)
73 throw std::runtime_error(
"luci-intp FullyConnected(1) Unsupported type.");
81 bias()->shape().num_elements() == weights_shape.
dim(0));
84 const int32_t batch_size = input_shape.
num_elements() / weights_shape.
dim(1);
85 const int32_t num_units = weights_shape.
dim(0);
87 if (
params().keep_num_dims ==
false)
94 for (
int i = 0; i < input_shape.
num_dims(); ++i)
103 const bool is_hybrid =
119 throw std::runtime_error(
"luci-intp FullyConnected(3) Unsupported type.");
124 switch (
input()->element_type())
132 case DataType::FLOAT32:
136 throw std::runtime_error(
"luci-intp FullyConnected(2) Unsupported type.");
141void FullyConnected::evalFloat()
const
143 float activation_min{};
144 float activation_max{};
147 tflite::FullyConnectedParams
params{};
148 params.float_activation_min = activation_min;
149 params.float_activation_max = activation_max;
150 params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
152 tflite::reference_ops::FullyConnected(
158void FullyConnected::evalQuantized()
const
160 double real_multiplier = 0.0;
162 int32_t output_activation_min;
163 int32_t output_activation_max;
164 int32_t output_multiplier;
169 &output_activation_max);
175 tflite::FullyConnectedParams op_params{};
176 op_params.input_offset = input_offset;
177 op_params.weights_offset = filter_offset;
178 op_params.output_offset = output_offset;
179 op_params.output_multiplier = output_multiplier;
180 op_params.output_shift = output_shift;
181 op_params.quantized_activation_min = output_activation_min;
182 op_params.quantized_activation_max = output_activation_max;
183 op_params.lhs_cacheable =
false;
184 op_params.rhs_cacheable =
false;
185 tflite::reference_ops::FullyConnected(
191void FullyConnected::evalQuantizedS8()
const
193 double real_multiplier = 0.0;
195 int32_t output_activation_min;
196 int32_t output_activation_max;
197 int32_t output_multiplier;
202 &output_activation_max);
208 tflite::FullyConnectedParams op_params{};
209 op_params.input_offset = input_offset;
210 op_params.weights_offset = filter_offset;
211 op_params.output_offset = output_offset;
212 op_params.output_multiplier = output_multiplier;
213 op_params.output_shift = output_shift;
214 op_params.quantized_activation_min = output_activation_min;
215 op_params.quantized_activation_max = output_activation_max;
216 op_params.lhs_cacheable =
false;
217 op_params.rhs_cacheable =
false;
224void FullyConnected::evalHybridWI4AF32()
const
226 float activation_min{};
227 float activation_max{};
230 tflite::FullyConnectedParams
params{};
231 params.float_activation_min = activation_min;
232 params.float_activation_max = activation_max;
233 params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
235 const int8_t *weights_int4 = getTensorData<int8_t>(
weights());
236 float *weights_float = getTensorData<float>(
scratch());
243 if (weights_scales.size() == 1)
247 for (int32_t i = 0; i < weights_shape.num_elements(); ++i)
249 weights_float[i] =
scale *
static_cast<float>(weights_int4[i]);
255 const int32_t quant_dim_size = weights_shape.dim(weights_quantized_dimension);
257 size_t outer_dims_size = 1;
258 size_t inner_dims_size = 1;
259 for (
int i = 0; i < weights_quantized_dimension; ++i)
260 outer_dims_size *= weights_shape.dim(i);
261 for (
int i = weights_quantized_dimension + 1; i < weights_shape.num_dims(); ++i)
262 inner_dims_size *= weights_shape.dim(i);
264 for (
size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
265 for (int32_t channel = 0; channel < quant_dim_size; ++channel)
267 float scale = weights_scales[channel];
268 size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
269 for (
size_t inner_it = 0; inner_it < inner_dims_size; ++inner_it)
272 static_cast<size_t>(weights_shape.num_elements()));
273 weights_float[
offset + inner_it] =
274 scale *
static_cast<float>(weights_int4[
offset + inner_it]);
279 tflite::reference_ops::FullyConnected(
285void FullyConnected::evalHybridWU4AF32()
const
287 float activation_min{};
288 float activation_max{};
291 tflite::FullyConnectedParams
params{};
292 params.float_activation_min = activation_min;
293 params.float_activation_max = activation_max;
294 params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
296 const auto *weights_uint4 = getTensorData<uint8_t>(
weights());
297 auto *weights_float = getTensorData<float>(
scratch());
303 if (weights_scales.size() == 1)
309 for (int32_t i = 0; i < weights_shape.num_elements(); ++i)
312 scale *
static_cast<float>(
static_cast<int32_t
>(weights_uint4[i]) - zero_point);
318 const int32_t quant_dim_size = weights_shape.dim(weights_quantized_dimension);
320 size_t outer_dims_size = 1;
321 size_t inner_dims_size = 1;
322 for (
int i = 0; i < weights_quantized_dimension; ++i)
323 outer_dims_size *= weights_shape.dim(i);
324 for (
int i = weights_quantized_dimension + 1; i < weights_shape.num_dims(); ++i)
325 inner_dims_size *= weights_shape.dim(i);
327 for (
size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
328 for (int32_t channel = 0; channel < quant_dim_size; ++channel)
330 int32_t zero_point = weights_zero_points[channel];
332 float scale = weights_scales[channel];
333 size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
334 for (
size_t inner_it = 0; inner_it < inner_dims_size; ++inner_it)
336 weights_float[
offset + inner_it] =
338 static_cast<float>(
static_cast<int32_t
>(weights_uint4[
offset + inner_it]) - zero_point);
343 tflite::reference_ops::FullyConnected(
const FullyConnectedParams _params
const FullyConnectedParams & params() const
int32_t num_elements() const
void resize(const Shape &new_shape)
const Shape & shape() const
const std::vector< float > & scales() const
const std::vector< int32_t > & zero_points() const
DataType element_type() const
int32_t quantized_dimension() const
int32_t zero_point() const
void configure() override
const Tensor * weights() const
FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output, const FullyConnectedParams ¶ms)
void execute() const override
const Tensor * input() const
const Tensor * bias() const
#define LUCI_INTERPRETER_CHECK(cond)
__global uchar * offset(const Image *img, int x, int y)
const luci_interpreter::RuntimeShape output_shape
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
void FullyConnected< int8_t >(const tflite::FullyConnectedParams ¶ms, const tflite::RuntimeShape &input_shape, const int8_t *input_data, const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, const tflite::RuntimeShape &output_shape, int8_t *output_data)