20#include "kernels/Utils.h"
53 if (
input()->element_type() == DataType::FLOAT32 &&
filter()->element_type() == DataType::FLOAT32)
57 else if (
input()->element_type() == DataType::U8 &&
filter()->element_type() == DataType::U8)
61 else if (
input()->element_type() == DataType::S8 &&
filter()->element_type() == DataType::S8)
66 static_cast<size_t>(
filter()->shape().dim(0)));
72 else if (
input()->element_type() == DataType::S16 &&
filter()->element_type() == DataType::S16)
78 throw std::runtime_error(
"luci-intp Conv2D(1) Unsupported type.");
86 const int32_t batches = input_shape.
dim(0);
87 const int32_t input_height = input_shape.
dim(1);
88 const int32_t input_width = input_shape.
dim(2);
89 const int32_t output_depth = filter_shape.
dim(0);
90 const int32_t filter_height = filter_shape.
dim(1);
91 const int32_t filter_width = filter_shape.
dim(2);
95 bias()->shape().dim(0) == output_depth));
97 const int32_t output_height =
100 const int32_t output_width =
105 input_height, filter_height, output_height);
107 filter_width, output_width);
109 output()->
resize({batches, output_height, output_width, output_depth});
112 tflite::ConvParams
params{};
113 params.padding_values.height = _padding_height;
114 params.padding_values.width = _padding_width;
120 luci_interpreter_pal::SetupScratchpadTensor(scratchpad,
input()->element_type(),
params,
126 case Activation::NONE:
127 case Activation::RELU:
128 case Activation::RELU6:
129 case Activation::RELU_N1_TO_1:
132 throw std::runtime_error(
"Unsupported fused activation");
138 switch (
input()->element_type())
140 case DataType::FLOAT32:
141 if (
filter()->element_type() == DataType::FLOAT32)
146 throw std::runtime_error(
"luci-intp Conv2D(2) Unsupported type.");
156 static_cast<size_t>(
filter()->shape().dim(0)));
157 evalQuantizedPerChannel();
161 evalQuantizedS8PerChannel();
167 throw std::runtime_error(
"luci-intp Conv2D(3) Unsupported type.");
171void Conv2D::evalFloat()
const
173 float activation_min{};
174 float activation_max{};
177 tflite::ConvParams
params{};
178 params.padding_values.height = _padding_height;
179 params.padding_values.width = _padding_width;
184 params.float_activation_min = activation_min;
185 params.float_activation_max = activation_max;
188 float *scratchpad_data =
nullptr;
189 if (scratchpad->is_allocatable())
190 scratchpad_data = scratchpad->data<
float>();
199void Conv2D::evalQuantized()
const
201 const auto input_scale =
static_cast<double>(
input()->
scale());
202 const auto filter_scale =
static_cast<double>(
filter()->
scale());
203 const auto output_scale =
static_cast<double>(
output()->
scale());
205 const double real_multiplier = input_scale * filter_scale / output_scale;
206 int32_t output_multiplier{};
210 int32_t activation_min{};
211 int32_t activation_max{};
214 tflite::ConvParams
params{};
215 params.padding_values.height = _padding_height;
216 params.padding_values.width = _padding_width;
225 params.output_multiplier = output_multiplier;
226 params.output_shift = output_shift;
227 params.quantized_activation_min = activation_min;
228 params.quantized_activation_max = activation_max;
238void Conv2D::evalQuantizedPerChannel()
const
241 const auto *filter_data = getTensorData<uint8_t>(
filter());
242 const auto *bias_data = getTensorData<int32_t>(
bias());
249 const int32_t batches = input_shape.dim(0);
250 const int32_t input_height = input_shape.dim(1);
251 const int32_t input_width = input_shape.dim(2);
252 const int32_t input_depth = input_shape.dim(3);
253 const int32_t output_depth = filter_shape.dim(0);
254 const int32_t filter_height = filter_shape.dim(1);
255 const int32_t filter_width = filter_shape.dim(2);
264 int32_t activation_min{};
265 int32_t activation_max{};
268 const std::vector<double> effective_output_scale =
271 const std::vector<ChannelQuantMultipliers> multipliers_raw =
273 BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
275 for (int32_t batch = 0; batch < batches; ++batch)
277 for (int32_t out_y = 0; out_y < output_height; ++out_y)
279 for (int32_t out_x = 0; out_x < output_width; ++out_x)
281 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
283 const int32_t in_y_origin = out_y * stride_height - _padding_height;
284 const int32_t in_x_origin = out_x * stride_width - _padding_width;
286 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
288 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
290 const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
291 const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
292 if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
294 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
296 const uint8_t input_val =
298 const uint8_t filter_val =
299 filter_data[
calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
308 acc += bias_data[out_c];
311 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
312 acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
315 scaled_acc = std::max(scaled_acc, activation_min);
316 scaled_acc = std::min(scaled_acc, activation_max);
324void Conv2D::evalQuantizedS8PerChannel()
const
326 int32_t activation_min{};
327 int32_t activation_max{};
330 tflite::ConvParams
params{};
331 params.padding_values.height = _padding_height;
332 params.padding_values.width = _padding_width;
339 params.weights_offset = 0;
341 params.quantized_activation_min = activation_min;
342 params.quantized_activation_max = activation_max;
344 const std::vector<double> effective_output_scales =
347 std::vector<ChannelQuantMultipliers> quant_multipliers =
350 std::vector<int32_t> shifts;
351 std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
352 [](ChannelQuantMultipliers cm) { return cm.shift; });
353 std::vector<int32_t> multipliers;
354 std::transform(quant_multipliers.begin(), quant_multipliers.end(),
355 std::back_inserter(multipliers),
356 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
359 int8_t *scratchpad_data =
nullptr;
360 if (scratchpad->is_allocatable())
361 scratchpad_data = scratchpad->data<int8_t>();
363 luci_interpreter_pal::ConvPerChannel(
370void Conv2D::evalQuantizedS16()
const
373 const auto *filter_data = getTensorData<int16_t>(
filter());
374 const auto *bias_data = getTensorData<int64_t>(
bias());
381 const int32_t batches = input_shape.dim(0);
382 const int32_t input_height = input_shape.dim(1);
383 const int32_t input_width = input_shape.dim(2);
384 const int32_t input_depth = input_shape.dim(3);
385 const int32_t output_depth = filter_shape.dim(0);
386 const int32_t filter_height = filter_shape.dim(1);
387 const int32_t filter_width = filter_shape.dim(2);
396 int32_t activation_min{};
397 int32_t activation_max{};
400 const std::vector<double> effective_output_scale =
403 const std::vector<ChannelQuantMultipliers> multipliers_raw =
405 BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
407 for (int32_t batch = 0; batch < batches; ++batch)
409 for (int32_t out_y = 0; out_y < output_height; ++out_y)
411 for (int32_t out_x = 0; out_x < output_width; ++out_x)
413 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
415 const int32_t in_y_origin = out_y * stride_height - _padding_height;
416 const int32_t in_x_origin = out_x * stride_width - _padding_width;
418 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
420 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
422 const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
423 const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
424 if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
426 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
428 const int16_t input_val =
430 const int16_t filter_val =
431 filter_data[
calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
432 acc +=
static_cast<int64_t
>(input_val) *
static_cast<int64_t
>(filter_val);
439 acc += bias_data[out_c];
442 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
443 acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
445 scaled_acc = std::max(scaled_acc, activation_min);
446 scaled_acc = std::min(scaled_acc, activation_max);
const std::vector< Tensor * > & getOutputTensors() const
const Conv2DParams _params
const Conv2DParams & params() const
void resize(const Shape &new_shape)
const Shape & shape() const
const std::vector< int32_t > & zero_points() const
int32_t zero_point() const
const Tensor * input() const
Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, Tensor *scratchpad, const Conv2DParams ¶ms)
void configure() override
const Tensor * bias() const
void execute() const override
const Tensor * filter() const
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
int32_t dilation_width_factor
int32_t dilation_height_factor