19#include "kernels/Utils.h"
21#include "PALDepthwiseConv2d.h"
53 if (
input()->element_type() == DataType::FLOAT32 &&
filter()->element_type() == DataType::FLOAT32)
57 else if (
input()->element_type() == DataType::U8 &&
filter()->element_type() == DataType::U8)
61 else if (
input()->element_type() == DataType::S8 &&
filter()->element_type() == DataType::S8)
72 else if (
input()->element_type() == DataType::S16 &&
filter()->element_type() == DataType::S16)
78 throw std::runtime_error(
"luci-intp DepthwiseConv2D(1) Unsupported type.");
86 const int32_t batches = input_shape.
dim(0);
87 const int32_t input_height = input_shape.
dim(1);
88 const int32_t input_width = input_shape.
dim(2);
91 const int32_t filter_height = filter_shape.
dim(1);
92 const int32_t filter_width = filter_shape.
dim(2);
93 const int32_t channels_out = filter_shape.
dim(3);
96 bias()->shape().dim(0) == channels_out));
98 const int32_t output_height =
101 const int32_t output_width =
106 input_height, filter_height, output_height);
108 filter_width, output_width);
110 output()->
resize({batches, output_height, output_width, channels_out});
112 tflite::DepthwiseParams
params{};
118 luci_interpreter_pal::SetupScratchpadTensor(scratchpad,
params,
input()->element_type(),
125 switch (
input()->element_type())
127 case DataType::FLOAT32:
128 if (
filter()->element_type() == DataType::FLOAT32)
133 throw std::runtime_error(
"luci-intp DepthwiseConv2D(2) Unsupported type.");
143 static_cast<size_t>(
filter()->shape().dim(3)));
144 evalQuantizedPerChannel();
148 evalQuantizedS8PerChannel();
154 throw std::runtime_error(
"luci-intp DepthwiseConv2D(3) Unsupported type.");
158void DepthwiseConv2D::evalFloat()
const
160 float activation_min{};
161 float activation_max{};
164 tflite::DepthwiseParams
params{};
165 params.padding_values.height = _padding_height;
166 params.padding_values.width = _padding_width;
172 params.float_activation_min = activation_min;
173 params.float_activation_max = activation_max;
175 tflite::reference_ops::DepthwiseConv(
181void DepthwiseConv2D::evalQuantizedPerChannel()
const
184 const auto *filter_data = getTensorData<uint8_t>(
filter());
185 const auto *bias_data = getTensorData<int32_t>(
bias());
192 const int32_t batches = input_shape.dim(0);
193 const int32_t input_height = input_shape.dim(1);
194 const int32_t input_width = input_shape.dim(2);
195 const int32_t input_depth = input_shape.dim(3);
196 const int32_t filter_height = filter_shape.dim(1);
197 const int32_t filter_width = filter_shape.dim(2);
207 int32_t activation_min{};
208 int32_t activation_max{};
211 const std::vector<double> effective_output_scales =
214 std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
216 BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
218 for (
int batch = 0; batch < batches; ++batch)
220 for (
int out_y = 0; out_y < output_height; ++out_y)
222 for (
int out_x = 0; out_x < output_width; ++out_x)
224 for (
int in_channel = 0; in_channel < input_depth; ++in_channel)
226 for (
int m = 0;
m < depth_multiplier; ++
m)
228 const int output_channel =
m + in_channel * depth_multiplier;
229 const int in_x_origin = (out_x * stride_width) - _padding_width;
230 const int in_y_origin = (out_y * stride_height) - _padding_height;
232 for (
int filter_y = 0; filter_y < filter_height; ++filter_y)
234 for (
int filter_x = 0; filter_x < filter_width; ++filter_x)
236 const int in_x = in_x_origin + dilation_width_factor * filter_x;
237 const int in_y = in_y_origin + dilation_height_factor * filter_y;
239 const bool is_point_inside_image =
240 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
241 if (is_point_inside_image)
246 filter_data[
calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
254 acc += bias_data[output_channel];
256 int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
257 int output_shift = quant_multipliers[output_channel].shift;
259 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
261 scaled_acc = std::max(scaled_acc, activation_min);
262 scaled_acc = std::min(scaled_acc, activation_max);
264 static_cast<uint8_t
>(scaled_acc);
272void DepthwiseConv2D::evalQuantized()
const
274 const auto input_scale =
static_cast<double>(
input()->
scale());
275 const auto filter_scale =
static_cast<double>(
filter()->
scale());
276 const auto output_scale =
static_cast<double>(
output()->
scale());
278 const double real_multiplier = input_scale * filter_scale / output_scale;
279 int32_t output_multiplier{};
283 int32_t activation_min{};
284 int32_t activation_max{};
287 tflite::DepthwiseParams
params{};
288 params.padding_values.height = _padding_height;
289 params.padding_values.width = _padding_width;
299 params.output_multiplier = output_multiplier;
300 params.output_shift = output_shift;
301 params.quantized_activation_min = activation_min;
302 params.quantized_activation_max = activation_max;
304 tflite::reference_ops::DepthwiseConv(
310void DepthwiseConv2D::evalQuantizedS8PerChannel()
const
312 int32_t activation_min{};
313 int32_t activation_max{};
316 tflite::DepthwiseParams
params{};
318 params.padding_type = tflite::PaddingType::kSame;
319 params.padding_values.height = _padding_height;
320 params.padding_values.width = _padding_width;
328 params.weights_offset = 0;
330 params.output_multiplier = 1;
332 params.quantized_activation_min = activation_min;
333 params.quantized_activation_max = activation_max;
335 const std::vector<double> effective_output_scales =
338 std::vector<ChannelQuantMultipliers> quant_multipliers =
341 std::vector<int32_t> shifts;
342 std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
343 [](ChannelQuantMultipliers cm) { return cm.shift; });
344 std::vector<int32_t> multipliers;
345 std::transform(quant_multipliers.begin(), quant_multipliers.end(),
346 std::back_inserter(multipliers),
347 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
350 int8_t *scratchpad_data =
nullptr;
351 if (scratchpad->is_allocatable())
352 scratchpad_data = scratchpad->data<int8_t>();
361void DepthwiseConv2D::evalQuantizedS16()
const
364 const auto *filter_data = getTensorData<int16_t>(
filter());
365 const auto *bias_data = getTensorData<int64_t>(
bias());
372 const int32_t batches = input_shape.dim(0);
373 const int32_t input_height = input_shape.dim(1);
374 const int32_t input_width = input_shape.dim(2);
375 const int32_t input_depth = input_shape.dim(3);
376 const int32_t filter_height = filter_shape.dim(1);
377 const int32_t filter_width = filter_shape.dim(2);
387 const std::vector<double> effective_output_scales =
390 std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
393 BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
395 int32_t activation_min{};
396 int32_t activation_max{};
399 for (int32_t batch = 0; batch < batches; ++batch)
401 for (int32_t out_y = 0; out_y < output_height; ++out_y)
403 for (int32_t out_x = 0; out_x < output_width; ++out_x)
405 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
407 for (int32_t
m = 0;
m < depth_multiplier; ++
m)
409 const int32_t out_c =
m + in_c * depth_multiplier;
410 const int32_t in_y_origin = out_y * stride_height - _padding_height;
411 const int32_t in_x_origin = out_x * stride_width - _padding_width;
413 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
415 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
417 const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
418 const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
419 if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
421 const int16_t input_val =
423 const int16_t filter_val =
424 filter_data[
calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
425 acc +=
static_cast<int64_t
>(input_val) *
static_cast<int64_t
>(filter_val);
429 if (bias_data !=
nullptr)
431 acc += bias_data[out_c];
434 int32_t output_multiplier = quant_multipliers[out_c].multiplier;
435 int output_shift = quant_multipliers[out_c].shift;
437 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
439 scaled_acc = std::max(scaled_acc, activation_min);
440 scaled_acc = std::min(scaled_acc, activation_max);
const std::vector< Tensor * > & getOutputTensors() const
const DepthwiseConv2DParams _params
const DepthwiseConv2DParams & params() const
void resize(const Shape &new_shape)
const Shape & shape() const
const std::vector< int32_t > & zero_points() const
int32_t zero_point() const
DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, Tensor *scratchpad, const DepthwiseConv2DParams ¶ms)
const Tensor * input() const
const Tensor * filter() const
const Tensor * bias() const
void configure() override
void execute() const override
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
void DepthwiseConvPerChannel< int8_t >(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, const tflite::RuntimeShape &output_shape, int8_t *output_data, const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
int32_t dilation_height_factor
int32_t dilation_width_factor