19#include "kernels/Utils.h"
21#include "PALDepthwiseConv2d.h"
53 if (
input()->element_type() == DataType::FLOAT32 &&
filter()->element_type() == DataType::FLOAT32)
57 else if (
input()->element_type() == DataType::U8 &&
filter()->element_type() == DataType::U8)
61 else if (
input()->element_type() == DataType::S8 &&
filter()->element_type() == DataType::S8)
72 else if (
input()->element_type() == DataType::S16 &&
filter()->element_type() == DataType::S16)
78 throw std::runtime_error(
"luci-intp DepthwiseConv2D(1) Unsupported type.");
98 const int32_t output_height =
101 const int32_t output_width =
108 filter_width, output_width);
112 tflite::DepthwiseParams
params{};
125 switch (
input()->element_type())
127 case DataType::FLOAT32:
128 if (
filter()->element_type() == DataType::FLOAT32)
133 throw std::runtime_error(
"luci-intp DepthwiseConv2D(2) Unsupported type.");
143 static_cast<size_t>(
filter()->shape().dim(3)));
144 evalQuantizedPerChannel();
148 evalQuantizedS8PerChannel();
154 throw std::runtime_error(
"luci-intp DepthwiseConv2D(3) Unsupported type.");
158void DepthwiseConv2D::evalFloat()
const
160 float activation_min{};
161 float activation_max{};
164 tflite::DepthwiseParams
params{};
165 params.padding_values.height = _padding_height;
166 params.padding_values.width = _padding_width;
172 params.float_activation_min = activation_min;
173 params.float_activation_max = activation_max;
175 tflite::reference_ops::DepthwiseConv(
181void DepthwiseConv2D::evalQuantizedPerChannel()
const
207 int32_t activation_min{};
208 int32_t activation_max{};
218 for (
int batch = 0; batch <
batches; ++batch)
226 for (
int m = 0;
m < depth_multiplier; ++
m)
259 tflite::MultiplyByQuantizedMultiplier(
acc, output_multiplier, output_shift);
272void DepthwiseConv2D::evalQuantized()
const
274 const auto input_scale =
static_cast<double>(
input()->
scale());
276 const auto output_scale =
static_cast<double>(
output()->
scale());
279 int32_t output_multiplier{};
283 int32_t activation_min{};
284 int32_t activation_max{};
287 tflite::DepthwiseParams
params{};
288 params.padding_values.height = _padding_height;
289 params.padding_values.width = _padding_width;
299 params.output_multiplier = output_multiplier;
300 params.output_shift = output_shift;
301 params.quantized_activation_min = activation_min;
302 params.quantized_activation_max = activation_max;
304 tflite::reference_ops::DepthwiseConv(
310void DepthwiseConv2D::evalQuantizedS8PerChannel()
const
312 int32_t activation_min{};
313 int32_t activation_max{};
316 tflite::DepthwiseParams
params{};
318 params.padding_type = tflite::PaddingType::kSame;
319 params.padding_values.height = _padding_height;
320 params.padding_values.width = _padding_width;
328 params.weights_offset = 0;
330 params.output_multiplier = 1;
332 params.quantized_activation_min = activation_min;
333 params.quantized_activation_max = activation_max;
341 std::vector<int32_t>
shifts;
343 [](ChannelQuantMultipliers
cm) { return cm.shift; });
347 [](ChannelQuantMultipliers
cm) { return cm.multiplier; });
361void DepthwiseConv2D::evalQuantizedS16()
const
395 int32_t activation_min{};
396 int32_t activation_max{};
399 for (int32_t batch = 0; batch <
batches; ++batch)
407 for (int32_t
m = 0;
m < depth_multiplier; ++
m)
409 const int32_t
out_c =
m +
in_c * depth_multiplier;
437 tflite::MultiplyByQuantizedMultiplier(
acc, output_multiplier, output_shift);
const std::vector< Tensor * > & getOutputTensors() const
const DepthwiseConv2DParams _params
const DepthwiseConv2DParams & params() const
void resize(const Shape &new_shape)
const Shape & shape() const
const std::vector< int32_t > & zero_points() const
int32_t zero_point() const
DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, Tensor *scratchpad, const DepthwiseConv2DParams ¶ms)
const Tensor * input() const
const Tensor * filter() const
const Tensor * bias() const
void configure() override
void execute() const override
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
void DepthwiseConvPerChannel< int8_t >(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, const tflite::RuntimeShape &output_shape, int8_t *output_data, const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
T must_cast(loco::Node *node)
int32_t dilation_height_factor
int32_t dilation_width_factor