25void DepthwiseConvolutionLayer::prepareF32()
32 const int64_t k_packet_size = nnfw::cker::eigen_support::kPacketSize<float>();
36 const int batch = out_shape.Dims(0);
37 const int out_depth = out_shape.Dims(3);
38 const int filter_rows = filter_shape.Dims(1);
39 const int filter_cols = filter_shape.Dims(2);
41 const int filter_spatial_size = filter_rows * filter_cols;
42 const int padded_filter_inner_dim_size =
43 ((out_depth + k_packet_size - 1) / k_packet_size) * k_packet_size;
49 padded_filter_info.shape({batch, filter_spatial_size, padded_filter_inner_dim_size});
50 _padded_filter = std::make_unique<Tensor>(padded_filter_info,
nullptr);
57 filter_buffers_info.shape({thread_count, filter_spatial_size, padded_filter_inner_dim_size});
58 _filter_buffers = std::make_unique<Tensor>(filter_buffers_info,
nullptr);
64 float output_activation_min = 0, output_activation_max = 0;
90 nnfw::cker::DepthwiseConv<float, float>(
93 getBuffer<float>(
_output), _external_context->ruy_context());
99 int32_t output_activation_min = 0;
100 int32_t output_activation_max = 0;
102 &output_activation_max);
104 double real_multiplier = 0.0;
105 int32_t output_multiplier = 0;
106 int32_t output_shift = 0;
126 nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
129 getBuffer<uint8_t>(
_output), _external_context->ruy_context());
144 int32_t output_activation_min = 0;
145 int32_t output_activation_max = 0;
147 &output_activation_max);
154 op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
168 int32_t output_activation_min = 0;
169 int32_t output_activation_max = 0;
171 &output_activation_max);
189 op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
192 _external_context->ruy_context());
199 prepareQ8iHybridPerChannel();
203 float output_activation_min = 0, output_activation_max = 0;
207 const int batch_size = input_shape.Dims(0);
208 const int input_size = input_shape.FlatSize() / batch_size;
210 auto scaling_factors_ptr = _input_scaling_factors.data();
211 auto input_offsets_ptr = _input_offsets.data();
213 for (
int b = 0; b < batch_size; ++b)
215 const int offset = b * input_size;
217 _input_quantized.data() +
offset,
218 &scaling_factors_ptr[b], &input_offsets_ptr[b]);
233 op_params, _input_scaling_factors.data(),
getShape(
_input), _input_quantized.data(),
236 _input_offsets.data());
239void DepthwiseConvolutionLayer::prepareQ8i()
244 _per_channel_output_shift);
247void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
252 _per_channel_output_shift);
255void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()
261 const int batch_size = input_shape.Dims(0);
262 const int input_size = input_shape.FlatSize() / batch_size;
263 _input_quantized.resize(input_size);
265 _input_scaling_factors.resize(batch_size);
266 _input_offsets.resize(batch_size);
269void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()
276 if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)
277 throw std::runtime_error{
"DConv2D hybrid supports only per-channel quantized weight."};
282 const uint32_t paddingLeft,
const uint32_t paddingRight,
const uint32_t paddingTop,
283 const uint32_t paddingBottom,
const uint32_t strideWidth,
const uint32_t strideHeight,
284 const uint32_t multiplier,
const uint32_t dilationWidth,
const uint32_t dilationHeight,
286 const std::shared_ptr<ExternalContext> &external_context)
302 _external_context = external_context;
308 ensureQ8iHybridPerChannel();
309 prepareQ8iHybridPerChannel();
328 if (per_channel_quantized)
330 prepareQ8uPerChannel();
349 if (per_channel_quantized)
360 throw std::runtime_error{
"DepthwiseConv: unsupported data type"};
int32_t Dims(int i) const
A tensor class that is portable for other backends.
const std::vector< float > & data_scales() const override final
float data_scale() const override final
const ir::OperandInfo & get_info() const
int32_t data_zero_point() const override final
const std::vector< int32_t > & data_zero_points() const override
ir::DataType data_type() const override final
bool is_dynamic() const override final
Return true if the tensor needs dynamic allocation, meaning that during compile-time the outpus shape...
bool is_constant() const override final
Return true if the tensor is constant.
std::unique_ptr< Tensor > _padded_filter
const IPortableTensor * _input
void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, const ir::Activation activation, IPortableTensor *output, const std::shared_ptr< ExternalContext > &external_context)
const IPortableTensor * _bias
ir::Activation _activation
std::unique_ptr< Tensor > _filter_buffers
const IPortableTensor * _kernel
void convQ8iHybridPerChannel()
IPortableTensor * _output
__global uchar * offset(const Image *img, int x, int y)
void DepthwiseConvPerChannel(const DepthwiseConvParams ¶ms, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, int8_t *output_data, ruy::Context *ruy_context)
void DepthwiseConvPerChannel(const DepthwiseConvParams ¶ms, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const int32_t *filter_zeropoint, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
void DepthwiseConvHybridPerChannel(const DepthwiseConvParams ¶ms, float *scaling_factors_ptr, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const float *per_channel_scale, int32_t *input_offset)
void DepthwiseConvOp(const DepthwiseConvParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, float *padded_filter_data, bool pad_filter, float *filter_buffers_data, const Shape &output_shape, float *output_data)
void PortableAsymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, float *scaling_factor, int32_t *offset)
nnfw::cker::Shape getShape(const IPortableTensor *tensor)
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
float float_activation_min
int16_t dilation_height_factor
int32_t output_multiplier
int16_t dilation_width_factor
int32_t quantized_activation_max
float float_activation_max
int32_t quantized_activation_min
PaddingValues padding_values