21#include "../KernelGenerator.h"
22#include "../Validator.h"
31void Validator::visit(
const ir::operation::Conv2D &) {
_supported =
true; }
33void KernelGenerator::visit(
const ir::operation::Conv2D &node)
35 using ir::operation::Conv2D;
37 const auto ofm_index{node.getOutputs().at(0)};
38 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
39 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
40 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
42 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
43 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
44 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
45 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
47 const auto stride = node.param().stride;
48 const auto activation = node.param().activation;
49 const auto ¶m_padding = node.param().padding;
50 const auto dilation = node.param().dilation;
52 const bool is_cacheable_weights =
ker_tensor->is_constant();
54 auto fn = std::make_unique<ops::ConvolutionLayer>();
56 if (_ctx.
at(ifm_index).info().isDynamic() || _ctx.
at(ker_index).info().isDynamic())
59 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
60 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
61 activation, ofm_tensor, is_cacheable_weights);
66 const auto ifm_shape = _ctx.
at(ifm_index).shape().asFeature();
67 const auto ofm_shape = _ctx.
at(ofm_index).shape().asFeature();
69 const auto &ker_shape = _ctx.
at(ker_index).shape();
70 const auto ker_height = ker_shape.dim(1);
71 const auto ker_width = ker_shape.dim(2);
75 dilation.width_factor, dilation.height_factor);
78 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
79 dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
80 is_cacheable_weights);
90 : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
91 _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
92 _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
93 _dilationHeightFactor(1), _activation(ir::Activation::
NONE),
94 _conv_kernel(new
nnfw::cker::
Conv()), _prepare(false), _is_cachable_weights(false),
102void ConvolutionLayer::convFloat32()
104 float output_activation_min = 0, output_activation_max = 0;
124void ConvolutionLayer::convQ8uPerTensor()
126 int32_t output_activation_min = 0;
127 int32_t output_activation_max = 0;
129 &output_activation_max);
131 double real_multiplier = 0.0;
132 int32_t output_multiplier = 0;
133 int32_t output_shift = 0;
160void ConvolutionLayer::convQ8uPerChannel()
171 int32_t output_activation_min = 0;
172 int32_t output_activation_max = 0;
174 &output_activation_max);
186void ConvolutionLayer::convQ8i()
188 int32_t output_activation_min = 0;
189 int32_t output_activation_max = 0;
191 &output_activation_max);
212void ConvolutionLayer::convQ8iHybridPerChannel()
214 float output_activation_min = 0;
215 float output_activation_max = 0;
220 throw std::runtime_error{
"Convolution input batch_size = 0"};
222 const int input_size = input_shape.FlatSize() / batch_size;
224 auto input_quantized_ptr =
_hybrid_arena->input_quantized.data();
225 auto input_scaling_factors_ptr =
_hybrid_arena->input_scaling_factors.data();
226 auto input_offsets_ptr =
_hybrid_arena->input_offsets.data();
227 for (
int b = 0;
b < batch_size; ++
b)
229 const int offset =
b * input_size;
232 input_quantized_ptr +
offset, &input_scaling_factors_ptr[b], &input_offsets_ptr[b]);
250 reinterpret_cast<float *
>(
_output->
buffer()), filter_per_channel_scales, input_offsets_ptr);
255 const uint32_t paddingLeft,
const uint32_t paddingRight,
256 const uint32_t paddingTop,
const uint32_t paddingBottom,
257 const uint32_t strideWidth,
const uint32_t strideHeight,
258 const uint32_t dilationWidthFactor,
259 const uint32_t dilationHeightFactor,
261 bool is_cachable_weights)
291 const auto ker_height = ker_shape.dim(1);
292 const auto ker_width = ker_shape.dim(2);
316 convQ8iHybridPerChannel();
325 if (per_channel_quantized)
336 throw std::runtime_error{
"Conv: unsupported data type"};
352 if ((int64_t)kernel_output_channel != (int64_t)kernel_zerop_cnt)
353 throw std::runtime_error{
"Conv2D hybrid supports only per-channel quantized weight."};
359 const int batch_size = input_shape.Dims(0);
360 const int input_size = input_shape.FlatSize() / batch_size;
361 _hybrid_arena = std::make_unique<nnfw::cker::ConvHybridTempArena>(batch_size, input_size);
369 bool is_transposed =
false;
379 const_cast<Tensor *
>(kernel_tensor)->decrease_ref();
386 if (per_channel_quantized)
411 throw std::runtime_error{
"Conv2D: Int8 dynamic weight is not supported"};
void Conv(const float *input_data, const Dims< 4 > &input_dims, const float *filter_data, const Dims< 4 > &filter_dims, const float *bias_data, const Dims< 4 > &bias_dims, int stride_width, int stride_height, int pad_width, int pad_height, float *output_data, const Dims< 4 > &output_dims, float *im2col_data, const Dims< 4 > &im2col_dims)
std::vector< int > & per_channel_output_shift()
void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape, uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor, uint32_t dilation_height_factor)
void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type, bool &is_replaced_weights, uint32_t dilationWidthFactor, uint32_t dilationHeightFactor)
std::vector< int32_t > & per_channel_output_multiplier()
int32_t Dims(int i) const
A tensor class that is portable for other backends.
const std::vector< float > & data_scales() const override final
float data_scale() const override final
int32_t data_zero_point() const override final
const std::vector< int32_t > & data_zero_points() const override
ir::DataType data_type() const override final
ir::Shape getShape() const override final
Get ir::Shape of tensor.
bool is_dynamic() const override final
Return true if the tensor needs dynamic allocation, meaning that during compile-time the outpus shape...
virtual uint8_t * buffer() const =0
std::unique_ptr< exec::IFunction > _return_fn
std::unique_ptr< nnfw::cker::Conv > _conv_kernel
void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, ir::PaddingType _paddingType, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t dilationWidthFactor, const uint32_t dilationHeightFactor, const ir::Activation activation, IPortableTensor *output, bool is_cachable_weights)
bool _is_cachable_weights
ir::Activation _activation
const IPortableTensor * _bias
const IPortableTensor * _kernel
const IPortableTensor * _input
uint32_t _dilationWidthFactor
IPortableTensor * _output
ir::PaddingType _paddingType
std::unique_ptr< nnfw::cker::ConvHybridTempArena > _hybrid_arena
uint32_t _dilationHeightFactor
const Object & at(const Index &index) const
Get the object that is associated with the given index.
__global uchar * offset(const Image *img, int x, int y)
void HybridConvPerChannel(const ConvParams ¶ms, float *scaling_factors_ptr, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const float *per_channel_scale, const int32_t *input_offset)
void PortableAsymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, float *scaling_factor, int32_t *offset)
nnfw::cker::Shape getShape(const IPortableTensor *tensor)
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf=1, uint32_t dhf=1)
void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
PaddingValues padding_values
float float_activation_max
int32_t output_multiplier
int16_t dilation_width_factor
float float_activation_min
int32_t quantized_activation_max
int16_t dilation_height_factor
int32_t quantized_activation_min