18#ifndef __NNFW_CKER_CONV_H__
19#define __NNFW_CKER_CONV_H__
44 for (
int i = 0; i < rows; ++i)
46 for (
int j = 0; j < cols; ++j)
48 const float in_value =
input_data[i * cols + j];
58 Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
61 bool &is_replaced_weights, uint32_t dilationWidthFactor,
62 uint32_t dilationHeightFactor)
66 if (usableMultiThreaded(padding_type, dilationWidthFactor, dilationHeightFactor))
68 transposeFilter(filter_shape, filter_data, is_replaced_weights);
76 uint32_t dilation_width_factor, uint32_t dilation_height_factor)
80 IsRequiredIm2col(input_shape, kernel_shape,
output_shape, stride_width, stride_height,
81 dilation_width_factor, dilation_height_factor);
87 const Shape &filter_shape,
const float *filter_data,
const Shape &bias_shape,
93 bool transposed_in_execution =
false;
99 transposeFilter(filter_shape, filter_data, transposed_in_execution);
101 multithreaded::Conv(params, input_shape, input_data, filter_shape, &_modified_filter_data[0],
107 reference::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
113 const Shape &filter_shape,
const uint8_t *filter_data,
const Shape &bias_shape,
124 int im2col_size = _need_im2col ? _im2col_shape.
FlatSize() : 1;
125 std::vector<uint8_t> im2col_data(im2col_size);
126 optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
127 bias_data,
output_shape, output_data, _im2col_shape, im2col_data.data());
131 const Shape &filter_shape,
const uint8_t *filter_data,
132 const int32_t *filter_zero_point,
const Shape &bias_shape,
135 reference::Conv<uint8_t, true>(params, _per_channel_output_multiplier.data(),
136 _per_channel_output_shift.data(), input_shape, input_data,
137 filter_shape, filter_data, filter_zero_point, bias_shape,
142 const Shape &filter_shape,
const int8_t *filter_data,
const Shape &bias_shape,
145 reference::Conv<int8_t, false>(params, _per_channel_output_multiplier.data(),
146 _per_channel_output_shift.data(), input_shape, input_data,
147 filter_shape, filter_data,
nullptr ,
154 bool usableMultiThreaded(
PaddingType padding_type, uint32_t dilation_width_factor,
155 int32_t dilation_height_factor)
157 return padding_type !=
PaddingType::kNone && std::thread::hardware_concurrency() > 1 &&
158 dilation_width_factor == 1 && dilation_height_factor == 1;
161 void transposeFilter(
const Shape &filter_shape,
const float *filter_data,
162 bool &is_replaced_weights)
164 const auto output_depth = filter_shape.Dims(0);
165 const Shape hwcn_filter_shape{filter_shape.FlatSize() / output_depth, output_depth};
166 _modified_filter_data.resize(hwcn_filter_shape.FlatSize());
167 TransposeFloatTensor(filter_data, hwcn_filter_shape, &_modified_filter_data[0]);
168 is_replaced_weights =
true;
171 void IsRequiredIm2col(
const Shape &input_shape,
const Shape &kernel_shape,
173 uint32_t dilation_width_factor, uint32_t dilation_height_factor)
175 const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
176 const bool need_non_dilated_im2col = stride_width != 1 || stride_height != 1 ||
177 kernel_shape.Dims(1) != 1 || kernel_shape.Dims(2) != 1;
179 _need_im2col = need_dilated_im2col || need_non_dilated_im2col;
186 _im2col_shape.
SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
191 std::vector<float> _modified_filter_data;
196 std::vector<int32_t> _per_channel_output_multiplier;
197 std::vector<int> _per_channel_output_shift;
std::vector< int > & per_channel_output_shift()
void operator()(const ConvParams ¶ms, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const int32_t *filter_zero_point, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
void operator()(const ConvParams ¶ms, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape, uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor, uint32_t dilation_height_factor)
void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type, bool &is_replaced_weights, uint32_t dilationWidthFactor, uint32_t dilationHeightFactor)
std::vector< int32_t > & per_channel_output_multiplier()
void operator()(const ConvParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data)
void operator()(const ConvParams ¶ms, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
void SetDim(int i, int32_t val)
const luci_interpreter::RuntimeShape output_shape
void Conv(const ConvParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data)
void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data, const Shape &im2col_shape, uint8_t *im2col_data)
void Conv(const ConvParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data)
std::vector< int8_t > input_quantized
std::vector< int32_t > input_offsets
std::vector< float > input_scaling_factors
ConvHybridTempArena(int batch_size, int input_size)
int16_t dilation_width_factor
int16_t dilation_height_factor