104{
105 if (!biasv)
106 {
107 throw std::runtime_error{"Unsupported quantized DepthwiseConv2D without fused bias"};
108 }
109
110 const auto &input_type = inputv.
getType();
111 const auto &kernel_type = kernelv.
getType();
112 const auto &bias_type = biasv->
getType();
114 (void)bias_type;
115
116 assert(input_type.isQuantized());
117 assert(kernel_type.isQuantized());
118 assert(bias_type.isQuantized());
120 assert(input_type.getElementType() == DataType::UINT8);
121 assert(kernel_type.getElementType() == DataType::UINT8);
122 assert(bias_type.getElementType() == DataType::INT32);
123
124 int32_t input_offset = -input_type.getQuantization().getZeroPoint();
125 int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
126 int32_t output_offset =
output_type.getQuantization().getZeroPoint();
127
128 double input_scale = input_type.getQuantization().getScale();
129 double kernel_scale = kernel_type.getQuantization().getScale();
130 double output_scale =
output_type.getQuantization().getScale();
131
132 double real_multiplier = input_scale * kernel_scale / output_scale;
133 int32_t output_multiplier = 0;
134 int output_shift = 0;
136
142
143 assert(in_shape.rank() == 4);
144 assert(kernel_shape.rank() == 4);
145 assert(kernel_shape.dim(2) == in_shape.dim(3));
146 assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
147 assert(strides.size() == 2);
148 assert(pads.size() == 2);
149
150 int32_t stride_height = strides[0];
151 int32_t stride_width = strides[1];
152
153 int32_t pad_height = pads[0];
154 int32_t pad_width = pads[1];
155
156 int32_t input_height = in_shape.dim(1);
157 int32_t input_width = in_shape.dim(2);
158
163
164 int32_t output_min = std::numeric_limits<uint8_t>::min();
165 int32_t output_max = std::numeric_limits<uint8_t>::max();
166
167 int batches = out_shape.dim(0);
168 int output_height = out_shape.dim(1);
169 int output_width = out_shape.dim(2);
170 int input_depth = in_shape.dim(3);
171
172 int filter_height = kernel_shape.dim(0);
173 int filter_width = kernel_shape.dim(1);
174
175 for (
int b = 0;
b < batches; ++
b)
176 {
177 for (int out_y = 0; out_y < output_height; ++out_y)
178 {
179 for (int out_x = 0; out_x < output_width; ++out_x)
180 {
181 for (int ic = 0; ic < input_depth; ++ic)
182 {
183 const int oc = ic;
184 const int in_x_origin = (out_x * stride_width) - pad_width;
185 const int in_y_origin = (out_y * stride_height) - pad_height;
186 int32_t acc = 0;
187 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
188 {
189 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
190 {
191 const int in_x = in_x_origin + filter_x;
192 const int in_y = in_y_origin + filter_y;
193
194
195 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
196 {
197 Index in_index{
b, in_y, in_x, ic};
198 Index ker_index{filter_y, filter_x, oc, 0};
199 int32_t input_val = input_accessor.at(in_index);
200 int32_t kernel_val = kernel_accessor.at(ker_index);
201 acc += (kernel_val + kernel_offset) * (input_val + input_offset);
202 }
203 }
204 }
205 acc += bias_accessor.at(Index{oc});
207 acc += output_offset;
208 acc = std::max(acc, output_min);
209 acc = std::min(acc, output_max);
210 Index out_index{
b, out_y, out_x, oc};
211 res_accessor.at(out_index) = static_cast<uint8_t>(acc);
212 }
213 }
214 }
215 }
216}
const TensorType & getType() const
Gets the type of this output.
Output * getOutput(std::size_t index)
const Shape & getOutputShape(std::size_t index) const
const TensorType & getType() const
const Shape & getShape() const
const std::vector< std::int32_t > & getStrides() const
const std::vector< std::int32_t > & getPaddingBefore() const
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)