51 assert(in_shape.
rank() == 4);
52 assert(kernel_shape.
rank() == 4);
53 assert(kernel_shape.
dim(2) == in_shape.
dim(3));
54 assert(in_shape.
dim(3) * kernel_shape.
dim(3) == out_shape.
dim(3));
55 assert(strides.size() == 2);
56 assert(pads.size() == 2);
58 int32_t channel_multiplier = kernel_shape.
dim(3);
74 for (
const auto &out_index : out_range)
76 Index out_index_k = out_index;
77 for (
const auto &kernel_index : kernel_range)
79 in_index.
at(0) = out_index.at(0);
80 for (
int i = 0; i < 2; ++i)
81 in_index.at(1 + i) = out_index.at(1 + i) * strides[i] + kernel_index.at(i) - pads[i];
82 in_index.at(3) = kernel_index.at(2);
86 out_index_k.
at(3) = kernel_index.at(2) * channel_multiplier + kernel_index.at(3);
87 res_accessor.
at(out_index_k) += input.at(in_index) * kernel.
at(kernel_index);
107 throw std::runtime_error{
"Unsupported quantized DepthwiseConv2D without fused bias"};
110 const auto &input_type = inputv.
getType();
111 const auto &kernel_type = kernelv.
getType();
112 const auto &bias_type = biasv->
getType();
116 assert(input_type.isQuantized());
117 assert(kernel_type.isQuantized());
118 assert(bias_type.isQuantized());
119 assert(output_type.isQuantized());
120 assert(input_type.getElementType() == DataType::UINT8);
121 assert(kernel_type.getElementType() == DataType::UINT8);
122 assert(bias_type.getElementType() == DataType::INT32);
124 int32_t input_offset = -input_type.getQuantization().getZeroPoint();
125 int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
126 int32_t output_offset = output_type.getQuantization().getZeroPoint();
128 double input_scale = input_type.getQuantization().getScale();
129 double kernel_scale = kernel_type.getQuantization().getScale();
130 double output_scale = output_type.getQuantization().getScale();
132 double real_multiplier = input_scale * kernel_scale / output_scale;
133 int32_t output_multiplier = 0;
134 int output_shift = 0;
143 assert(in_shape.
rank() == 4);
144 assert(kernel_shape.
rank() == 4);
145 assert(kernel_shape.
dim(2) == in_shape.
dim(3));
146 assert(in_shape.
dim(3) * kernel_shape.
dim(3) == out_shape.
dim(3));
147 assert(strides.size() == 2);
148 assert(pads.size() == 2);
150 int32_t stride_height = strides[0];
151 int32_t stride_width = strides[1];
153 int32_t pad_height = pads[0];
154 int32_t pad_width = pads[1];
156 int32_t input_height = in_shape.
dim(1);
157 int32_t input_width = in_shape.
dim(2);
164 int32_t output_min = std::numeric_limits<uint8_t>::min();
165 int32_t output_max = std::numeric_limits<uint8_t>::max();
167 int batches = out_shape.
dim(0);
168 int output_height = out_shape.
dim(1);
169 int output_width = out_shape.
dim(2);
170 int input_depth = in_shape.
dim(3);
172 int filter_height = kernel_shape.
dim(0);
173 int filter_width = kernel_shape.
dim(1);
175 for (
int b = 0; b < batches; ++b)
177 for (
int out_y = 0; out_y < output_height; ++out_y)
179 for (
int out_x = 0; out_x < output_width; ++out_x)
181 for (
int ic = 0; ic < input_depth; ++ic)
184 const int in_x_origin = (out_x * stride_width) - pad_width;
185 const int in_y_origin = (out_y * stride_height) - pad_height;
187 for (
int filter_y = 0; filter_y < filter_height; ++filter_y)
189 for (
int filter_x = 0; filter_x < filter_width; ++filter_x)
191 const int in_x = in_x_origin + filter_x;
192 const int in_y = in_y_origin + filter_y;
195 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
197 Index in_index{b, in_y, in_x, ic};
198 Index ker_index{filter_y, filter_x, oc, 0};
199 int32_t input_val = input_accessor.
at(in_index);
200 int32_t kernel_val = kernel_accessor.
at(ker_index);
201 acc += (kernel_val + kernel_offset) * (input_val + input_offset);
205 acc += bias_accessor.
at(
Index{oc});
207 acc += output_offset;
208 acc = std::max(acc, output_min);
209 acc = std::min(acc, output_max);
210 Index out_index{b, out_y, out_x, oc};
211 res_accessor.
at(out_index) =
static_cast<uint8_t
>(acc);