34 const Shape &filter_shape,
const float *filter_data,
35 [[maybe_unused]]
const Shape &bias_shape,
const float *bias_data,
51 const int input_depth =
MatchingDim(input_shape, 3, filter_shape, 3);
55 assert(bias_shape.FlatSize() == output_depth);
57 const int input_height = input_shape.
Dims(1);
58 const int input_width = input_shape.
Dims(2);
59 const int filter_height = filter_shape.
Dims(1);
60 const int filter_width = filter_shape.
Dims(2);
63 for (
int batch = 0; batch < batches; ++batch)
65 for (
int out_y = 0; out_y < output_height; ++out_y)
67 for (
int out_x = 0; out_x < output_width; ++out_x)
69 for (
int out_channel = 0; out_channel < output_depth; ++out_channel)
71 const int in_x_origin = (out_x * stride_width) - pad_width;
72 const int in_y_origin = (out_y * stride_height) - pad_height;
74 for (
int filter_y = 0; filter_y < filter_height; ++filter_y)
76 for (
int filter_x = 0; filter_x < filter_width; ++filter_x)
78 const int in_x = in_x_origin + dilation_width_factor * filter_x;
79 const int in_y = in_y_origin + dilation_height_factor * filter_y;
82 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
84 const int in_offset =
Offset(input_shape, batch, in_y, in_x, 0);
85 const int filter_offset =
Offset(filter_shape, out_channel, filter_y, filter_x, 0);
86 for (
int in_channel = 0; in_channel < input_depth; ++in_channel)
88 float input_value = input_data[in_offset + in_channel];
89 float filter_value = filter_data[filter_offset + in_channel];
90 total += (input_value * filter_value);
95 float bias_value = 0.0f;
98 bias_value = bias_data[out_channel];
102 output_activation_max);
110 const Shape &filter_shape,
const uint8_t *filter_data,
111 [[maybe_unused]]
const Shape &bias_shape,
const int32_t *bias_data,
127 assert(output_activation_min <= output_activation_max);
133 const int input_depth =
MatchingDim(input_shape, 3, filter_shape, 3);
137 assert(bias_shape.FlatSize() == output_depth);
139 const int input_height = input_shape.
Dims(1);
140 const int input_width = input_shape.
Dims(2);
141 const int filter_height = filter_shape.
Dims(1);
142 const int filter_width = filter_shape.
Dims(2);
145 for (
int batch = 0; batch < batches; ++batch)
147 for (
int out_y = 0; out_y < output_height; ++out_y)
149 for (
int out_x = 0; out_x < output_width; ++out_x)
151 for (
int out_channel = 0; out_channel < output_depth; ++out_channel)
153 const int in_x_origin = (out_x * stride_width) - pad_width;
154 const int in_y_origin = (out_y * stride_height) - pad_height;
156 for (
int filter_y = 0; filter_y < filter_height; ++filter_y)
158 for (
int filter_x = 0; filter_x < filter_width; ++filter_x)
160 const int in_x = in_x_origin + dilation_width_factor * filter_x;
161 const int in_y = in_y_origin + dilation_height_factor * filter_y;
164 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
166 const int in_base =
Offset(input_shape, batch, in_y, in_x, 0);
167 const int filter_base =
Offset(filter_shape, out_channel, filter_y, filter_x, 0);
168 for (
int in_channel = 0; in_channel < input_depth; in_channel++)
170 int32_t input_val = input_data[in_channel + in_base];
171 int32_t filter_val = filter_data[in_channel + filter_base];
172 acc += (filter_val + filter_offset) * (input_val + input_offset);
179 acc += bias_data[out_channel];
182 acc += output_offset;
183 acc = std::max(acc, output_activation_min);
184 acc = std::min(acc, output_activation_max);
186 static_cast<uint8_t
>(acc);
195Conv(
const ConvParams ¶ms,
const int32_t *output_multiplier,
const int32_t *output_shift,
196 const Shape &input_shape,
const T *input_data,
const Shape &filter_shape,
const T *filter_data,
197 [[maybe_unused]]
const int32_t *filter_zeropoint, [[maybe_unused]]
const Shape &bias_shape,
216 assert(output_activation_min < output_activation_max);
221 const int input_depth =
MatchingDim(input_shape, 3, filter_shape, 3);
225 assert(bias_shape.FlatSize() == output_depth);
229 const int input_height = input_shape.
Dims(1);
230 const int input_width = input_shape.
Dims(2);
231 const int filter_height = filter_shape.
Dims(1);
232 const int filter_width = filter_shape.
Dims(2);
235 for (
int batch = 0; batch < batches; ++batch)
237 for (
int out_y = 0; out_y < output_height; ++out_y)
239 const int in_y_origin = (out_y * stride_height) - pad_height;
240 for (
int out_x = 0; out_x < output_width; ++out_x)
242 const int in_x_origin = (out_x * stride_width) - pad_width;
243 for (
int out_channel = 0; out_channel < output_depth; ++out_channel)
246 for (
int filter_y = 0; filter_y < filter_height; ++filter_y)
248 const int in_y = in_y_origin + dilation_height_factor * filter_y;
249 for (
int filter_x = 0; filter_x < filter_width; ++filter_x)
251 const int in_x = in_x_origin + dilation_width_factor * filter_x;
254 const bool is_point_inside_image =
255 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
257 if (!is_point_inside_image)
262 for (
int in_channel = 0; in_channel < input_depth; ++in_channel)
264 const T input_val = input_data[
Offset(input_shape, batch, in_y, in_x, in_channel)];
266 filter_data[
Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
269 const int32_t filter_offset = -filter_zeropoint[out_channel];
270 acc += (filter_val + filter_offset) * (input_val + input_offset);
290 acc += filter_val * (input_val + input_offset);
298 acc += bias_data[out_channel];
301 output_shift[out_channel]);
302 acc += output_offset;
303 acc = std::max(acc, output_activation_min);
304 acc = std::min(acc, output_activation_max);
305 output_data[
Offset(
output_shape, batch, out_y, out_x, out_channel)] =
static_cast<T
>(acc);
315 const Shape &input_shape,
const int8_t *input_data,
316 const Shape &filter_shape,
const int8_t *filter_data,
317 [[maybe_unused]]
const Shape &bias_shape,
const float *bias_data,
319 const float *per_channel_scale,
const int32_t *input_offset)
334 const int input_depth = input_shape.
Dims(3);
338 assert(bias_shape.FlatSize() == output_depth);
340 const int input_height = input_shape.
Dims(1);
341 const int input_width = input_shape.
Dims(2);
342 const int filter_height = filter_shape.
Dims(1);
343 const int filter_width = filter_shape.
Dims(2);
344 const int filter_input_depth = filter_shape.
Dims(3);
345 const int groups = input_depth / filter_input_depth;
346 assert(input_depth % filter_input_depth == 0);
347 const int filters_per_group = output_depth / groups;
350 for (
int batch = 0; batch < batches; ++batch)
352 for (
int out_y = 0; out_y < output_height; ++out_y)
354 for (
int out_x = 0; out_x < output_width; ++out_x)
356 for (
int out_channel = 0; out_channel < output_depth; ++out_channel)
358 auto group = out_channel / filters_per_group;
359 const int in_x_origin = (out_x * stride_width) - pad_width;
360 const int in_y_origin = (out_y * stride_height) - pad_height;
362 for (
int filter_y = 0; filter_y < filter_height; ++filter_y)
364 for (
int filter_x = 0; filter_x < filter_width; ++filter_x)
366 for (
int in_channel = 0; in_channel < filter_input_depth; ++in_channel)
368 const int in_x = in_x_origin + dilation_width_factor * filter_x;
369 const int in_y = in_y_origin + dilation_height_factor * filter_y;
372 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
374 int32_t input_val = input_data[
Offset(input_shape, batch, in_y, in_x,
375 in_channel + group * filter_input_depth)];
377 filter_data[
Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
378 acc += filter_val * (input_val - input_offset[batch]);
383 float acc_float = acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
386 acc_float += bias_data[out_channel];
void HybridConvPerChannel(const ConvParams ¶ms, float *scaling_factors_ptr, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const float *per_channel_scale, const int32_t *input_offset)