45{
46
47 TensorVariant transposed_kernel = transposeTensor<3, 0, 1, 2>(kernel);
48
49 const auto *
input_data =
reinterpret_cast<const T *
>(
input.atOffset(0));
50 const auto *kernel_data = reinterpret_cast<const T *>(transposed_kernel.atOffset(0));
52
55 const Shape &kernel_shape = transposed_kernel.getShape();
56
57 const std::vector<int32_t> &strides = attributes.strides;
58 const std::vector<int32_t> &padding_before = attributes.padding_before;
59 assert(attributes.data_format == DataFormat::NHWC);
60
64 const int32_t kernel_height = kernel_shape.dim(1);
65 const int32_t kernel_width = kernel_shape.dim(2);
66 const int32_t input_height = input_shape.dim(1);
67 const int32_t input_width = input_shape.dim(2);
68
69 const int32_t num_in_channels = input_shape.dim(3);
71
72 assert(kernel_shape.dim(0) == num_in_channels);
73 assert(kernel_shape.dim(3) == num_out_channels);
74
75 erase<T>(output);
76
77 for (int32_t batch = 0; batch < batch_size; ++batch)
78 {
79 for (int32_t in_y = 0; in_y < input_height; ++in_y)
80 {
81 for (int32_t in_x = 0; in_x < input_width; ++in_x)
82 {
83 for (int32_t in_c = 0; in_c < num_in_channels; ++in_c)
84 {
86 const int32_t out_y_origin = in_y * strides[0] - padding_before[0];
87 const int32_t out_x_origin = in_x * strides[1] - padding_before[1];
88
89 for (int32_t kernel_y = 0; kernel_y < kernel_height; ++kernel_y)
90 {
91 for (int32_t kernel_x = 0; kernel_x < kernel_width; ++kernel_x)
92 {
93 const int32_t out_y = out_y_origin + kernel_y;
94 const int32_t out_x = out_x_origin + kernel_x;
95
96 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
97 {
98 for (int32_t out_c = 0; out_c < num_out_channels; ++out_c)
99 {
100 const int32_t kernel_offset =
101 calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
102 const int32_t output_offset =
104 const T kernel_val = kernel_data[kernel_offset];
105 output_data[output_offset] += input_val * kernel_val;
106 }
107 }
108 }
109 }
110 }
111 }
112 }
113 }
114}
const luci_interpreter::RuntimeShape output_shape
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)