unfold the vector with NHWC shape, inherently acting in an in-place manner.
32{
33 assert(input_n > 0 && input_h > 0 && input_w > 0 && input_c > 0);
34 assert(stride_h > 0 && stride_w > 0);
35 assert(kernel_h > 0 && kernel_w > 0 && kernel_ic > 0);
36
37 if (input_c != kernel_ic)
38 throw std::runtime_error("RecordHessian: Input channels do not match kernel channels.");
39 uint32_t out_height = (input_h - dilation_h * (kernel_h - 1) - 1) / stride_h + 1;
40 uint32_t out_width = (input_w - dilation_w * (kernel_w - 1) - 1) / stride_w + 1;
41 uint32_t patch_size = kernel_h * kernel_w * kernel_ic;
42 std::vector<float> unfolded_buf(input_n * out_height * out_width * patch_size, 0.0f);
43
44 uint32_t index = 0;
45 uint32_t in_y, in_x;
46 for (uint32_t n = 0; n < input_n; ++n)
47 {
48 for (uint32_t y = 0; y < out_height; ++y)
49 {
50 for (uint32_t x = 0; x < out_width; ++x)
51 {
52 for (uint32_t in_c = 0; in_c < input_c; ++in_c)
53 {
54 for (uint32_t ky = 0; ky < kernel_h; ++ky)
55 {
56 for (uint32_t kx = 0; kx < kernel_w; ++kx)
57 {
58 in_y = y * stride_h + ky * dilation_h;
59 in_x = x * stride_w + kx * dilation_w;
60 if (in_y < input_h && in_x < input_w)
61 {
62 unfolded_buf[index] = buf[((n * input_h + in_y) * input_w + in_x) * input_c + in_c];
63 }
64 index++;
65 }
66 }
67 }
68 }
69 }
70 }
71
72 buf.swap(unfolded_buf);
73}