ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Pad.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef __NNFW_CKER_PAD_H__
18#define __NNFW_CKER_PAD_H__
19
20#include "cker/Shape.h"
21#include "cker/Types.h"
22#include "cker/Utils.h"
23#include <stdexcept>
24#include <iostream>
25namespace nnfw
26{
27namespace cker
28{
29template <typename T>
30inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &input_shape,
31 const T *input_data, const Shape &output_shape, T *output_data,
32 const T *constant_value_data)
33{
34 // Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC`
35 // TODO: come up with more subtle solution that uses subtensors like arm compute
36 // TODO: Check if it works for all layouts
37
38 using PaddingInfo = std::pair<int32_t, int32_t>;
40 using PaddingList = std::vector<PaddingInfo>;
41
42 const T constant_value = constant_value_data ? *constant_value_data : 0;
43 assert(output_shape.DimensionsCount() == input_shape.DimensionsCount());
44
45 PaddingList padding_list(pad_rank);
46 for (int32_t n = 0; n < pad_rank; ++n)
47 {
48 const int32_t *from = padding_data + (n * 2);
49 padding_list[n] = {from[0], from[1]};
50 }
51 for (int32_t i = 0; i < pad_rank; ++i)
52 {
53 assert(output_shape.Dims(i) ==
54 input_shape.Dims(i) + padding_list[i].first + padding_list[i].second);
55 }
56 /* Use pad_rank since given input/output shapes are expanded to 4d before calling all cker
57 functions:
58 1. to prevent access violation in padding_list;
59 2. handling as 4d is slower than as 2d/3d.
60 */
61 switch (pad_rank)
62 {
63 case 0:
64 case 1:
65 {
66 const int32_t in_row_len = input_shape.Dims(0);
67 [[maybe_unused]] auto [pad_before, pad_after] = padding_list[0];
68 std::fill_n(output_data, pad_before, constant_value);
69 std::memcpy(output_data + pad_before, input_data, in_row_len * sizeof(T));
70 std::fill_n(output_data + pad_before + in_row_len, pad_after, constant_value);
71 break;
72 }
73 case 2: // HW
74 {
75 const int32_t in_row_len = input_shape.Dims(1);
76 const int32_t out_row_size = output_shape.Dims(1);
77
78 auto [pad_top, pad_bottom] = padding_list[0];
79 auto [pad_left, pad_right] = padding_list[1];
80
81 // Prepend padding rows
82 std::fill_n(output_data, pad_top * out_row_size, constant_value);
83
84 const auto r_h_inp_lim = input_shape.Dims(0) + pad_top;
85 for (auto i = pad_top, j = 0; i < r_h_inp_lim; ++i, ++j)
86 {
87 auto out_offset = i * out_row_size;
88 const auto in_offset = j * in_row_len;
89
90 // Prepend padding values
91 std::fill_n(output_data + out_offset, pad_left, constant_value);
92 out_offset += pad_left;
93
94 // Copy a row of input data
95 memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));
96 out_offset += in_row_len;
97
98 // Append padding values
99 std::fill_n(output_data + out_offset, pad_right, constant_value);
100 }
101
102 // Append padding rows
103 std::fill_n(output_data + r_h_inp_lim * out_row_size, pad_bottom * out_row_size,
104 constant_value);
105 break;
106 }
107 case 3: // HWC
108 {
109 const int32_t in_row_len = input_shape.Dims(2);
110 const int32_t out_row_size = output_shape.Dims(2);
111 const auto plain_size = out_row_size * output_shape.Dims(1);
112
113 auto [pad_batches_before, pad_batches_after] = padding_list[0];
114 auto [pad_parallelepipes_before, pad_parallelepipes_after] = padding_list[1];
115 auto [pad_plains_before, pad_plains_after] = padding_list[2];
116
117 // Prepend padding plains
118 std::fill_n(output_data, pad_batches_before * plain_size, constant_value);
119
120 const auto r_h_inp_lim = input_shape.Dims(0) + pad_batches_before;
121 for (auto i = pad_batches_before, i_inp = 0; i < r_h_inp_lim; ++i, ++i_inp)
122 {
123 const auto out_w_offset = (i * output_shape.Dims(1)) * output_shape.Dims(2);
124
125 // Prepend padding rows
126 std::fill_n(output_data + out_w_offset, pad_parallelepipes_before * out_row_size,
127 constant_value);
128
129 const auto r_w_inp_lim = input_shape.Dims(1) + pad_parallelepipes_before;
130 for (auto j = pad_parallelepipes_before, j_inp = 0; j < r_w_inp_lim; ++j, ++j_inp)
131 {
132 auto out_offset = (i * output_shape.Dims(1) + j) * output_shape.Dims(2);
133 const auto in_offset = (i_inp * input_shape.Dims(1) + j_inp) * input_shape.Dims(2);
134
135 // Prepend padding values
136 std::fill_n(output_data + out_offset, pad_plains_before, constant_value);
137 out_offset += pad_plains_before;
138
139 // Copy a row of input data
140 memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));
141 out_offset += in_row_len;
142
143 // Append padding values
144 std::fill_n(output_data + out_offset, pad_plains_after, constant_value);
145 }
146
147 // Append padding rows
148 std::fill_n(output_data + out_w_offset + r_w_inp_lim * out_row_size,
149 pad_parallelepipes_after * out_row_size, constant_value);
150 }
151
152 // Append padding plains
153 std::fill_n(output_data + r_h_inp_lim * plain_size, pad_batches_after * plain_size,
154 constant_value);
155 break;
156 }
157 case 4:
158 {
159 auto get_offset = [](const Shape &shape, int32_t n, int32_t h, int32_t w) -> int32_t {
160 return ((n * shape.Dims(1) + h) * shape.Dims(2) + w) * shape.Dims(3);
161 };
162 const int32_t in_row_len = input_shape.Dims(3);
163 const int32_t out_row_size = output_shape.Dims(3);
164 const auto plain_size = out_row_size * output_shape.Dims(2);
165 const auto parallelepiped_size = plain_size * output_shape.Dims(1);
166
167 auto [pad_batches_before, pad_batches_after] = padding_list[0];
168 auto [pad_parallelepipes_before, pad_parallelepipes_after] = padding_list[1];
169 auto [pad_plains_before, pad_plains_after] = padding_list[2];
170 auto [pad_rows_before, pad_rows_after] = padding_list[3];
171
172 // Prepend padding parallelepipeds
173 std::fill_n(output_data, pad_batches_before * parallelepiped_size, constant_value);
174
175 const auto r_b_inp_lim = input_shape.Dims(0) + pad_batches_before;
176 for (auto i = pad_batches_before, i_inp = 0; i < r_b_inp_lim; ++i, ++i_inp)
177 {
178 const auto out_h_offset = get_offset(output_shape, i, 0, 0);
179 // Prepend padding plains
180 std::fill_n(output_data + out_h_offset, pad_parallelepipes_before * plain_size,
181 constant_value);
182
183 const auto r_h_inp_lim = input_shape.Dims(1) + pad_parallelepipes_before;
184 for (auto j = pad_parallelepipes_before, j_inp = 0; j < r_h_inp_lim; ++j, ++j_inp)
185 {
186 const auto out_w_offset = get_offset(output_shape, i, j, 0);
187
188 // Prepend padding rows
189 std::fill_n(output_data + out_w_offset, pad_plains_before * out_row_size, constant_value);
190
191 const auto r_w_inp_lim = input_shape.Dims(2) + pad_plains_before;
192 for (auto k = pad_plains_before, k_inp = 0; k < r_w_inp_lim; ++k, ++k_inp)
193 {
194 auto out_c_offset = get_offset(output_shape, i, j, k);
195 const auto in_offset = get_offset(input_shape, i_inp, j_inp, k_inp);
196
197 // Prepend padding values
198 std::fill_n(output_data + out_c_offset, pad_rows_before, constant_value);
199 out_c_offset += pad_rows_before;
200
201 // Copy a row of input data
202 memcpy(output_data + out_c_offset, input_data + in_offset, in_row_len * sizeof(T));
203 out_c_offset += in_row_len;
204
205 // Append padding values
206 std::fill_n(output_data + out_c_offset, pad_rows_after, constant_value);
207 }
208
209 // Append padding rows
210 std::fill_n(output_data + out_w_offset + r_w_inp_lim * out_row_size,
211 pad_plains_after * out_row_size, constant_value);
212 }
213
214 // Append padding plains
215 std::fill_n(output_data + out_h_offset + r_h_inp_lim * plain_size,
216 pad_parallelepipes_after * plain_size, constant_value);
217 }
218
219 // Append padding parallelepipeds
220 std::fill_n(output_data + r_b_inp_lim * parallelepiped_size,
221 pad_batches_after * parallelepiped_size, constant_value);
222 break;
223 break;
224 }
225 default:
226 throw std::runtime_error("Padding for rank > 4 NYI");
227 break;
228 }
229}
230} // namespace cker
231} // namespace nnfw
232
233#endif // __NNFW_CKER_PAD_H__
int32_t DimensionsCount() const
Definition Shape.h:107
int32_t Dims(int i) const
Definition Shape.h:110
const luci_interpreter::RuntimeShape output_shape
void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &input_shape, const T *input_data, const Shape &output_shape, T *output_data, const T *constant_value_data)
Definition Pad.h:30
Definition topk_v2.h:30