ONE - On-device Neural Engine
Loading...
Searching...
No Matches
TransposeConv.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
21#include <nonius/nonius.h++>
22
23#include <arm_compute/core/Types.h>
24#include <arm_compute/runtime/CL/CLScheduler.h>
25#include <arm_compute/runtime/CL/CLFunctions.h>
27
28#include <cstdint>
29#include <cassert>
30#include <stdexcept>
31
32#include "acl_common/Utils.h"
33
34using namespace arm_compute;
36
37//
38// Helpers
39//
40namespace
41{
42
43enum Layout
44{
45 NCHW,
46 NHWC
47};
48
49struct Initializer
50{
51 Initializer() { CLScheduler::get().default_init(); }
52};
53
54Initializer initializer;
55
56TensorInfo make_info(uint32_t N)
57{
58 TensorShape shape{N};
59 return TensorInfo{shape, 1, DataType::F32};
60}
61
62template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
63
64template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
65{
66 TensorShape shape{W, H, C, N};
67 TensorInfo info{shape, 1, DataType::F32};
68 info.set_data_layout(DataLayout::NCHW);
69 return info;
70}
71
72template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
73{
74 TensorShape shape{C, W, H, N};
75 TensorInfo info{shape, 1, DataType::F32};
76 info.set_data_layout(DataLayout::NHWC);
77 return info;
78}
79
80inline void check(const Status &status)
81{
82 if (!status)
83 {
84 std::cerr << status.error_description() << std::endl;
85 throw std::runtime_error{"ERROR"};
86 }
87}
88
89inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
90
91} // namespace
92
93//
94// Benchmark Parameters
95//
96NONIUS_PARAM(BATCH, 1);
97
98NONIUS_PARAM(IFM_C, 3);
99NONIUS_PARAM(IFM_H, 244);
100NONIUS_PARAM(IFM_W, 244);
101
102NONIUS_PARAM(OFM_C, 3);
103NONIUS_PARAM(OFM_H, 244);
104NONIUS_PARAM(OFM_W, 244);
105
106NONIUS_PARAM(KER_H, 3);
107NONIUS_PARAM(KER_W, 3);
108
109NONIUS_PARAM(STRIDE_H, 1);
110NONIUS_PARAM(STRIDE_W, 1);
111
112NONIUS_PARAM(PADDING, std::string{"SAME"})
113
114//
115// Configuration Helpers
116//
117namespace
118{
119
120struct Configuration
121{
122 uint32_t ifm_N;
123 uint32_t ifm_C;
124 uint32_t ifm_H;
125 uint32_t ifm_W;
126
127 uint32_t ofm_N;
128 uint32_t ofm_C;
129 uint32_t ofm_H;
130 uint32_t ofm_W;
131
132 uint32_t ker_N;
133 uint32_t ker_C;
134 uint32_t ker_H;
135 uint32_t ker_W;
136
137 uint32_t vertical_stride;
138 uint32_t horizontal_stride;
139
140 PadStrideInfo deconv_info;
141
142 uint32_t inner_border_right;
143 uint32_t inner_border_top;
144
145 Configuration(nonius::chronometer meter)
146 {
147 ifm_N = meter.param<BATCH>();
148 ifm_C = meter.param<IFM_C>();
149 ifm_H = meter.param<IFM_H>();
150 ifm_W = meter.param<IFM_W>();
151
152 ofm_N = meter.param<BATCH>();
153 ofm_C = meter.param<OFM_C>();
154 ofm_H = meter.param<OFM_H>();
155 ofm_W = meter.param<OFM_W>();
156
157 ker_N = meter.param<OFM_C>();
158 ker_C = meter.param<IFM_C>();
159 ker_H = meter.param<KER_H>();
160 ker_W = meter.param<KER_W>();
161
162 vertical_stride = meter.param<STRIDE_H>();
163 horizontal_stride = meter.param<STRIDE_W>();
164
165 // NOTE The padding calculation formula of TransposeConv is opposite to Conv.
166 // So the location of ifm and ofm is changed.
167 auto padding_info = calculatePadding(meter.param<PADDING>(), ofm_H, ofm_W, ifm_H, ifm_W,
168 vertical_stride, horizontal_stride, ker_H, ker_W);
169
170 inner_border_right = padding_info.right - padding_info.left;
171 inner_border_top = padding_info.bottom - padding_info.top;
172
173 padding_info.left = padding_info.right;
174 padding_info.top = padding_info.bottom;
175
176 deconv_info = asPadStrideInfo(padding_info, vertical_stride, horizontal_stride);
177 }
178
179 template <Layout L> TensorInfo src_info() const
180 {
181 return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
182 }
183 template <Layout L> TensorInfo dst_info() const
184 {
185 return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
186 }
187 template <Layout L> TensorInfo ker_info() const
188 {
189 return make_info<L>(ker_N, ker_C, ker_H, ker_W);
190 }
191 TensorInfo bias_info(void) const { return make_info(ker_N); }
192};
193
194} // namespace
195
196//
197// Benchmark Implementations
198//
199namespace
200{
201
202inline nonius::benchmark_registry &local_benchmark_registry()
203{
204 static nonius::benchmark_registry registry;
205 return registry;
206}
207
208} // namespace
209
210#define NONIUS_LOCAL_BENCHMARK(name, ...) \
211 namespace \
212 { \
213 static ::nonius::benchmark_registrar \
214 NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
215 }
216
217NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
218 CLDeconvolutionLayer deconv;
219
220 // Configure
221 Configuration p{meter};
222
223 CLTensor src_tensor{};
224 CLTensor dst_tensor{};
225 CLTensor ker_tensor{};
226
227 src_tensor.allocator()->init(p.src_info<NCHW>());
228 dst_tensor.allocator()->init(p.dst_info<NCHW>());
229 ker_tensor.allocator()->init(p.ker_info<NCHW>());
230
231 try
232 {
233 check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
234 p.deconv_info, p.inner_border_right, p.inner_border_top));
235 }
236 catch (...)
237 {
238 meter.measure([&](int) {
239 // DO NOTHING
240 volatile int x = 0;
241 return x;
242 });
243 return;
244 }
245
246 deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
247 p.inner_border_right, p.inner_border_top);
248
249 src_tensor.allocator()->allocate();
250 ker_tensor.allocator()->allocate();
251 dst_tensor.allocator()->allocate();
252
253 // Run!
254 meter.measure([&](int) {
255 deconv.run();
256 CLScheduler::get().sync();
257 });
258})
259
260NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayer_NHWC", [](nonius::chronometer meter) {
261 CLDeconvolutionLayer deconv;
262
263 // Configure
264 Configuration p{meter};
265
266 CLTensor src_tensor{};
267 CLTensor dst_tensor{};
268 CLTensor ker_tensor{};
269
270 src_tensor.allocator()->init(p.src_info<NHWC>());
271 dst_tensor.allocator()->init(p.dst_info<NHWC>());
272 ker_tensor.allocator()->init(p.ker_info<NHWC>());
273
274 try
275 {
276 check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
277 p.deconv_info, p.inner_border_right, p.inner_border_top));
278 }
279 catch (...)
280 {
281 meter.measure([&](int) {
282 // DO NOTHING
283 volatile int x = 0;
284 return x;
285 });
286 return;
287 }
288
289 deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
290 p.inner_border_right, p.inner_border_top);
291
292 src_tensor.allocator()->allocate();
293 ker_tensor.allocator()->allocate();
294 dst_tensor.allocator()->allocate();
295
296 // Run!
297 meter.measure([&](int) {
298 deconv.run();
299 CLScheduler::get().sync();
300 });
301})
302
303NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayerEx_NCHW", [](nonius::chronometer meter) {
304 CLDeconvolutionLayerEx deconv;
305
306 // Configure
307 Configuration p{meter};
308
309 CLTensor src_tensor{};
310 CLTensor dst_tensor{};
311 CLTensor ker_tensor{};
312
313 src_tensor.allocator()->init(p.src_info<NCHW>());
314 dst_tensor.allocator()->init(p.dst_info<NCHW>());
315 ker_tensor.allocator()->init(p.ker_info<NCHW>());
316
317 try
318 {
319 check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
320 p.deconv_info, p.inner_border_right, p.inner_border_top));
321 }
322 catch (...)
323 {
324 meter.measure([&](int) {
325 // DO NOTHING
326 volatile int x = 0;
327 return x;
328 });
329 return;
330 }
331
332 deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
333 p.inner_border_right, p.inner_border_top);
334
335 src_tensor.allocator()->allocate();
336 ker_tensor.allocator()->allocate();
337 dst_tensor.allocator()->allocate();
338
339 // Run!
340 meter.measure([&](int) {
341 deconv.run();
342 CLScheduler::get().sync();
343 });
344})
345
346NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayerEx_NHWC", [](nonius::chronometer meter) {
347 CLDeconvolutionLayerEx deconv;
348
349 // Configure
350 Configuration p{meter};
351
352 CLTensor src_tensor{};
353 CLTensor dst_tensor{};
354 CLTensor ker_tensor{};
355
356 src_tensor.allocator()->init(p.src_info<NHWC>());
357 dst_tensor.allocator()->init(p.dst_info<NHWC>());
358 ker_tensor.allocator()->init(p.ker_info<NHWC>());
359
360 try
361 {
362 check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
363 p.deconv_info, p.inner_border_right, p.inner_border_top));
364 }
365 catch (...)
366 {
367 meter.measure([&](int) {
368 // DO NOTHING
369 volatile int x = 0;
370 return x;
371 });
372 return;
373 }
374
375 deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
376 p.inner_border_right, p.inner_border_top);
377
378 src_tensor.allocator()->allocate();
379 ker_tensor.allocator()->allocate();
380 dst_tensor.allocator()->allocate();
381
382 // Run!
383 meter.measure([&](int) {
384 deconv.run();
385 CLScheduler::get().sync();
386 });
387})
388
389extern "C" nonius::benchmark_registry &benchmark_functions(void)
390{
391 return local_benchmark_registry();
392}
volatile const char info[]
::nncc::core::ADT::tensor::Shape TensorShape
Definition TensorShape.h:25
C
Definition infer.py:52
PadStrideInfo asPadStrideInfo(const PaddingInfo &padding, uint32_t vertical_stride, uint32_t horizontal_stride)
Definition Utils.h:70
PaddingInfo calculatePadding(const std::string &padding_name, const uint32_t ifm_H, const uint32_t ifm_W, const uint32_t ofm_H, const uint32_t ofm_W, const uint32_t vertical_stride, const uint32_t horizontal_stride, const uint32_t ker_H, const uint32_t ker_W)
Definition Utils.h:39
nonius::chronometer meter
NONIUS_PARAM(BATCH, 1)
CLTensor dst_tensor
CLTensor src_tensor
nonius::benchmark_registry & benchmark_functions(void)
Configuration p
CLTensor ker_tensor
nonius::chronometer meter
#define NONIUS_LOCAL_BENCHMARK(name,...)