ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Convolution.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
21#include <nonius/nonius.h++>
22
23#include <arm_compute/core/Types.h>
24#include <arm_compute/runtime/CL/CLScheduler.h>
25#include <arm_compute/runtime/CL/CLFunctions.h>
26
27#include <cstdint>
28#include <cassert>
29#include <stdexcept>
30
31using namespace arm_compute;
32
33//
34// Helpers
35//
36namespace
37{
38
39enum Layout
40{
41 NCHW,
42 NHWC
43};
44
45struct Initializer
46{
47 Initializer() { CLScheduler::get().default_init(); }
48};
49
50Initializer initializer;
51
52TensorInfo make_info(uint32_t N)
53{
54 TensorShape shape{N};
55 return TensorInfo{shape, 1, DataType::F32};
56}
57
58template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
59
60template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
61{
62 TensorShape shape{W, H, C, N};
63 TensorInfo info{shape, 1, DataType::F32};
64 info.set_data_layout(DataLayout::NCHW);
65 return info;
66}
67
68template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
69{
70 TensorShape shape{C, W, H, N};
71 TensorInfo info{shape, 1, DataType::F32};
72 info.set_data_layout(DataLayout::NHWC);
73 return info;
74}
75
76inline void check(const Status &status)
77{
78 if (!status)
79 {
80 std::cerr << status.error_description() << std::endl;
81 throw std::runtime_error{"ERROR"};
82 }
83}
84
85inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
86
87} // namespace
88
89//
90// Benchmark Parameters
91//
92NONIUS_PARAM(BATCH, 1);
93
94NONIUS_PARAM(IFM_C, 3);
95NONIUS_PARAM(IFM_H, 244);
96NONIUS_PARAM(IFM_W, 244);
97
98NONIUS_PARAM(OFM_C, 3);
99NONIUS_PARAM(OFM_H, 244);
100NONIUS_PARAM(OFM_W, 244);
101
102NONIUS_PARAM(KER_H, 3);
103NONIUS_PARAM(KER_W, 3);
104
105NONIUS_PARAM(STRIDE_H, 1);
106NONIUS_PARAM(STRIDE_W, 1);
107
108NONIUS_PARAM(PADDING, std::string{"SAME"})
109NONIUS_PARAM(FUSED_ACT, std::string{"RELU"})
110
111//
112// Configuration Helpers
113//
114namespace
115{
116
117struct Configuration
118{
119 uint32_t ifm_N;
120 uint32_t ifm_C;
121 uint32_t ifm_H;
122 uint32_t ifm_W;
123
124 uint32_t ofm_N;
125 uint32_t ofm_C;
126 uint32_t ofm_H;
127 uint32_t ofm_W;
128
129 uint32_t ker_N;
130 uint32_t ker_C;
131 uint32_t ker_H;
132 uint32_t ker_W;
133
134 uint32_t vertical_stride;
135 uint32_t horizontal_stride;
136
137 std::string padding;
138 std::string fused_act;
139
140 uint32_t top_padding;
141 uint32_t bottom_padding;
142 uint32_t left_padding;
143 uint32_t right_padding;
144
145 Configuration(nonius::chronometer meter)
146 {
147 ifm_N = meter.param<BATCH>();
148 ifm_C = meter.param<IFM_C>();
149 ifm_H = meter.param<IFM_H>();
150 ifm_W = meter.param<IFM_W>();
151
152 ofm_N = meter.param<BATCH>();
153 ofm_C = meter.param<OFM_C>();
154 ofm_H = meter.param<OFM_H>();
155 ofm_W = meter.param<OFM_W>();
156
157 ker_N = meter.param<OFM_C>();
158 ker_C = meter.param<IFM_C>();
159 ker_H = meter.param<KER_H>();
160 ker_W = meter.param<KER_W>();
161
162 vertical_stride = meter.param<STRIDE_H>();
163 horizontal_stride = meter.param<STRIDE_W>();
164
165 padding = meter.param<PADDING>();
166 fused_act = meter.param<FUSED_ACT>();
167
168 assert((ifm_H - ker_H) % vertical_stride == 0);
169 assert((ifm_W - ker_H) % horizontal_stride == 0);
170
171 uint32_t const effective_ofm_H = (ifm_H - ker_H) / vertical_stride + 1;
172 uint32_t const effective_ofm_W = (ifm_W - ker_H) / horizontal_stride + 1;
173
174 assert(ofm_H >= effective_ofm_H);
175 assert(ofm_W >= effective_ofm_W);
176
177 uint32_t const pad_H = ofm_H - effective_ofm_H;
178 uint32_t const pad_W = ofm_W - effective_ofm_W;
179
180 top_padding = pad_H / 2;
181 bottom_padding = pad_H / 2;
182 left_padding = pad_W / 2;
183 right_padding = pad_W / 2;
184
185 if (is_odd(pad_H))
186 top_padding += 1;
187 if (is_odd(pad_W))
188 left_padding += 1;
189 }
190
191 template <Layout L> TensorInfo src_info() const
192 {
193 return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
194 }
195 template <Layout L> TensorInfo dst_info() const
196 {
197 return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
198 }
199 template <Layout L> TensorInfo ker_info() const
200 {
201 return make_info<L>(ker_N, ker_C, ker_H, ker_W);
202 }
203 TensorInfo bias_info(void) const { return make_info(ker_N); }
204
205 PadStrideInfo pad_stride_info(void) const
206 {
207 return PadStrideInfo{horizontal_stride,
208 vertical_stride,
209 left_padding,
210 right_padding,
211 top_padding,
212 bottom_padding,
213 DimensionRoundingType::FLOOR};
214 }
215};
216
217} // namespace
218
219//
220// Benchmark Implementations
221//
222namespace
223{
224
225inline nonius::benchmark_registry &local_benchmark_registry()
226{
227 static nonius::benchmark_registry registry;
228 return registry;
229}
230
231} // namespace
232
233#define NONIUS_LOCAL_BENCHMARK(name, ...) \
234 namespace \
235 { \
236 static ::nonius::benchmark_registrar \
237 NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
238 }
239
240NONIUS_LOCAL_BENCHMARK("CLDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) {
241 CLDirectConvolutionLayer conv;
242
243 // Configure
244 Configuration p{meter};
245
246 CLTensor src_tensor{};
247 CLTensor dst_tensor{};
248 CLTensor ker_tensor{};
249 CLTensor bias_tensor{};
250
251 src_tensor.allocator()->init(p.src_info<NCHW>());
252 dst_tensor.allocator()->init(p.dst_info<NCHW>());
253 ker_tensor.allocator()->init(p.ker_info<NCHW>());
254 bias_tensor.allocator()->init(p.bias_info());
255
256 try
257 {
258 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
259 p.pad_stride_info()));
260 }
261 catch (...)
262 {
263 meter.measure([&](int) {
264 // DO NOTHING
265 volatile int x = 0;
266 return x;
267 });
268 return;
269 }
270
271 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
272
273 src_tensor.allocator()->allocate();
274 ker_tensor.allocator()->allocate();
275 bias_tensor.allocator()->allocate();
276 dst_tensor.allocator()->allocate();
277
278 // Run!
279 meter.measure([&](int) {
280 conv.run();
281 CLScheduler::get().sync();
282 });
283})
284
285NONIUS_LOCAL_BENCHMARK("CLDirectConvolutionLayer_NHWC", [](nonius::chronometer meter) {
286 CLDirectConvolutionLayer conv;
287
288 // Configure
289 Configuration p{meter};
290
291 CLTensor src_tensor{};
292 CLTensor dst_tensor{};
293 CLTensor ker_tensor{};
294 CLTensor bias_tensor{};
295
296 src_tensor.allocator()->init(p.src_info<NHWC>());
297 dst_tensor.allocator()->init(p.dst_info<NHWC>());
298 ker_tensor.allocator()->init(p.ker_info<NHWC>());
299 bias_tensor.allocator()->init(p.bias_info());
300
301 try
302 {
303 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
304 p.pad_stride_info()));
305 }
306 catch (...)
307 {
308 meter.measure([&](int) {
309 // DO NOTHING
310 volatile int x = 0;
311 return x;
312 });
313 return;
314 }
315
316 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
317
318 src_tensor.allocator()->allocate();
319 ker_tensor.allocator()->allocate();
320 bias_tensor.allocator()->allocate();
321 dst_tensor.allocator()->allocate();
322
323 // Run!
324 meter.measure([&](int) {
325 conv.run();
326 CLScheduler::get().sync();
327 });
328})
329
330NONIUS_LOCAL_BENCHMARK("CLGEMMConvolutionLayer_NCHW", [](nonius::chronometer meter) {
331 CLGEMMConvolutionLayer conv;
332
333 // Configure
334 Configuration p{meter};
335
336 CLTensor src_tensor{};
337 CLTensor dst_tensor{};
338 CLTensor ker_tensor{};
339 CLTensor bias_tensor{};
340
341 src_tensor.allocator()->init(p.src_info<NCHW>());
342 dst_tensor.allocator()->init(p.dst_info<NCHW>());
343 ker_tensor.allocator()->init(p.ker_info<NCHW>());
344 bias_tensor.allocator()->init(p.bias_info());
345
346 try
347 {
348 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
349 p.pad_stride_info()));
350 }
351 catch (...)
352 {
353 meter.measure([&](int) {
354 // DO NOTHING
355 volatile int x = 0;
356 return x;
357 });
358 return;
359 }
360
361 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
362
363 src_tensor.allocator()->allocate();
364 ker_tensor.allocator()->allocate();
365 bias_tensor.allocator()->allocate();
366 dst_tensor.allocator()->allocate();
367
368 // Run
369 meter.measure([&](int) {
370 conv.run();
371 CLScheduler::get().sync();
372 });
373})
374
375NONIUS_LOCAL_BENCHMARK("CLGEMMConvolutionLayer_NHWC", [](nonius::chronometer meter) {
376 CLGEMMConvolutionLayer conv;
377
378 // Configure
379 Configuration p{meter};
380
381 CLTensor src_tensor{};
382 CLTensor dst_tensor{};
383 CLTensor ker_tensor{};
384 CLTensor bias_tensor{};
385
386 src_tensor.allocator()->init(p.src_info<NHWC>());
387 dst_tensor.allocator()->init(p.dst_info<NHWC>());
388 ker_tensor.allocator()->init(p.ker_info<NHWC>());
389 bias_tensor.allocator()->init(p.bias_info());
390
391 try
392 {
393 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
394 p.pad_stride_info()));
395 }
396 catch (...)
397 {
398 meter.measure([&](int) {
399 // DO NOTHING
400 volatile int x = 0;
401 return x;
402 });
403 return;
404 }
405
406 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
407
408 src_tensor.allocator()->allocate();
409 ker_tensor.allocator()->allocate();
410 bias_tensor.allocator()->allocate();
411 dst_tensor.allocator()->allocate();
412
413 // Run
414 meter.measure([&](int) {
415 conv.run();
416 CLScheduler::get().sync();
417 });
418})
419
420NONIUS_LOCAL_BENCHMARK("CLWinogradConvolutionLayer_NCHW", [](nonius::chronometer meter) {
421 CLWinogradConvolutionLayer conv;
422
423 // Configure
424 Configuration p{meter};
425
426 CLTensor src_tensor{};
427 CLTensor dst_tensor{};
428 CLTensor ker_tensor{};
429 CLTensor bias_tensor{};
430
431 src_tensor.allocator()->init(p.src_info<NCHW>());
432 dst_tensor.allocator()->init(p.dst_info<NCHW>());
433 ker_tensor.allocator()->init(p.ker_info<NCHW>());
434 bias_tensor.allocator()->init(p.bias_info());
435
436 try
437 {
438 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
439 p.pad_stride_info()));
440 }
441 catch (...)
442 {
443 meter.measure([&](int) {
444 // DO NOTHING
445 volatile int x = 0;
446 return x;
447 });
448 return;
449 }
450
451 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
452
453 src_tensor.allocator()->allocate();
454 ker_tensor.allocator()->allocate();
455 bias_tensor.allocator()->allocate();
456 dst_tensor.allocator()->allocate();
457
458 // Run
459 meter.measure([&](int) {
460 conv.run();
461 CLScheduler::get().sync();
462 });
463})
464
465NONIUS_LOCAL_BENCHMARK("CLWinogradConvolutionLayer_NHWC", [](nonius::chronometer meter) {
466 CLWinogradConvolutionLayer conv;
467
468 // Configure
469 Configuration p{meter};
470
471 CLTensor src_tensor{};
472 CLTensor dst_tensor{};
473 CLTensor ker_tensor{};
474 CLTensor bias_tensor{};
475
476 src_tensor.allocator()->init(p.src_info<NHWC>());
477 dst_tensor.allocator()->init(p.dst_info<NHWC>());
478 ker_tensor.allocator()->init(p.ker_info<NHWC>());
479 bias_tensor.allocator()->init(p.bias_info());
480
481 try
482 {
483 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
484 p.pad_stride_info()));
485 }
486 catch (...)
487 {
488 meter.measure([&](int) {
489 // DO NOTHING
490 volatile int x = 0;
491 return x;
492 });
493 return;
494 }
495
496 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
497
498 src_tensor.allocator()->allocate();
499 ker_tensor.allocator()->allocate();
500 bias_tensor.allocator()->allocate();
501 dst_tensor.allocator()->allocate();
502
503 // Run
504 meter.measure([&](int) {
505 conv.run();
506 CLScheduler::get().sync();
507 });
508})
509
510extern "C" nonius::benchmark_registry &benchmark_functions(void)
511{
512 return local_benchmark_registry();
513}
volatile const char info[]
::nncc::core::ADT::tensor::Shape TensorShape
Definition TensorShape.h:25
C
Definition infer.py:52
void conv(const nncc::core::ADT::feature::Shape &out_shape, nncc::core::ADT::feature::Accessor< OutputDType > &out_data, const nncc::core::ADT::feature::Shape &in_shape, const nncc::core::ADT::feature::Reader< InputDType > &in_data, const nncc::core::ADT::kernel::Shape &ker_shape, const nncc::core::ADT::kernel::Reader< KernelDType > &ker_data, const PadInfo &pad_info, const StrideInfo &stride_info)
Definition Conv2D.h:34
NONIUS_PARAM(BATCH, 1)
CLTensor src_tensor
CLTensor ker_tensor
nonius::benchmark_registry & benchmark_functions(void)
Configuration p
CLTensor dst_tensor
nonius::chronometer meter
#define NONIUS_LOCAL_BENCHMARK(name,...)
CLTensor bias_tensor