ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Convolution.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
21#include <nonius/nonius.h++>
22
23#include <arm_compute/core/Types.h>
24#include <arm_compute/runtime/NEON/NEScheduler.h>
25#include <arm_compute/runtime/NEON/NEFunctions.h>
26
27#include <cstdint>
28#include <cassert>
29#include <stdexcept>
30
31using namespace arm_compute;
32
33//
34// Helpers
35//
36namespace
37{
38
39enum Layout
40{
41 NCHW,
42 NHWC
43};
44
45TensorInfo make_info(uint32_t N)
46{
47 TensorShape shape{N};
48 return TensorInfo{shape, 1, DataType::F32};
49}
50
51template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
52
53template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
54{
55 TensorShape shape{W, H, C, N};
56 TensorInfo info{shape, 1, DataType::F32};
57 info.set_data_layout(DataLayout::NCHW);
58 return info;
59}
60
61template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
62{
63 TensorShape shape{C, W, H, N};
64 TensorInfo info{shape, 1, DataType::F32};
65 info.set_data_layout(DataLayout::NHWC);
66 return info;
67}
68
69inline void check(const Status &status)
70{
71 if (!status)
72 {
73 std::cerr << status.error_description() << std::endl;
74 throw std::runtime_error{"ERROR"};
75 }
76}
77
78inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
79
80} // namespace
81
82//
83// Benchmark Parameters
84//
85NONIUS_PARAM(BATCH, 1);
86
87NONIUS_PARAM(IFM_C, 3);
88NONIUS_PARAM(IFM_H, 244);
89NONIUS_PARAM(IFM_W, 244);
90
91NONIUS_PARAM(OFM_C, 3);
92NONIUS_PARAM(OFM_H, 244);
93NONIUS_PARAM(OFM_W, 244);
94
95NONIUS_PARAM(KER_H, 3);
96NONIUS_PARAM(KER_W, 3);
97
98NONIUS_PARAM(STRIDE_H, 1);
99NONIUS_PARAM(STRIDE_W, 1);
100
101NONIUS_PARAM(PADDING, std::string{"SAME"})
102NONIUS_PARAM(FUSED_ACT, std::string{"RELU"})
103
104//
105// Configuration Helpers
106//
107namespace
108{
109
110struct Configuration
111{
112 uint32_t ifm_N;
113 uint32_t ifm_C;
114 uint32_t ifm_H;
115 uint32_t ifm_W;
116
117 uint32_t ofm_N;
118 uint32_t ofm_C;
119 uint32_t ofm_H;
120 uint32_t ofm_W;
121
122 uint32_t ker_N;
123 uint32_t ker_C;
124 uint32_t ker_H;
125 uint32_t ker_W;
126
127 uint32_t vertical_stride;
128 uint32_t horizontal_stride;
129
130 std::string padding;
131 std::string fused_act;
132
133 uint32_t top_padding;
134 uint32_t bottom_padding;
135 uint32_t left_padding;
136 uint32_t right_padding;
137
138 Configuration(nonius::chronometer meter)
139 {
140 ifm_N = meter.param<BATCH>();
141 ifm_C = meter.param<IFM_C>();
142 ifm_H = meter.param<IFM_H>();
143 ifm_W = meter.param<IFM_W>();
144
145 ofm_N = meter.param<BATCH>();
146 ofm_C = meter.param<OFM_C>();
147 ofm_H = meter.param<OFM_H>();
148 ofm_W = meter.param<OFM_W>();
149
150 ker_N = meter.param<OFM_C>();
151 ker_C = meter.param<IFM_C>();
152 ker_H = meter.param<KER_H>();
153 ker_W = meter.param<KER_W>();
154
155 vertical_stride = meter.param<STRIDE_H>();
156 horizontal_stride = meter.param<STRIDE_W>();
157
158 padding = meter.param<PADDING>();
159 fused_act = meter.param<FUSED_ACT>();
160
161 assert((ifm_H - ker_H) % vertical_stride == 0);
162 assert((ifm_W - ker_H) % horizontal_stride == 0);
163
164 uint32_t const effective_ofm_H = (ifm_H - ker_H) / vertical_stride + 1;
165 uint32_t const effective_ofm_W = (ifm_W - ker_H) / horizontal_stride + 1;
166
167 assert(ofm_H >= effective_ofm_H);
168 assert(ofm_W >= effective_ofm_W);
169
170 uint32_t const pad_H = ofm_H - effective_ofm_H;
171 uint32_t const pad_W = ofm_W - effective_ofm_W;
172
173 top_padding = pad_H / 2;
174 bottom_padding = pad_H / 2;
175 left_padding = pad_W / 2;
176 right_padding = pad_W / 2;
177
178 if (is_odd(pad_H))
179 top_padding += 1;
180 if (is_odd(pad_W))
181 left_padding += 1;
182 }
183
184 template <Layout L> TensorInfo src_info() const
185 {
186 return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
187 }
188 template <Layout L> TensorInfo dst_info() const
189 {
190 return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
191 }
192 template <Layout L> TensorInfo ker_info() const
193 {
194 return make_info<L>(ker_N, ker_C, ker_H, ker_W);
195 }
196 TensorInfo bias_info(void) const { return make_info(ker_N); }
197
198 PadStrideInfo pad_stride_info(void) const
199 {
200 return PadStrideInfo{horizontal_stride,
201 vertical_stride,
202 left_padding,
203 right_padding,
204 top_padding,
205 bottom_padding,
206 DimensionRoundingType::FLOOR};
207 }
208};
209
210} // namespace
211
212//
213// Benchmark Implementations
214//
215namespace
216{
217
218inline nonius::benchmark_registry &local_benchmark_registry()
219{
220 static nonius::benchmark_registry registry;
221 return registry;
222}
223
224} // namespace
225
226#define NONIUS_LOCAL_BENCHMARK(name, ...) \
227 namespace \
228 { \
229 static ::nonius::benchmark_registrar \
230 NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
231 }
232
233NONIUS_LOCAL_BENCHMARK("NEDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) {
234 NEDirectConvolutionLayer conv;
235
236 // Configure
237 Configuration p{meter};
238
243
244 src_tensor.allocator()->init(p.src_info<NCHW>());
245 dst_tensor.allocator()->init(p.dst_info<NCHW>());
246 ker_tensor.allocator()->init(p.ker_info<NCHW>());
247 bias_tensor.allocator()->init(p.bias_info());
248
249 try
250 {
251 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
252 p.pad_stride_info()));
253 }
254 catch (...)
255 {
256 meter.measure([&](int) {
257 // DO NOTHING
258 volatile int x = 0;
259 return x;
260 });
261 return;
262 }
263
264 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
265
266 src_tensor.allocator()->allocate();
267 ker_tensor.allocator()->allocate();
268 bias_tensor.allocator()->allocate();
269 dst_tensor.allocator()->allocate();
270
271 // Run!
272 meter.measure([&](int) { conv.run(); });
273})
274
275NONIUS_LOCAL_BENCHMARK("NEDirectConvolutionLayer_NHWC", [](nonius::chronometer meter) {
276 NEDirectConvolutionLayer conv;
277
278 // Configure
279 Configuration p{meter};
280
285
286 src_tensor.allocator()->init(p.src_info<NHWC>());
287 dst_tensor.allocator()->init(p.dst_info<NHWC>());
288 ker_tensor.allocator()->init(p.ker_info<NHWC>());
289 bias_tensor.allocator()->init(p.bias_info());
290
291 try
292 {
293 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
294 p.pad_stride_info()));
295 }
296 catch (...)
297 {
298 meter.measure([&](int) {
299 // DO NOTHING
300 volatile int x = 0;
301 return x;
302 });
303 return;
304 }
305
306 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
307
308 src_tensor.allocator()->allocate();
309 ker_tensor.allocator()->allocate();
310 bias_tensor.allocator()->allocate();
311 dst_tensor.allocator()->allocate();
312
313 // Run!
314 meter.measure([&](int) { conv.run(); });
315})
316
317NONIUS_LOCAL_BENCHMARK("NEGEMMConvolutionLayer_NCHW", [](nonius::chronometer meter) {
318 NEGEMMConvolutionLayer conv;
319
320 // Configure
321 Configuration p{meter};
322
327
328 src_tensor.allocator()->init(p.src_info<NCHW>());
329 dst_tensor.allocator()->init(p.dst_info<NCHW>());
330 ker_tensor.allocator()->init(p.ker_info<NCHW>());
331 bias_tensor.allocator()->init(p.bias_info());
332
333 try
334 {
335 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
336 p.pad_stride_info()));
337 }
338 catch (...)
339 {
340 meter.measure([&](int) {
341 // DO NOTHING
342 volatile int x = 0;
343 return x;
344 });
345 return;
346 }
347
348 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
349
350 src_tensor.allocator()->allocate();
351 ker_tensor.allocator()->allocate();
352 bias_tensor.allocator()->allocate();
353 dst_tensor.allocator()->allocate();
354
355 // Run
356 meter.measure([&](int) { conv.run(); });
357})
358
359NONIUS_LOCAL_BENCHMARK("NEGEMMConvolutionLayer_NHWC", [](nonius::chronometer meter) {
360 NEGEMMConvolutionLayer conv;
361
362 // Configure
363 Configuration p{meter};
364
369
370 src_tensor.allocator()->init(p.src_info<NHWC>());
371 dst_tensor.allocator()->init(p.dst_info<NHWC>());
372 ker_tensor.allocator()->init(p.ker_info<NHWC>());
373 bias_tensor.allocator()->init(p.bias_info());
374
375 try
376 {
377 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
378 p.pad_stride_info()));
379 }
380 catch (...)
381 {
382 meter.measure([&](int) {
383 // DO NOTHING
384 volatile int x = 0;
385 return x;
386 });
387 return;
388 }
389
390 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
391
392 src_tensor.allocator()->allocate();
393 ker_tensor.allocator()->allocate();
394 bias_tensor.allocator()->allocate();
395 dst_tensor.allocator()->allocate();
396
397 // Run
398 meter.measure([&](int) { conv.run(); });
399})
400
401NONIUS_LOCAL_BENCHMARK("NEWinogradConvolutionLayer_NCHW", [](nonius::chronometer meter) {
402 NEWinogradConvolutionLayer conv;
403
404 // Configure
405 Configuration p{meter};
406
411
412 src_tensor.allocator()->init(p.src_info<NCHW>());
413 dst_tensor.allocator()->init(p.dst_info<NCHW>());
414 ker_tensor.allocator()->init(p.ker_info<NCHW>());
415 bias_tensor.allocator()->init(p.bias_info());
416
417 try
418 {
419 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
420 p.pad_stride_info()));
421 }
422 catch (...)
423 {
424 meter.measure([&](int) {
425 // DO NOTHING
426 volatile int x = 0;
427 return x;
428 });
429 return;
430 }
431
432 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
433
434 src_tensor.allocator()->allocate();
435 ker_tensor.allocator()->allocate();
436 bias_tensor.allocator()->allocate();
437 dst_tensor.allocator()->allocate();
438
439 // Run
440 meter.measure([&](int) { conv.run(); });
441})
442
443NONIUS_LOCAL_BENCHMARK("NEWinogradConvolutionLayer_NHWC", [](nonius::chronometer meter) {
444 NEWinogradConvolutionLayer conv;
445
446 // Configure
447 Configuration p{meter};
448
453
454 src_tensor.allocator()->init(p.src_info<NHWC>());
455 dst_tensor.allocator()->init(p.dst_info<NHWC>());
456 ker_tensor.allocator()->init(p.ker_info<NHWC>());
457 bias_tensor.allocator()->init(p.bias_info());
458
459 try
460 {
461 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
462 p.pad_stride_info()));
463 }
464 catch (...)
465 {
466 meter.measure([&](int) {
467 // DO NOTHING
468 volatile int x = 0;
469 return x;
470 });
471 return;
472 }
473
474 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
475
476 src_tensor.allocator()->allocate();
477 ker_tensor.allocator()->allocate();
478 bias_tensor.allocator()->allocate();
479 dst_tensor.allocator()->allocate();
480
481 // Run
482 meter.measure([&](int) { conv.run(); });
483})
484
485extern "C" nonius::benchmark_registry &benchmark_functions(void)
486{
487 return local_benchmark_registry();
488}
volatile const char info[]
::nncc::core::ADT::tensor::Shape TensorShape
Definition TensorShape.h:25
C
Definition infer.py:52
void conv(const nncc::core::ADT::feature::Shape &out_shape, nncc::core::ADT::feature::Accessor< OutputDType > &out_data, const nncc::core::ADT::feature::Shape &in_shape, const nncc::core::ADT::feature::Reader< InputDType > &in_data, const nncc::core::ADT::kernel::Shape &ker_shape, const nncc::core::ADT::kernel::Reader< KernelDType > &ker_data, const PadInfo &pad_info, const StrideInfo &stride_info)
Definition Conv2D.h:34
nonius::chronometer meter
NONIUS_PARAM(BATCH, 1)
Tensor src_tensor
Tensor ker_tensor
nonius::benchmark_registry & benchmark_functions(void)
Configuration p
Tensor dst_tensor
nonius::chronometer meter
#define NONIUS_LOCAL_BENCHMARK(name,...)
Tensor bias_tensor