#include <nonius/nonius.h++>
#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/CL/CLScheduler.h>
#include <arm_compute/runtime/CL/CLFunctions.h>
#include <cstdint>
#include <cassert>
#include <stdexcept>

Macros
#define	NONIUS_LOCAL_BENCHMARK(name, ...)

Functions
	NONIUS_PARAM (BATCH, 1)

	NONIUS_PARAM (IFM_C, 3)

	NONIUS_PARAM (IFM_H, 244)

	NONIUS_PARAM (IFM_W, 244)

	NONIUS_PARAM (OFM_C, 3)

	NONIUS_PARAM (OFM_H, 244)

	NONIUS_PARAM (OFM_W, 244)

	NONIUS_PARAM (KER_H, 3)

	NONIUS_PARAM (KER_W, 3)

	NONIUS_PARAM (STRIDE_H, 1)

	NONIUS_PARAM (STRIDE_W, 1)

	NONIUS_LOCAL_BENCHMARK ("CLDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) { CLDirectConvolutionLayer conv;Configuration p{meter};CLTensor src_tensor{};CLTensor dst_tensor{};CLTensor ker_tensor{};CLTensor bias_tensor{};src_tensor.allocator() ->init(p.src_info< NCHW >());dst_tensor.allocator() ->init(p.dst_info< NCHW >());ker_tensor.allocator() ->init(p.ker_info< NCHW >());bias_tensor.allocator() ->init(p.bias_info());try { check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));} catch(...) { meter.measure([&](int) { volatile int x=0;return x;});return;} conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());src_tensor.allocator() ->allocate();ker_tensor.allocator() ->allocate();bias_tensor.allocator() ->allocate();dst_tensor.allocator() ->allocate();meter.measure([&](int) { conv.run();CLScheduler::get().sync();});}) NONIUS_LOCAL_BENCHMARK("CLDirectConvolutionLayer_NHWC"

src_tensor	allocator () -> init(p.src_info< NHWC >())

	catch (...)

meter	measure ([&](int) { conv.run();CLScheduler::get().sync();})

	NONIUS_LOCAL_BENCHMARK ("CLGEMMConvolutionLayer_NCHW", [](nonius::chronometer meter) { CLGEMMConvolutionLayer conv;Configuration p{meter};CLTensor src_tensor{};CLTensor dst_tensor{};CLTensor ker_tensor{};CLTensor bias_tensor{};src_tensor.allocator() ->init(p.src_info< NCHW >());dst_tensor.allocator() ->init(p.dst_info< NCHW >());ker_tensor.allocator() ->init(p.ker_info< NCHW >());bias_tensor.allocator() ->init(p.bias_info());try { check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));} catch(...) { meter.measure([&](int) { volatile int x=0;return x;});return;} conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());src_tensor.allocator() ->allocate();ker_tensor.allocator() ->allocate();bias_tensor.allocator() ->allocate();dst_tensor.allocator() ->allocate();meter.measure([&](int) { conv.run();CLScheduler::get().sync();});}) NONIUS_LOCAL_BENCHMARK("CLGEMMConvolutionLayer_NHWC"

	NONIUS_LOCAL_BENCHMARK ("CLWinogradConvolutionLayer_NCHW", [](nonius::chronometer meter) { CLWinogradConvolutionLayer conv;Configuration p{meter};CLTensor src_tensor{};CLTensor dst_tensor{};CLTensor ker_tensor{};CLTensor bias_tensor{};src_tensor.allocator() ->init(p.src_info< NCHW >());dst_tensor.allocator() ->init(p.dst_info< NCHW >());ker_tensor.allocator() ->init(p.ker_info< NCHW >());bias_tensor.allocator() ->init(p.bias_info());try { check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));} catch(...) { meter.measure([&](int) { volatile int x=0;return x;});return;} conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());src_tensor.allocator() ->allocate();ker_tensor.allocator() ->allocate();bias_tensor.allocator() ->allocate();dst_tensor.allocator() ->allocate();meter.measure([&](int) { conv.run();CLScheduler::get().sync();});}) NONIUS_LOCAL_BENCHMARK("CLWinogradConvolutionLayer_NHWC"

nonius::benchmark_registry &	benchmark_functions (void)

Variables
nonius::chronometer	meter

Configuration	p {meter}

CLTensor	src_tensor {}

CLTensor	dst_tensor {}

CLTensor	ker_tensor {}

CLTensor	bias_tensor {}

	try

Macro Definition Documentation

◆ NONIUS_LOCAL_BENCHMARK

#define NONIUS_LOCAL_BENCHMARK	(	name,
		...
	)

Value:

  namespace                                                                                        \
  {                                                                                                \
  static ::nonius::benchmark_registrar                                                             \
    NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
  }

Definition at line 233 of file Convolution.cpp.

  {                                                                                                \
  static ::nonius::benchmark_registrar                                                             \
    NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
  }

Function Documentation

◆ allocator()

dst_tensor allocator ( ) -> init(p.src_info< NHWC >())

Referenced by flatbuffers::Allocate(), onert_micro::import::OMExecutionPlanCreator::createBackwardExecutionPlan(), onert_micro::import::OMExecutionPlanCreator::createExecutionPlan(), onert_micro::import::OMExecutionPlanCreator::createForwardExecutionPlan(), flatbuffers::Deallocate(), flatbuffers::ReallocateDownward(), onert_micro::train::OMBackpropExecute::runBackward(), and onert_micro::execute::OMKernelExecute::runForward().

◆ benchmark_functions()

nonius::benchmark_registry & benchmark_functions ( void )

Definition at line 510 of file Convolution.cpp.

{
  return local_benchmark_registry();
}

◆ catch()

catch ( ... )

Definition at line 306 of file Convolution.cpp.

  {
    meter.measure([&](int) {
      // DO NOTHING
      volatile int x = 0;
      return x;
    });
    return;
  }

References meter.

◆ measure()

meter measure ( [&] (int) { conv.run();CLScheduler::get().sync();} )

Referenced by main().

◆ NONIUS_LOCAL_BENCHMARK() [1/3]

NONIUS_LOCAL_BENCHMARK	(	"CLDirectConvolutionLayer_NCHW"	,
		[] (nonius::chronometer meter) { CLDirectConvolutionLayer conv;Configuration p{meter};CLTensor src_tensor{};CLTensor dst_tensor{};CLTensor ker_tensor{};CLTensor bias_tensor{};src_tensor.allocator() ->init(p.src_info< NCHW >());dst_tensor.allocator() ->init(p.dst_info< NCHW >());ker_tensor.allocator() ->init(p.ker_info< NCHW >());bias_tensor.allocator() ->init(p.bias_info());try { check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));} catch(...) { meter.measure([&](int) { volatile int x=0;return x;});return;} conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());src_tensor.allocator() ->allocate();ker_tensor.allocator() ->allocate();bias_tensor.allocator() ->allocate();dst_tensor.allocator() ->allocate();meter.measure([&](int) { conv.run();CLScheduler::get().sync();});}
	)

References bias_tensor, dst_tensor, ker_tensor, meter, p, and src_tensor.

◆ NONIUS_LOCAL_BENCHMARK() [2/3]

NONIUS_LOCAL_BENCHMARK	(	"CLGEMMConvolutionLayer_NCHW"	,
		[] (nonius::chronometer meter) { CLGEMMConvolutionLayer conv;Configuration p{meter};CLTensor src_tensor{};CLTensor dst_tensor{};CLTensor ker_tensor{};CLTensor bias_tensor{};src_tensor.allocator() ->init(p.src_info< NCHW >());dst_tensor.allocator() ->init(p.dst_info< NCHW >());ker_tensor.allocator() ->init(p.ker_info< NCHW >());bias_tensor.allocator() ->init(p.bias_info());try { check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));} catch(...) { meter.measure([&](int) { volatile int x=0;return x;});return;} conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());src_tensor.allocator() ->allocate();ker_tensor.allocator() ->allocate();bias_tensor.allocator() ->allocate();dst_tensor.allocator() ->allocate();meter.measure([&](int) { conv.run();CLScheduler::get().sync();});}
	)

References bias_tensor, dst_tensor, ker_tensor, meter, p, and src_tensor.

◆ NONIUS_LOCAL_BENCHMARK() [3/3]

NONIUS_LOCAL_BENCHMARK	(	"CLWinogradConvolutionLayer_NCHW"	,
		[] (nonius::chronometer meter) { CLWinogradConvolutionLayer conv;Configuration p{meter};CLTensor src_tensor{};CLTensor dst_tensor{};CLTensor ker_tensor{};CLTensor bias_tensor{};src_tensor.allocator() ->init(p.src_info< NCHW >());dst_tensor.allocator() ->init(p.dst_info< NCHW >());ker_tensor.allocator() ->init(p.ker_info< NCHW >());bias_tensor.allocator() ->init(p.bias_info());try { check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(), p.pad_stride_info()));} catch(...) { meter.measure([&](int) { volatile int x=0;return x;});return;} conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());src_tensor.allocator() ->allocate();ker_tensor.allocator() ->allocate();bias_tensor.allocator() ->allocate();dst_tensor.allocator() ->allocate();meter.measure([&](int) { conv.run();CLScheduler::get().sync();});}
	)

References bias_tensor, dst_tensor, ker_tensor, meter, p, and src_tensor.

◆ NONIUS_PARAM() [1/11]

NONIUS_PARAM	(	BATCH	,
		1
	)

◆ NONIUS_PARAM() [2/11]

NONIUS_PARAM	(	IFM_C	,
		3
	)

◆ NONIUS_PARAM() [3/11]

NONIUS_PARAM	(	IFM_H	,
		244
	)

◆ NONIUS_PARAM() [4/11]

NONIUS_PARAM	(	IFM_W	,
		244
	)

◆ NONIUS_PARAM() [5/11]

NONIUS_PARAM	(	KER_H	,
		3
	)

◆ NONIUS_PARAM() [6/11]

NONIUS_PARAM	(	KER_W	,
		3
	)

◆ NONIUS_PARAM() [7/11]

NONIUS_PARAM	(	OFM_C	,
		3
	)

◆ NONIUS_PARAM() [8/11]

NONIUS_PARAM	(	OFM_H	,
		244
	)

◆ NONIUS_PARAM() [9/11]

NONIUS_PARAM	(	OFM_W	,
		244
	)

◆ NONIUS_PARAM() [10/11]

NONIUS_PARAM	(	STRIDE_H	,
		1
	)

◆ NONIUS_PARAM() [11/11]

NONIUS_PARAM	(	STRIDE_W	,
		1
	)

Variable Documentation

◆ bias_tensor

Tensor bias_tensor {}

Definition at line 294 of file Convolution.cpp.

294{};

Referenced by mir_caffe2::Caffe2OpCreator::convertSpatialBN(), onert::backend::acl_common::kernelGenFullyConnected(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), onert::backend::cpu::KernelGenerator::visit(), onert::backend::cpu::KernelGenerator::visit(), onert::backend::cpu::KernelGenerator::visit(), onert::backend::train::KernelGenerator::visit(), onert::backend::train::KernelGenerator::visit(), and onert::backend::train::KernelGenerator::visit().

◆ dst_tensor

Tensor dst_tensor {}

Definition at line 292 of file Convolution.cpp.

292{};

Referenced by luci_interpreter::RuntimeGraph::makeInplaceOperation(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), onert::backend::builtin::kernel::PermuteLayer::optimize(), onert::exec::IPermuteFunction::permute(), and onert::backend::builtin::kernel::PermuteLayer::run().

◆ ker_tensor

Tensor ker_tensor {}

Definition at line 293 of file Convolution.cpp.

293{};

Referenced by NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), onert::backend::cpu::KernelGenerator::visit(), onert::backend::cpu::KernelGenerator::visit(), onert::backend::train::KernelGenerator::visit(), and onert::backend::train::KernelGenerator::visit().

◆ meter

nonius::chronometer meter

Initial value:

{

CLDirectConvolutionLayer conv

Definition at line 285 of file Convolution.cpp.

Referenced by catch(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), and NONIUS_LOCAL_BENCHMARK().

◆ p

Configuration p {meter}

Definition at line 289 of file Convolution.cpp.

289{meter};

Referenced by nnkit::support::onnx::Allocator::Alloc(), nnkit::support::onnx::Allocator::Allocator(), loco::AnnotatedItem< Annotation >::annot(), luci_interpreter::BuddyMemoryManager::BuddyMemoryManager(), fme_apply::check_patterns_valid(), onert_micro::import::OMExecutionPlanCreator::createBackwardExecutionPlan(), onert_micro::import::OMExecutionPlanCreator::createForwardExecutionPlan(), flatbuffers::DefaultAllocator::dealloc(), flatbuffers::Deallocate(), flatbuffers::grpc::SliceAllocator::deallocate(), flatbuffers::DefaultAllocator::deallocate(), fme_apply::FMEqualizer::equalize(), moco::PlaneInference::fill(), luci_interpreter::kernels::fillArithmeticActivationRange(), training_configure_tool::findAllTensorsRanksCombinations(), training_configure_tool::findBestSparseBackpropagationTensorsRanks(), flatbuffers::FloatToString(), nnkit::support::onnx::Allocator::Free(), pepper::KVStoreInterface< KVStoreTrait::Queryable >::get(), onert::compiler::BackendManager::getAll(), onert_micro::import::OMDynamicShapesHandler::importDynamicShapesFromTrainConfig(), moco::PlaneInference::infer(), main(), nnfw::cker::optimized::MulElementwise(), nnfw::cker::optimized::MulElementwise(), nnfw::cker::optimized::MulSimpleBroadcast(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), moco::tf::eltwise::binary::InputConnector< NodeTy >::operator()(), moco::PlaneInference::operator()(), moco::Padding2DInference::operator()(), bino::UniformTransform< Callable >::operator()(), nnfw::cker::depthwise_conv_op::functor::DepthwiseInputCopyOp< T >::operator()(), flatbuffers::unique_ptr< T >::operator=(), loco::PermutingEncoder< Domain::DepthwiseFilter >::perm(), loco::PermutingDecoder< Domain::DepthwiseFilter >::perm(), loco::PermutingEncoder< Domain::Feature >::perm(), loco::PermutingDecoder< Domain::Feature >::perm(), loco::PermutingEncoder< Domain::Filter >::perm(), loco::PermutingDecoder< Domain::Filter >::perm(), loco::PermutingEncoder< Domain::Matrix >::perm(), loco::PermutingDecoder< Domain::Matrix >::perm(), fme_apply::read(), flatbuffers::IndirectHelper< T >::Read(), flatbuffers::IndirectHelper< Offset< T > >::Read(), flatbuffers::IndirectHelper< const T * >::Read(), read_layer_params(), tflite::reference_ops::ReverseSequence(), logo::SimplifyDomainConversionPass::run(), luci::QuantizeOnnxDequantizeLinearPass::run(), luci::QuantizeOnnxQDQPass::run(), onert_micro::train::OMBackpropExecute::runBackward(), nnfw::cker::StridedSlicePadIndices(), nnfw::cker::Transpose2D(), onert::exec::ExecTime::updateOperationExecTime(), coco::updaters(), nnc::ArtifactGeneratorCppCode::visit(), nnc::ArtifactGeneratorCppDecl::visit(), fipe::wrap(), and fme_detect::write().

◆ src_tensor

deconv configure & src_tensor {}

Definition at line 291 of file Convolution.cpp.

291{};

Referenced by NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), NONIUS_LOCAL_BENCHMARK(), onert::backend::builtin::kernel::PermuteLayer::optimize(), onert::exec::IPermuteFunction::permute(), and onert::backend::builtin::kernel::PermuteLayer::run().

◆ try

try

Initial value:

{
    check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
                        p.pad_stride_info()))

Definition at line 301 of file Convolution.cpp.

Macros

Functions

Variables

Macro Definition Documentation

◆ NONIUS_LOCAL_BENCHMARK

Function Documentation

◆ allocator()

◆ benchmark_functions()

◆ catch()

◆ measure()

◆ NONIUS_LOCAL_BENCHMARK() [1/3]

◆ NONIUS_LOCAL_BENCHMARK() [2/3]

◆ NONIUS_LOCAL_BENCHMARK() [3/3]

◆ NONIUS_PARAM() [1/11]

◆ NONIUS_PARAM() [2/11]

◆ NONIUS_PARAM() [3/11]

◆ NONIUS_PARAM() [4/11]

◆ NONIUS_PARAM() [5/11]

◆ NONIUS_PARAM() [6/11]

◆ NONIUS_PARAM() [7/11]

◆ NONIUS_PARAM() [8/11]

◆ NONIUS_PARAM() [9/11]

◆ NONIUS_PARAM() [10/11]

◆ NONIUS_PARAM() [11/11]

Variable Documentation

◆ bias_tensor

◆ dst_tensor

◆ ker_tensor

◆ meter

◆ p

◆ src_tensor

◆ try