ONE - On-device Neural Engine
Loading...
Searching...
No Matches
nnc::AclCppOpGenerator Class Reference

Implements the visitor for the model IR which generates the DOM description translated to C++ source/header files by the ACL soft backend code generators. More...

#include <AclCppOpGenerator.h>

Collaboration diagram for nnc::AclCppOpGenerator:

Public Member Functions

 AclCppOpGenerator (const std::string &name, std::ostream &par_out)
 
const ArtifactModulegenerate (mir::Graph *g)
 The main interface function to the class. Convers the model IR to the DOM.
 
void visit (mir::ops::AddOp &op) override
 Implementations of the MIR visitors.
 
void visit (mir::ops::AvgPool2DOp &op) override
 
void visit (mir::ops::CappedReluOp &op) override
 
void visit (mir::ops::ConcatOp &op) override
 
void visit (mir::ops::ConstantOp &op) override
 
void visit (mir::ops::Conv2DOp &op) override
 
void visit (mir::ops::DeConv2DOp &op) override
 
void visit (mir::ops::DepthwiseConv2DOp &op) override
 
void visit (mir::ops::DivOp &op) override
 
void visit (mir::ops::EluOp &op) override
 
void visit (mir::ops::FullyConnectedOp &op) override
 
void visit (mir::ops::GatherOp &op) override
 
void visit (mir::ops::InputOp &op) override
 
void visit (mir::ops::LeakyReluOp &op) override
 
void visit (mir::ops::MaxOp &op) override
 
void visit (mir::ops::MaxPool2DOp &op) override
 
void visit (mir::ops::MulOp &op) override
 
void visit (mir::ops::OutputOp &op) override
 
void visit (mir::ops::PadOp &op) override
 
void visit (mir::ops::ReluOp &op) override
 
void visit (mir::ops::ReshapeOp &op) override
 
void visit (mir::ops::ResizeOp &op) override
 
void visit (mir::ops::SigmoidOp &op) override
 
void visit (mir::ops::SliceOp &op) override
 
void visit (mir::ops::SoftmaxOp &op) override
 
void visit (mir::ops::SqrtOp &op) override
 
void visit (mir::ops::SqueezeOp &op) override
 
void visit (mir::ops::SubOp &op) override
 
void visit (mir::ops::TanhOp &op) override
 
void visit (mir::ops::TransposeOp &op) override
 
template<typename Op >
shared_ptr< ArtifactVariablegenPadStrideInfo (const Op &op, const string &prefix, ArtifactBlock *block)
 
template<typename Op >
void genConvolution (Op &op, const string &acl_func_name, const string &suffix)
 
template<typename T >
std::shared_ptr< ArtifactIdgenVectorInitializedVar (ArtifactBlock *block, const string &type, const string &name, const vector< T > &init)
 
- Public Member Functions inherited from mir::IVisitor
virtual ~IVisitor ()=default
 

Protected Member Functions

void visit_fallback (mir::Operation &op) override
 

Detailed Description

Implements the visitor for the model IR which generates the DOM description translated to C++ source/header files by the ACL soft backend code generators.

Definition at line 37 of file AclCppOpGenerator.h.

Constructor & Destructor Documentation

◆ AclCppOpGenerator()

nnc::AclCppOpGenerator::AclCppOpGenerator ( const std::string &  name,
std::ostream &  par_out 
)

Definition at line 35 of file AclCppOpGenerator.cpp.

36 : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
37 _clScheduler(AF::id("arm_compute::CLScheduler"))
38{
39}
static std::shared_ptr< ArtifactId > id(const std::string &name)

Member Function Documentation

◆ genConvolution()

template<typename Op >
void nnc::AclCppOpGenerator::genConvolution ( Op &  op,
const string &  acl_func_name,
const string &  suffix 
)

Definition at line 494 of file AclCppOpGenerator.cpp.

495{
496 const auto *ir_input = op.getInput(0);
497 const auto *ir_weights = op.getInput(1);
498 const auto *ir_output = op.getOutput(0);
499
500 auto ir_weights_op = dynamic_cast<const ops::ConstantOp *>(ir_weights->getNode());
501 if (ir_weights_op == nullptr)
502 throw AclCppException("Unsupported operation type");
503
504 auto ir_weights_tensor = ir_weights_op->getValue();
505 if (op.getType() == Operation::Type::conv2D)
506 {
507 // [Co, Hk, Wk, Ci] -> [Co, Ci, Hk, Wk].
508 ir_weights_tensor = transposeTensor<0, 3, 1, 2>(ir_weights_tensor);
509 }
510 else
511 {
512 ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_tensor);
513 }
514
515 const Shape &ir_weights_shape = ir_weights_tensor.getShape();
516
517 // get output tensor name that is used as base for other names
518 const string output_tensor_name = tensorName(ir_output);
519
520 // Get the identifier of the input tensor in the DOM.
521 auto input = AF::id(tensorName(ir_input));
522
523 // Generate auxiliary tensor to hold transposed input of convolution in NCHW format
524 shared_ptr<ArtifactId> transposed_input =
525 genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
526
527 // Create the transposed output tensor in the DOM.
528 const string transposed_output_name = output_tensor_name + "_transposed_output";
529 Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
530 shared_ptr<ArtifactId> transposed_output =
531 genTensor(transposed_output_name, transposed_output_shape);
532
533 string operation_name = output_tensor_name + suffix;
534
535 // Generate a tensor for weights (kernel) in the DOM.
536 auto weights = genTensor(operation_name + "_weights", ir_weights_shape);
537
538 // Create a local variable of type PadStrideInfo in the artifact constructor:
539 // PadStrideInfo pad_stride_info(stride_x, stride_y, pad_x, pad_y);
540 auto pad_stride_info_var = genPadStrideInfo(op, operation_name, _constrBlock);
541
542 auto pad_stride_info = pad_stride_info_var->use();
543
544 // The parameter for the conv_layer.config(&in, &weights, nullptr, &out, pad_stride_info)
545 // function call.
546 list<shared_ptr<ArtifactExpr>> config_params{AF::ref(transposed_input), AF::ref(weights),
547 AF::lit("nullptr"), AF::ref(transposed_output),
548 pad_stride_info};
549
550 // Add to additional parameters for deconvolution.
551 if (op.getType() == Operation::Type::deConv2D)
552 {
553 config_params.push_back(AF::lit("0"));
554 config_params.push_back(AF::lit("0"));
555 }
556
557 // Create the convolution (/depthwise convolution/deconvolution) layer class instance.
558 shared_ptr<ArtifactId> layer = genLayer(acl_func_name, operation_name, config_params);
559
560 addToPersistentTensors(weights);
561 // Save the IR weights tensor to later read this in the artifact.
562 serializeTensor(weights, ir_weights_tensor);
563 genTensorAllocation(_infBlock, transposed_output);
564 genLayerExecution(layer);
565
566 // Generate auxiliar tensor to hold transposed output of convolution in NHWC format
567 shared_ptr<ArtifactId> output =
568 genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
569
570 genTensorDeallocation(_infBlock, transposed_input);
571 genTensorDeallocation(_infBlock, transposed_output);
572}
static std::shared_ptr< ArtifactLiteral > lit(const std::string &name)
static std::shared_ptr< ArtifactRef > ref(std::shared_ptr< ArtifactExpr > ref)
Creates a 'reference' (like C/C++ '&' address operator do) to the expression.
Definition Shape.h:28

◆ generate()

const ArtifactModule & nnc::AclCppOpGenerator::generate ( mir::Graph g)

The main interface function to the class. Convers the model IR to the DOM.

Parameters
g- pointer the model IR graph.
Returns
- reference to the top-level DOM entity.

Definition at line 41 of file AclCppOpGenerator.cpp.

42{
43 // Including headers.
44 _module.addHeaderSysInclude("fstream");
45 _module.addHeaderInclude("arm_compute/core/Types.h");
46 _module.addHeaderInclude("arm_compute/runtime/CL/CLFunctions.h");
47 _module.addHeaderInclude("arm_compute/runtime/CL/CLScheduler.h");
48 _module.addHeaderInclude("arm_compute/runtime/CL/CLBufferAllocator.h");
49 _module.addHeaderInclude("arm_compute/runtime/BlobLifetimeManager.h");
50 _module.addHeaderInclude("arm_compute/runtime/PoolManager.h");
51 _module.addHeaderInclude("arm_compute/runtime/MemoryManagerOnDemand.h");
52
53 // The general structure creation.
54 _artifactClass = _module.createClass(_module.name());
55 _constrBlock = _artifactClass->getConstrBlock();
56 _inferenceFunction = _artifactClass->func(true, "void", "Inference");
57 _infBlock = _inferenceFunction->getBlock();
58
59 // Input parameter stream preparation.
60 _parInVar = _artifactClass->var(false, "std::ifstream", "_parIn");
61 _parIn = _parInVar->use();
62 string par_file_name = _module.name() + ".par";
63 _constrBlock->call(
64 "open",
65 {AF::lit("\"" + par_file_name + "\""), AF::lit("std::ios_base::in | std::ios_base::binary")},
66 _parIn);
67 auto file_fail = _constrBlock->ifCond(AF::call("fail", {}, _parIn));
68 auto file_fail_block = file_fail->getBlock();
69 file_fail_block->addStatement(
70 AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
71
72 // Traverse the computational graph.
73 g->accept(this);
74
75 // Generate all the deferred entities.
76 genNamed(g);
77 genPersistentTensorAllocations();
78 genDeserializations();
79 genFillings();
80
81 // Make sure all the OpenCL jobs are done executing:
82 _infBlock->call("sync", {}, AF::call("get", {}, _clScheduler, ArtifactCallType::scope));
83
84 return _module;
85}
std::shared_ptr< ArtifactFunctionCall > call(const std::string &func_name, const std::list< std::shared_ptr< ArtifactExpr > > &param_list, std::shared_ptr< ArtifactExpr > call_on=nullptr, ArtifactCallType call_type=ArtifactCallType::obj)
Creates a function call.
std::shared_ptr< ArtifactIf > ifCond(std::shared_ptr< ArtifactExpr > cond)
Creates an 'if' blocks.
static std::shared_ptr< ArtifactFunctionCall > call(const std::string &func_name, const std::list< std::shared_ptr< ArtifactExpr > > &param_list, std::shared_ptr< ArtifactExpr > on=nullptr, ArtifactCallType call_type=ArtifactCallType::obj)
Creates a function call.
const std::string & name() const
void addHeaderInclude(const std::string &name)
std::shared_ptr< ArtifactClass > createClass(const std::string &name)
void addHeaderSysInclude(const std::string &name)

References nnc::ArtifactModule::addHeaderInclude(), nnc::ArtifactModule::addHeaderSysInclude(), nnc::ArtifactBlock::call(), nnc::ArtifactFactory::call(), nnc::ArtifactModule::createClass(), nnc::ArtifactBlock::ifCond(), nnc::ArtifactFactory::lit(), nnc::ArtifactModule::name(), and nnc::scope.

Referenced by nnc::AclCppCodeGenerator::run(), TEST(), TEST(), TEST(), TEST(), TEST(), TEST(), TEST(), TEST(), and TEST().

◆ genPadStrideInfo()

template<typename Op >
shared_ptr< ArtifactVariable > nnc::AclCppOpGenerator::genPadStrideInfo ( const Op &  op,
const string &  prefix,
ArtifactBlock block 
)

Definition at line 203 of file AclCppOpGenerator.cpp.

205{
206 using AF = ArtifactFactory;
207
208 const Shape strides(op.getStrides());
209 assert(strides.rank() == 2);
210 auto &padding_before = op.getPaddingBefore();
211 auto &padding_after = op.getPaddingAfter();
212
213 string type_name = "arm_compute::PadStrideInfo";
214
215 string var_name = prefix + "_pad_stride_info";
216
217 list<std::shared_ptr<ArtifactExpr>> var_init_params = {
218 AF::lit(to_string(strides.dim(1))),
219 AF::lit(to_string(strides.dim(0))),
220 AF::lit(to_string(padding_before.at(1))),
221 AF::lit(to_string(padding_after.at(1))),
222 AF::lit(to_string(padding_before.at(0))),
223 AF::lit(to_string(padding_after.at(0))),
224 AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
225
226 auto pad_stride_info_var = block->var(type_name, var_name, {}, var_init_params);
227
228 return pad_stride_info_var;
229}
Factory for some kinds of frequently used artifact DOM objects.
std::string to_string(const T value)
Definition arser.h:60

References mir::Shape::dim(), mir::Shape::rank(), and nnc::ArtifactBlock::var().

◆ genVectorInitializedVar()

template<typename T >
std::shared_ptr< ArtifactId > nnc::AclCppOpGenerator::genVectorInitializedVar ( ArtifactBlock block,
const string &  type,
const string &  name,
const vector< T > &  init 
)

Definition at line 715 of file AclCppOpGenerator.cpp.

717{
718 list<shared_ptr<ArtifactExpr>> dims;
719
720 for (const auto &v : init)
721 dims.push_back(AF::lit(to_string(v)));
722
723 auto shape_var = block->var(type, name, {}, dims);
724 auto shape_id = shape_var->use();
725 return shape_id;
726}

References nnc::ArtifactBlock::var().

◆ visit() [1/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::AddOp op)
override

Implementations of the MIR visitors.

Parameters
op

Definition at line 958 of file AclCppOpGenerator.cpp.

959{
960 assert(op.getNumInputs() == 2);
961 const auto *ir_lhs = op.getInput(0);
962 const auto *ir_rhs = op.getInput(1);
963 const auto *ir_output = op.getOutput(0);
964
965 // Create the output tensor in the DOM and obtain its identifier.
966 auto out = genTensor(ir_output);
967 addToPersistentTensors(out);
968
969 // Get the identifiers of the input tensors in the DOM.
970 auto lhs = AF::id(tensorName(ir_lhs));
971 auto rhs = AF::id(tensorName(ir_rhs));
972
973 genAddition(out->name() + "_" + "addition", 0, ir_rhs->getShape(), lhs, rhs, out);
974}
std::size_t getNumInputs() const
Definition Operation.h:128
Output * getInput(std::size_t index)
Definition Operation.h:137
Output * getOutput(std::size_t index)
Definition Operation.h:149

References mir::Operation::getInput(), mir::Operation::getNumInputs(), and mir::Operation::getOutput().

◆ visit() [2/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::AvgPool2DOp op)
override

Definition at line 253 of file AclCppOpGenerator.cpp.

254{
255 genPooling(op, "arm_compute::PoolingType::AVG", !op.getIncludePad());
256}
bool getIncludePad() const
Definition AvgPool2DOp.h:53

References mir::ops::AvgPool2DOp::getIncludePad().

◆ visit() [3/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::CappedReluOp op)
override

Definition at line 301 of file AclCppOpGenerator.cpp.

302{
303 genActivation(op, "LU_BOUNDED_RELU", op.getCap());
304}

References mir::ops::CappedReluOp::getCap().

◆ visit() [4/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::ConcatOp op)
override

Definition at line 87 of file AclCppOpGenerator.cpp.

88{
89 const auto &ir_inputs = op.getInputs();
90 const auto *ir_output = op.getOutput(0);
91
92 static const char *axis_names[] = {
93 "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
94 "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
95
96 int axis = op.getAxis();
97 assert(axis >= 0 && axis < static_cast<int>(sizeof(axis_names) / sizeof(axis_names[0])) &&
98 "axis outside this range is not supported in ACL");
99 const char *axis_name = axis_names[axis];
100
101 auto out = genTensor(ir_output);
102 auto prefix = out->name() + "_concatenate_layer";
103 auto inputs_var = _constrBlock->var("std::vector<arm_compute::ICLTensor*>", prefix + "_inputs");
104 auto inputs = inputs_var->use();
105
106 for (const Operation::Output *ir_input : ir_inputs)
107 _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs);
108
109 auto layer =
110 genLayer("arm_compute::CLConcatenateLayer", prefix, {inputs, AF::ref(out), AF::lit(axis_name)});
111
112 addToPersistentTensors(out);
113 genLayerExecution(layer);
114}
std::deque< Output * > & getInputs()
Definition Operation.h:131
int32_t getAxis() const
Definition ConcatOp.h:44
std::shared_ptr< ArtifactVariable > var(const std::string &type_name, const std::string &var_name, const std::list< std::shared_ptr< ArtifactExpr > > &dimensions={}, const std::list< std::shared_ptr< ArtifactExpr > > &initializers={})
Creates a new variable and place it to the block.

References nnc::ArtifactBlock::call(), mir::ops::ConcatOp::getAxis(), mir::Operation::getInputs(), mir::Operation::getOutput(), nnc::ArtifactFactory::id(), nnc::ArtifactFactory::ref(), and nnc::ArtifactBlock::var().

◆ visit() [5/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::ConstantOp op)
override

Definition at line 338 of file AclCppOpGenerator.cpp.

339{
340 if (shouldSerializeConstant(op))
341 {
342 TensorVariant data = op.getValue();
343 shared_ptr<ArtifactId> out = genTensor(op.getOutput(0));
344 addToPersistentTensors(out);
345 serializeTensor(out, data);
346 }
347}
const TensorVariant & getValue() const
Definition ConstantOp.h:36

References mir::Operation::getOutput(), and mir::ops::ConstantOp::getValue().

◆ visit() [6/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::Conv2DOp op)
override

Definition at line 116 of file AclCppOpGenerator.cpp.

117{
118 assert(op.getNumGroups() == 1);
119 genConvolution(op, "arm_compute::CLConvolutionLayer", "_convolution_layer");
120}
std::int32_t getNumGroups() const
Definition Conv2DOp.h:58

References mir::ops::Conv2DOp::getNumGroups().

◆ visit() [7/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::DeConv2DOp op)
override

Definition at line 392 of file AclCppOpGenerator.cpp.

393{
394 genConvolution(op, "arm_compute::CLDeconvolutionLayer", "_deconvolution_layer");
395}

◆ visit() [8/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::DepthwiseConv2DOp op)
override

Definition at line 122 of file AclCppOpGenerator.cpp.

123{
124 genConvolution(op, "arm_compute::CLDepthwiseConvolutionLayer", "_depthwise_convolution_layer");
125}

◆ visit() [9/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::DivOp op)
override

Definition at line 976 of file AclCppOpGenerator.cpp.

976{ throw AclCppException("NYI"); }

◆ visit() [10/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::EluOp op)
override

Definition at line 397 of file AclCppOpGenerator.cpp.

398{
399 throw AclCppException("EluOp not supported by the ACL library yet.");
400}

◆ visit() [11/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::FullyConnectedOp op)
override

Definition at line 264 of file AclCppOpGenerator.cpp.

265{
266 assert(op.getNumInputs() == 2);
267 const auto *ir_input = op.getInput(0);
268 const auto *ir_weights = op.getInput(1);
269 const auto *ir_output = op.getOutput(0);
270
271 auto ir_weights_op = dynamic_cast<const mir::ops::ConstantOp *>(ir_weights->getNode());
272 if (ir_weights_op == nullptr)
273 throw AclCppException("Unsupported operation type");
274
275 const TensorVariant ir_weights_tensor = transposeTensor<1, 0>(ir_weights_op->getValue());
276 const Shape &ir_weights_shape = ir_weights_tensor.getShape();
277
278 // Get the input node tensor id in the DOM.
279 auto in = AF::id(tensorName(ir_input));
280
281 // Create the output tensor in the DOM.
282 if (ir_output->getShape().rank() != 2)
283 throw AclCppException("Unsupported number of dimensions in fc layer");
284 auto out = genTensor(ir_output);
285 string operation_name = out->name() + "_fully_connected_layer";
286
287 // Create the weights tensor in the DOM and use its id.
288 auto weights = genTensor(operation_name + "_weights", ir_weights_shape);
289
290 // Instantiate the CLFullyConnectedLayer object.
291 auto layer = genLayer("arm_compute::CLFullyConnectedLayer", operation_name,
292 {AF::ref(in), AF::ref(weights), AF::lit("nullptr"), AF::ref(out)});
293
294 addToPersistentTensors(weights);
295 // Serialize the weights tensor and generate the function to deserialize it in the artifact.
296 serializeTensor(weights, ir_weights_tensor);
297 addToPersistentTensors(out);
298 genLayerExecution(layer);
299}

References mir::Operation::getInput(), mir::Operation::getNumInputs(), mir::Operation::getOutput(), and mir::TensorVariant::getShape().

◆ visit() [12/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::GatherOp op)
override

Definition at line 941 of file AclCppOpGenerator.cpp.

942{
943 throw AclCppException("Unimplemented operation: GatherOp");
944}

◆ visit() [13/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::InputOp op)
override

Definition at line 306 of file AclCppOpGenerator.cpp.

307{
308 shared_ptr<ArtifactId> tensor;
309 tensor = genTensor(op.getOutput(0));
310 addToPersistentTensors(tensor);
311}

References mir::Operation::getOutput().

◆ visit() [14/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::LeakyReluOp op)
override

Definition at line 948 of file AclCppOpGenerator.cpp.

949{
950 genActivation(op, "LEAKY_RELU", op.getAlpha());
951}
float getAlpha() const
Definition LeakyReluOp.h:41

References mir::ops::LeakyReluOp::getAlpha().

◆ visit() [15/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::MaxOp op)
override

Definition at line 978 of file AclCppOpGenerator.cpp.

978{ throw AclCppException("NYI"); }

◆ visit() [16/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::MaxPool2DOp op)
override

Definition at line 258 of file AclCppOpGenerator.cpp.

259{
260 // The value of 'exclude_padding' does not really matter for MAX pooling.
261 genPooling(op, "arm_compute::PoolingType::MAX", false);
262}

◆ visit() [17/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::MulOp op)
override

Definition at line 980 of file AclCppOpGenerator.cpp.

981{
982 assert(op.getNumInputs() == 2);
983 const auto *ir_lhs = op.getInput(0);
984 const auto *ir_rhs = op.getInput(1);
985 const auto *ir_output = op.getOutput(0);
986
987 // Create the output tensor in the DOM and obtain its identifier.
988 auto out = genTensor(ir_output);
989 addToPersistentTensors(out);
990
991 // Get the identifiers of the input tensors in the DOM.
992 auto lhs = AF::id(tensorName(ir_lhs));
993 auto rhs = AF::id(tensorName(ir_rhs));
994
995 genMultiplication(out->name() + "_" + "multiplication", 0, ir_rhs->getShape(), lhs, rhs, out);
996}

References mir::Operation::getInput(), mir::Operation::getNumInputs(), and mir::Operation::getOutput().

◆ visit() [18/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::OutputOp op)
override

Definition at line 953 of file AclCppOpGenerator.cpp.

954{
955 // No-op.
956}

◆ visit() [19/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::PadOp op)
override

Definition at line 402 of file AclCppOpGenerator.cpp.

403{
404 assert(op.getNumInputs() == 1);
405 const auto *ir_input = op.getInput(0);
406 const auto *ir_output = op.getOutput(0);
407
408 // Get the id of the input tensor.
409 auto input = AF::id(tensorName(ir_input));
410
411 // Create the output tensor in the DOM
412 auto out = genTensor(ir_output);
413 addToPersistentTensors(out);
414
415 // Generate PadLayer params
416 auto prefix = out->name() + "_pad_layer";
417 auto pad_list_decl = _constrBlock->var("arm_compute::PaddingList", prefix + "_pads");
418 auto pad_list = pad_list_decl->use();
419 const auto &padding_before = op.getPaddingBefore();
420 const auto &padding_after = op.getPaddingAfter();
421 for (int i = 0; i < ir_input->getShape().rank(); ++i)
422 {
423 auto pad_var = _constrBlock->var(
424 "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
425 {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
426 auto pad = pad_var->use();
427 _constrBlock->call("push_back", {pad}, pad_list);
428 }
429
430 // Generate PadLayer
431 // FIXME Set up the `constant_value` parameter.
432 assert(op.getPaddingValue() == 0.0f);
433 auto layer =
434 genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
435 genLayerExecution(layer);
436}
float getPaddingValue() const
Definition PadOp.h:47
const std::vector< std::int32_t > & getPaddingBefore() const
Definition PadOp.h:43
const std::vector< std::int32_t > & getPaddingAfter() const
Definition PadOp.h:45

References mir::Operation::getInput(), mir::Operation::getNumInputs(), mir::Operation::getOutput(), mir::ops::PadOp::getPaddingAfter(), mir::ops::PadOp::getPaddingBefore(), and mir::ops::PadOp::getPaddingValue().

◆ visit() [20/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::ReluOp op)
override

Definition at line 349 of file AclCppOpGenerator.cpp.

349{ genActivation(op, "RELU"); }

◆ visit() [21/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::ReshapeOp op)
override

Definition at line 351 of file AclCppOpGenerator.cpp.

352{
353 assert(op.getNumInputs() == 1);
354 const auto *ir_input = op.getInput(0);
355 const auto *ir_output = op.getOutput(0);
356
357 // Get the id of the input tensor in the generated artifact.
358 auto in = AF::id(tensorName(ir_input));
359
360 // Create the output tensor in the DOM and return its id.
361 const Shape &out_shape = ir_output->getShape();
362
363 // This check confirms that we can "safely" reshape data
364 // The only safe configuration of output shape is (1...1, N, 1 ... 1)
365 bool found_non_one = false;
366 for (int32_t i = 0; i < out_shape.rank(); ++i)
367 {
368 if (out_shape.dim(i) != 1)
369 {
370 if (found_non_one)
371 throw AclCppException("Unsupported result of reshape");
372 found_non_one = true;
373 }
374 }
375
376 shared_ptr<ArtifactId> out = genTensor(ir_output);
377
378 // Create an instance of the CLReshapeLayer class as a member of the artifact class.
379 auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer",
380 {AF::ref(in), AF::ref(out)});
381 addToPersistentTensors(out);
382 genLayerExecution(layer);
383}

References mir::Shape::dim(), mir::Operation::getInput(), mir::Operation::getNumInputs(), mir::Operation::getOutput(), and mir::Shape::rank().

◆ visit() [22/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::ResizeOp op)
override

Definition at line 877 of file AclCppOpGenerator.cpp.

878{
879 throw AclCppException("Unimplemented operation: Resize");
880}

◆ visit() [23/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::SigmoidOp op)
override

Definition at line 946 of file AclCppOpGenerator.cpp.

946{ genActivation(op, "LOGISTIC"); }

◆ visit() [24/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::SliceOp op)
override

Definition at line 385 of file AclCppOpGenerator.cpp.

386{
387 throw AclCppException("Unimplemented operation: SliceOp");
388}

◆ visit() [25/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::SoftmaxOp op)
override

Definition at line 127 of file AclCppOpGenerator.cpp.

128{
129 assert(op.getNumInputs() == 1);
130 const auto *ir_input = op.getInput(0);
131 const auto *ir_output = op.getOutput(0);
132
133 auto in = AF::id(tensorName(ir_input));
134
135 int rank = ir_output->getShape().rank();
136 // CLPermute does not support all kinds of permutations now.
137 // rank can be more than 2 in our models, so we can not use CLTranspose.
138 // This means we can support tensors with no more then one axis > 1.
139 int axis = op.getAxis();
140 assert(axis == rank - 1);
141 int nof_long_axes = 0;
142
143 for (int i = 0; i < rank; ++i)
144 {
145 if (ir_output->getShape().dim(i) > 1)
146 ++nof_long_axes;
147 }
148
149 // TODO: Consider how to support Softmax on more general inputs.
150 if (nof_long_axes > 1)
151 throw AclCppException("Unsupported Softmax operation with several dimensions greater than 1");
152
153 // Create the output tensor.
154 shared_ptr<ArtifactId> output = genTensor(ir_output);
155 auto layer_name_prefix = output->name();
156
157 if (axis == 0)
158 {
159 // Simple version: do not need pre and post reshapes.
160 // Apply the softmax operation.
161 auto sm = genLayer("arm_compute::CLSoftmaxLayer", layer_name_prefix + "_softmax_layer",
162 {AF::ref(in), AF::ref(output)});
163 addToPersistentTensors(output);
164 genLayerExecution(sm);
165 }
166 else
167 {
168 // TODO refactor this code, it works only with 1 batch
169
170 // Need to reshape before the Softmax application and after it.
171 // Then we need two tensors for intermediate results. This is because we do a couple of
172 // auxiliary
173 // reshapes: one to transform the input tensor to a unidimensional tensor and the second to
174 // transorm the result of the softmax operation back to the original form.
175 Shape sm_shape(ir_output->getShape());
176
177 std::swap(sm_shape.dim(axis), sm_shape.dim(-1));
178
179 auto tmp = genTensor(layer_name_prefix + "_tmp", sm_shape);
180 auto tmp2 = genTensor(layer_name_prefix + "_tmp2", sm_shape);
181
182 // Do the input permutation.
183 auto transp1 = genLayer("arm_compute::CLReshapeLayer", layer_name_prefix + "_transp_layer1",
184 {AF::ref(in), AF::ref(tmp)});
185 addToPersistentTensors(tmp);
186 genLayerExecution(transp1);
187
188 // Apply the softmax operaion.
189 auto sm = genLayer("arm_compute::CLSoftmaxLayer", layer_name_prefix + "_softmax_layer",
190 {AF::ref(tmp), AF::ref(tmp2)});
191 addToPersistentTensors(tmp2);
192 genLayerExecution(sm);
193
194 // Reshape the output to the original form.
195 auto transp2 = genLayer("arm_compute::CLReshapeLayer", layer_name_prefix + "_transp_layer2",
196 {AF::ref(tmp2), AF::ref(output)});
197 addToPersistentTensors(output);
198 genLayerExecution(transp2);
199 }
200}
int32_t getAxis() const
Definition SoftmaxOp.h:43

References mir::Shape::dim(), mir::ops::SoftmaxOp::getAxis(), mir::Operation::getInput(), mir::Operation::getNumInputs(), and mir::Operation::getOutput().

◆ visit() [26/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::SqrtOp op)
override

Definition at line 834 of file AclCppOpGenerator.cpp.

835{
836 throw AclCppException("Unimplemented operation: Sqrt");
837}

◆ visit() [27/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::SqueezeOp op)
override

Definition at line 829 of file AclCppOpGenerator.cpp.

830{
831 throw AclCppException("Unimplemented operation: Squeeze");
832}

◆ visit() [28/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::SubOp op)
override

Definition at line 998 of file AclCppOpGenerator.cpp.

998{ throw AclCppException("NYI"); }

◆ visit() [29/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::TanhOp op)
override

Definition at line 390 of file AclCppOpGenerator.cpp.

390{ genActivation(op, "TANH"); }

◆ visit() [30/30]

void nnc::AclCppOpGenerator::visit ( mir::ops::TransposeOp op)
override

Definition at line 921 of file AclCppOpGenerator.cpp.

922{
923 assert(op.getNumInputs() == 1);
924 const auto *ir_input = op.getInput(0);
925 const auto *ir_output = op.getOutput(0);
926
927 // Get the input node tensor id in the DOM.
928 shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
929 const vector<size_t> &mir_axis_order = op.getAxisOrder();
930
931 // Create the output tensor in the DOM.
932 if (ir_output->getShape().rank() != 4)
933 throw AclCppException("Unsupported number of dimensions in transpose operation");
934 // TODO replace transpose shape
935 shared_ptr<ArtifactId> output = genTensor(ir_output);
936
937 // Actual generation of operation and related stuff
938 genTranspose(input, output, mir_axis_order, false);
939}
const std::vector< std::size_t > & getAxisOrder() const
Definition TransposeOp.h:38

References mir::ops::TransposeOp::getAxisOrder(), mir::Operation::getInput(), mir::Operation::getNumInputs(), and mir::Operation::getOutput().

◆ visit_fallback()

void nnc::AclCppOpGenerator::visit_fallback ( mir::Operation op)
overrideprotectedvirtual

Reimplemented from mir::Visitor.

Definition at line 1000 of file AclCppOpGenerator.cpp.

1000{ throw AclCppException("NYI"); }

The documentation for this class was generated from the following files: