ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KernelGenerator.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "KernelGenerator.h"
18
19#include "ops/AddNLayer.h"
20#include "ops/ArgMinMaxLayer.h"
22#include "ops/BinaryArithmeticLayer.h"
23#include "ops/CompareLayer.h"
24#include "ops/ConcatLayer.h"
25#include "ops/ConvolutionLayer.h"
27#include "ops/DepthwiseConvolutionLayer.h"
28#include "ops/EinsumLayer.h"
29#include "ops/ElementwiseActivationLayer.h"
32#include "ops/ExpandDimsLayer.h"
33#include "ops/FillLayer.h"
34#include "ops/FullyConnectedLayer.h"
35#include "ops/GatherLayer.h"
36#include "ops/LSTMLayer.h"
37#include "ops/MeanLayer.h"
39#include "ops/OneHotLayer.h"
40#include "ops/OperationUtils.h"
41#include "ops/PackLayer.h"
42#include "ops/PadLayer.h"
43#include "ops/PoolLayer.h"
44#include "ops/PowLayer.h"
45#include "ops/QuantizeLayer.h"
46#include "ops/RangeLayer.h"
47#include "ops/RankLayer.h"
48#include "ops/ReduceLayer.h"
49#include "ops/ReshapeLayer.h"
51#include "ops/ReverseLayer.h"
52#include "ops/RoPELayer.h"
53#include "ops/SelectLayer.h"
54#include "ops/ShapeLayer.h"
55#include "ops/SliceLayer.h"
56#include "ops/SoftMaxLayer.h"
60#include "ops/SplitLayer.h"
61#include "ops/SplitVLayer.h"
62#include "ops/TileLayer.h"
63#include "ops/TransposeLayer.h"
64#include "ops/UnpackLayer.h"
66#include "ops/L2NormLayer.h"
71#include "ops/LogSoftMaxLayer.h"
73#include "ops/RmsNormLayer.h"
74
75#include <backend/Backend.h>
76#include <backend/IConfig.h>
77#include <memory>
78#include <util/Utils.h>
79#include <util/logging.h>
81
82#include <stdexcept>
83
84namespace onert::backend::cpu
85{
86
87namespace
88{
89ops::ArithmeticType
90convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
91{
92 switch (arithmetic_type_ir)
93 {
102 default:
103 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
104 }
105}
106
107ops::ElementwiseActivationType
108convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
109{
110 switch (type_ir)
111 {
122 default:
123 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
124 }
125}
126
127ops::ElementwiseBinaryType
128convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
129{
130 switch (type_ir)
131 {
144 default:
145 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
146 }
147}
148
149ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
150{
151 switch (type_ir)
152 {
187 default:
188 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
189 }
190}
191
193{
194 switch (type_ir)
195 {
197 return ops::PoolType::kAvg;
199 return ops::PoolType::kMax;
200 default:
201 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
202 }
203}
204
205ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
206{
207 switch (reduce_type_ir)
208 {
221 default:
222 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
223 }
224}
225} // namespace
226
228 const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
229 const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
230 const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
231 const std::shared_ptr<ExternalContext> &external_context)
232 : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx{graph.operations()},
233 _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
234 _external_context(external_context)
235{
236 // DO NOTHING
237}
238
239std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
240{
241 auto ret = std::make_unique<exec::FunctionSequence>();
242
243 assert(_tensor_builder->dynamicTensorManager());
244 assert(_tensor_reg);
245
246 // Prepare to handle dynamic tensors later
247 auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
248 {
249 dyn_ctx->op = &_operations_ctx.at(ind);
250 dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_tensor_reg);
251 }
252 ret->dynamic_tensor_ctx(dyn_ctx);
253
254 auto &op = _graph.operations().at(ind);
255 op.accept(*this);
256 assert(_return_fn); // _return_fn must have been generated
257 ret->append(std::move(_return_fn));
258
259 for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
260 {
261 auto tensor = _tensor_reg->getNativeTensor(ind);
262 if (tensor)
263 {
264 tensor->increase_ref();
265 }
266 }
267 return ret;
268}
269
270void KernelGenerator::visit(const ir::operation::AddN &node)
271{
272 const auto output_index{node.getOutputs().at(0)};
273
274 std::vector<const IPortableTensor *> input_tensors;
275 for (const auto &input_idx : node.getInputs())
276 input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
277
278 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
279
280 auto fn = std::make_unique<ops::AddNLayer>();
281
282 fn->configure(std::move(input_tensors), output_tensor);
283
284 _return_fn = std::move(fn);
285}
286
287void KernelGenerator::visit(const ir::operation::Conv2D &node)
288{
290
291 const auto ofm_index{node.getOutputs().at(0)};
292 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
293 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
294 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
295
296 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
297 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
298 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
299 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
300
301 const auto stride = node.param().stride;
302 const auto activation = node.param().activation;
303 const auto &param_padding = node.param().padding;
304 const auto dilation = node.param().dilation;
305
306 const bool is_cacheable_weights = ker_tensor->is_constant();
307
308 auto fn = std::make_unique<ops::ConvolutionLayer>();
309
310 if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
311 {
312 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
313 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
314 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
315 activation, ofm_tensor, is_cacheable_weights);
316
317 _return_fn = std::move(fn);
318 return;
319 }
320 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
321 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
322 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
323 const auto &ker_shape = _ctx.at(ker_index).shape();
324 const auto ker_height = ker_shape.dim(1);
325 const auto ker_width = ker_shape.dim(2);
326
327 const auto padding =
328 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
329 dilation.width_factor, dilation.height_factor);
330
331 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
332 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
333 dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
334 is_cacheable_weights);
335
336 _return_fn = std::move(fn);
337}
338
339void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
340{
342
343 const auto ofm_index{node.getOutputs().at(0)};
344 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
345 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
346 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
347
348 const auto stride = node.param().stride;
349 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
350 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
351 // Kernel format is [1, kernel_height, kernel_width, depth_out].
352 const auto &ker_shape = _ctx.at(ker_index).shape();
353 const auto ker_height = ker_shape.dim(1);
354 const auto ker_width = ker_shape.dim(2);
355 const auto dilation_width = node.param().dilation.width_factor;
356 const auto dilation_height = node.param().dilation.height_factor;
357 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
358 ker_width, ker_height, dilation_width, dilation_height);
359 const auto multiplier = node.param().multiplier;
360 const auto activation = node.param().activation;
361
362 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
363 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
364 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
365 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
366
367 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
368
369 fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
370 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
371 dilation_height, activation, ofm_tensor, _external_context);
372
373 _return_fn = std::move(fn);
374}
375
376void KernelGenerator::visit(const ir::operation::Concat &node)
377{
378 const auto ofm_index{node.getOutputs().at(0)};
379
380 const auto rank = _ctx.at(ofm_index).shape().rank();
381 const auto axis = ops::getAxis(rank, node.param().axis);
382
383 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
384
385 std::vector<const IPortableTensor *> input_tensors;
386 for (const auto &ifm_idx : node.getInputs())
387 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
388
389 auto fn = std::make_unique<ops::ConcatLayer>();
390
391 fn->configure(input_tensors, axis, output_tensor);
392
393 _return_fn = std::move(fn);
394}
395
396void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
397{
398 const auto output_index{node.getOutputs().at(0)};
399 const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
400 const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
401
402 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
403 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
404 auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
405
406 auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
407
408 IPortableTensor *crops_alloc = nullptr;
409 const auto NNApiInputs = 2;
410
411 if (node.getInputs().size() != NNApiInputs)
412 {
413 const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
414 crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
415 }
416
417 fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
418
419 _return_fn = std::move(fn);
420}
421
422void KernelGenerator::visit(const ir::operation::Fill &node)
423{
424 const auto output_index{node.getOutputs().at(0)};
425 // SHAPE input is used for shape inference
426 const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
427
428 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
429 auto value_tensor = _tensor_reg->getPortableTensor(value_index);
430
431 auto fn = std::make_unique<ops::FillLayer>();
432
433 fn->configure(value_tensor, output_tensor);
434
435 _return_fn = std::move(fn);
436}
437
438void KernelGenerator::visit(const ir::operation::FullyConnected &node)
439{
441
442 const auto output_index{node.getOutputs().at(0)};
443 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
444 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
445 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
446 const auto activation = node.param().activation;
447 const auto weights_format = node.param().weights_format;
448
449 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
450 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
451 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
452 auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
453
454 auto fn = std::make_unique<ops::FullyConnectedLayer>();
455
456 fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
457 _external_context);
458
459 _return_fn = std::move(fn);
460}
461
462void KernelGenerator::visit(const ir::operation::Reshape &node)
463{
464 const auto output_index{node.getOutputs().at(0)};
465 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
466
467 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
468 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
469
470 // optional 2nd input
471 IPortableTensor *shape_tensor = nullptr;
472
473 if (node.getInputs().size() == 2)
474 {
475 const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
476 shape_tensor = _tensor_reg->getPortableTensor(shape_index);
477 }
478
479 auto fn = std::make_unique<ops::ReshapeLayer>();
480
481 fn->configure(input_tensor, shape_tensor, output_tensor);
482 _return_fn = std::move(fn);
483}
484
485void KernelGenerator::visit(const ir::operation::Squeeze &node)
486{
487 const auto output_index{node.getOutputs().at(0)};
488 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
489
490 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
491 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
492
493 // Squeeze can share same kernel with reshape
494 auto fn = std::make_unique<ops::ReshapeLayer>();
495
496 fn->configure(input_tensor, nullptr, output_tensor);
497
498 _return_fn = std::move(fn);
499}
500
501void KernelGenerator::visit(const ir::operation::Softmax &node)
502{
503 const auto output_index{node.getOutputs().at(0)};
504 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
505
506 const auto beta = node.param().beta;
507
508 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
509 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
510
511 auto fn = std::make_unique<ops::SoftMaxLayer>();
512
513 fn->configure(input_tensor, beta, output_tensor);
514
515 _return_fn = std::move(fn);
516}
517
518void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
519{
520 const auto ofm_index{node.getOutputs().at(0)};
521 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
522 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
523
524 const auto activation = node.param().activation;
525
526 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
527 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
528 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
529
530 auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
531
532 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
533 convertArithmeticType(node.param().arithmetic_type));
534
535 _return_fn = std::move(fn);
536}
537
538void KernelGenerator::visit(const ir::operation::Comparison &node)
539{
540 const auto ofm_index{node.getOutputs().at(0)};
541 const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
542 const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
543
544 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
545 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
546 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
547
548 auto comparison_type = node.param().comparison_type;
549
550 auto fn = std::make_unique<ops::CompareLayer>();
551
552 fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
553
554 _return_fn = std::move(fn);
555}
556
557void KernelGenerator::visit(const ir::operation::Gather &node)
558{
559 const auto output_index{node.getOutputs().at(0)};
560 const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
561 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
562
563 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
564 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
565 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
566
567 const auto rank = _ctx.at(input_index).shape().rank();
568 const auto axis = ops::getAxis(rank, node.param().axis);
569
570 auto fn = std::make_unique<ops::GatherLayer>();
571
572 fn->configure(input_tensor, indices_tensor, output_tensor, axis, _external_context.get());
573
574 _return_fn = std::move(fn);
575}
576
577void KernelGenerator::visit(const ir::operation::OneHot &node)
578{
579 const auto output_index{node.getOutputs().at(0)};
580 const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
581 const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
582 const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
583 const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
584
585 const auto axis = node.param().axis;
586
587 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
588 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
589 auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
590 auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
591 auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
592
593 assert(indices_tensor->data_type() == OperandType::INT32);
594 assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
595
596 auto fn = std::make_unique<ops::OneHotLayer>();
597
598 fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
599
600 _return_fn = std::move(fn);
601}
602
603void KernelGenerator::visit(const ir::operation::Einsum &node)
604{
605 const auto ofm_index{node.getOutputs().at(0)};
606
607 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
608 std::vector<const IPortableTensor *> input_tensors;
609 for (const auto &ifm_idx : node.getInputs())
610 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
611
612 const auto &equation = node.param().equation;
613
614 auto fn = std::make_unique<ops::EinsumLayer>();
615
616 fn->configure(input_tensors, equation, output_tensor);
617
618 _return_fn = std::move(fn);
619}
620
621void KernelGenerator::visit(const ir::operation::Custom &node)
622{
623 auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
624 std::vector<custom::TypeInfo> &types,
625 std::vector<IPortableTensor *> &tensors) {
626 for (const auto &idx : opSeq)
627 {
628 const auto &operand = _ctx.at(idx);
629 types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
630 auto in_tensor = _tensor_reg->getPortableTensor(idx);
631 tensors.emplace_back(in_tensor);
632 }
633 };
634
636
637 fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
638 fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
639
640 params.userdata = node.userdata().data;
641 params.userdata_size = node.userdata().size;
642
643 auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
644
645 _return_fn = std::move(fn);
646}
647
648void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
649{
650 const auto output_index{node.getOutputs().at(0)};
651 const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
652
653 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
654 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
655
656 auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
657
658 fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
659 convertElementwiseActivationType(node.param().op_type));
660
661 _return_fn = std::move(fn);
662}
663
664void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
665{
666 const auto output_index{node.getOutputs().at(0)};
667 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
668 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
669
670 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
671 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
672 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
673
674 auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
675
676 fn->configure(lhs_tensor, rhs_tensor, output_tensor,
677 convertElementwiseBinaryType(node.param().op_type));
678
679 _return_fn = std::move(fn);
680}
681
682void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
683{
684 const auto output_index{node.getOutputs().at(0)};
685 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
686
687 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
688 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
689
690 if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
691 {
692 auto fn = std::make_unique<ops::QuantizeLayer>();
693 fn->configure(input_tensor, output_tensor);
694 _return_fn = std::move(fn);
695 }
696 else
697 {
698 auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
699 fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
700 _return_fn = std::move(fn);
701 }
702}
703
704void KernelGenerator::visit(const ir::operation::ExpandDims &node)
705{
706 const auto output_index{node.getOutputs().at(0)};
707 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
708 // AXIS input is used for output shape inference
709
710 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
711 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
712
713 auto fn = std::make_unique<ops::ExpandDimsLayer>();
714
715 fn->configure(input_tensor, output_tensor);
716
717 _return_fn = std::move(fn);
718}
719
720void KernelGenerator::visit(const ir::operation::Pack &node)
721{
722 const auto ofm_index{node.getOutputs().at(0)};
723
724 const auto rank = _ctx.at(ofm_index).shape().rank();
725 const auto axis = ops::getAxis(rank, node.param().axis);
726
727 assert(-rank <= axis && axis < rank);
728
729 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
730
731 std::vector<const IPortableTensor *> input_tensors;
732 for (const auto &ifm_idx : node.getInputs())
733 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
734
735 auto fn = std::make_unique<ops::PackLayer>();
736
737 fn->configure(input_tensors, axis, output_tensor);
738
739 _return_fn = std::move(fn);
740}
741
742void KernelGenerator::visit(const ir::operation::Unpack &node)
743{
744 const auto input_index{node.getInputs().at(0)};
745
746 const auto rank = _ctx.at(input_index).shape().rank();
747 const auto axis = ops::getAxis(rank, node.param().axis);
748
749 assert(rank == 0 || (-rank <= axis && axis < rank));
750
751 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
752
753 std::vector<IPortableTensor *> output_tensors;
754 for (const auto &output_idx : node.getOutputs())
755 output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
756
757 auto fn = std::make_unique<ops::UnpackLayer>();
758
759 uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
760
761 fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
762
763 _return_fn = std::move(fn);
764}
765
766void KernelGenerator::visit(const ir::operation::Pad &node)
767{
768 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
769 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
770 const auto output_index{node.getOutputs().at(0)};
771
772 auto input = _tensor_reg->getPortableTensor(input_index);
773 auto pad = _tensor_reg->getPortableTensor(pad_index);
774 auto output = _tensor_reg->getPortableTensor(output_index);
775
776 auto fn = std::make_unique<ops::PadLayer>();
777
778 IPortableTensor *value = nullptr;
779 if (node.getInputs().size() == 3) // isPadV2
780 {
781 const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
782 value = _tensor_reg->getPortableTensor(value_index);
783 }
784
785 fn->configure(input, pad, value, output);
786 _return_fn = std::move(fn);
787}
788
789void KernelGenerator::visit(const ir::operation::Transpose &node)
790{
791 const auto output_index{node.getOutputs().at(0)};
792 const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
793 const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
794
795 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
796 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
797 auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
798
799 auto fn = std::make_unique<ops::TransposeLayer>();
800
801 fn->configure(input_tensor, perm_tensor, output_tensor);
802
803 _return_fn = std::move(fn);
804}
805
806void KernelGenerator::visit(const ir::operation::Reduce &node)
807{
808 const auto output_index{node.getOutputs().at(0)};
809 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
810 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
811
812 const auto keep_dims = node.param().keep_dims;
813 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
814 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
815 auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
816
817 if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
818 {
819 auto fn = std::make_unique<ops::MeanLayer>();
820
821 fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
822
823 _return_fn = std::move(fn);
824 }
825 else
826 {
827 auto fn = std::make_unique<ops::ReduceLayer>();
828
829 const auto reduce_type = convertReduceType(node.param().reduce_type);
830 fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
831
832 _return_fn = std::move(fn);
833 }
834}
835
836void KernelGenerator::visit(const ir::operation::Select &node)
837{
838 const auto output_index{node.getOutputs().at(0)};
839 const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
840 const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
841 const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
842
843 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
844 auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
845 auto true_tensor = _tensor_reg->getPortableTensor(true_index);
846 auto false_tensor = _tensor_reg->getPortableTensor(false_index);
847
848 auto fn = std::make_unique<ops::SelectLayer>();
849
850 fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
851
852 _return_fn = std::move(fn);
853}
854
855void KernelGenerator::visit(const ir::operation::Slice &node)
856{
857 const auto output_index{node.getOutputs().at(0)};
858 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
859 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
860 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
861
862 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
863 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
864 auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
865 auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
866
867 auto fn = std::make_unique<ops::SliceLayer>();
868
869 fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
870
871 _return_fn = std::move(fn);
872}
873
874void KernelGenerator::visit(const ir::operation::StridedSlice &node)
875{
876 const auto output_index{node.getOutputs().at(0)};
877 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
878 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
879 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
880 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
881
882 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
883 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
884 auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
885 auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
886 auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
887
888 auto begin_mask = node.param().begin_mask;
889 auto end_mask = node.param().end_mask;
890 auto shrink_axis_mask = node.param().shrink_axis_mask;
891
892 auto fn = std::make_unique<ops::StridedSliceLayer>();
893
894 fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
895 end_mask, shrink_axis_mask);
896
897 _return_fn = std::move(fn);
898}
899
900void KernelGenerator::visit(const ir::operation::Split &node)
901{
902 const auto num_splits = node.param().num_splits;
903 assert(num_splits == static_cast<int>(node.getOutputs().size()));
904
905 const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
906 const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
907
908 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
909 auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
910
911 std::vector<IPortableTensor *> out_tensors;
912 for (const auto &output_idx : node.getOutputs())
913 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
914
915 auto fn = std::make_unique<ops::SplitLayer>();
916
917 fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
918
919 _return_fn = std::move(fn);
920}
921
922void KernelGenerator::visit(const ir::operation::Shape &node)
923{
924 const auto ofm_index{node.getOutputs().at(0)};
925 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
926
927 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
928 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
929
930 auto fn = std::make_unique<ops::ShapeLayer>();
931
932 fn->configure(ifm_tensor, ofm_tensor);
933
934 _return_fn = std::move(fn);
935}
936
937void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
938{
939 const auto output_index{node.getOutputs().at(0)};
940 const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
941
942 auto align_corners = node.param().align_corners;
943 auto half_pixel_centers = node.param().half_pixel_centers;
944
945 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
946 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
947
948 auto fn = std::make_unique<ops::ResizeBilinearLayer>();
949
950 if (node.getInputs().size() == 1)
951 {
952 fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
953 align_corners, half_pixel_centers);
954 }
955 else
956 {
957 assert(node.getInputs().size() == 2);
958 const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
959 auto size_tensor = _tensor_reg->getPortableTensor(size_index);
960 if (size_tensor->is_constant())
961 {
962 auto size_vec = _ctx.at(size_index).asVector<int32_t>();
963 const auto height_out = size_vec[0];
964 const auto width_out = size_vec[1];
965 fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
966 half_pixel_centers);
967 }
968 else
969 {
970 fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
971 }
972 }
973
974 _return_fn = std::move(fn);
975}
976
977void KernelGenerator::visit(const ir::operation::Reverse &node)
978{
979 const auto output_index{node.getOutputs().at(0)};
980 const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
981 const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
982
983 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
984 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
985 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
986
987 auto fn = std::make_unique<ops::ReverseLayer>();
988
989 fn->configure(input_tensor, axis_tensor, output_tensor);
990
991 _return_fn = std::move(fn);
992}
993
994void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
995{
996 const auto output_index{node.getOutputs().at(0)};
997 const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
998 const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
999
1000 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1001 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1002 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1003
1004 auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1005
1006 fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1007
1008 _return_fn = std::move(fn);
1009}
1010
1011void KernelGenerator::visit(const ir::operation::Pool2D &node)
1012{
1013 const auto ofm_index{node.getOutputs().at(0)};
1014 const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1015
1016 const auto kh = node.param().kh;
1017 const auto kw = node.param().kw;
1018 const auto stride = node.param().stride;
1019 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
1020 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
1021 const auto padding =
1022 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1023 const auto activation = node.param().activation;
1024
1025 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1026 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1027
1028 auto fn = std::make_unique<ops::PoolLayer>();
1029
1030 fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1031 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1032 convertPoolType(node.param().op_type));
1033
1034 _return_fn = std::move(fn);
1035}
1036
1037void KernelGenerator::visit(const ir::operation::Pow &node)
1038{
1039 const auto output_index{node.getOutputs().at(0)};
1040 const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1041 const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1042
1043 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1044 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1045 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1046
1047 auto fn = std::make_unique<ops::PowLayer>();
1048
1049 fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1050
1051 _return_fn = std::move(fn);
1052}
1053
1054void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1055{
1056 const auto output_index{node.getOutputs().at(0)};
1057 const auto input_index{node.getInputs().at(0)};
1058
1059 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1060 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1061
1062 auto fn = std::make_unique<ops::L2NormLayer>();
1063
1064 fn->configure(input_alloc, output_alloc);
1065
1066 _return_fn = std::move(fn);
1067}
1068
1069void KernelGenerator::visit(const ir::operation::Range &node)
1070{
1071 const auto output_index{node.getOutputs().at(0)};
1072 const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1073 const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1074 const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1075
1076 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1077 auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1078 auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1079 auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1080
1081 auto fn = std::make_unique<ops::RangeLayer>();
1082
1083 fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1084 _return_fn = std::move(fn);
1085}
1086
1087void KernelGenerator::visit(const ir::operation::Rank &node)
1088{
1089 const auto ofm_index{node.getOutputs().at(0)};
1090 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1091
1092 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1093 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1094
1095 auto fn = std::make_unique<ops::RankLayer>();
1096
1097 fn->configure(ifm_tensor, ofm_tensor);
1098
1099 _return_fn = std::move(fn);
1100}
1101
1102void KernelGenerator::visit(const ir::operation::RmsNorm &node)
1103{
1104 const auto ofm_index{node.getOutputs().at(0)};
1105 const auto ifm_index{node.getInputs().at(ir::operation::RmsNorm::Input::INPUT)};
1106 const auto gamma_index{node.getInputs().at(ir::operation::RmsNorm::Input::GAMMA)};
1107
1108 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1109 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1110 auto gamma_tensor = _tensor_reg->getPortableTensor(gamma_index);
1111 auto epsilon = node.param().epsilon;
1112
1113 auto fn = std::make_unique<ops::RmsNormLayer>();
1114
1115 fn->configure(ifm_tensor, gamma_tensor, epsilon, ofm_tensor);
1116
1117 _return_fn = std::move(fn);
1118}
1119
1120void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1121{
1122 const auto ofm_index{node.getOutputs().at(0)};
1123 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1124 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1125
1126 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1127 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1128 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1129
1130 auto fn = std::make_unique<ops::SqDiffLayer>();
1131
1132 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1133 _return_fn = std::move(fn);
1134}
1135
1136void KernelGenerator::visit(const ir::operation::Tile &node)
1137{
1138 const auto output_index{node.getOutputs().at(0)};
1139 const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1140 const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1141
1142 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1143 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1144 auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1145
1146 auto fn = std::make_unique<ops::TileLayer>();
1147
1148 fn->configure(input_tensor, multiples_tensor, output_tensor);
1149 _return_fn = std::move(fn);
1150}
1151
1152void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1153{
1154 const auto output_index{node.getOutputs().at(0)};
1155 const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1156 const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1157 const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1158
1159 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1160 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1161 auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1162 auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1163
1164 auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1165
1166 fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1167 _return_fn = std::move(fn);
1168}
1169
1170void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
1171{
1173
1175 parameters.scales.y = node.param().scale.y_scale;
1176 parameters.scales.x = node.param().scale.x_scale;
1177 parameters.scales.w = node.param().scale.w_scale;
1178 parameters.scales.h = node.param().scale.h_scale;
1179
1180 parameters.iou_threshold = node.param().iou_threshold;
1181 parameters.score_threshold = node.param().score_threshold;
1182 parameters.max_boxes_per_class = node.param().max_boxes_per_class;
1183 parameters.max_detections = node.param().max_detections;
1184 parameters.num_classes = node.param().num_classes;
1185 parameters.center_box_format = node.param().center_size_boxes;
1187
1188 auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
1189 auto scores_index = node.getInputs().at(NMS::Input::SCORES);
1190 auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
1191
1192 auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
1193 auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
1194 auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
1195 auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
1196
1197 parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
1198 parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
1199
1200 parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
1201 parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
1202 parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
1203
1204 parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
1205 parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
1206 parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
1207 parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
1208
1209 auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
1210 fn->configure(std::move(parameters));
1211
1212 _return_fn = std::move(fn);
1213}
1214
1215void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1216{
1217 const auto output_index{node.getOutputs().at(0)};
1218 const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1219 const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1220
1221 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1222 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1223 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1224
1225 const auto adj_x = node.param().adj_x;
1226 const auto adj_y = node.param().adj_y;
1227
1228 auto fn = std::make_unique<ops::BatchMatMulLayer>();
1229
1230 fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1231 _return_fn = std::move(fn);
1232}
1233
1234void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1235{
1236 const auto output_index{node.getOutputs().at(0)};
1237 const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1238 const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1239
1240 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1241 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1242 auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1243
1244 auto fn = std::make_unique<ops::BroadcastToLayer>();
1245
1246 fn->configure(input_tensor, shape_tensor, output_tensor);
1247
1248 _return_fn = std::move(fn);
1249}
1250
1251void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1252{
1253 const auto ofm_index{node.getOutputs().at(0)};
1254
1255 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1256 std::vector<const IPortableTensor *> input_tensors;
1257 for (const auto &ifm_idx : node.getInputs())
1258 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1259
1260 const auto epsilon = node.param().epsilon;
1261 const auto is_training = node.param().is_training;
1262 const auto &data_format = node.param().data_format;
1263
1264 auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1265
1266 fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1267
1268 _return_fn = std::move(fn);
1269}
1270
1271void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1272{
1273 const auto output_index{node.getOutputs().at(0)};
1274 const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1275
1276 const auto beta = node.param().beta;
1277 const auto axis = node.param().axis;
1278
1279 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1280 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1281
1282 auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1283
1284 fn->configure(input_tensor, beta, axis, output_tensor);
1285
1286 _return_fn = std::move(fn);
1287}
1288
1289void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1290{
1291 const auto output_index{node.getOutputs().at(0)};
1292 const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1293 const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1294 const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1295
1296 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1297 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1298 auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1299 auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1300
1301 auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1302
1303 fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1304
1305 _return_fn = std::move(fn);
1306}
1307
1308void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1309{
1310 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1311 const auto output_index{node.getOutputs().at(0)};
1312 auto block_size = node.param().block_size;
1313
1314 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1315 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1316
1317 auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1318
1319 fn->configure(input_tensor, block_size, output_tensor);
1320 _return_fn = std::move(fn);
1321}
1322
1323void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1324{
1325 const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1326 const auto output_index{node.getOutputs().at(0)};
1327 auto block_size = node.param().block_size;
1328
1329 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1330 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1331
1332 auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1333
1334 fn->configure(input_tensor, block_size, output_tensor);
1335 _return_fn = std::move(fn);
1336}
1337
1338void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1339{
1340 const auto output_index{node.getOutputs().at(0)};
1341 const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1342 const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1343
1344 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1345 auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1346 auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1347
1348 auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1349
1350 fn->configure(shape_alloc, seed_alloc, output_alloc);
1351 _return_fn = std::move(fn);
1352}
1353
1354void KernelGenerator::visit(const ir::operation::SplitV &node)
1355{
1356 const auto num_splits = node.param().num_splits;
1357 assert(num_splits == static_cast<int>(node.getOutputs().size()));
1358
1359 const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1360 const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1361 const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1362
1363 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1364 auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1365 auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1366
1367 std::vector<IPortableTensor *> out_tensors;
1368 for (const auto &output_idx : node.getOutputs())
1369 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1370
1371 auto fn = std::make_unique<ops::SplitVLayer>();
1372
1373 fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1374
1375 _return_fn = std::move(fn);
1376}
1377
1378void KernelGenerator::visit(const ir::operation::LSTM &node)
1379{
1380 const auto scratch_buffer_index{
1381 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1382 const auto output_state_out_index{
1383 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1384 const auto cell_state_out_index{
1385 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1386 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1387
1388 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1389 const auto input_to_input_weights_index{
1390 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1391 const auto input_to_forget_weights_index{
1392 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1393 const auto input_to_cell_weights_index{
1394 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1395 const auto input_to_output_weights_index{
1396 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1397 const auto recurrent_to_input_weights_index{
1398 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1399 const auto recurrent_to_forget_weights_index{
1400 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1401 const auto recurrent_to_cell_weights_index{
1402 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1403 const auto recurrent_to_output_weights_index{
1404 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1405 const auto cell_to_input_weights_index{
1406 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1407 const auto cell_to_forget_weights_index{
1408 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1409 const auto cell_to_output_weights_index{
1410 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1411 const auto input_gate_bias_index{
1412 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1413 const auto forget_gate_bias_index{
1414 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1415 const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1416 const auto output_gate_bias_index{
1417 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1418 const auto projection_weights_index{
1419 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1420 const auto projection_bias_index{
1421 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1422 const auto output_state_in_index{
1423 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1424 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1425 const auto time_major = node.param().time_major;
1426
1427 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1428 // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1429 // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1430 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1431 bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1432 (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1433 _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1434 bool has_recurrent_to_input_weights =
1435 _ctx.exist(recurrent_to_input_weights_index) &&
1436 (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1437 _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1438
1439 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1440 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1441 // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1442 // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1443 bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1444 _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1445 bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1446 _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1447
1448 bool has_input_gate_bias =
1449 _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1450
1451 bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1452 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1453 _ctx.at(projection_weights_index).shape().dim(1) != 0);
1454 bool has_projection_bias =
1455 _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1456
1457 auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1458 ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1459 : nullptr; // optional
1460 auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1461 ? _tensor_reg->getPortableTensor(output_state_out_index)
1462 : nullptr; // optional
1463 auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1464 ? _tensor_reg->getPortableTensor(cell_state_out_index)
1465 : nullptr; // optional
1466 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1467
1468 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1469
1470 auto input_to_input_weights_tensor =
1471 has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1472 : nullptr; // optional
1473 auto input_to_forget_weights_tensor =
1474 _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1475 auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1476 auto input_to_output_weights_tensor =
1477 _tensor_reg->getPortableTensor(input_to_output_weights_index);
1478 auto recurrent_to_input_weights_tensor =
1479 has_recurrent_to_input_weights
1480 ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1481 : nullptr; // optional
1482 auto recurrent_to_forget_weights_tensor =
1483 _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1484 auto recurrent_to_cell_weights_tensor =
1485 _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1486 auto recurrent_to_output_weights_tensor =
1487 _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1488
1489 auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1490 auto cell_to_forget_weights_tensor =
1491 has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1492 : nullptr; // optional
1493 auto cell_to_output_weights_tensor =
1494 has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1495 : nullptr; // optional
1496
1497 auto input_gate_bias_tensor =
1498 has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1499 auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1500 auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1501 auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1502 auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1503 auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1504
1505 auto projection_weights_tensor = has_projection_weights
1506 ? _tensor_reg->getPortableTensor(projection_weights_index)
1507 : nullptr; // optional
1508 auto projection_bias_tensor = has_projection_bias
1509 ? _tensor_reg->getPortableTensor(projection_bias_index)
1510 : nullptr; // optional
1511
1512 IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1513 IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1514 IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1515 IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1516 if (node.getInputs().size() == 24)
1517 {
1518 const auto input_layer_norm_weights_index{
1519 node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1520 const auto forget_layer_norm_weights_index{
1521 node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1522 const auto cell_layer_norm_weights_index{
1523 node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1524 const auto output_layer_norm_weights_index{
1525 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1526
1527 input_layer_norm_weights_tensor =
1528 _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1529 forget_layer_norm_weights_tensor =
1530 _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1531 cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1532 output_layer_norm_weights_tensor =
1533 _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1534 }
1535
1536 auto fn = std::make_unique<ops::LSTMLayer>();
1537
1538 fn->configure(
1539 input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1540 input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
1541 recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
1542 recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
1543 cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
1544 forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
1545 output_layer_norm_weights_tensor,
1546 /*aux_input=*/nullptr,
1547 /*aux_input_to_input_weights=*/nullptr,
1548 /*aux_input_to_forget_weights=*/nullptr,
1549 /*aux_input_to_cell_weights=*/nullptr,
1550 /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1551 cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1552 projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1553 /*forward_sequence=*/true, time_major,
1554 /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1555 output_tensor,
1556 !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1557 !_ctx.at(cell_state_in_index).info().isVariable());
1558
1559 _return_fn = std::move(fn);
1560}
1561
1562void KernelGenerator::visit(const ir::operation::RoPE &node)
1563{
1564 const auto input_index{node.getInputs().at(ir::operation::RoPE::Input::INPUT)};
1565 const auto sin_table{node.getInputs().at(ir::operation::RoPE::Input::SIN_TABLE)};
1566 const auto cos_table{node.getInputs().at(ir::operation::RoPE::Input::COS_TABLE)};
1567 const auto output_index{node.getOutputs().at(ir::operation::RoPE::Output::OUTPUT)};
1568
1569 auto mode = ops::getRoPEMode(node.param().mode);
1570
1571 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1572 auto sin_tensor = _tensor_reg->getPortableTensor(sin_table);
1573 auto cos_tensor = _tensor_reg->getPortableTensor(cos_table);
1574 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1575
1576 auto fn = std::make_unique<ops::RoPELayer>();
1577
1578 fn->configure(input_tensor, sin_tensor, cos_tensor, mode, output_tensor);
1579 _return_fn = std::move(fn);
1580}
1581
1582} // namespace onert::backend::cpu
A tensor class that is portable for other backends.
KernelGenerator(const ir::Graph &graph, const std::shared_ptr< TensorBuilder > &tensor_builder, const std::shared_ptr< basic::TensorRegistry > &tensor_reg, const std::shared_ptr< custom::IKernelBuilder > &kernel_builder, const std::shared_ptr< ExternalContext > &external_context)
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:53
OperandIndexSequence & getInputs()
Definition Operation.h:51
const Param & param() const
Definition ArgMinMax.h:49
const Param & param() const
Definition BatchMatMul.h:49
const Param & param() const
Definition Comparison.h:58
const Param & param() const
Definition Concat.h:44
const Param & param() const
Definition Conv2D.h:56
const Userdata & userdata() const
Definition Custom.cc:34
const std::string & id() const
Definition Custom.cc:32
const Param & param() const
const Param & param() const
Definition Einsum.h:43
const Param & param() const
Definition Gather.h:50
const Param & param() const
Definition LSTM.h:82
const Param & param() const
Definition LogSoftmax.h:50
const Param & param() const
Definition OneHot.h:46
const Param & param() const
Definition Pack.h:40
const Param & param() const
Definition Pool2D.h:64
const Param & param() const
Definition Reduce.h:63
const Param & param() const
Definition RmsNorm.h:49
const Param & param() const
Definition RoPE.h:59
const Param & param() const
Definition Softmax.h:49
const Param & param() const
Definition Split.h:46
const Param & param() const
Definition SplitV.h:47
const Param & param() const
Definition Unpack.h:46
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
Definition Convert.cc:279
arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
Definition Convert.cc:294
This file contains utility macro.
CLTensor ker_tensor
CLTensor bias_tensor
float h
float y
float w
float x
const IPortableTensor * scores_input
int32_t num_classes
IPortableTensor * num_selections_output
uint32_t max_detections
CenterSizeBox scales
std::vector< int32_t > scrores_descr
uint32_t max_boxes_per_class
bool center_box_format
IPortableTensor * box_scores_output
float iou_threshold
float score_threshold
std::vector< int32_t > boxes_descr
const IPortableTensor * boxes_input
IPortableTensor * box_coords_output
IPortableTensor * box_classes_output
int32_t max_classes_per_detection
const IPortableTensor * anchors_input
float score_threshold
int32_t num_classes
Scale scale
int max_detections
bool center_size_boxes
int32_t max_classes_per_detection
int max_boxes_per_class
float iou_threshold
float w_scale
float y_scale
float x_scale
float h_scale
FullyConnectedWeightsFormat weights_format