ONE - On-device Neural Engine
Loading...
Searching...
No Matches
KernelGenerator.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "KernelGenerator.h"
18
19#include "ops/AddNLayer.h"
20#include "ops/ArgMinMaxLayer.h"
22#include "ops/BinaryArithmeticLayer.h"
23#include "ops/CompareLayer.h"
24#include "ops/ConcatLayer.h"
25#include "ops/ConvolutionLayer.h"
27#include "ops/DepthwiseConvolutionLayer.h"
29#include "ops/ElementwiseActivationLayer.h"
32#include "ops/ExpandDimsLayer.h"
33#include "ops/FillLayer.h"
34#include "ops/FullyConnectedLayer.h"
35#include "ops/GatherLayer.h"
36#include "ops/LSTMLayer.h"
37#include "ops/MeanLayer.h"
39#include "ops/OneHotLayer.h"
40#include "ops/OperationUtils.h"
41#include "ops/PackLayer.h"
42#include "ops/PadLayer.h"
43#include "ops/PoolLayer.h"
44#include "ops/PowLayer.h"
45#include "ops/QuantizeLayer.h"
46#include "ops/RangeLayer.h"
47#include "ops/RankLayer.h"
48#include "ops/ReduceLayer.h"
49#include "ops/ReshapeLayer.h"
51#include "ops/ReverseLayer.h"
52#include "ops/RoPELayer.h"
53#include "ops/SelectLayer.h"
54#include "ops/ShapeLayer.h"
55#include "ops/SliceLayer.h"
56#include "ops/SoftMaxLayer.h"
60#include "ops/SplitLayer.h"
61#include "ops/SplitVLayer.h"
62#include "ops/TileLayer.h"
63#include "ops/TopKV2Layer.h"
64#include "ops/TransposeLayer.h"
65#include "ops/UnpackLayer.h"
67#include "ops/L2NormLayer.h"
71#include "ops/LogSoftMaxLayer.h"
73#include "ops/RmsNormLayer.h"
74
75#include <backend/Backend.h>
76#include <backend/IConfig.h>
77#include <memory>
78#include <util/Utils.h>
79#include <util/logging.h>
81
82#include <stdexcept>
83
84namespace onert::backend::cpu
85{
86
87namespace
88{
89ops::ArithmeticType
90convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
91{
92 switch (arithmetic_type_ir)
93 {
102 default:
103 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
104 }
105}
106
107ops::ElementwiseActivationType
108convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
109{
110 switch (type_ir)
111 {
124 default:
125 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
126 }
127}
128
129ops::ElementwiseBinaryType
130convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
131{
132 switch (type_ir)
133 {
146 default:
147 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
148 }
149}
150
151ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
152{
153 switch (type_ir)
154 {
189 default:
190 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
191 }
192}
193
195{
196 switch (type_ir)
197 {
199 return ops::PoolType::kAvg;
201 return ops::PoolType::kMax;
202 default:
203 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
204 }
205}
206
207ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
208{
209 switch (reduce_type_ir)
210 {
223 default:
224 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
225 }
226}
227} // namespace
228
230 const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
231 const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
232 const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
233 const std::shared_ptr<ExternalContext> &external_context)
234 : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx{graph.operations()},
235 _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
236 _external_context(external_context)
237{
238 // DO NOTHING
239}
240
241std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
242{
243 auto ret = std::make_unique<exec::FunctionSequence>();
244
245 assert(_tensor_builder->dynamicTensorManager());
246 assert(_tensor_reg);
247
248 // Prepare to handle dynamic tensors later
249 auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
250 {
251 dyn_ctx->op = &_operations_ctx.at(ind);
252 dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_tensor_reg);
253 }
254 ret->dynamic_tensor_ctx(dyn_ctx);
255
256 auto &op = _graph.operations().at(ind);
257 op.accept(*this);
258 assert(_return_fn); // _return_fn must have been generated
259 ret->append(std::move(_return_fn));
260
261 for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
262 {
263 auto tensor = _tensor_reg->getNativeTensor(ind);
264 if (tensor)
265 {
266 tensor->increase_ref();
267 }
268 }
269 return ret;
270}
271
272void KernelGenerator::visit(const ir::operation::AddN &node)
273{
274 const auto output_index{node.getOutputs().at(0)};
275
276 std::vector<const IPortableTensor *> input_tensors;
277 for (const auto &input_idx : node.getInputs())
278 input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
279
280 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
281
282 auto fn = std::make_unique<ops::AddNLayer>();
283
284 fn->configure(std::move(input_tensors), output_tensor);
285
286 _return_fn = std::move(fn);
287}
288
289void KernelGenerator::visit(const ir::operation::Conv2D &node)
290{
292
293 const auto ofm_index{node.getOutputs().at(0)};
294 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
295 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
296 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
297
298 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
299 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
300 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
301 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
302
303 const auto stride = node.param().stride;
304 const auto activation = node.param().activation;
305 const auto &param_padding = node.param().padding;
306 const auto dilation = node.param().dilation;
307
308 const bool is_cacheable_weights = ker_tensor->is_constant();
309
310 auto fn = std::make_unique<ops::ConvolutionLayer>();
311
312 if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
313 {
314 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
315 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
316 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
317 activation, ofm_tensor, is_cacheable_weights);
318
319 _return_fn = std::move(fn);
320 return;
321 }
322 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
323 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
324 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
325 const auto &ker_shape = _ctx.at(ker_index).shape();
326 const auto ker_height = ker_shape.dim(1);
327 const auto ker_width = ker_shape.dim(2);
328
329 const auto padding =
330 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
331 dilation.width_factor, dilation.height_factor);
332
333 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
334 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
335 dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
336 is_cacheable_weights);
337
338 _return_fn = std::move(fn);
339}
340
341void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
342{
344
345 const auto ofm_index{node.getOutputs().at(0)};
346 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
347 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
348 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
349
350 const auto stride = node.param().stride;
351 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
352 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
353 // Kernel format is [1, kernel_height, kernel_width, depth_out].
354 const auto &ker_shape = _ctx.at(ker_index).shape();
355 const auto ker_height = ker_shape.dim(1);
356 const auto ker_width = ker_shape.dim(2);
357 const auto dilation_width = node.param().dilation.width_factor;
358 const auto dilation_height = node.param().dilation.height_factor;
359 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
360 ker_width, ker_height, dilation_width, dilation_height);
361 const auto multiplier = node.param().multiplier;
362 const auto activation = node.param().activation;
363
364 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
365 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
366 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
367 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
368
369 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
370
371 fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
372 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
373 dilation_height, activation, ofm_tensor, _external_context);
374
375 _return_fn = std::move(fn);
376}
377
378void KernelGenerator::visit(const ir::operation::DynamicUpdateSlice &node)
379{
381
382 const auto output_index{node.getOutputs().at(0)};
383 const auto operand_index{node.getInputs().at(DynamicUpdateSlice::Input::OPERAND)};
384 const auto update_index{node.getInputs().at(DynamicUpdateSlice::Input::UPDATE)};
385 const auto indices_index{node.getInputs().at(DynamicUpdateSlice::Input::INDICES)};
386
387 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
388 auto operand_tensor = _tensor_reg->getPortableTensor(operand_index);
389 auto update_tensor = _tensor_reg->getPortableTensor(update_index);
390 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
391
392 auto fn = std::make_unique<ops::DynamicUpdateSliceLayer>();
393
394 fn->configure(operand_tensor, update_tensor, indices_tensor, output_tensor);
395
396 _return_fn = std::move(fn);
397}
398
399void KernelGenerator::visit(const ir::operation::Concat &node)
400{
401 const auto ofm_index{node.getOutputs().at(0)};
402
403 const auto rank = _ctx.at(ofm_index).shape().rank();
404 const auto axis = ops::getAxis(rank, node.param().axis);
405
406 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
407
408 std::vector<const IPortableTensor *> input_tensors;
409 for (const auto &ifm_idx : node.getInputs())
410 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
411
412 auto fn = std::make_unique<ops::ConcatLayer>();
413
414 fn->configure(input_tensors, axis, output_tensor);
415
416 _return_fn = std::move(fn);
417}
418
419void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
420{
421 const auto output_index{node.getOutputs().at(0)};
422 const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
423 const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
424
425 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
426 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
427 auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
428
429 auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
430
431 IPortableTensor *crops_alloc = nullptr;
432 const auto NNApiInputs = 2;
433
434 if (node.getInputs().size() != NNApiInputs)
435 {
436 const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
437 crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
438 }
439
440 fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
441
442 _return_fn = std::move(fn);
443}
444
445void KernelGenerator::visit(const ir::operation::Fill &node)
446{
447 const auto output_index{node.getOutputs().at(0)};
448 // SHAPE input is used for shape inference
449 const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
450
451 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
452 auto value_tensor = _tensor_reg->getPortableTensor(value_index);
453
454 auto fn = std::make_unique<ops::FillLayer>();
455
456 fn->configure(value_tensor, output_tensor);
457
458 _return_fn = std::move(fn);
459}
460
461void KernelGenerator::visit(const ir::operation::FullyConnected &node)
462{
464
465 const auto output_index{node.getOutputs().at(0)};
466 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
467 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
468 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
469 const auto activation = node.param().activation;
470 const auto weights_format = node.param().weights_format;
471
472 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
473 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
474 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
475 auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
476
477 auto fn = std::make_unique<ops::FullyConnectedLayer>();
478
479 fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
480 _external_context);
481
482 _return_fn = std::move(fn);
483}
484
485void KernelGenerator::visit(const ir::operation::Reshape &node)
486{
487 const auto output_index{node.getOutputs().at(0)};
488 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
489
490 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
491 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
492
493 // optional 2nd input
494 IPortableTensor *shape_tensor = nullptr;
495
496 if (node.getInputs().size() == 2)
497 {
498 const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
499 shape_tensor = _tensor_reg->getPortableTensor(shape_index);
500 }
501
502 auto fn = std::make_unique<ops::ReshapeLayer>();
503
504 fn->configure(input_tensor, shape_tensor, output_tensor);
505 _return_fn = std::move(fn);
506}
507
508void KernelGenerator::visit(const ir::operation::Squeeze &node)
509{
510 const auto output_index{node.getOutputs().at(0)};
511 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
512
513 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
514 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
515
516 // Squeeze can share same kernel with reshape
517 auto fn = std::make_unique<ops::ReshapeLayer>();
518
519 fn->configure(input_tensor, nullptr, output_tensor);
520
521 _return_fn = std::move(fn);
522}
523
524void KernelGenerator::visit(const ir::operation::Softmax &node)
525{
526 const auto output_index{node.getOutputs().at(0)};
527 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
528
529 const auto beta = node.param().beta;
530
531 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
532 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
533
534 auto fn = std::make_unique<ops::SoftMaxLayer>();
535
536 fn->configure(input_tensor, beta, output_tensor);
537
538 _return_fn = std::move(fn);
539}
540
541void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
542{
543 const auto ofm_index{node.getOutputs().at(0)};
544 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
545 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
546
547 const auto activation = node.param().activation;
548
549 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
550 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
551 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
552
553 auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
554
555 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
556 convertArithmeticType(node.param().arithmetic_type));
557
558 _return_fn = std::move(fn);
559}
560
561void KernelGenerator::visit(const ir::operation::Comparison &node)
562{
563 const auto ofm_index{node.getOutputs().at(0)};
564 const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
565 const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
566
567 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
568 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
569 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
570
571 auto comparison_type = node.param().comparison_type;
572
573 auto fn = std::make_unique<ops::CompareLayer>();
574
575 fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
576
577 _return_fn = std::move(fn);
578}
579
580void KernelGenerator::visit(const ir::operation::Gather &node)
581{
582 const auto output_index{node.getOutputs().at(0)};
583 const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
584 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
585
586 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
587 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
588 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
589
590 const auto rank = _ctx.at(input_index).shape().rank();
591 const auto axis = ops::getAxis(rank, node.param().axis);
592
593 auto fn = std::make_unique<ops::GatherLayer>();
594
595 fn->configure(input_tensor, indices_tensor, output_tensor, axis, _external_context.get());
596
597 _return_fn = std::move(fn);
598}
599
600void KernelGenerator::visit(const ir::operation::OneHot &node)
601{
602 const auto output_index{node.getOutputs().at(0)};
603 const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
604 const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
605 const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
606 const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
607
608 const auto axis = node.param().axis;
609
610 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
611 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
612 auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
613 auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
614 auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
615
616 assert(indices_tensor->data_type() == OperandType::INT32);
617 assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
618
619 auto fn = std::make_unique<ops::OneHotLayer>();
620
621 fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
622
623 _return_fn = std::move(fn);
624}
625
626void KernelGenerator::visit(const ir::operation::Custom &node)
627{
628 auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
629 std::vector<custom::TypeInfo> &types,
630 std::vector<IPortableTensor *> &tensors) {
631 for (const auto &idx : opSeq)
632 {
633 const auto &operand = _ctx.at(idx);
634 types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
635 auto in_tensor = _tensor_reg->getPortableTensor(idx);
636 tensors.emplace_back(in_tensor);
637 }
638 };
639
641
642 fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
643 fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
644
645 params.userdata = node.userdata().data;
646 params.userdata_size = node.userdata().size;
647
648 auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
649
650 _return_fn = std::move(fn);
651}
652
653void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
654{
655 const auto output_index{node.getOutputs().at(0)};
656 const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
657
658 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
659 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
660
661 auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
662
663 fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
664 node.param().approximate, convertElementwiseActivationType(node.param().op_type));
665
666 _return_fn = std::move(fn);
667}
668
669void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
670{
671 const auto output_index{node.getOutputs().at(0)};
672 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
673 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
674
675 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
676 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
677 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
678
679 auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
680
681 fn->configure(lhs_tensor, rhs_tensor, output_tensor,
682 convertElementwiseBinaryType(node.param().op_type));
683
684 _return_fn = std::move(fn);
685}
686
687void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
688{
689 const auto output_index{node.getOutputs().at(0)};
690 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
691
692 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
693 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
694
695 if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
696 {
697 auto fn = std::make_unique<ops::QuantizeLayer>();
698 fn->configure(input_tensor, output_tensor);
699 _return_fn = std::move(fn);
700 }
701 else
702 {
703 auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
704 fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
705 _return_fn = std::move(fn);
706 }
707}
708
709void KernelGenerator::visit(const ir::operation::ExpandDims &node)
710{
711 const auto output_index{node.getOutputs().at(0)};
712 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
713 // AXIS input is used for output shape inference
714
715 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
716 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
717
718 auto fn = std::make_unique<ops::ExpandDimsLayer>();
719
720 fn->configure(input_tensor, output_tensor);
721
722 _return_fn = std::move(fn);
723}
724
725void KernelGenerator::visit(const ir::operation::Pack &node)
726{
727 const auto ofm_index{node.getOutputs().at(0)};
728
729 const auto rank = _ctx.at(ofm_index).shape().rank();
730 const auto axis = ops::getAxis(rank, node.param().axis);
731
732 assert(-rank <= axis && axis < rank);
733
734 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
735
736 std::vector<const IPortableTensor *> input_tensors;
737 for (const auto &ifm_idx : node.getInputs())
738 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
739
740 auto fn = std::make_unique<ops::PackLayer>();
741
742 fn->configure(input_tensors, axis, output_tensor);
743
744 _return_fn = std::move(fn);
745}
746
747void KernelGenerator::visit(const ir::operation::Unpack &node)
748{
749 const auto input_index{node.getInputs().at(0)};
750
751 const auto rank = _ctx.at(input_index).shape().rank();
752 const auto axis = ops::getAxis(rank, node.param().axis);
753
754 assert(rank == 0 || (-rank <= axis && axis < rank));
755
756 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
757
758 std::vector<IPortableTensor *> output_tensors;
759 for (const auto &output_idx : node.getOutputs())
760 output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
761
762 auto fn = std::make_unique<ops::UnpackLayer>();
763
764 uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
765
766 fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
767
768 _return_fn = std::move(fn);
769}
770
771void KernelGenerator::visit(const ir::operation::Pad &node)
772{
773 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
774 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
775 const auto output_index{node.getOutputs().at(0)};
776
777 auto input = _tensor_reg->getPortableTensor(input_index);
778 auto pad = _tensor_reg->getPortableTensor(pad_index);
779 auto output = _tensor_reg->getPortableTensor(output_index);
780
781 auto fn = std::make_unique<ops::PadLayer>();
782
783 IPortableTensor *value = nullptr;
784 if (node.getInputs().size() == 3) // isPadV2
785 {
786 const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
787 value = _tensor_reg->getPortableTensor(value_index);
788 }
789
790 fn->configure(input, pad, value, output);
791 _return_fn = std::move(fn);
792}
793
794void KernelGenerator::visit(const ir::operation::Transpose &node)
795{
796 const auto output_index{node.getOutputs().at(0)};
797 const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
798 const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
799
800 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
801 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
802 auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
803
804 auto fn = std::make_unique<ops::TransposeLayer>();
805
806 fn->configure(input_tensor, perm_tensor, output_tensor);
807
808 _return_fn = std::move(fn);
809}
810
811void KernelGenerator::visit(const ir::operation::Reduce &node)
812{
813 const auto output_index{node.getOutputs().at(0)};
814 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
815 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
816
817 const auto keep_dims = node.param().keep_dims;
818 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
819 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
820 auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
821
822 if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
823 {
824 auto fn = std::make_unique<ops::MeanLayer>();
825
826 fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
827
828 _return_fn = std::move(fn);
829 }
830 else
831 {
832 auto fn = std::make_unique<ops::ReduceLayer>();
833
834 const auto reduce_type = convertReduceType(node.param().reduce_type);
835 fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
836
837 _return_fn = std::move(fn);
838 }
839}
840
841void KernelGenerator::visit(const ir::operation::Select &node)
842{
843 const auto output_index{node.getOutputs().at(0)};
844 const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
845 const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
846 const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
847
848 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
849 auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
850 auto true_tensor = _tensor_reg->getPortableTensor(true_index);
851 auto false_tensor = _tensor_reg->getPortableTensor(false_index);
852
853 auto fn = std::make_unique<ops::SelectLayer>();
854
855 fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
856
857 _return_fn = std::move(fn);
858}
859
860void KernelGenerator::visit(const ir::operation::Slice &node)
861{
862 const auto output_index{node.getOutputs().at(0)};
863 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
864 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
865 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
866
867 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
868 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
869 auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
870 auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
871
872 auto fn = std::make_unique<ops::SliceLayer>();
873
874 fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
875
876 _return_fn = std::move(fn);
877}
878
879void KernelGenerator::visit(const ir::operation::StridedSlice &node)
880{
881 const auto output_index{node.getOutputs().at(0)};
882 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
883 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
884 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
885 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
886
887 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
888 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
889 auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
890 auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
891 auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
892
893 auto begin_mask = node.param().begin_mask;
894 auto end_mask = node.param().end_mask;
895 auto shrink_axis_mask = node.param().shrink_axis_mask;
896
897 auto fn = std::make_unique<ops::StridedSliceLayer>();
898
899 fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
900 end_mask, shrink_axis_mask);
901
902 _return_fn = std::move(fn);
903}
904
905void KernelGenerator::visit(const ir::operation::Split &node)
906{
907 const auto num_splits = node.param().num_splits;
908 assert(num_splits == static_cast<int>(node.getOutputs().size()));
909
910 const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
911 const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
912
913 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
914 auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
915
916 std::vector<IPortableTensor *> out_tensors;
917 for (const auto &output_idx : node.getOutputs())
918 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
919
920 auto fn = std::make_unique<ops::SplitLayer>();
921
922 fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
923
924 _return_fn = std::move(fn);
925}
926
927void KernelGenerator::visit(const ir::operation::Shape &node)
928{
929 const auto ofm_index{node.getOutputs().at(0)};
930 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
931
932 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
933 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
934
935 auto fn = std::make_unique<ops::ShapeLayer>();
936
937 fn->configure(ifm_tensor, ofm_tensor);
938
939 _return_fn = std::move(fn);
940}
941
942void KernelGenerator::visit(const ir::operation::TopKV2 &node)
943{
944 const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
945 const auto outputIndices_index{
946 node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
947
948 const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
949
950 const auto k = node.param().k;
951
952 auto values_tensor = _tensor_reg->getPortableTensor(outputValues_index);
953 auto indices_tensor = _tensor_reg->getPortableTensor(outputIndices_index);
954 auto input_tensor = _tensor_reg->getPortableTensor(inputData_index);
955
956 auto fn = std::make_unique<ops::TopKV2Layer>();
957
958 fn->configure(input_tensor, values_tensor, indices_tensor, k);
959
960 _return_fn = std::move(fn);
961}
962
963void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
964{
965 const auto output_index{node.getOutputs().at(0)};
966 const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
967
968 auto align_corners = node.param().align_corners;
969 auto half_pixel_centers = node.param().half_pixel_centers;
970
971 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
972 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
973
974 auto fn = std::make_unique<ops::ResizeBilinearLayer>();
975
976 if (node.getInputs().size() == 1)
977 {
978 fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
979 align_corners, half_pixel_centers);
980 }
981 else
982 {
983 assert(node.getInputs().size() == 2);
984 const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
985 auto size_tensor = _tensor_reg->getPortableTensor(size_index);
986 if (size_tensor->is_constant())
987 {
988 auto size_vec = _ctx.at(size_index).asVector<int32_t>();
989 const auto height_out = size_vec[0];
990 const auto width_out = size_vec[1];
991 fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
992 half_pixel_centers);
993 }
994 else
995 {
996 fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
997 }
998 }
999
1000 _return_fn = std::move(fn);
1001}
1002
1003void KernelGenerator::visit(const ir::operation::Reverse &node)
1004{
1005 const auto output_index{node.getOutputs().at(0)};
1006 const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1007 const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1008
1009 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1010 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1011 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1012
1013 auto fn = std::make_unique<ops::ReverseLayer>();
1014
1015 fn->configure(input_tensor, axis_tensor, output_tensor);
1016
1017 _return_fn = std::move(fn);
1018}
1019
1020void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1021{
1022 const auto output_index{node.getOutputs().at(0)};
1023 const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1024 const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1025
1026 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1027 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1028 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1029
1030 auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1031
1032 fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1033
1034 _return_fn = std::move(fn);
1035}
1036
1037void KernelGenerator::visit(const ir::operation::Pool2D &node)
1038{
1039 const auto ofm_index{node.getOutputs().at(0)};
1040 const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1041
1042 const auto kh = node.param().kh;
1043 const auto kw = node.param().kw;
1044 const auto stride = node.param().stride;
1045 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
1046 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
1047 const auto padding =
1048 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1049 const auto activation = node.param().activation;
1050
1051 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1052 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1053
1054 auto fn = std::make_unique<ops::PoolLayer>();
1055
1056 fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1057 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1058 convertPoolType(node.param().op_type));
1059
1060 _return_fn = std::move(fn);
1061}
1062
1063void KernelGenerator::visit(const ir::operation::Pow &node)
1064{
1065 const auto output_index{node.getOutputs().at(0)};
1066 const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1067 const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1068
1069 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1070 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1071 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1072
1073 auto fn = std::make_unique<ops::PowLayer>();
1074
1075 fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1076
1077 _return_fn = std::move(fn);
1078}
1079
1080void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1081{
1082 const auto output_index{node.getOutputs().at(0)};
1083 const auto input_index{node.getInputs().at(0)};
1084
1085 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1086 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1087
1088 auto fn = std::make_unique<ops::L2NormLayer>();
1089
1090 fn->configure(input_alloc, output_alloc);
1091
1092 _return_fn = std::move(fn);
1093}
1094
1095void KernelGenerator::visit(const ir::operation::Range &node)
1096{
1097 const auto output_index{node.getOutputs().at(0)};
1098 const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1099 const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1100 const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1101
1102 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1103 auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1104 auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1105 auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1106
1107 auto fn = std::make_unique<ops::RangeLayer>();
1108
1109 fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1110 _return_fn = std::move(fn);
1111}
1112
1113void KernelGenerator::visit(const ir::operation::Rank &node)
1114{
1115 const auto ofm_index{node.getOutputs().at(0)};
1116 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1117
1118 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1119 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1120
1121 auto fn = std::make_unique<ops::RankLayer>();
1122
1123 fn->configure(ifm_tensor, ofm_tensor);
1124
1125 _return_fn = std::move(fn);
1126}
1127
1128void KernelGenerator::visit(const ir::operation::RmsNorm &node)
1129{
1130 const auto ofm_index{node.getOutputs().at(0)};
1131 const auto ifm_index{node.getInputs().at(ir::operation::RmsNorm::Input::INPUT)};
1132 const auto gamma_index{node.getInputs().at(ir::operation::RmsNorm::Input::GAMMA)};
1133
1134 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1135 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1136 auto gamma_tensor = _tensor_reg->getPortableTensor(gamma_index);
1137 auto epsilon = node.param().epsilon;
1138
1139 auto fn = std::make_unique<ops::RmsNormLayer>();
1140
1141 fn->configure(ifm_tensor, gamma_tensor, epsilon, ofm_tensor);
1142
1143 _return_fn = std::move(fn);
1144}
1145
1146void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1147{
1148 const auto ofm_index{node.getOutputs().at(0)};
1149 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1150 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1151
1152 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1153 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1154 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1155
1156 auto fn = std::make_unique<ops::SqDiffLayer>();
1157
1158 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1159 _return_fn = std::move(fn);
1160}
1161
1162void KernelGenerator::visit(const ir::operation::Tile &node)
1163{
1164 const auto output_index{node.getOutputs().at(0)};
1165 const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1166 const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1167
1168 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1169 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1170 auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1171
1172 auto fn = std::make_unique<ops::TileLayer>();
1173
1174 fn->configure(input_tensor, multiples_tensor, output_tensor);
1175 _return_fn = std::move(fn);
1176}
1177
1178void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
1179{
1181
1183 parameters.scales.y = node.param().scale.y_scale;
1184 parameters.scales.x = node.param().scale.x_scale;
1185 parameters.scales.w = node.param().scale.w_scale;
1186 parameters.scales.h = node.param().scale.h_scale;
1187
1188 parameters.iou_threshold = node.param().iou_threshold;
1189 parameters.score_threshold = node.param().score_threshold;
1190 parameters.max_boxes_per_class = node.param().max_boxes_per_class;
1191 parameters.max_detections = node.param().max_detections;
1192 parameters.num_classes = node.param().num_classes;
1193 parameters.center_box_format = node.param().center_size_boxes;
1195
1196 auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
1197 auto scores_index = node.getInputs().at(NMS::Input::SCORES);
1198 auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
1199
1200 auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
1201 auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
1202 auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
1203 auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
1204
1205 parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
1206 parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
1207
1208 parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
1209 parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
1210 parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
1211
1212 parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
1213 parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
1214 parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
1215 parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
1216
1217 auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
1218 fn->configure(std::move(parameters));
1219
1220 _return_fn = std::move(fn);
1221}
1222
1223void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1224{
1225 const auto output_index{node.getOutputs().at(0)};
1226 const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1227 const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1228
1229 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1230 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1231 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1232
1233 const auto adj_x = node.param().adj_x;
1234 const auto adj_y = node.param().adj_y;
1235
1236 auto fn = std::make_unique<ops::BatchMatMulLayer>();
1237
1238 fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1239 _return_fn = std::move(fn);
1240}
1241
1242void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1243{
1244 const auto output_index{node.getOutputs().at(0)};
1245 const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1246 const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1247
1248 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1249 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1250 auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1251
1252 auto fn = std::make_unique<ops::BroadcastToLayer>();
1253
1254 fn->configure(input_tensor, shape_tensor, output_tensor);
1255
1256 _return_fn = std::move(fn);
1257}
1258
1259void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1260{
1261 const auto ofm_index{node.getOutputs().at(0)};
1262
1263 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1264 std::vector<const IPortableTensor *> input_tensors;
1265 for (const auto &ifm_idx : node.getInputs())
1266 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1267
1268 const auto epsilon = node.param().epsilon;
1269 const auto is_training = node.param().is_training;
1270 const auto &data_format = node.param().data_format;
1271
1272 auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1273
1274 fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1275
1276 _return_fn = std::move(fn);
1277}
1278
1279void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1280{
1281 const auto output_index{node.getOutputs().at(0)};
1282 const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1283
1284 const auto beta = node.param().beta;
1285 const auto axis = node.param().axis;
1286
1287 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1288 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1289
1290 auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1291
1292 fn->configure(input_tensor, beta, axis, output_tensor);
1293
1294 _return_fn = std::move(fn);
1295}
1296
1297void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1298{
1299 const auto output_index{node.getOutputs().at(0)};
1300 const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1301 const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1302 const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1303
1304 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1305 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1306 auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1307 auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1308
1309 auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1310
1311 fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1312
1313 _return_fn = std::move(fn);
1314}
1315
1316void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1317{
1318 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1319 const auto output_index{node.getOutputs().at(0)};
1320 auto block_size = node.param().block_size;
1321
1322 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1323 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1324
1325 auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1326
1327 fn->configure(input_tensor, block_size, output_tensor);
1328 _return_fn = std::move(fn);
1329}
1330
1331void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1332{
1333 const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1334 const auto output_index{node.getOutputs().at(0)};
1335 auto block_size = node.param().block_size;
1336
1337 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1338 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1339
1340 auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1341
1342 fn->configure(input_tensor, block_size, output_tensor);
1343 _return_fn = std::move(fn);
1344}
1345
1346void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1347{
1348 const auto output_index{node.getOutputs().at(0)};
1349 const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1350 const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1351
1352 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1353 auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1354 auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1355
1356 auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1357
1358 fn->configure(shape_alloc, seed_alloc, output_alloc);
1359 _return_fn = std::move(fn);
1360}
1361
1362void KernelGenerator::visit(const ir::operation::SplitV &node)
1363{
1364 const auto num_splits = node.param().num_splits;
1365 assert(num_splits == static_cast<int>(node.getOutputs().size()));
1366
1367 const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1368 const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1369 const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1370
1371 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1372 auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1373 auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1374
1375 std::vector<IPortableTensor *> out_tensors;
1376 for (const auto &output_idx : node.getOutputs())
1377 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1378
1379 auto fn = std::make_unique<ops::SplitVLayer>();
1380
1381 fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1382
1383 _return_fn = std::move(fn);
1384}
1385
1386void KernelGenerator::visit(const ir::operation::LSTM &node)
1387{
1388 const auto scratch_buffer_index{
1389 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1390 const auto output_state_out_index{
1391 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1392 const auto cell_state_out_index{
1393 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1394 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1395
1396 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1397 const auto input_to_input_weights_index{
1398 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1399 const auto input_to_forget_weights_index{
1400 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1401 const auto input_to_cell_weights_index{
1402 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1403 const auto input_to_output_weights_index{
1404 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1405 const auto recurrent_to_input_weights_index{
1406 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1407 const auto recurrent_to_forget_weights_index{
1408 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1409 const auto recurrent_to_cell_weights_index{
1410 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1411 const auto recurrent_to_output_weights_index{
1412 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1413 const auto cell_to_input_weights_index{
1414 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1415 const auto cell_to_forget_weights_index{
1416 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1417 const auto cell_to_output_weights_index{
1418 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1419 const auto input_gate_bias_index{
1420 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1421 const auto forget_gate_bias_index{
1422 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1423 const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1424 const auto output_gate_bias_index{
1425 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1426 const auto projection_weights_index{
1427 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1428 const auto projection_bias_index{
1429 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1430 const auto output_state_in_index{
1431 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1432 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1433 const auto time_major = node.param().time_major;
1434
1435 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1436 // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1437 // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1438 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1439 bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1440 (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1441 _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1442 bool has_recurrent_to_input_weights =
1443 _ctx.exist(recurrent_to_input_weights_index) &&
1444 (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1445 _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1446
1447 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1448 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1449 // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1450 // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1451 bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1452 _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1453 bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1454 _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1455
1456 bool has_input_gate_bias =
1457 _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1458
1459 bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1460 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1461 _ctx.at(projection_weights_index).shape().dim(1) != 0);
1462 bool has_projection_bias =
1463 _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1464
1465 auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1466 ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1467 : nullptr; // optional
1468 auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1469 ? _tensor_reg->getPortableTensor(output_state_out_index)
1470 : nullptr; // optional
1471 auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1472 ? _tensor_reg->getPortableTensor(cell_state_out_index)
1473 : nullptr; // optional
1474 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1475
1476 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1477
1478 auto input_to_input_weights_tensor =
1479 has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1480 : nullptr; // optional
1481 auto input_to_forget_weights_tensor =
1482 _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1483 auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1484 auto input_to_output_weights_tensor =
1485 _tensor_reg->getPortableTensor(input_to_output_weights_index);
1486 auto recurrent_to_input_weights_tensor =
1487 has_recurrent_to_input_weights
1488 ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1489 : nullptr; // optional
1490 auto recurrent_to_forget_weights_tensor =
1491 _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1492 auto recurrent_to_cell_weights_tensor =
1493 _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1494 auto recurrent_to_output_weights_tensor =
1495 _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1496
1497 auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1498 auto cell_to_forget_weights_tensor =
1499 has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1500 : nullptr; // optional
1501 auto cell_to_output_weights_tensor =
1502 has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1503 : nullptr; // optional
1504
1505 auto input_gate_bias_tensor =
1506 has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1507 auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1508 auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1509 auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1510 auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1511 auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1512
1513 auto projection_weights_tensor = has_projection_weights
1514 ? _tensor_reg->getPortableTensor(projection_weights_index)
1515 : nullptr; // optional
1516 auto projection_bias_tensor = has_projection_bias
1517 ? _tensor_reg->getPortableTensor(projection_bias_index)
1518 : nullptr; // optional
1519
1520 IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1521 IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1522 IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1523 IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1524 if (node.getInputs().size() == 24)
1525 {
1526 const auto input_layer_norm_weights_index{
1527 node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1528 const auto forget_layer_norm_weights_index{
1529 node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1530 const auto cell_layer_norm_weights_index{
1531 node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1532 const auto output_layer_norm_weights_index{
1533 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1534
1535 input_layer_norm_weights_tensor =
1536 _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1537 forget_layer_norm_weights_tensor =
1538 _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1539 cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1540 output_layer_norm_weights_tensor =
1541 _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1542 }
1543
1544 auto fn = std::make_unique<ops::LSTMLayer>();
1545
1546 fn->configure(
1547 input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1548 input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
1549 recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
1550 recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
1551 cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
1552 forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
1553 output_layer_norm_weights_tensor,
1554 /*aux_input=*/nullptr,
1555 /*aux_input_to_input_weights=*/nullptr,
1556 /*aux_input_to_forget_weights=*/nullptr,
1557 /*aux_input_to_cell_weights=*/nullptr,
1558 /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1559 cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1560 projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1561 /*forward_sequence=*/true, time_major,
1562 /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1563 output_tensor,
1564 !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1565 !_ctx.at(cell_state_in_index).info().isVariable());
1566
1567 _return_fn = std::move(fn);
1568}
1569
1570void KernelGenerator::visit(const ir::operation::RoPE &node)
1571{
1572 const auto input_index{node.getInputs().at(ir::operation::RoPE::Input::INPUT)};
1573 const auto sin_table{node.getInputs().at(ir::operation::RoPE::Input::SIN_TABLE)};
1574 const auto cos_table{node.getInputs().at(ir::operation::RoPE::Input::COS_TABLE)};
1575 const auto output_index{node.getOutputs().at(ir::operation::RoPE::Output::OUTPUT)};
1576
1577 auto mode = ops::getRoPEMode(node.param().mode);
1578
1579 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1580 auto sin_tensor = _tensor_reg->getPortableTensor(sin_table);
1581 auto cos_tensor = _tensor_reg->getPortableTensor(cos_table);
1582 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1583
1584 auto fn = std::make_unique<ops::RoPELayer>();
1585
1586 fn->configure(input_tensor, sin_tensor, cos_tensor, mode, output_tensor);
1587 _return_fn = std::move(fn);
1588}
1589
1590} // namespace onert::backend::cpu
A tensor class that is portable for other backends.
KernelGenerator(const ir::Graph &graph, const std::shared_ptr< TensorBuilder > &tensor_builder, const std::shared_ptr< basic::TensorRegistry > &tensor_reg, const std::shared_ptr< custom::IKernelBuilder > &kernel_builder, const std::shared_ptr< ExternalContext > &external_context)
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:54
OperandIndexSequence & getInputs()
Definition Operation.h:51
const Param & param() const
Definition ArgMinMax.h:49
const Param & param() const
Definition BatchMatMul.h:49
const Param & param() const
Definition Comparison.h:58
const Param & param() const
Definition Concat.h:44
const Param & param() const
Definition Conv2D.h:56
const Userdata & userdata() const
Definition Custom.cc:34
const std::string & id() const
Definition Custom.cc:32
const Param & param() const
const Param & param() const
Definition Gather.h:50
const Param & param() const
Definition LSTM.h:82
const Param & param() const
Definition LogSoftmax.h:50
const Param & param() const
Definition OneHot.h:46
const Param & param() const
Definition Pack.h:40
const Param & param() const
Definition Pool2D.h:64
const Param & param() const
Definition Reduce.h:63
const Param & param() const
Definition RmsNorm.h:49
const Param & param() const
Definition RoPE.h:59
const Param & param() const
Definition Softmax.h:49
const Param & param() const
Definition Split.h:46
const Param & param() const
Definition SplitV.h:47
const Param & param() const
Definition TopKV2.h:55
const Param & param() const
Definition Unpack.h:46
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
Definition Convert.cc:281
arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
Definition Convert.cc:296
This file contains utility macro.
CLTensor ker_tensor
CLTensor bias_tensor
float h
float y
float w
float x
const IPortableTensor * scores_input
int32_t num_classes
IPortableTensor * num_selections_output
uint32_t max_detections
CenterSizeBox scales
std::vector< int32_t > scrores_descr
uint32_t max_boxes_per_class
bool center_box_format
IPortableTensor * box_scores_output
float iou_threshold
float score_threshold
std::vector< int32_t > boxes_descr
const IPortableTensor * boxes_input
IPortableTensor * box_coords_output
IPortableTensor * box_classes_output
int32_t max_classes_per_detection
const IPortableTensor * anchors_input
float score_threshold
int32_t num_classes
Scale scale
int max_detections
bool center_size_boxes
int32_t max_classes_per_detection
int max_boxes_per_class
float iou_threshold
float w_scale
float y_scale
float x_scale
float h_scale
FullyConnectedWeightsFormat weights_format