ONE - On-device Neural Engine
Loading...
Searching...
No Matches
KernelGenerator.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "KernelGenerator.h"
18
19#include "ops/AddNLayer.h"
20#include "ops/ArgMinMaxLayer.h"
22#include "ops/BinaryArithmeticLayer.h"
23#include "ops/CompareLayer.h"
24#include "ops/ConcatLayer.h"
25#include "ops/ConvolutionLayer.h"
27#include "ops/DepthwiseConvolutionLayer.h"
28#include "ops/EinsumLayer.h"
29#include "ops/ElementwiseActivationLayer.h"
32#include "ops/ExpandDimsLayer.h"
33#include "ops/FillLayer.h"
34#include "ops/FullyConnectedLayer.h"
35#include "ops/GatherLayer.h"
36#include "ops/LSTMLayer.h"
37#include "ops/MeanLayer.h"
39#include "ops/OneHotLayer.h"
40#include "ops/OperationUtils.h"
41#include "ops/PackLayer.h"
42#include "ops/PadLayer.h"
43#include "ops/PoolLayer.h"
44#include "ops/PowLayer.h"
45#include "ops/QuantizeLayer.h"
46#include "ops/RangeLayer.h"
47#include "ops/RankLayer.h"
48#include "ops/ReduceLayer.h"
49#include "ops/ReshapeLayer.h"
51#include "ops/ReverseLayer.h"
52#include "ops/RoPELayer.h"
53#include "ops/SelectLayer.h"
54#include "ops/ShapeLayer.h"
55#include "ops/SliceLayer.h"
56#include "ops/SoftMaxLayer.h"
60#include "ops/SplitLayer.h"
61#include "ops/SplitVLayer.h"
62#include "ops/TileLayer.h"
63#include "ops/TransposeLayer.h"
64#include "ops/UnpackLayer.h"
66#include "ops/L2NormLayer.h"
71#include "ops/LogSoftMaxLayer.h"
73#include "ops/RmsNormLayer.h"
74
75#include <backend/Backend.h>
76#include <backend/IConfig.h>
77#include <memory>
78#include <util/Utils.h>
79#include <util/logging.h>
81
82#include <stdexcept>
83
84namespace onert
85{
86namespace backend
87{
88namespace cpu
89{
90
91namespace
92{
93ops::ArithmeticType
94convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
95{
96 switch (arithmetic_type_ir)
97 {
106 default:
107 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
108 }
109}
110
111ops::ElementwiseActivationType
112convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
113{
114 switch (type_ir)
115 {
126 default:
127 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
128 }
129}
130
131ops::ElementwiseBinaryType
132convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
133{
134 switch (type_ir)
135 {
148 default:
149 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
150 }
151}
152
153ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
154{
155 switch (type_ir)
156 {
191 default:
192 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
193 }
194}
195
197{
198 switch (type_ir)
199 {
201 return ops::PoolType::kAvg;
203 return ops::PoolType::kMax;
204 default:
205 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
206 }
207}
208
209ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
210{
211 switch (reduce_type_ir)
212 {
225 default:
226 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
227 }
228}
229} // namespace
230
232 const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
233 const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
234 const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
235 const std::shared_ptr<ExternalContext> &external_context)
236 : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx{graph.operations()},
237 _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
238 _external_context(external_context)
239{
240 // DO NOTHING
241}
242
243std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
244{
245 auto ret = std::make_unique<exec::FunctionSequence>();
246
247 assert(_tensor_builder->dynamicTensorManager());
248 assert(_tensor_reg);
249
250 // Prepare to handle dynamic tensors later
251 auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
252 {
253 dyn_ctx->op = &_operations_ctx.at(ind);
254 dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_tensor_reg);
255 }
256 ret->dynamic_tensor_ctx(dyn_ctx);
257
258 auto &op = _graph.operations().at(ind);
259 op.accept(*this);
260 assert(_return_fn); // _return_fn must have been generated
261 ret->append(std::move(_return_fn));
262
263 for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
264 {
265 auto tensor = _tensor_reg->getNativeTensor(ind);
266 if (tensor)
267 {
268 tensor->increase_ref();
269 }
270 }
271 return ret;
272}
273
274void KernelGenerator::visit(const ir::operation::AddN &node)
275{
276 const auto output_index{node.getOutputs().at(0)};
277
278 std::vector<const IPortableTensor *> input_tensors;
279 for (const auto &input_idx : node.getInputs())
280 input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
281
282 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
283
284 auto fn = std::make_unique<ops::AddNLayer>();
285
286 fn->configure(std::move(input_tensors), output_tensor);
287
288 _return_fn = std::move(fn);
289}
290
291void KernelGenerator::visit(const ir::operation::Conv2D &node)
292{
294
295 const auto ofm_index{node.getOutputs().at(0)};
296 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
297 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
298 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
299
300 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
301 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
302 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
303 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
304
305 const auto stride = node.param().stride;
306 const auto activation = node.param().activation;
307 const auto &param_padding = node.param().padding;
308 const auto dilation = node.param().dilation;
309
310 const bool is_cacheable_weights = ker_tensor->is_constant();
311
312 auto fn = std::make_unique<ops::ConvolutionLayer>();
313
314 if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
315 {
316 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
317 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
318 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
319 activation, ofm_tensor, is_cacheable_weights);
320
321 _return_fn = std::move(fn);
322 return;
323 }
324 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
325 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
326 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
327 const auto &ker_shape = _ctx.at(ker_index).shape();
328 const auto ker_height = ker_shape.dim(1);
329 const auto ker_width = ker_shape.dim(2);
330
331 const auto padding =
332 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
333 dilation.width_factor, dilation.height_factor);
334
335 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
336 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
337 dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
338 is_cacheable_weights);
339
340 _return_fn = std::move(fn);
341}
342
343void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
344{
346
347 const auto ofm_index{node.getOutputs().at(0)};
348 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
349 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
350 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
351
352 const auto stride = node.param().stride;
353 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
354 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
355 // Kernel format is [1, kernel_height, kernel_width, depth_out].
356 const auto &ker_shape = _ctx.at(ker_index).shape();
357 const auto ker_height = ker_shape.dim(1);
358 const auto ker_width = ker_shape.dim(2);
359 const auto dilation_width = node.param().dilation.width_factor;
360 const auto dilation_height = node.param().dilation.height_factor;
361 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
362 ker_width, ker_height, dilation_width, dilation_height);
363 const auto multiplier = node.param().multiplier;
364 const auto activation = node.param().activation;
365
366 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
367 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
368 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
369 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
370
371 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
372
373 fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
374 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
375 dilation_height, activation, ofm_tensor, _external_context);
376
377 _return_fn = std::move(fn);
378}
379
380void KernelGenerator::visit(const ir::operation::Concat &node)
381{
382 const auto ofm_index{node.getOutputs().at(0)};
383
384 const auto rank = _ctx.at(ofm_index).shape().rank();
385 const auto axis = ops::getAxis(rank, node.param().axis);
386
387 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
388
389 std::vector<const IPortableTensor *> input_tensors;
390 for (const auto &ifm_idx : node.getInputs())
391 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
392
393 auto fn = std::make_unique<ops::ConcatLayer>();
394
395 fn->configure(input_tensors, axis, output_tensor);
396
397 _return_fn = std::move(fn);
398}
399
400void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
401{
402 const auto output_index{node.getOutputs().at(0)};
403 const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
404 const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
405
406 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
407 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
408 auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
409
410 auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
411
412 IPortableTensor *crops_alloc = nullptr;
413 const auto NNApiInputs = 2;
414
415 if (node.getInputs().size() != NNApiInputs)
416 {
417 const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
418 crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
419 }
420
421 fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
422
423 _return_fn = std::move(fn);
424}
425
426void KernelGenerator::visit(const ir::operation::Fill &node)
427{
428 const auto output_index{node.getOutputs().at(0)};
429 // SHAPE input is used for shape inference
430 const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
431
432 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
433 auto value_tensor = _tensor_reg->getPortableTensor(value_index);
434
435 auto fn = std::make_unique<ops::FillLayer>();
436
437 fn->configure(value_tensor, output_tensor);
438
439 _return_fn = std::move(fn);
440}
441
442void KernelGenerator::visit(const ir::operation::FullyConnected &node)
443{
445
446 const auto output_index{node.getOutputs().at(0)};
447 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
448 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
449 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
450 const auto activation = node.param().activation;
451 const auto weights_format = node.param().weights_format;
452
453 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
454 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
455 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
456 auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
457
458 auto fn = std::make_unique<ops::FullyConnectedLayer>();
459
460 fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
461 _external_context);
462
463 _return_fn = std::move(fn);
464}
465
466void KernelGenerator::visit(const ir::operation::Reshape &node)
467{
468 const auto output_index{node.getOutputs().at(0)};
469 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
470
471 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
472 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
473
474 // optional 2nd input
475 IPortableTensor *shape_tensor = nullptr;
476
477 if (node.getInputs().size() == 2)
478 {
479 const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
480 shape_tensor = _tensor_reg->getPortableTensor(shape_index);
481 }
482
483 auto fn = std::make_unique<ops::ReshapeLayer>();
484
485 fn->configure(input_tensor, shape_tensor, output_tensor);
486 _return_fn = std::move(fn);
487}
488
489void KernelGenerator::visit(const ir::operation::Squeeze &node)
490{
491 const auto output_index{node.getOutputs().at(0)};
492 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
493
494 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
495 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
496
497 // Squeeze can share same kernel with reshape
498 auto fn = std::make_unique<ops::ReshapeLayer>();
499
500 fn->configure(input_tensor, nullptr, output_tensor);
501
502 _return_fn = std::move(fn);
503}
504
505void KernelGenerator::visit(const ir::operation::Softmax &node)
506{
507 const auto output_index{node.getOutputs().at(0)};
508 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
509
510 const auto beta = node.param().beta;
511
512 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
513 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
514
515 auto fn = std::make_unique<ops::SoftMaxLayer>();
516
517 fn->configure(input_tensor, beta, output_tensor);
518
519 _return_fn = std::move(fn);
520}
521
522void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
523{
524 const auto ofm_index{node.getOutputs().at(0)};
525 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
526 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
527
528 const auto activation = node.param().activation;
529
530 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
531 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
532 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
533
534 auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
535
536 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
537 convertArithmeticType(node.param().arithmetic_type));
538
539 _return_fn = std::move(fn);
540}
541
542void KernelGenerator::visit(const ir::operation::Comparison &node)
543{
544 const auto ofm_index{node.getOutputs().at(0)};
545 const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
546 const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
547
548 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
549 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
550 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
551
552 auto comparison_type = node.param().comparison_type;
553
554 auto fn = std::make_unique<ops::CompareLayer>();
555
556 fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
557
558 _return_fn = std::move(fn);
559}
560
561void KernelGenerator::visit(const ir::operation::Gather &node)
562{
563 const auto output_index{node.getOutputs().at(0)};
564 const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
565 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
566
567 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
568 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
569 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
570
571 const auto rank = _ctx.at(input_index).shape().rank();
572 const auto axis = ops::getAxis(rank, node.param().axis);
573
574 auto fn = std::make_unique<ops::GatherLayer>();
575
576 fn->configure(input_tensor, indices_tensor, output_tensor, axis, _external_context.get());
577
578 _return_fn = std::move(fn);
579}
580
581void KernelGenerator::visit(const ir::operation::OneHot &node)
582{
583 const auto output_index{node.getOutputs().at(0)};
584 const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
585 const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
586 const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
587 const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
588
589 const auto axis = node.param().axis;
590
591 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
592 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
593 auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
594 auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
595 auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
596
597 assert(indices_tensor->data_type() == OperandType::INT32);
598 assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
599
600 auto fn = std::make_unique<ops::OneHotLayer>();
601
602 fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
603
604 _return_fn = std::move(fn);
605}
606
607void KernelGenerator::visit(const ir::operation::Einsum &node)
608{
609 const auto ofm_index{node.getOutputs().at(0)};
610
611 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
612 std::vector<const IPortableTensor *> input_tensors;
613 for (const auto &ifm_idx : node.getInputs())
614 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
615
616 const auto &equation = node.param().equation;
617
618 auto fn = std::make_unique<ops::EinsumLayer>();
619
620 fn->configure(input_tensors, equation, output_tensor);
621
622 _return_fn = std::move(fn);
623}
624
625void KernelGenerator::visit(const ir::operation::Custom &node)
626{
627 auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
628 std::vector<custom::TypeInfo> &types,
629 std::vector<IPortableTensor *> &tensors) {
630 for (const auto &idx : opSeq)
631 {
632 const auto &operand = _ctx.at(idx);
633 types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
634 auto in_tensor = _tensor_reg->getPortableTensor(idx);
635 tensors.emplace_back(in_tensor);
636 }
637 };
638
640
641 fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
642 fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
643
644 params.userdata = node.userdata().data;
645 params.userdata_size = node.userdata().size;
646
647 auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
648
649 _return_fn = std::move(fn);
650}
651
652void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
653{
654 const auto output_index{node.getOutputs().at(0)};
655 const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
656
657 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
658 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
659
660 auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
661
662 fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
663 convertElementwiseActivationType(node.param().op_type));
664
665 _return_fn = std::move(fn);
666}
667
668void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
669{
670 const auto output_index{node.getOutputs().at(0)};
671 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
672 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
673
674 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
675 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
676 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
677
678 auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
679
680 fn->configure(lhs_tensor, rhs_tensor, output_tensor,
681 convertElementwiseBinaryType(node.param().op_type));
682
683 _return_fn = std::move(fn);
684}
685
686void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
687{
688 const auto output_index{node.getOutputs().at(0)};
689 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
690
691 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
692 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
693
694 if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
695 {
696 auto fn = std::make_unique<ops::QuantizeLayer>();
697 fn->configure(input_tensor, output_tensor);
698 _return_fn = std::move(fn);
699 }
700 else
701 {
702 auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
703 fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
704 _return_fn = std::move(fn);
705 }
706}
707
708void KernelGenerator::visit(const ir::operation::ExpandDims &node)
709{
710 const auto output_index{node.getOutputs().at(0)};
711 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
712 // AXIS input is used for output shape inference
713
714 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
715 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
716
717 auto fn = std::make_unique<ops::ExpandDimsLayer>();
718
719 fn->configure(input_tensor, output_tensor);
720
721 _return_fn = std::move(fn);
722}
723
724void KernelGenerator::visit(const ir::operation::Pack &node)
725{
726 const auto ofm_index{node.getOutputs().at(0)};
727
728 const auto rank = _ctx.at(ofm_index).shape().rank();
729 const auto axis = ops::getAxis(rank, node.param().axis);
730
731 assert(-rank <= axis && axis < rank);
732
733 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
734
735 std::vector<const IPortableTensor *> input_tensors;
736 for (const auto &ifm_idx : node.getInputs())
737 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
738
739 auto fn = std::make_unique<ops::PackLayer>();
740
741 fn->configure(input_tensors, axis, output_tensor);
742
743 _return_fn = std::move(fn);
744}
745
746void KernelGenerator::visit(const ir::operation::Unpack &node)
747{
748 const auto input_index{node.getInputs().at(0)};
749
750 const auto rank = _ctx.at(input_index).shape().rank();
751 const auto axis = ops::getAxis(rank, node.param().axis);
752
753 assert(rank == 0 || (-rank <= axis && axis < rank));
754
755 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
756
757 std::vector<IPortableTensor *> output_tensors;
758 for (const auto &output_idx : node.getOutputs())
759 output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
760
761 auto fn = std::make_unique<ops::UnpackLayer>();
762
763 uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
764
765 fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
766
767 _return_fn = std::move(fn);
768}
769
770void KernelGenerator::visit(const ir::operation::Pad &node)
771{
772 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
773 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
774 const auto output_index{node.getOutputs().at(0)};
775
776 auto input = _tensor_reg->getPortableTensor(input_index);
777 auto pad = _tensor_reg->getPortableTensor(pad_index);
778 auto output = _tensor_reg->getPortableTensor(output_index);
779
780 auto fn = std::make_unique<ops::PadLayer>();
781
782 IPortableTensor *value = nullptr;
783 if (node.getInputs().size() == 3) // isPadV2
784 {
785 const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
786 value = _tensor_reg->getPortableTensor(value_index);
787 }
788
789 fn->configure(input, pad, value, output);
790 _return_fn = std::move(fn);
791}
792
793void KernelGenerator::visit(const ir::operation::Transpose &node)
794{
795 const auto output_index{node.getOutputs().at(0)};
796 const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
797 const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
798
799 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
800 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
801 auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
802
803 auto fn = std::make_unique<ops::TransposeLayer>();
804
805 fn->configure(input_tensor, perm_tensor, output_tensor);
806
807 _return_fn = std::move(fn);
808}
809
810void KernelGenerator::visit(const ir::operation::Reduce &node)
811{
812 const auto output_index{node.getOutputs().at(0)};
813 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
814 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
815
816 const auto keep_dims = node.param().keep_dims;
817 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
818 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
819 auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
820
821 if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
822 {
823 auto fn = std::make_unique<ops::MeanLayer>();
824
825 fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
826
827 _return_fn = std::move(fn);
828 }
829 else
830 {
831 auto fn = std::make_unique<ops::ReduceLayer>();
832
833 const auto reduce_type = convertReduceType(node.param().reduce_type);
834 fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
835
836 _return_fn = std::move(fn);
837 }
838}
839
840void KernelGenerator::visit(const ir::operation::Select &node)
841{
842 const auto output_index{node.getOutputs().at(0)};
843 const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
844 const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
845 const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
846
847 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
848 auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
849 auto true_tensor = _tensor_reg->getPortableTensor(true_index);
850 auto false_tensor = _tensor_reg->getPortableTensor(false_index);
851
852 auto fn = std::make_unique<ops::SelectLayer>();
853
854 fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
855
856 _return_fn = std::move(fn);
857}
858
859void KernelGenerator::visit(const ir::operation::Slice &node)
860{
861 const auto output_index{node.getOutputs().at(0)};
862 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
863 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
864 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
865
866 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
867 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
868 auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
869 auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
870
871 auto fn = std::make_unique<ops::SliceLayer>();
872
873 fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
874
875 _return_fn = std::move(fn);
876}
877
878void KernelGenerator::visit(const ir::operation::StridedSlice &node)
879{
880 const auto output_index{node.getOutputs().at(0)};
881 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
882 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
883 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
884 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
885
886 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
887 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
888 auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
889 auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
890 auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
891
892 auto begin_mask = node.param().begin_mask;
893 auto end_mask = node.param().end_mask;
894 auto shrink_axis_mask = node.param().shrink_axis_mask;
895
896 auto fn = std::make_unique<ops::StridedSliceLayer>();
897
898 fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
899 end_mask, shrink_axis_mask);
900
901 _return_fn = std::move(fn);
902}
903
904void KernelGenerator::visit(const ir::operation::Split &node)
905{
906 const auto num_splits = node.param().num_splits;
907 assert(num_splits == static_cast<int>(node.getOutputs().size()));
908
909 const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
910 const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
911
912 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
913 auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
914
915 std::vector<IPortableTensor *> out_tensors;
916 for (const auto &output_idx : node.getOutputs())
917 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
918
919 auto fn = std::make_unique<ops::SplitLayer>();
920
921 fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
922
923 _return_fn = std::move(fn);
924}
925
926void KernelGenerator::visit(const ir::operation::Shape &node)
927{
928 const auto ofm_index{node.getOutputs().at(0)};
929 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
930
931 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
932 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
933
934 auto fn = std::make_unique<ops::ShapeLayer>();
935
936 fn->configure(ifm_tensor, ofm_tensor);
937
938 _return_fn = std::move(fn);
939}
940
941void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
942{
943 const auto output_index{node.getOutputs().at(0)};
944 const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
945
946 auto align_corners = node.param().align_corners;
947 auto half_pixel_centers = node.param().half_pixel_centers;
948
949 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
950 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
951
952 auto fn = std::make_unique<ops::ResizeBilinearLayer>();
953
954 if (node.getInputs().size() == 1)
955 {
956 fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
957 align_corners, half_pixel_centers);
958 }
959 else
960 {
961 assert(node.getInputs().size() == 2);
962 const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
963 auto size_tensor = _tensor_reg->getPortableTensor(size_index);
964 if (size_tensor->is_constant())
965 {
966 auto size_vec = _ctx.at(size_index).asVector<int32_t>();
967 const auto height_out = size_vec[0];
968 const auto width_out = size_vec[1];
969 fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
970 half_pixel_centers);
971 }
972 else
973 {
974 fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
975 }
976 }
977
978 _return_fn = std::move(fn);
979}
980
981void KernelGenerator::visit(const ir::operation::Reverse &node)
982{
983 const auto output_index{node.getOutputs().at(0)};
984 const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
985 const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
986
987 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
988 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
989 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
990
991 auto fn = std::make_unique<ops::ReverseLayer>();
992
993 fn->configure(input_tensor, axis_tensor, output_tensor);
994
995 _return_fn = std::move(fn);
996}
997
998void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
999{
1000 const auto output_index{node.getOutputs().at(0)};
1001 const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1002 const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1003
1004 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1005 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1006 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1007
1008 auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1009
1010 fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1011
1012 _return_fn = std::move(fn);
1013}
1014
1015void KernelGenerator::visit(const ir::operation::Pool2D &node)
1016{
1017 const auto ofm_index{node.getOutputs().at(0)};
1018 const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1019
1020 const auto kh = node.param().kh;
1021 const auto kw = node.param().kw;
1022 const auto stride = node.param().stride;
1023 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
1024 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
1025 const auto padding =
1026 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1027 const auto activation = node.param().activation;
1028
1029 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1030 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1031
1032 auto fn = std::make_unique<ops::PoolLayer>();
1033
1034 fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1035 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1036 convertPoolType(node.param().op_type));
1037
1038 _return_fn = std::move(fn);
1039}
1040
1041void KernelGenerator::visit(const ir::operation::Pow &node)
1042{
1043 const auto output_index{node.getOutputs().at(0)};
1044 const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1045 const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1046
1047 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1048 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1049 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1050
1051 auto fn = std::make_unique<ops::PowLayer>();
1052
1053 fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1054
1055 _return_fn = std::move(fn);
1056}
1057
1058void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1059{
1060 const auto output_index{node.getOutputs().at(0)};
1061 const auto input_index{node.getInputs().at(0)};
1062
1063 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1064 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1065
1066 auto fn = std::make_unique<ops::L2NormLayer>();
1067
1068 fn->configure(input_alloc, output_alloc);
1069
1070 _return_fn = std::move(fn);
1071}
1072
1073void KernelGenerator::visit(const ir::operation::Range &node)
1074{
1075 const auto output_index{node.getOutputs().at(0)};
1076 const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1077 const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1078 const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1079
1080 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1081 auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1082 auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1083 auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1084
1085 auto fn = std::make_unique<ops::RangeLayer>();
1086
1087 fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1088 _return_fn = std::move(fn);
1089}
1090
1091void KernelGenerator::visit(const ir::operation::Rank &node)
1092{
1093 const auto ofm_index{node.getOutputs().at(0)};
1094 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1095
1096 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1097 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1098
1099 auto fn = std::make_unique<ops::RankLayer>();
1100
1101 fn->configure(ifm_tensor, ofm_tensor);
1102
1103 _return_fn = std::move(fn);
1104}
1105
1106void KernelGenerator::visit(const ir::operation::RmsNorm &node)
1107{
1108 const auto ofm_index{node.getOutputs().at(0)};
1109 const auto ifm_index{node.getInputs().at(ir::operation::RmsNorm::Input::INPUT)};
1110 const auto gamma_index{node.getInputs().at(ir::operation::RmsNorm::Input::GAMMA)};
1111
1112 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1113 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1114 auto gamma_tensor = _tensor_reg->getPortableTensor(gamma_index);
1115 auto epsilon = node.param().epsilon;
1116
1117 auto fn = std::make_unique<ops::RmsNormLayer>();
1118
1119 fn->configure(ifm_tensor, gamma_tensor, epsilon, ofm_tensor);
1120
1121 _return_fn = std::move(fn);
1122}
1123
1124void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1125{
1126 const auto ofm_index{node.getOutputs().at(0)};
1127 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1128 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1129
1130 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1131 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1132 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1133
1134 auto fn = std::make_unique<ops::SqDiffLayer>();
1135
1136 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1137 _return_fn = std::move(fn);
1138}
1139
1140void KernelGenerator::visit(const ir::operation::Tile &node)
1141{
1142 const auto output_index{node.getOutputs().at(0)};
1143 const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1144 const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1145
1146 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1147 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1148 auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1149
1150 auto fn = std::make_unique<ops::TileLayer>();
1151
1152 fn->configure(input_tensor, multiples_tensor, output_tensor);
1153 _return_fn = std::move(fn);
1154}
1155
1156void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1157{
1158 const auto output_index{node.getOutputs().at(0)};
1159 const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1160 const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1161 const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1162
1163 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1164 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1165 auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1166 auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1167
1168 auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1169
1170 fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1171 _return_fn = std::move(fn);
1172}
1173
1174void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
1175{
1177
1179 parameters.scales.y = node.param().scale.y_scale;
1180 parameters.scales.x = node.param().scale.x_scale;
1181 parameters.scales.w = node.param().scale.w_scale;
1182 parameters.scales.h = node.param().scale.h_scale;
1183
1184 parameters.iou_threshold = node.param().iou_threshold;
1185 parameters.score_threshold = node.param().score_threshold;
1186 parameters.max_boxes_per_class = node.param().max_boxes_per_class;
1187 parameters.max_detections = node.param().max_detections;
1188 parameters.num_classes = node.param().num_classes;
1189 parameters.center_box_format = node.param().center_size_boxes;
1191
1192 auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
1193 auto scores_index = node.getInputs().at(NMS::Input::SCORES);
1194 auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
1195
1196 auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
1197 auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
1198 auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
1199 auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
1200
1201 parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
1202 parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
1203
1204 parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
1205 parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
1206 parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
1207
1208 parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
1209 parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
1210 parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
1211 parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
1212
1213 auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
1214 fn->configure(std::move(parameters));
1215
1216 _return_fn = std::move(fn);
1217}
1218
1219void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1220{
1221 const auto output_index{node.getOutputs().at(0)};
1222 const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1223 const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1224
1225 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1226 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1227 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1228
1229 const auto adj_x = node.param().adj_x;
1230 const auto adj_y = node.param().adj_y;
1231
1232 auto fn = std::make_unique<ops::BatchMatMulLayer>();
1233
1234 fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1235 _return_fn = std::move(fn);
1236}
1237
1238void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1239{
1240 const auto output_index{node.getOutputs().at(0)};
1241 const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1242 const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1243
1244 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1245 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1246 auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1247
1248 auto fn = std::make_unique<ops::BroadcastToLayer>();
1249
1250 fn->configure(input_tensor, shape_tensor, output_tensor);
1251
1252 _return_fn = std::move(fn);
1253}
1254
1255void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1256{
1257 const auto ofm_index{node.getOutputs().at(0)};
1258
1259 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1260 std::vector<const IPortableTensor *> input_tensors;
1261 for (const auto &ifm_idx : node.getInputs())
1262 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1263
1264 const auto epsilon = node.param().epsilon;
1265 const auto is_training = node.param().is_training;
1266 const auto &data_format = node.param().data_format;
1267
1268 auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1269
1270 fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1271
1272 _return_fn = std::move(fn);
1273}
1274
1275void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1276{
1277 const auto output_index{node.getOutputs().at(0)};
1278 const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1279
1280 const auto beta = node.param().beta;
1281 const auto axis = node.param().axis;
1282
1283 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1284 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1285
1286 auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1287
1288 fn->configure(input_tensor, beta, axis, output_tensor);
1289
1290 _return_fn = std::move(fn);
1291}
1292
1293void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1294{
1295 const auto output_index{node.getOutputs().at(0)};
1296 const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1297 const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1298 const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1299
1300 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1301 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1302 auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1303 auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1304
1305 auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1306
1307 fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1308
1309 _return_fn = std::move(fn);
1310}
1311
1312void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1313{
1314 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1315 const auto output_index{node.getOutputs().at(0)};
1316 auto block_size = node.param().block_size;
1317
1318 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1319 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1320
1321 auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1322
1323 fn->configure(input_tensor, block_size, output_tensor);
1324 _return_fn = std::move(fn);
1325}
1326
1327void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1328{
1329 const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1330 const auto output_index{node.getOutputs().at(0)};
1331 auto block_size = node.param().block_size;
1332
1333 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1334 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1335
1336 auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1337
1338 fn->configure(input_tensor, block_size, output_tensor);
1339 _return_fn = std::move(fn);
1340}
1341
1342void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1343{
1344 const auto output_index{node.getOutputs().at(0)};
1345 const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1346 const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1347
1348 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1349 auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1350 auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1351
1352 auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1353
1354 fn->configure(shape_alloc, seed_alloc, output_alloc);
1355 _return_fn = std::move(fn);
1356}
1357
1358void KernelGenerator::visit(const ir::operation::SplitV &node)
1359{
1360 const auto num_splits = node.param().num_splits;
1361 assert(num_splits == static_cast<int>(node.getOutputs().size()));
1362
1363 const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1364 const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1365 const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1366
1367 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1368 auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1369 auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1370
1371 std::vector<IPortableTensor *> out_tensors;
1372 for (const auto &output_idx : node.getOutputs())
1373 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1374
1375 auto fn = std::make_unique<ops::SplitVLayer>();
1376
1377 fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1378
1379 _return_fn = std::move(fn);
1380}
1381
1382void KernelGenerator::visit(const ir::operation::LSTM &node)
1383{
1384 const auto scratch_buffer_index{
1385 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1386 const auto output_state_out_index{
1387 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1388 const auto cell_state_out_index{
1389 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1390 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1391
1392 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1393 const auto input_to_input_weights_index{
1394 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1395 const auto input_to_forget_weights_index{
1396 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1397 const auto input_to_cell_weights_index{
1398 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1399 const auto input_to_output_weights_index{
1400 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1401 const auto recurrent_to_input_weights_index{
1402 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1403 const auto recurrent_to_forget_weights_index{
1404 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1405 const auto recurrent_to_cell_weights_index{
1406 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1407 const auto recurrent_to_output_weights_index{
1408 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1409 const auto cell_to_input_weights_index{
1410 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1411 const auto cell_to_forget_weights_index{
1412 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1413 const auto cell_to_output_weights_index{
1414 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1415 const auto input_gate_bias_index{
1416 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1417 const auto forget_gate_bias_index{
1418 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1419 const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1420 const auto output_gate_bias_index{
1421 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1422 const auto projection_weights_index{
1423 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1424 const auto projection_bias_index{
1425 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1426 const auto output_state_in_index{
1427 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1428 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1429 const auto time_major = node.param().time_major;
1430
1431 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1432 // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1433 // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1434 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1435 bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1436 (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1437 _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1438 bool has_recurrent_to_input_weights =
1439 _ctx.exist(recurrent_to_input_weights_index) &&
1440 (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1441 _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1442
1443 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1444 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1445 // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1446 // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1447 bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1448 _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1449 bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1450 _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1451
1452 bool has_input_gate_bias =
1453 _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1454
1455 bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1456 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1457 _ctx.at(projection_weights_index).shape().dim(1) != 0);
1458 bool has_projection_bias =
1459 _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1460
1461 auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1462 ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1463 : nullptr; // optional
1464 auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1465 ? _tensor_reg->getPortableTensor(output_state_out_index)
1466 : nullptr; // optional
1467 auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1468 ? _tensor_reg->getPortableTensor(cell_state_out_index)
1469 : nullptr; // optional
1470 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1471
1472 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1473
1474 auto input_to_input_weights_tensor =
1475 has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1476 : nullptr; // optional
1477 auto input_to_forget_weights_tensor =
1478 _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1479 auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1480 auto input_to_output_weights_tensor =
1481 _tensor_reg->getPortableTensor(input_to_output_weights_index);
1482 auto recurrent_to_input_weights_tensor =
1483 has_recurrent_to_input_weights
1484 ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1485 : nullptr; // optional
1486 auto recurrent_to_forget_weights_tensor =
1487 _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1488 auto recurrent_to_cell_weights_tensor =
1489 _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1490 auto recurrent_to_output_weights_tensor =
1491 _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1492
1493 auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1494 auto cell_to_forget_weights_tensor =
1495 has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1496 : nullptr; // optional
1497 auto cell_to_output_weights_tensor =
1498 has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1499 : nullptr; // optional
1500
1501 auto input_gate_bias_tensor =
1502 has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1503 auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1504 auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1505 auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1506 auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1507 auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1508
1509 auto projection_weights_tensor = has_projection_weights
1510 ? _tensor_reg->getPortableTensor(projection_weights_index)
1511 : nullptr; // optional
1512 auto projection_bias_tensor = has_projection_bias
1513 ? _tensor_reg->getPortableTensor(projection_bias_index)
1514 : nullptr; // optional
1515
1516 IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1517 IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1518 IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1519 IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1520 if (node.getInputs().size() == 24)
1521 {
1522 const auto input_layer_norm_weights_index{
1523 node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1524 const auto forget_layer_norm_weights_index{
1525 node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1526 const auto cell_layer_norm_weights_index{
1527 node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1528 const auto output_layer_norm_weights_index{
1529 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1530
1531 input_layer_norm_weights_tensor =
1532 _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1533 forget_layer_norm_weights_tensor =
1534 _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1535 cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1536 output_layer_norm_weights_tensor =
1537 _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1538 }
1539
1540 auto fn = std::make_unique<ops::LSTMLayer>();
1541
1542 fn->configure(
1543 input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1544 input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
1545 recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
1546 recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
1547 cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
1548 forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
1549 output_layer_norm_weights_tensor,
1550 /*aux_input=*/nullptr,
1551 /*aux_input_to_input_weights=*/nullptr,
1552 /*aux_input_to_forget_weights=*/nullptr,
1553 /*aux_input_to_cell_weights=*/nullptr,
1554 /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1555 cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1556 projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1557 /*forward_sequence=*/true, time_major,
1558 /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1559 output_tensor,
1560 !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1561 !_ctx.at(cell_state_in_index).info().isVariable());
1562
1563 _return_fn = std::move(fn);
1564}
1565
1566void KernelGenerator::visit(const ir::operation::RoPE &node)
1567{
1568 const auto input_index{node.getInputs().at(ir::operation::RoPE::Input::INPUT)};
1569 const auto sin_table{node.getInputs().at(ir::operation::RoPE::Input::SIN_TABLE)};
1570 const auto cos_table{node.getInputs().at(ir::operation::RoPE::Input::COS_TABLE)};
1571 const auto output_index{node.getOutputs().at(ir::operation::RoPE::Output::OUTPUT)};
1572
1573 auto mode = ops::getRoPEMode(node.param().mode);
1574
1575 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1576 auto sin_tensor = _tensor_reg->getPortableTensor(sin_table);
1577 auto cos_tensor = _tensor_reg->getPortableTensor(cos_table);
1578 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1579
1580 auto fn = std::make_unique<ops::RoPELayer>();
1581
1582 fn->configure(input_tensor, sin_tensor, cos_tensor, mode, output_tensor);
1583 _return_fn = std::move(fn);
1584}
1585
1586} // namespace cpu
1587} // namespace backend
1588} // namespace onert
A tensor class that is portable for other backends.
KernelGenerator(const ir::Graph &graph, const std::shared_ptr< TensorBuilder > &tensor_builder, const std::shared_ptr< basic::TensorRegistry > &tensor_reg, const std::shared_ptr< custom::IKernelBuilder > &kernel_builder, const std::shared_ptr< ExternalContext > &external_context)
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:55
OperandIndexSequence & getInputs()
Definition Operation.h:53
const Param & param() const
Definition ArgMinMax.h:53
const Param & param() const
Definition BatchMatMul.h:53
const Param & param() const
Definition Comparison.h:62
const Param & param() const
Definition Concat.h:48
const Param & param() const
Definition Conv2D.h:60
const Userdata & userdata() const
Definition Custom.cc:38
const std::string & id() const
Definition Custom.cc:36
const Param & param() const
const Param & param() const
Definition Einsum.h:47
const Param & param() const
Definition Gather.h:54
const Param & param() const
Definition LSTM.h:86
const Param & param() const
Definition LogSoftmax.h:54
const Param & param() const
Definition OneHot.h:50
const Param & param() const
Definition Pack.h:44
const Param & param() const
Definition Pool2D.h:68
const Param & param() const
Definition Reduce.h:67
const Param & param() const
Definition RmsNorm.h:53
const Param & param() const
Definition RoPE.h:63
const Param & param() const
Definition Softmax.h:53
const Param & param() const
Definition Split.h:50
const Param & param() const
Definition SplitV.h:51
const Param & param() const
Definition Unpack.h:50
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
Definition Convert.cc:283
arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
Definition Convert.cc:298
This file contains utility macro.
float h
float y
float w
float x
const IPortableTensor * scores_input
int32_t num_classes
IPortableTensor * num_selections_output
uint32_t max_detections
CenterSizeBox scales
std::vector< int32_t > scrores_descr
uint32_t max_boxes_per_class
bool center_box_format
IPortableTensor * box_scores_output
float iou_threshold
float score_threshold
std::vector< int32_t > boxes_descr
const IPortableTensor * boxes_input
IPortableTensor * box_coords_output
IPortableTensor * box_classes_output
int32_t max_classes_per_detection
const IPortableTensor * anchors_input
float score_threshold
int32_t num_classes
Scale scale
int max_detections
bool center_size_boxes
int32_t max_classes_per_detection
int max_boxes_per_class
float iou_threshold
float w_scale
float y_scale
float x_scale
float h_scale
FullyConnectedWeightsFormat weights_format