ONE - On-device Neural Engine
Loading...
Searching...
No Matches
KernelGenerator.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "KernelGenerator.h"
18
19#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
20#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
21
23#include <AclFunction.h>
24#include <Convert.h>
25#include <Swizzle.h>
26
27#include "ir/Index.h"
28#include "ir/DataType.h"
29#include "ir/InternalType.h"
30#include "exec/NopFunction.h"
32#include "util/logging.h"
33#include "AclKernelGen.h"
34
35namespace onert
36{
37namespace backend
38{
39namespace acl_cl
40{
41
42using ::onert::backend::acl_common::asAclFunction;
44 ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
45
47 const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
48 const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
49 : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx(graph.operations()),
50 _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
51{
52 // DO NOTHING
53}
54
55std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
56{
57 auto ret = std::make_unique<exec::FunctionSequence>();
58 ret->enableDynamicShapeInferer(false);
59
60 const auto &op = _graph.operations().at(ind);
61 op.accept(*this);
62 ret->append(releaseFunction());
63 return ret;
64}
65
66void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
67{
68 const auto ofm_index{node.getOutputs().at(0)};
69 const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
70 const auto block_size_index{
72
73 const auto NNApiInputs = 2;
74 if (node.getInputs().size() != NNApiInputs)
75 {
77 if (!_ctx.at(crops_index).isConstant())
78 {
79 throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
80 }
81
82 auto crops = _ctx.at(crops_index).asVector<int32_t>();
83 for (auto &&crop : crops)
84 {
85 if (crop != 0)
86 {
87 throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
88 }
89 }
90 }
91
92 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
93 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
94
95 if (!_ctx.at(block_size_index).data())
96 throw std::runtime_error("ACL CL does not support dynamic block size for BatchToSpaceND");
97
98 auto block = _ctx.at(block_size_index).asVector<int32_t>();
99 int32_t height = block[0];
100 int32_t width = block[1];
101
102 auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
103 ifm_tensor->handle(), width, height, ofm_tensor->handle());
104
105 _return_fn = asAclFunction(std::move(fn));
106}
107
108void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
109{
110 const auto ofm_index{node.getOutputs().at(0)};
111 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
112 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
113
114 const auto activation = node.param().activation;
115
116 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
117 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
118 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
119
120 const auto act_info = acl_common::asActivationLayerInfo(activation);
121
122 std::unique_ptr<arm_compute::IFunction> fn;
123 switch (node.param().arithmetic_type)
124 {
126 {
127 arm_compute::CLArithmeticAddition::validate(lhs_tensor->info(), rhs_tensor->info(),
128 ofm_tensor->info(),
129 arm_compute::ConvertPolicy::SATURATE, act_info)
130 .throw_if_error();
131 fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
132 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
133 arm_compute::ConvertPolicy::SATURATE, act_info);
134 break;
135 }
137 {
138 arm_compute::CLArithmeticSubtraction::validate(lhs_tensor->info(), rhs_tensor->info(),
139 ofm_tensor->info(),
140 arm_compute::ConvertPolicy::SATURATE, act_info)
141 .throw_if_error();
142 fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
143 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
144 arm_compute::ConvertPolicy::SATURATE, act_info);
145 break;
146 }
148 {
149 arm_compute::CLPixelWiseMultiplication::validate(
150 lhs_tensor->info(), rhs_tensor->info(), ofm_tensor->info(), 1.0,
151 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
152 act_info)
153 .throw_if_error();
154 fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
155 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
156 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
157 act_info);
158 break;
159 }
161 {
162 arm_compute::CLArithmeticDivision::validate(lhs_tensor->info(), rhs_tensor->info(),
163 ofm_tensor->info(), act_info)
164 .throw_if_error();
165 fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
166 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
167 break;
168 }
169 default:
170 assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
171 break;
172 }
173
174 _return_fn = asAclFunction(std::move(fn));
175}
176
177void KernelGenerator::visit(const ir::operation::Conv2D &node)
178{
179 using ir::operation::Conv2D;
180
181 const auto ofm_index{node.getOutputs().at(0)};
182 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
183 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
184 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
185
186 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
187 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
188 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
189 const auto &ker_shape = _ctx.at(ker_index).shape();
190 const auto ker_height = ker_shape.dim(1);
191 const auto ker_width = ker_shape.dim(2);
192
193 const auto stride = node.param().stride;
194 const auto padding =
195 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
196 const auto activation = node.param().activation;
197
198 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
199 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
200 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
201 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
202
203 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
204 const auto act_info = acl_common::asActivationLayerInfo(activation);
205
206 auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
207 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
208 ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
209 ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
210
211 _return_fn = asAclFunction(std::move(fn));
212}
213
214void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
215{
216 using ir::operation::DepthwiseConv2D;
217
218 const auto ofm_index{node.getOutputs().at(0)};
219 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
220 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
221 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
222
223 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
224 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
225 // Kernel format is [1, kernel_height, kernel_width, depth_out].
226 const auto &ker_shape = _ctx.at(ker_index).shape();
227 const auto ker_height = ker_shape.dim(1);
228 const auto ker_width = ker_shape.dim(2);
229
230 const auto stride = node.param().stride;
231 const auto dilation = node.param().dilation;
232 const auto padding =
233 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
234 dilation.width_factor, dilation.height_factor);
235 const auto multiplier = node.param().multiplier;
236 const auto activation = node.param().activation;
237
238 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
239 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
240 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
241 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
242
243 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
244 const auto act_info = acl_common::asActivationLayerInfo(activation);
245 const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
246
247 auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
248 ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
249 conv_info, multiplier, act_info, dilation_info);
250
251 _return_fn = asAclFunction(std::move(fn));
252}
253
254void KernelGenerator::visit(const ir::operation::Concat &node)
255{
256 const auto ofm_index{node.getOutputs().at(0)};
257
258 std::vector<ir::OperandIndex> input_indexes;
259
260 for (const auto &input : node.getInputs())
261 input_indexes.emplace_back(input);
262
263 const auto axis = node.param().axis;
264
265 // Concat elimination check
266 bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
267 if (eliminated)
268 {
269 // If concat eliminated, return a NOP IFunction
270 VERBOSE(acl_cl_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
271 _return_fn = std::make_unique<exec::NopFunction>();
272 return;
273 }
274
275 auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
276 std::vector<const ::arm_compute::ICLTensor *> input_tensors;
277 for (const auto &ifm_ind : input_indexes)
278 input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
279
280 std::unique_ptr<::arm_compute::IFunction> fn;
281 if (input_indexes.size() < 2)
282 {
283 ::arm_compute::ICLTensor *input_tesor =
284 _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
285
286 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tesor, output_tensor->handle());
287 }
288 else
289 {
290 const auto rank = _ctx.at(ofm_index).shape().rank();
291 const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis).value();
292 fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
293 input_tensors, output_tensor->handle(), fixed_axis);
294 }
295
296 _return_fn = asAclFunction(std::move(fn));
297}
298
299void KernelGenerator::visit(const ir::operation::FullyConnected &node)
300{
301 const auto output_index{node.getOutputs().at(0)};
302 auto output_tensor = _tensor_reg->getAclTensor(output_index);
303 const auto activation = node.param().activation;
304 if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
305 throw std::runtime_error(
306 "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
307
308 auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
310 node, _ctx, _tensor_builder, _tensor_reg);
311 _return_fn = std::make_unique<exec::FunctionSequence>(
312 std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
313}
314
315void KernelGenerator::visit(const ir::operation::Reduce &node)
316{
317 const auto output_index{node.getOutputs().at(0)};
318 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
319 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
320 const auto keep_dims{node.param().keep_dims};
321 const auto reduce_type = node.param().reduce_type;
322
323 auto output_tensor = _tensor_reg->getAclTensor(output_index);
324 auto input_tensor = _tensor_reg->getAclTensor(input_index);
325
326 // Convert to ACL axes taking into account negative values and possible duplicates.
327 const auto &axes = _ctx.at(axes_index);
328 const auto input_rank = _ctx.at(input_index).shape().rank();
329
330 std::unique_ptr<arm_compute::IFunction> fn;
332 {
333 const auto acl_axes = acl_common::asCoordinates(axes, input_rank);
334 fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
335 keep_dims, output_tensor->handle());
336 }
337 else
338 {
339 const auto acl_axes = acl_common::asSet(axes, input_rank);
340
341 fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
342 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
343 output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
344 }
345
346 _return_fn = asAclFunction(std::move(fn));
347}
348
349void KernelGenerator::visit(const ir::operation::Reshape &node)
350{
351 const auto output_index{node.getOutputs().at(0)};
352 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
353
354 auto output_tensor = _tensor_reg->getAclTensor(output_index);
355 auto input_tensor = _tensor_reg->getAclTensor(input_index);
356
357 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
358 output_tensor->handle());
359
360 _return_fn = asAclFunction(std::move(fn));
361}
362
363void KernelGenerator::visit(const ir::operation::Squeeze &node)
364{
365 // Squeeze is identical to reshape except that it has an optional dimensions input.
366 // In addition, optional dims_index is ignored since output tensor already has squeezed shape
367 // by freezer and toco
368 // TODO Support multi-layout for frontend and backend
369 const auto output_index{node.getOutputs().at(0)};
370 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
371 const auto dims{node.param().dims};
372 const auto ndim{node.param().ndim};
373 (void)dims;
374 (void)ndim;
375
376 auto output_tensor = _tensor_reg->getAclTensor(output_index);
377 auto input_tensor = _tensor_reg->getAclTensor(input_index);
378 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
379 output_tensor->handle());
380 _return_fn = asAclFunction(std::move(fn));
381}
382
383void KernelGenerator::visit(const ir::operation::Softmax &node)
384{
385 const auto output_index{node.getOutputs().at(0)};
386 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
387
388 const auto beta = node.param().beta;
389
390 auto output_tensor = _tensor_reg->getAclTensor(output_index);
391 auto input_tensor = _tensor_reg->getAclTensor(input_index);
392
393 auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
394 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
395 output_tensor->handle(), beta);
396
397 _return_fn = asAclFunction(std::move(fn));
398}
399
400void KernelGenerator::visit(const ir::operation::Slice &node)
401{
402 const auto output_index{node.getOutputs().at(0)};
403 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
404 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
405 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
406
407 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
408 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
409
410 // Set initializers for indices data such as order of inputData
411 int input_rank = _ctx.at(input_index).shape().rank();
412 std::vector<int32_t> starts;
413 std::vector<int32_t> ends;
414 starts.resize(input_rank, 0);
415 ends.resize(input_rank, 0);
416 {
417 assert(_ctx.at(begins_index).data());
418 assert(_ctx.at(sizes_index).data());
419 auto beginData_base = _ctx.at(begins_index).data()->base();
420 auto sizeData_base = _ctx.at(sizes_index).data()->base();
421 [[maybe_unused]] const int beginData_size = _ctx.at(begins_index).shape().num_elements();
422 [[maybe_unused]] const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
423
424 using ir::DataType;
425
426 assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
427 assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
428 assert(beginData_size == input_rank);
429 assert(sizeData_size == input_rank);
430
431 assert(beginData_base != nullptr);
432 for (int n = 0; n < input_rank; ++n)
433 {
434 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
435
436 int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
437 starts[axis] = begin_value;
438
439 int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
440 ends[axis] = begin_value + size_value;
441 }
442 }
443
444 ::arm_compute::Coordinates starts_set;
445 ::arm_compute::Coordinates ends_set;
446
447 for (size_t i = 0; i < starts.size(); ++i)
448 {
449 starts_set.set(i, starts[i]);
450 ends_set.set(i, ends[i]);
451 }
452
453 auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
454 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
455
456 _return_fn = asAclFunction(std::move(fn));
457}
458
459void KernelGenerator::visit(const ir::operation::StridedSlice &node)
460{
461 const auto output_index{node.getOutputs().at(0)};
462 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
463 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
464 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
465 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
466
467 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
468 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
469
470 // Set initializers for indices data such as order of inputData
471 int input_rank = _ctx.at(input_index).shape().rank();
472 std::vector<int32_t> starts;
473 std::vector<int32_t> ends;
474 std::vector<int32_t> strides;
475 starts.resize(input_rank, 0);
476 ends.resize(input_rank, 0);
477 strides.resize(input_rank, 0);
478 {
479 assert(_ctx.at(starts_index).data());
480 assert(_ctx.at(ends_index).data());
481 assert(_ctx.at(strides_index).data());
482 auto startData_base = _ctx.at(starts_index).data()->base();
483 auto endData_base = _ctx.at(ends_index).data()->base();
484 auto stridesData_base = _ctx.at(strides_index).data()->base();
485 [[maybe_unused]] const int startData_size = _ctx.at(starts_index).shape().num_elements();
486 [[maybe_unused]] const int endData_size = _ctx.at(ends_index).shape().num_elements();
487 [[maybe_unused]] const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
488
489 using ir::DataType;
490
491 assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
492 assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
493 assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
494 assert(startData_size == input_rank);
495 assert(endData_size == input_rank);
496 assert(stridesData_size == input_rank);
497
498 assert(startData_base != nullptr);
499 for (int n = 0; n < input_rank; ++n)
500 {
501 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
502
503 int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
504 starts[axis] = start_value;
505
506 int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
507 ends[axis] = end_value;
508
509 int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
510 strides[axis] = strides_value;
511 }
512 }
513
514 // Set mask bits such as order of inputData
515 const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
516 const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
517 const auto shrink_axis_mask =
518 acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
519
520 ::arm_compute::Coordinates starts_set;
521 ::arm_compute::Coordinates ends_set;
522 ::arm_compute::BiStrides strides_set;
523
524 for (size_t i = 0; i < starts.size(); ++i)
525 {
526 starts_set.set(i, starts[i]);
527 ends_set.set(i, ends[i]);
528 strides_set.set(i, strides[i]);
529 }
530
531 // Disable applied dim_correction
532 if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
533 {
534 // This means that high dimension's value is 1 and input tensor is applied dim_correction
535 acl_common::disableDimCorrection(inputData_tensor);
536 }
537
538 auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
539 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
540 begin_mask, end_mask, shrink_axis_mask);
541
542 // Revert disabling applied dim_correction
543 if (inputData_tensor->dimension(0) == 1)
544 {
545 acl_common::enableDimCorrection(inputData_tensor);
546 }
547
548 _return_fn = asAclFunction(std::move(fn));
549}
550
551void KernelGenerator::visit(const ir::operation::Transpose &node)
552{
553 const auto ofm_idx{node.getOutputs().at(0)};
554 const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
555 const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
556
557 const auto rank = _ctx.at(ifm_idx).shape().rank();
558
559 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
560 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
561
562 const auto &perms = _ctx.at(perm_idx);
563 std::vector<int32_t> pv;
564 if (perms.shape() == ir::Shape{0})
565 {
566 pv.resize(rank);
567 std::iota(pv.begin(), pv.end(), 0);
568 std::reverse(pv.begin(), pv.end());
569 }
570 else
571 {
572 pv = _ctx.at(perm_idx).asVector<int32_t>();
573 }
574
575 std::unique_ptr<arm_compute::IFunction> fn;
576 if (rank == 1)
577 {
578 fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
579 }
580 else if (rank == 2)
581 {
582 assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
583 fn = acl_common::generateLayer<arm_compute::CLTranspose>(ifm_tensor->handle(),
584 ofm_tensor->handle());
585 }
586 else
587 {
588 auto backend_pv = acl_common::getARMComputePermutationVector(rank, pv);
589
590 fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
591 ofm_tensor->handle(), backend_pv);
592 }
593
594 _return_fn = asAclFunction(std::move(fn));
595}
596
597void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
598{
599 const auto ofm_index{node.getOutputs().at(0)};
600 const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
601
602 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
603 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
604
605 const ::arm_compute::ActivationLayerInfo act_info =
606 acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
607
608 auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
609 ifm_tensor->handle(), ofm_tensor->handle(), act_info);
610
611 _return_fn = asAclFunction(std::move(fn));
612}
613
614void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
615{
616 const auto output_index{node.getOutputs().at(0)};
617 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
618 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
619
620 auto output_tensor = _tensor_reg->getAclTensor(output_index);
621 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
622 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
623
624 std::unique_ptr<arm_compute::IFunction> fn;
625 switch (node.param().op_type)
626 {
628 {
629 fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
630 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
632 break;
633 }
635 {
636 fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
637 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
638 break;
639 }
641 {
642 fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
643 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
644 break;
645 }
647 {
648 fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
649 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
650 break;
651 }
652 default:
653 {
654 std::string err_msg("acl_cl KernelGenerator : " + node.name() +
655 "is not elementwise-binary operations");
656 assert(false && err_msg.c_str());
657 break;
658 }
659 }
660
661 _return_fn = asAclFunction(std::move(fn));
662}
663
664void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
665{
666 const auto output_index{node.getOutputs().at(0)};
667 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
668
669 auto output_tensor = _tensor_reg->getAclTensor(output_index);
670 auto input_tensor = _tensor_reg->getAclTensor(input_index);
671
672 std::unique_ptr<arm_compute::IFunction> fn;
673 switch (node.param().op_type)
674 {
676 {
677 const ::arm_compute::ActivationLayerInfo act_info{
678 ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
679
680 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
681 input_tensor->handle(), output_tensor->handle(), act_info);
682 break;
683 }
685 {
686 if (input_tensor->data_type() == output_tensor->data_type())
687 {
688 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
689 output_tensor->handle());
690 }
691 else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
692 {
693 fn = acl_common::generateLayer<arm_compute::CLCastBool>(input_tensor->handle(),
694 output_tensor->handle());
695 }
696 else
697 {
698 // TODO Support converting float to int32 as round down
699 fn = acl_common::generateLayer<arm_compute::CLCast>(
700 input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
701 }
702 break;
703 }
705 {
706 fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
707 output_tensor->handle());
708 break;
709 }
711 {
712 fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
713 output_tensor->handle());
714 break;
715 }
717 {
718 fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
719 output_tensor->handle());
720 break;
721 }
723 {
724 fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
725 output_tensor->handle());
726 break;
727 }
729 {
730 fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
731 output_tensor->handle());
732 break;
733 }
735 {
736 fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
737 output_tensor->handle());
738 break;
739 }
741 {
742 const ::arm_compute::ActivationLayerInfo act_info{
743 ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
744
745 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
746 input_tensor->handle(), output_tensor->handle(), act_info);
747 break;
748 }
749 default:
750 {
751 throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
752 break;
753 }
754 }
755
756 auto acl_fn = asAclFunction(std::move(fn));
757
758 _return_fn = std::move(acl_fn);
759}
760
761void KernelGenerator::visit(const ir::operation::ExpandDims &node)
762{
763 const auto output_index{node.getOutputs().at(0)};
764 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
765
766 auto output_tensor = _tensor_reg->getAclTensor(output_index);
767 auto input_tensor = _tensor_reg->getAclTensor(input_index);
768
769 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
770 output_tensor->handle());
771
772 _return_fn = asAclFunction(std::move(fn));
773}
774
775void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
776{
777 const auto ofm_index{node.getOutputs().at(0)};
778 const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
779 const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
780 const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
781
782 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
783 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
784 auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
785 auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
786 auto epsilon = node.param().epsilon;
787 auto activation = node.param().activation;
788
789 auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
790 ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
791 epsilon);
792
793 _return_fn = std::make_unique<exec::FunctionSequence>(
794 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
795}
796
797void KernelGenerator::visit(const ir::operation::LSTM &node)
798{
799 _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
800 ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
801}
802
803void KernelGenerator::visit(const ir::operation::Comparison &node)
804{
805 const auto output_index{node.getOutputs().at(0)};
806 const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
807 const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
808
809 const auto comparison_type = node.param().comparison_type;
810
811 auto output_tensor = _tensor_reg->getAclTensor(output_index);
812 auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
813 auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
814
815 auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
816 input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
817 (arm_compute::ComparisonOperation)comparison_type);
818
819 _return_fn = asAclFunction(std::move(fn));
820}
821
822void KernelGenerator::visit(const ir::operation::OneHot &node)
823{
824 const auto output_idx{node.getOutputs().at(0)};
825 const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
826 const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
827 const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
828 const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
829 const auto depth = _ctx.at(depth_idx).asScalar<int32_t>();
830 assert(depth > 0);
831
832 auto output_tensor = _tensor_reg->getAclTensor(output_idx);
833 auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
834 auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
835
836 const size_t output_rank = _ctx.at(output_idx).shape().rank();
837 int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
838 axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
839
840 if (output_tensor->num_dimensions() != output_tensor->info()->num_dimensions())
841 {
842 // This means that high dimension's value is 1 and output_tensor is applied dim_correction
844 }
845
846 std::unique_ptr<::arm_compute::IFunction> fn;
847 const auto &offvalue = _ctx.at(offvalue_idx);
848 if (offvalue.isConstant())
849 {
850 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
851 indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
852 acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
853 }
854 else
855 {
856 auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
857 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
858 indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
859 output_tensor->handle(), static_cast<uint32_t>(depth), axis);
860 }
861
862 if (output_tensor->dimension(0) == 1)
863 {
864 acl_common::enableDimCorrection(output_tensor);
865 }
866
867 _return_fn = asAclFunction(std::move(fn));
868}
869
870void KernelGenerator::visit(const ir::operation::Pack &node)
871{
872 const auto output_index{node.getOutputs().at(0)};
873 auto axis{node.param().axis};
874
875 const auto output_rank = _ctx.at(output_index).shape().rank();
876
877 std::vector<ir::OperandIndex> input_indexes;
878 for (const auto &input_index : node.getInputs())
879 input_indexes.emplace_back(input_index);
880
881 auto output = _tensor_reg->getAclTensor(output_index)->handle();
882 std::vector<arm_compute::ICLTensor *> inputs;
883 for (const auto &input_index : input_indexes)
884 inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
885
886 if (axis < 0)
887 axis += output_rank;
888 axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
889
890 // Disable applied dim_correction
891 for (const auto &input_index : input_indexes)
892 {
893 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
894 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
895 {
896 // This means that high dimension's value is 1 and input tensor is applied dim_correction
898 }
899 }
900
901 auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
902
903 // Revert disabling applied dim_correction
904 for (const auto &input_index : input_indexes)
905 {
906 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
907 if (input_tensor->dimension(0) == 1)
908 {
910 }
911 }
912
913 _return_fn = asAclFunction(std::move(fn));
914}
915
916void KernelGenerator::visit(const ir::operation::Pool2D &node)
917{
918 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
919 node, _ctx, _tensor_reg, acl_common::convertPoolType(node.param().op_type));
920
921 const auto ofm_index{node.getOutputs().at(0)};
922 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
923 const auto activation = node.param().activation;
924 _return_fn = std::make_unique<exec::FunctionSequence>(
925 asAclFunction(std::move(raw_fn)),
926 ActivationBuilder::generate(activation, ofm_tensor->handle()));
927}
928
929void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
930{
931 const auto ofm_index{node.getOutputs().at(0)};
932 const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
933
934 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
935 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
936
937 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
938 ifm_tensor->handle(), ofm_tensor->handle(),
939 ::arm_compute::ScaleKernelInfo{
940 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
941 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
942
943 _return_fn = asAclFunction(std::move(fn));
944}
945
946void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
947{
948 const auto ofm_index{node.getOutputs().at(0)};
949 const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
950
951 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
952 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
953
954 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
955 ifm_tensor->handle(), ofm_tensor->handle(),
956 ::arm_compute::ScaleKernelInfo{
957 ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
958 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
959
960 _return_fn = asAclFunction(std::move(fn));
961}
962
963void KernelGenerator::visit(const ir::operation::RNN &node)
964{
965 const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
966 const auto hidden_state_out_index{
968
969 const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
970 const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
971 const auto recurrent_weights_index{
973 const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
974 const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
975
976 const auto activation = node.param().activation;
977
978 auto output_tensor = _tensor_reg->getAclTensor(output_index);
979 auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
980
981 auto input_tensor = _tensor_reg->getAclTensor(input_index);
982 auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
983 auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
984 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
985 auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
986 auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
987
988 auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
989 hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
990 _return_fn = asAclFunction(std::move(copy_layer));
991
992 auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
993 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
994 weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
995 hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
996 _return_fn = asAclFunction(std::move(fn));
997}
998
999void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1000{
1001 const auto ofm_index{node.getOutputs().at(0)};
1002 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
1003 const auto block_size_index{
1005 const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
1006
1007 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1008 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1009 auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
1010 auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
1011
1012 assert(_ctx.at(block_size_index).data());
1013 assert(_ctx.at(paddings_index).data());
1014
1015 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
1016 ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
1017 ofm_tensor->handle());
1018
1019 _return_fn = asAclFunction(std::move(fn));
1020}
1021
1022void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1023{
1024 const auto ofm_index{node.getOutputs().at(0)};
1025 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1026
1027 auto block_size = node.param().block_size;
1028
1029 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1030 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1031
1032 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
1033 ifm_tensor->handle(), ofm_tensor->handle(), block_size);
1034
1035 _return_fn = asAclFunction(std::move(fn));
1036}
1037
1038void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
1039{
1040 const auto output_index{node.getOutputs().at(0)};
1041 const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
1042 const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
1043
1044 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1045 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1046 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1047
1048 auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
1049 values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
1050
1051 _return_fn = asAclFunction(std::move(fn));
1052}
1053
1054void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1055{
1056 const auto ofm_index{node.getOutputs().at(0)};
1057 const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
1058
1059 // {CL|Neon}L2Normalization performs the reduction only along dimension 0
1060 // L2 Normalization always performs the reduction along the depth axis
1061 // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
1062 // choosing normalization parameters as below
1063
1064 const auto &ifm_shape = _ctx.at(ifm_index).shape();
1065 // TODO Support optional constant dimension that normalization would be performed on
1066 const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
1067 int32_t radius =
1068 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
1069 float alpha = 1.0f; // In the implementation to make alpha_ become 1
1070 float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
1071 float bias = 0.0f; // Don't offset the reduction.
1072
1073 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1074 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1075
1076 const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
1077 radius, alpha, beta, bias, false);
1078
1079 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1080 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1081
1082 _return_fn = asAclFunction(std::move(fn));
1083}
1084
1085void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
1086{
1087 const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
1088 const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
1089
1090 const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
1091 const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
1092 const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
1093
1094 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1095 auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
1096
1097 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1098 auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
1099 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1100
1101 auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
1102 lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
1103 output_tensor->handle(), hits_tensor->handle());
1104
1105 _return_fn = asAclFunction(std::move(fn));
1106}
1107
1108void KernelGenerator::visit(const ir::operation::PReLU &node)
1109{
1110 const auto ofm_index{node.getOutputs().at(0)};
1111 const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
1112 const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
1113
1114 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1115 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1116 auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
1117
1118 auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
1119 ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
1120
1121 _return_fn = asAclFunction(std::move(fn));
1122}
1123
1124void KernelGenerator::visit(const ir::operation::TransposeConv &node)
1125{
1126 const auto ofm_index{node.getOutputs().at(0)};
1127 const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
1128 const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
1129
1130 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
1131 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
1132 const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
1133
1134 const auto stride = node.param().stride;
1135
1136 assert((node.param().padding.type == ir::PaddingType::SAME) ||
1137 (node.param().padding.type == ir::PaddingType::VALID));
1138 auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
1139 ker_shape.W, ker_shape.H);
1140 uint32_t invalid_horizontal = 0;
1141 uint32_t invalid_vertical = 0;
1142 if (node.param().padding.type == ir::PaddingType::VALID)
1143 {
1144 invalid_horizontal =
1145 ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
1146 invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
1147 }
1148
1149 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1150 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1151 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
1152
1153 const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
1154
1155 auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
1156 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
1157 ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
1158 invalid_vertical);
1159
1160 _return_fn = asAclFunction(std::move(fn));
1161}
1162
1163void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1164{
1165 const auto ofm_index{node.getOutputs().at(0)};
1166 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1167 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1168
1169 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1170 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
1171 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
1172
1173 auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
1174 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1175
1176 _return_fn = asAclFunction(std::move(fn));
1177}
1178
1179void KernelGenerator::visit(const ir::operation::TopKV2 &node)
1180{
1181 const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
1182 const auto outputIndices_index{
1183 node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
1184
1185 const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
1186
1187 // Currently, we only support the vector input.
1188 assert(_ctx.at(inputData_index).shape().rank() == 1 ||
1189 _ctx.at(inputData_index).shape().rank() == 2);
1190
1191 const auto k = node.param().k;
1192
1193 auto values_tensor = _tensor_reg->getAclTensor(outputValues_index);
1194 auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index);
1195 auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
1196
1197 auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
1198 input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
1199
1200 _return_fn = asAclFunction(std::move(fn));
1201}
1202
1203void KernelGenerator::visit(const ir::operation::Gather &node)
1204{
1205 const auto ofm_index{node.getOutputs().at(0)};
1206
1207 const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
1208 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
1209
1210 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1211 const auto axis_raw = node.param().axis;
1212 const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
1213 const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
1214
1215 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1216 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1217 auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
1218
1219 // input is n-D, indices k-D, output is (n + k - 1)-D
1220 size_t n = ifm_rank;
1221 assert(n == ifm_tensor->num_dimensions());
1222 size_t k = _ctx.at(indices_index).shape().rank();
1223 assert(k == indices_tensor->num_dimensions());
1224
1225 // Disable applied dim_correction
1226 if (n != ifm_tensor->info()->num_dimensions())
1227 {
1228 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1230 }
1231 if (k != indices_tensor->info()->num_dimensions())
1232 {
1233 // This means that high dimension's value is 1 and indices tensor is applied dim_correction
1234 acl_common::disableDimCorrection(indices_tensor);
1235 }
1236
1237 auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
1238 ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
1239
1240 // Revert disabling applied dim_correction
1241 if (ifm_tensor->dimension(0) == 1)
1242 {
1244 }
1245 if (indices_tensor->dimension(0) == 1)
1246 {
1247 acl_common::enableDimCorrection(indices_tensor);
1248 }
1249
1250 _return_fn = asAclFunction(std::move(fn));
1251}
1252
1253void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1254{
1255 const auto ofm_index{node.getOutputs().at(0)};
1256 const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
1257 const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
1258
1259 auto ifm_shape = _ctx.at(ifm_index).shape();
1260 auto ofm_shape = _ctx.at(ofm_index).shape();
1261
1262 assert((ifm_shape.rank() - 1) == ofm_shape.rank());
1263
1264 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1265 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1266 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1267
1268 int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
1269 if (axis_value < 0)
1270 {
1271 axis_value += ifm_rank;
1272 }
1273
1274 auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
1275 auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
1276 : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
1277 auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
1278 ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
1279
1280 _return_fn = asAclFunction(std::move(fn));
1281}
1282
1283void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
1284{
1285 const auto ofm_index{node.getOutputs().at(0)};
1286 const auto ifm_index{
1288
1289 auto radius = node.param().radius;
1290 auto alpha = node.param().alpha;
1291 auto beta = node.param().beta;
1292 auto bias = node.param().bias;
1293
1294 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1295 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1296
1297 const auto norm_info = ::arm_compute::NormalizationLayerInfo(
1298 ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
1299
1300 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1301 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1302
1303 _return_fn = asAclFunction(std::move(fn));
1304}
1305
1306void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1307{
1308 const auto output_index{node.getOutputs().at(0)};
1309 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1310
1311 auto block_size = node.param().block_size;
1312 assert(block_size > 0);
1313
1314 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1315 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1316
1317 auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
1318 input_tensor->handle(), output_tensor->handle(), block_size);
1319
1320 _return_fn = asAclFunction(std::move(fn));
1321}
1322
1323void KernelGenerator::visit(const ir::operation::Split &node)
1324{
1325 const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
1326 const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
1327
1328 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1329 if (!_ctx.at(axis_index).isConstant())
1330 {
1331 throw std::runtime_error("Non-constant axis_index NYI for acl_cl backend");
1332 }
1333
1334 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1335 std::vector<ir::OperandIndex> output_indexes;
1336 for (const auto &output : node.getOutputs())
1337 output_indexes.emplace_back(output);
1338
1339 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1340 std::vector<arm_compute::ICLTensor *> output_tensors;
1341 for (const auto &ofm_ind : output_indexes)
1342 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1343
1344 auto axis = _ctx.at(axis_index).asScalar<int32_t>();
1345 if (axis < 0)
1346 axis += ifm_rank;
1347 axis = acl_common::ToARMComputeAxis(ifm_rank, axis).value();
1348
1349 auto fn =
1350 acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
1351
1352 _return_fn = asAclFunction(std::move(fn));
1353}
1354
1355void KernelGenerator::visit(const ir::operation::SplitV &node)
1356{
1357 const auto ifm_index{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1358 const auto size_split_index{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1359 const auto split_dim_index{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1360
1361 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1362
1363 const size_t ifm_rank = _ctx.at(ifm_index).shape().rank();
1364 std::vector<ir::OperandIndex> output_indexes;
1365 for (const auto &output : node.getOutputs())
1366 output_indexes.emplace_back(output);
1367
1368 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1369 auto size_split_tensor = _tensor_reg->getAclTensor(size_split_index);
1370
1371 std::vector<arm_compute::ICLTensor *> output_tensors;
1372 for (const auto &ofm_ind : output_indexes)
1373 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1374
1375 auto fn = std::make_unique<arm_compute::CLSplitVEx>();
1376 const auto &split_dim_op = _ctx.at(split_dim_index);
1377 if (split_dim_op.isConstant())
1378 {
1379 int32_t split_dim = split_dim_op.asScalar<int32_t>();
1380 uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
1381
1382 if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
1383 {
1384 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1386 }
1387
1388 split_dim_revised = acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised).value();
1389 fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
1390 output_tensors, node.param().num_splits);
1391
1392 if (ifm_tensor->dimension(0) == 1)
1393 {
1395 }
1396 }
1397 else
1398 {
1399 throw std::runtime_error("Non-constant split_dim NYI for acl_cl backend");
1400 }
1401
1402 _return_fn = asAclFunction(std::move(fn));
1403}
1404
1405void KernelGenerator::visit(const ir::operation::Unpack &node)
1406{
1407 const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
1408 auto axis{node.param().axis};
1409
1410 const auto input_rank = _ctx.at(input_index).shape().rank();
1411
1412 std::vector<ir::OperandIndex> output_indexes;
1413 for (const auto &output_index : node.getOutputs())
1414 output_indexes.emplace_back(output_index);
1415
1416 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1417 std::vector<arm_compute::ICLTensor *> outputs;
1418 for (const auto &output_index : output_indexes)
1419 outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
1420
1421 if (axis < 0)
1422 axis += input_rank;
1423 axis = acl_common::ToARMComputeAxis(input_rank, axis).value();
1424
1425 // Disable applied dim_correction
1426 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1427 {
1428 // This means that high dimension's value is 1 and input tensor is applied dim_correction
1430 }
1431
1432 auto fn =
1433 acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
1434
1435 // Revert disabling applied dim_correction
1436 if (input_tensor->dimension(0) == 1)
1437 {
1438 acl_common::enableDimCorrection(input_tensor);
1439 }
1440
1441 _return_fn = asAclFunction(std::move(fn));
1442}
1443
1444void KernelGenerator::visit(const ir::operation::Pad &node)
1445{
1446 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
1447 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
1448 const auto output_index{node.getOutputs().at(0)};
1449 assert(_ctx.at(pad_index).data());
1450
1451 auto rank = _ctx.at(input_index).shape().rank();
1452 auto pad_base = _ctx.at(pad_index).data()->base();
1453
1454 auto input_type = _ctx.at(input_index).typeInfo();
1455 auto data_type = acl_common::asDataType(input_type.type());
1456 auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point());
1457 const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
1458
1459 auto input = _tensor_reg->getAclTensor(input_index)->handle();
1460 auto output = _tensor_reg->getAclTensor(output_index)->handle();
1461
1462 ::arm_compute::PaddingList padding_list;
1463 padding_list.resize(rank);
1464 for (int32_t n = 0; n < rank; ++n)
1465 {
1466 const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
1467
1468 const auto axis = acl_common::ToARMComputeAxis(rank, n).value();
1469 padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
1470 }
1471
1472 // Disable applied dim_correction
1473 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
1474 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1475 {
1476 // This means that high dimension's value is 1 and input tensor is applied dim_correction
1478 }
1479
1480 auto fn =
1481 acl_common::generateLayer<arm_compute::CLPadLayerEx>(input, output, padding_list, pixel_value);
1482
1483 // NOTE Do not revert disabling applied dim_correction for 4D.
1484 // It would produce a mistach of result by incorrect offset_first_element in
1485 // ICLKernel::add_tensor_argument<3>().
1486 // We have to disable applied dim_correction and not to revert enabling for the kernel that slices
1487 // 4D to 3D because slicing arm_compute::Window can causes incorrect offset_first_element if the
1488 // used tensor is 4D and the tensor's high dimention is 1
1489 if (input_tensor->num_dimensions() < 4 && input_tensor->dimension(0) == 1)
1490 {
1491 acl_common::enableDimCorrection(input_tensor);
1492 }
1493
1494 _return_fn = asAclFunction(std::move(fn));
1495}
1496
1497void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
1498{
1499 const auto ofm_index{node.getOutputs().at(0)};
1500 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
1501
1502 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1503 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1504
1505 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1506 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1507
1508 _return_fn = asAclFunction(std::move(fn));
1509}
1510
1511void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
1512{
1513 const auto ofm_index{node.getOutputs().at(0)};
1514 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
1515
1516 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1517 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1518
1519 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1520 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1521
1522 _return_fn = asAclFunction(std::move(fn));
1523}
1524
1525void KernelGenerator::visit(const ir::operation::Reverse &node)
1526{
1527 const auto ofm_index{node.getOutputs().at(0)};
1528 const auto ifm_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
1529 const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)};
1530
1531 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1532 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1533 auto axis_tensor = _tensor_reg->getAclTensor(axis_index);
1534
1535 // WORKAROUND: acl-cl backend only allow U32 type for axis
1536 // ConstantInitializer will resolve S32 type to U32 type
1537 if (_ctx.at(axis_index).isConstant() &&
1538 (axis_tensor->handle()->info()->data_type() == arm_compute::DataType::S32))
1539 {
1540 axis_tensor->handle()->info()->set_data_type(arm_compute::DataType::U32);
1541 }
1542
1543 auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
1544 ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle(), false);
1545
1546 _return_fn = asAclFunction(std::move(fn));
1547}
1548
1549} // namespace acl_cl
1550} // namespace backend
1551} // namespace onert
This file defines NopFunction.
Class to run FullyConnected Layer after reshaping input tensor.
std::unique_ptr< exec::FunctionSequence > generate(ir::OperationIndex ind) override
KernelGenerator(const ir::Graph &graph, const std::shared_ptr< TensorBuilder > &tensor_builder, const std::shared_ptr< acl_common::AclTensorRegistry< TensorManager > > &_tensor_reg)
static std::unique_ptr< exec::IFunction > generate(ir::Activation code, T_Tensor *ifm_alloc)
Tensor registry class for acl backends.
std::unique_ptr< exec::IFunction > _return_fn
std::unique_ptr< exec::IFunction > releaseFunction()
const Operations & operations() const override
Definition Graph.h:114
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:55
OperandIndexSequence & getInputs()
Definition Operation.h:53
const Object & at(const Index &index) const
Get the object that is associated with the given index.
#define VERBOSE(name, lv)
Definition Log.h:71
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
Definition Convert.cc:283
ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
Definition Swizzle.h:49
inline ::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank, const std::vector< int32_t > runtime_pv)
Definition Swizzle.h:72
std::unique_ptr< exec::IFunction > kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
std::set< uint32_t > asSet(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:224
::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
Definition Convert.cc:135
arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
Definition Convert.cc:298
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:211
void enableDimCorrection(IACLTensor *tensor)
arm_compute::PixelValue asPixelValue(const ir::Operand &operand)
Definition Convert.cc:313
arm_compute::Size2D asDilation(uint32_t dilation_width, uint32_t dilation_height)
Definition Convert.cc:334
::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding, const ir::Stride &stride)
Definition Convert.cc:123
std::unique_ptr< AclFunction > asAclFunction(std::unique_ptr<::arm_compute::IFunction > &&layer)
Definition Convert.cc:250
std::unique_ptr< exec::IFunction > kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
::arm_compute::DataType asDataType(const ir::DataType type)
Definition Convert.cc:75
void disableDimCorrection(IACLTensor *tensor)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf=1, uint32_t dhf=1)
Definition Padding.cc:133