ONE - On-device Neural Engine
Loading...
Searching...
No Matches
KernelGenerator.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "KernelGenerator.h"
18
19#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
20#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
21
23#include <AclFunction.h>
24#include <Convert.h>
25#include <Swizzle.h>
26
27#include "ir/Index.h"
28#include "ir/DataType.h"
29#include "ir/InternalType.h"
30#include "exec/NopFunction.h"
32#include "util/logging.h"
33#include "AclKernelGen.h"
34
36{
37
38using ::onert::backend::acl_common::asAclFunction;
40 ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
41
43 const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
44 const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
45 : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx(graph.operations()),
46 _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
47{
48 // DO NOTHING
49}
50
51std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
52{
53 auto ret = std::make_unique<exec::FunctionSequence>();
54 ret->enableDynamicShapeInferer(false);
55
56 const auto &op = _graph.operations().at(ind);
57 op.accept(*this);
58 ret->append(releaseFunction());
59 return ret;
60}
61
62void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
63{
64 const auto ofm_index{node.getOutputs().at(0)};
65 const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
66 const auto block_size_index{
68
69 const auto NNApiInputs = 2;
70 if (node.getInputs().size() != NNApiInputs)
71 {
73 if (!_ctx.at(crops_index).isConstant())
74 {
75 throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
76 }
77
78 auto crops = _ctx.at(crops_index).asVector<int32_t>();
79 for (auto &&crop : crops)
80 {
81 if (crop != 0)
82 {
83 throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
84 }
85 }
86 }
87
88 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
89 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
90
91 if (!_ctx.at(block_size_index).data())
92 throw std::runtime_error("ACL CL does not support dynamic block size for BatchToSpaceND");
93
94 auto block = _ctx.at(block_size_index).asVector<int32_t>();
95 int32_t height = block[0];
96 int32_t width = block[1];
97
98 auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
99 ifm_tensor->handle(), width, height, ofm_tensor->handle());
100
101 _return_fn = asAclFunction(std::move(fn));
102}
103
104void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
105{
106 const auto ofm_index{node.getOutputs().at(0)};
107 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
108 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
109
110 const auto activation = node.param().activation;
111
112 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
113 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
114 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
115
116 const auto act_info = acl_common::asActivationLayerInfo(activation);
117
118 std::unique_ptr<arm_compute::IFunction> fn;
119 switch (node.param().arithmetic_type)
120 {
122 {
123 arm_compute::CLArithmeticAddition::validate(lhs_tensor->info(), rhs_tensor->info(),
124 ofm_tensor->info(),
125 arm_compute::ConvertPolicy::SATURATE, act_info)
126 .throw_if_error();
127 fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
128 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
129 arm_compute::ConvertPolicy::SATURATE, act_info);
130 break;
131 }
133 {
134 arm_compute::CLArithmeticSubtraction::validate(lhs_tensor->info(), rhs_tensor->info(),
135 ofm_tensor->info(),
136 arm_compute::ConvertPolicy::SATURATE, act_info)
137 .throw_if_error();
138 fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
139 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
140 arm_compute::ConvertPolicy::SATURATE, act_info);
141 break;
142 }
144 {
145 arm_compute::CLPixelWiseMultiplication::validate(
146 lhs_tensor->info(), rhs_tensor->info(), ofm_tensor->info(), 1.0,
147 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
148 act_info)
149 .throw_if_error();
150 fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
151 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
152 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
153 act_info);
154 break;
155 }
157 {
158 arm_compute::CLArithmeticDivision::validate(lhs_tensor->info(), rhs_tensor->info(),
159 ofm_tensor->info(), act_info)
160 .throw_if_error();
161 fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
162 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
163 break;
164 }
165 default:
166 assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
167 break;
168 }
169
170 _return_fn = asAclFunction(std::move(fn));
171}
172
173void KernelGenerator::visit(const ir::operation::Conv2D &node)
174{
175 using ir::operation::Conv2D;
176
177 const auto ofm_index{node.getOutputs().at(0)};
178 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
179 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
180 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
181
182 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
183 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
184 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
185 const auto &ker_shape = _ctx.at(ker_index).shape();
186 const auto ker_height = ker_shape.dim(1);
187 const auto ker_width = ker_shape.dim(2);
188
189 const auto stride = node.param().stride;
190 const auto padding =
191 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
192 const auto activation = node.param().activation;
193
194 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
195 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
196 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
197 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
198
199 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
200 const auto act_info = acl_common::asActivationLayerInfo(activation);
201
202 auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
203 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
204 ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
205 ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
206
207 _return_fn = asAclFunction(std::move(fn));
208}
209
210void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
211{
212 using ir::operation::DepthwiseConv2D;
213
214 const auto ofm_index{node.getOutputs().at(0)};
215 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
216 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
217 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
218
219 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
220 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
221 // Kernel format is [1, kernel_height, kernel_width, depth_out].
222 const auto &ker_shape = _ctx.at(ker_index).shape();
223 const auto ker_height = ker_shape.dim(1);
224 const auto ker_width = ker_shape.dim(2);
225
226 const auto stride = node.param().stride;
227 const auto dilation = node.param().dilation;
228 const auto padding =
229 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
230 dilation.width_factor, dilation.height_factor);
231 const auto multiplier = node.param().multiplier;
232 const auto activation = node.param().activation;
233
234 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
235 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
236 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
237 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
238
239 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
240 const auto act_info = acl_common::asActivationLayerInfo(activation);
241 const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
242
243 auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
244 ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
245 conv_info, multiplier, act_info, dilation_info);
246
247 _return_fn = asAclFunction(std::move(fn));
248}
249
250void KernelGenerator::visit(const ir::operation::Concat &node)
251{
252 const auto ofm_index{node.getOutputs().at(0)};
253
254 std::vector<ir::OperandIndex> input_indexes;
255
256 for (const auto &input : node.getInputs())
257 input_indexes.emplace_back(input);
258
259 const auto axis = node.param().axis;
260
261 // Concat elimination check
262 bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
263 if (eliminated)
264 {
265 // If concat eliminated, return a NOP IFunction
266 VERBOSE(acl_cl_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
267 _return_fn = std::make_unique<exec::NopFunction>();
268 return;
269 }
270
271 auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
272 std::vector<const ::arm_compute::ICLTensor *> input_tensors;
273 for (const auto &ifm_ind : input_indexes)
274 input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
275
276 std::unique_ptr<::arm_compute::IFunction> fn;
277 if (input_indexes.size() < 2)
278 {
279 ::arm_compute::ICLTensor *input_tesor =
280 _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
281
282 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tesor, output_tensor->handle());
283 }
284 else
285 {
286 const auto rank = _ctx.at(ofm_index).shape().rank();
287 const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis).value();
288 fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
289 input_tensors, output_tensor->handle(), fixed_axis);
290 }
291
292 _return_fn = asAclFunction(std::move(fn));
293}
294
295void KernelGenerator::visit(const ir::operation::FullyConnected &node)
296{
297 const auto output_index{node.getOutputs().at(0)};
298 auto output_tensor = _tensor_reg->getAclTensor(output_index);
299 const auto activation = node.param().activation;
300 if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
301 throw std::runtime_error(
302 "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
303
304 auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
306 node, _ctx, _tensor_builder, _tensor_reg);
307 _return_fn = std::make_unique<exec::FunctionSequence>(
308 std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
309}
310
311void KernelGenerator::visit(const ir::operation::Reduce &node)
312{
313 const auto output_index{node.getOutputs().at(0)};
314 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
315 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
316 const auto keep_dims{node.param().keep_dims};
317 const auto reduce_type = node.param().reduce_type;
318
319 auto output_tensor = _tensor_reg->getAclTensor(output_index);
320 auto input_tensor = _tensor_reg->getAclTensor(input_index);
321
322 // Convert to ACL axes taking into account negative values and possible duplicates.
323 const auto &axes = _ctx.at(axes_index);
324 const auto input_rank = _ctx.at(input_index).shape().rank();
325
326 std::unique_ptr<arm_compute::IFunction> fn;
328 {
329 const auto acl_axes = acl_common::asCoordinates(axes, input_rank);
330 fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
331 keep_dims, output_tensor->handle());
332 }
333 else
334 {
335 const auto acl_axes = acl_common::asSet(axes, input_rank);
336
337 fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
338 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
339 output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
340 }
341
342 _return_fn = asAclFunction(std::move(fn));
343}
344
345void KernelGenerator::visit(const ir::operation::Reshape &node)
346{
347 const auto output_index{node.getOutputs().at(0)};
348 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
349
350 auto output_tensor = _tensor_reg->getAclTensor(output_index);
351 auto input_tensor = _tensor_reg->getAclTensor(input_index);
352
353 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
354 output_tensor->handle());
355
356 _return_fn = asAclFunction(std::move(fn));
357}
358
359void KernelGenerator::visit(const ir::operation::Squeeze &node)
360{
361 // Squeeze is identical to reshape except that it has an optional dimensions input.
362 // In addition, optional dims_index is ignored since output tensor already has squeezed shape
363 // by freezer and toco
364 const auto output_index{node.getOutputs().at(0)};
365 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
366 const auto dims{node.param().dims};
367 const auto ndim{node.param().ndim};
368 (void)dims;
369 (void)ndim;
370
371 auto output_tensor = _tensor_reg->getAclTensor(output_index);
372 auto input_tensor = _tensor_reg->getAclTensor(input_index);
373 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
374 output_tensor->handle());
375 _return_fn = asAclFunction(std::move(fn));
376}
377
378void KernelGenerator::visit(const ir::operation::Softmax &node)
379{
380 const auto output_index{node.getOutputs().at(0)};
381 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
382
383 const auto beta = node.param().beta;
384
385 auto output_tensor = _tensor_reg->getAclTensor(output_index);
386 auto input_tensor = _tensor_reg->getAclTensor(input_index);
387
388 auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
389 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
390 output_tensor->handle(), beta);
391
392 _return_fn = asAclFunction(std::move(fn));
393}
394
395void KernelGenerator::visit(const ir::operation::Slice &node)
396{
397 const auto output_index{node.getOutputs().at(0)};
398 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
399 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
400 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
401
402 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
403 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
404
405 // Set initializers for indices data such as order of inputData
406 int input_rank = _ctx.at(input_index).shape().rank();
407 std::vector<int32_t> starts;
408 std::vector<int32_t> ends;
409 starts.resize(input_rank, 0);
410 ends.resize(input_rank, 0);
411 {
412 assert(_ctx.at(begins_index).data());
413 assert(_ctx.at(sizes_index).data());
414 auto beginData_base = _ctx.at(begins_index).data()->base();
415 auto sizeData_base = _ctx.at(sizes_index).data()->base();
416 [[maybe_unused]] const int beginData_size = _ctx.at(begins_index).shape().num_elements();
417 [[maybe_unused]] const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
418
419 using ir::DataType;
420
421 assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
422 assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
423 assert(beginData_size == input_rank);
424 assert(sizeData_size == input_rank);
425
426 assert(beginData_base != nullptr);
427 for (int n = 0; n < input_rank; ++n)
428 {
429 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
430
431 int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
432 starts[axis] = begin_value;
433
434 int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
435 ends[axis] = begin_value + size_value;
436 }
437 }
438
439 ::arm_compute::Coordinates starts_set;
440 ::arm_compute::Coordinates ends_set;
441
442 for (size_t i = 0; i < starts.size(); ++i)
443 {
444 starts_set.set(i, starts[i]);
445 ends_set.set(i, ends[i]);
446 }
447
448 auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
449 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
450
451 _return_fn = asAclFunction(std::move(fn));
452}
453
454void KernelGenerator::visit(const ir::operation::StridedSlice &node)
455{
456 const auto output_index{node.getOutputs().at(0)};
457 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
458 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
459 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
460 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
461
462 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
463 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
464
465 // Set initializers for indices data such as order of inputData
466 int input_rank = _ctx.at(input_index).shape().rank();
467 std::vector<int32_t> starts;
468 std::vector<int32_t> ends;
469 std::vector<int32_t> strides;
470 starts.resize(input_rank, 0);
471 ends.resize(input_rank, 0);
472 strides.resize(input_rank, 0);
473 {
474 assert(_ctx.at(starts_index).data());
475 assert(_ctx.at(ends_index).data());
476 assert(_ctx.at(strides_index).data());
477 auto startData_base = _ctx.at(starts_index).data()->base();
478 auto endData_base = _ctx.at(ends_index).data()->base();
479 auto stridesData_base = _ctx.at(strides_index).data()->base();
480 [[maybe_unused]] const int startData_size = _ctx.at(starts_index).shape().num_elements();
481 [[maybe_unused]] const int endData_size = _ctx.at(ends_index).shape().num_elements();
482 [[maybe_unused]] const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
483
484 using ir::DataType;
485
486 assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
487 assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
488 assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
489 assert(startData_size == input_rank);
490 assert(endData_size == input_rank);
491 assert(stridesData_size == input_rank);
492
493 assert(startData_base != nullptr);
494 for (int n = 0; n < input_rank; ++n)
495 {
496 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
497
498 int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
499 starts[axis] = start_value;
500
501 int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
502 ends[axis] = end_value;
503
504 int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
505 strides[axis] = strides_value;
506 }
507 }
508
509 // Set mask bits such as order of inputData
510 const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
511 const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
512 const auto shrink_axis_mask =
513 acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
514
515 ::arm_compute::Coordinates starts_set;
516 ::arm_compute::Coordinates ends_set;
517 ::arm_compute::BiStrides strides_set;
518
519 for (size_t i = 0; i < starts.size(); ++i)
520 {
521 starts_set.set(i, starts[i]);
522 ends_set.set(i, ends[i]);
523 strides_set.set(i, strides[i]);
524 }
525
526 // Disable applied dim_correction
527 if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
528 {
529 // This means that high dimension's value is 1 and input tensor is applied dim_correction
530 acl_common::disableDimCorrection(inputData_tensor);
531 }
532
533 auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
534 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
535 begin_mask, end_mask, shrink_axis_mask);
536
537 // Revert disabling applied dim_correction
538 if (inputData_tensor->dimension(0) == 1)
539 {
540 acl_common::enableDimCorrection(inputData_tensor);
541 }
542
543 _return_fn = asAclFunction(std::move(fn));
544}
545
546void KernelGenerator::visit(const ir::operation::Transpose &node)
547{
548 const auto ofm_idx{node.getOutputs().at(0)};
549 const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
550 const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
551
552 const auto rank = _ctx.at(ifm_idx).shape().rank();
553
554 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
555 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
556
557 const auto &perms = _ctx.at(perm_idx);
558 std::vector<int32_t> pv;
559 if (perms.shape() == ir::Shape{0})
560 {
561 pv.resize(rank);
562 std::iota(pv.begin(), pv.end(), 0);
563 std::reverse(pv.begin(), pv.end());
564 }
565 else
566 {
567 pv = _ctx.at(perm_idx).asVector<int32_t>();
568 }
569
570 std::unique_ptr<arm_compute::IFunction> fn;
571 if (rank == 1)
572 {
573 fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
574 }
575 else if (rank == 2)
576 {
577 assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
578 fn = acl_common::generateLayer<arm_compute::CLTranspose>(ifm_tensor->handle(),
579 ofm_tensor->handle());
580 }
581 else
582 {
583 auto backend_pv = acl_common::getARMComputePermutationVector(rank, pv);
584
585 fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
586 ofm_tensor->handle(), backend_pv);
587 }
588
589 _return_fn = asAclFunction(std::move(fn));
590}
591
592void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
593{
594 const auto ofm_index{node.getOutputs().at(0)};
595 const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
596
597 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
598 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
599
600 const ::arm_compute::ActivationLayerInfo act_info =
601 acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
602
603 auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
604 ifm_tensor->handle(), ofm_tensor->handle(), act_info);
605
606 _return_fn = asAclFunction(std::move(fn));
607}
608
609void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
610{
611 const auto output_index{node.getOutputs().at(0)};
612 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
613 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
614
615 auto output_tensor = _tensor_reg->getAclTensor(output_index);
616 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
617 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
618
619 std::unique_ptr<arm_compute::IFunction> fn;
620 switch (node.param().op_type)
621 {
623 {
624 fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
625 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
627 break;
628 }
630 {
631 fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
632 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
633 break;
634 }
636 {
637 fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
638 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
639 break;
640 }
642 {
643 fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
644 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
645 break;
646 }
647 default:
648 {
649 std::string err_msg("acl_cl KernelGenerator : " + node.name() +
650 "is not elementwise-binary operations");
651 assert(false && err_msg.c_str());
652 break;
653 }
654 }
655
656 _return_fn = asAclFunction(std::move(fn));
657}
658
659void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
660{
661 const auto output_index{node.getOutputs().at(0)};
662 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
663
664 auto output_tensor = _tensor_reg->getAclTensor(output_index);
665 auto input_tensor = _tensor_reg->getAclTensor(input_index);
666
667 std::unique_ptr<arm_compute::IFunction> fn;
668 switch (node.param().op_type)
669 {
671 {
672 const ::arm_compute::ActivationLayerInfo act_info{
673 ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
674
675 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
676 input_tensor->handle(), output_tensor->handle(), act_info);
677 break;
678 }
680 {
681 if (input_tensor->data_type() == output_tensor->data_type())
682 {
683 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
684 output_tensor->handle());
685 }
686 else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
687 {
688 fn = acl_common::generateLayer<arm_compute::CLCastBool>(input_tensor->handle(),
689 output_tensor->handle());
690 }
691 else
692 {
693 // TODO Support converting float to int32 as round down
694 fn = acl_common::generateLayer<arm_compute::CLCast>(
695 input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
696 }
697 break;
698 }
700 {
701 fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
702 output_tensor->handle());
703 break;
704 }
706 {
707 fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
708 output_tensor->handle());
709 break;
710 }
712 {
713 fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
714 output_tensor->handle());
715 break;
716 }
718 {
719 fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
720 output_tensor->handle());
721 break;
722 }
724 {
725 fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
726 output_tensor->handle());
727 break;
728 }
730 {
731 fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
732 output_tensor->handle());
733 break;
734 }
736 {
737 const ::arm_compute::ActivationLayerInfo act_info{
738 ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
739
740 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
741 input_tensor->handle(), output_tensor->handle(), act_info);
742 break;
743 }
744 default:
745 {
746 throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
747 break;
748 }
749 }
750
751 auto acl_fn = asAclFunction(std::move(fn));
752
753 _return_fn = std::move(acl_fn);
754}
755
756void KernelGenerator::visit(const ir::operation::ExpandDims &node)
757{
758 const auto output_index{node.getOutputs().at(0)};
759 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
760
761 auto output_tensor = _tensor_reg->getAclTensor(output_index);
762 auto input_tensor = _tensor_reg->getAclTensor(input_index);
763
764 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
765 output_tensor->handle());
766
767 _return_fn = asAclFunction(std::move(fn));
768}
769
770void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
771{
772 const auto ofm_index{node.getOutputs().at(0)};
773 const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
774 const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
775 const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
776
777 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
778 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
779 auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
780 auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
781 auto epsilon = node.param().epsilon;
782 auto activation = node.param().activation;
783
784 auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
785 ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
786 epsilon);
787
788 _return_fn = std::make_unique<exec::FunctionSequence>(
789 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
790}
791
792void KernelGenerator::visit(const ir::operation::LSTM &node)
793{
794 _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
795 ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
796}
797
798void KernelGenerator::visit(const ir::operation::Comparison &node)
799{
800 const auto output_index{node.getOutputs().at(0)};
801 const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
802 const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
803
804 const auto comparison_type = node.param().comparison_type;
805
806 auto output_tensor = _tensor_reg->getAclTensor(output_index);
807 auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
808 auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
809
810 auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
811 input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
812 (arm_compute::ComparisonOperation)comparison_type);
813
814 _return_fn = asAclFunction(std::move(fn));
815}
816
817void KernelGenerator::visit(const ir::operation::OneHot &node)
818{
819 const auto output_idx{node.getOutputs().at(0)};
820 const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
821 const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
822 const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
823 const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
824 const auto depth = _ctx.at(depth_idx).asScalar<int32_t>();
825 assert(depth > 0);
826
827 auto output_tensor = _tensor_reg->getAclTensor(output_idx);
828 auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
829 auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
830
831 const size_t output_rank = _ctx.at(output_idx).shape().rank();
832 int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
833 axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
834
835 if (output_tensor->num_dimensions() != output_tensor->info()->num_dimensions())
836 {
837 // This means that high dimension's value is 1 and output_tensor is applied dim_correction
839 }
840
841 std::unique_ptr<::arm_compute::IFunction> fn;
842 const auto &offvalue = _ctx.at(offvalue_idx);
843 if (offvalue.isConstant())
844 {
845 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
846 indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
847 acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
848 }
849 else
850 {
851 auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
852 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
853 indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
854 output_tensor->handle(), static_cast<uint32_t>(depth), axis);
855 }
856
857 if (output_tensor->dimension(0) == 1)
858 {
859 acl_common::enableDimCorrection(output_tensor);
860 }
861
862 _return_fn = asAclFunction(std::move(fn));
863}
864
865void KernelGenerator::visit(const ir::operation::Pack &node)
866{
867 const auto output_index{node.getOutputs().at(0)};
868 auto axis{node.param().axis};
869
870 const auto output_rank = _ctx.at(output_index).shape().rank();
871
872 std::vector<ir::OperandIndex> input_indexes;
873 for (const auto &input_index : node.getInputs())
874 input_indexes.emplace_back(input_index);
875
876 auto output = _tensor_reg->getAclTensor(output_index)->handle();
877 std::vector<arm_compute::ICLTensor *> inputs;
878 for (const auto &input_index : input_indexes)
879 inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
880
881 if (axis < 0)
882 axis += output_rank;
883 axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
884
885 // Disable applied dim_correction
886 for (const auto &input_index : input_indexes)
887 {
888 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
889 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
890 {
891 // This means that high dimension's value is 1 and input tensor is applied dim_correction
893 }
894 }
895
896 auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
897
898 // Revert disabling applied dim_correction
899 for (const auto &input_index : input_indexes)
900 {
901 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
902 if (input_tensor->dimension(0) == 1)
903 {
905 }
906 }
907
908 _return_fn = asAclFunction(std::move(fn));
909}
910
911void KernelGenerator::visit(const ir::operation::Pool2D &node)
912{
913 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
914 node, _ctx, _tensor_reg, acl_common::convertPoolType(node.param().op_type));
915
916 const auto ofm_index{node.getOutputs().at(0)};
917 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
918 const auto activation = node.param().activation;
919 _return_fn = std::make_unique<exec::FunctionSequence>(
920 asAclFunction(std::move(raw_fn)),
921 ActivationBuilder::generate(activation, ofm_tensor->handle()));
922}
923
924void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
925{
926 const auto ofm_index{node.getOutputs().at(0)};
927 const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
928
929 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
930 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
931
932 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
933 ifm_tensor->handle(), ofm_tensor->handle(),
934 ::arm_compute::ScaleKernelInfo{
935 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
936 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
937
938 _return_fn = asAclFunction(std::move(fn));
939}
940
941void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
942{
943 const auto ofm_index{node.getOutputs().at(0)};
944 const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
945
946 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
947 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
948
949 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
950 ifm_tensor->handle(), ofm_tensor->handle(),
951 ::arm_compute::ScaleKernelInfo{
952 ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
953 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
954
955 _return_fn = asAclFunction(std::move(fn));
956}
957
958void KernelGenerator::visit(const ir::operation::RNN &node)
959{
960 const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
961 const auto hidden_state_out_index{
963
964 const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
965 const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
966 const auto recurrent_weights_index{
968 const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
969 const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
970
971 const auto activation = node.param().activation;
972
973 auto output_tensor = _tensor_reg->getAclTensor(output_index);
974 auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
975
976 auto input_tensor = _tensor_reg->getAclTensor(input_index);
977 auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
978 auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
979 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
980 auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
981 auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
982
983 auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
984 hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
985 _return_fn = asAclFunction(std::move(copy_layer));
986
987 auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
988 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
989 weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
990 hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
991 _return_fn = asAclFunction(std::move(fn));
992}
993
994void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
995{
996 const auto ofm_index{node.getOutputs().at(0)};
997 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
998 const auto block_size_index{
1000 const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
1001
1002 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1003 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1004 auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
1005 auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
1006
1007 assert(_ctx.at(block_size_index).data());
1008 assert(_ctx.at(paddings_index).data());
1009
1010 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
1011 ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
1012 ofm_tensor->handle());
1013
1014 _return_fn = asAclFunction(std::move(fn));
1015}
1016
1017void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1018{
1019 const auto ofm_index{node.getOutputs().at(0)};
1020 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1021
1022 auto block_size = node.param().block_size;
1023
1024 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1025 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1026
1027 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
1028 ifm_tensor->handle(), ofm_tensor->handle(), block_size);
1029
1030 _return_fn = asAclFunction(std::move(fn));
1031}
1032
1033void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
1034{
1035 const auto output_index{node.getOutputs().at(0)};
1036 const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
1037 const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
1038
1039 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1040 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1041 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1042
1043 auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
1044 values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
1045
1046 _return_fn = asAclFunction(std::move(fn));
1047}
1048
1049void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1050{
1051 const auto ofm_index{node.getOutputs().at(0)};
1052 const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
1053
1054 // {CL|Neon}L2Normalization performs the reduction only along dimension 0
1055 // L2 Normalization always performs the reduction along the depth axis
1056 // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
1057 // choosing normalization parameters as below
1058
1059 const auto &ifm_shape = _ctx.at(ifm_index).shape();
1060 // TODO Support optional constant dimension that normalization would be performed on
1061 const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
1062 int32_t radius =
1063 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
1064 float alpha = 1.0f; // In the implementation to make alpha_ become 1
1065 float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
1066 float bias = 0.0f; // Don't offset the reduction.
1067
1068 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1069 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1070
1071 const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
1072 radius, alpha, beta, bias, false);
1073
1074 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1075 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1076
1077 _return_fn = asAclFunction(std::move(fn));
1078}
1079
1080void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
1081{
1082 const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
1083 const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
1084
1085 const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
1086 const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
1087 const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
1088
1089 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1090 auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
1091
1092 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1093 auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
1094 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1095
1096 auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
1097 lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
1098 output_tensor->handle(), hits_tensor->handle());
1099
1100 _return_fn = asAclFunction(std::move(fn));
1101}
1102
1103void KernelGenerator::visit(const ir::operation::PReLU &node)
1104{
1105 const auto ofm_index{node.getOutputs().at(0)};
1106 const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
1107 const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
1108
1109 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1110 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1111 auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
1112
1113 auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
1114 ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
1115
1116 _return_fn = asAclFunction(std::move(fn));
1117}
1118
1119void KernelGenerator::visit(const ir::operation::TransposeConv &node)
1120{
1121 const auto ofm_index{node.getOutputs().at(0)};
1122 const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
1123 const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
1124
1125 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
1126 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
1127 const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
1128
1129 const auto stride = node.param().stride;
1130
1131 assert((node.param().padding.type == ir::PaddingType::SAME) ||
1132 (node.param().padding.type == ir::PaddingType::VALID));
1133 auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
1134 ker_shape.W, ker_shape.H);
1135 uint32_t invalid_horizontal = 0;
1136 uint32_t invalid_vertical = 0;
1137 if (node.param().padding.type == ir::PaddingType::VALID)
1138 {
1139 invalid_horizontal =
1140 ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
1141 invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
1142 }
1143
1144 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1145 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1146 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
1147
1148 const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
1149
1150 auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
1151 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
1152 ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
1153 invalid_vertical);
1154
1155 _return_fn = asAclFunction(std::move(fn));
1156}
1157
1158void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1159{
1160 const auto ofm_index{node.getOutputs().at(0)};
1161 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1162 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1163
1164 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1165 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
1166 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
1167
1168 auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
1169 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1170
1171 _return_fn = asAclFunction(std::move(fn));
1172}
1173
1174void KernelGenerator::visit(const ir::operation::TopKV2 &node)
1175{
1176 const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
1177 const auto outputIndices_index{
1178 node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
1179
1180 const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
1181
1182 // Currently, we only support the vector input.
1183 assert(_ctx.at(inputData_index).shape().rank() == 1 ||
1184 _ctx.at(inputData_index).shape().rank() == 2);
1185
1186 const auto k = node.param().k;
1187
1188 auto values_tensor = _tensor_reg->getAclTensor(outputValues_index);
1189 auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index);
1190 auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
1191
1192 auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
1193 input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
1194
1195 _return_fn = asAclFunction(std::move(fn));
1196}
1197
1198void KernelGenerator::visit(const ir::operation::Gather &node)
1199{
1200 const auto ofm_index{node.getOutputs().at(0)};
1201
1202 const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
1203 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
1204
1205 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1206 const auto axis_raw = node.param().axis;
1207 const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
1208 const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
1209
1210 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1211 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1212 auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
1213
1214 // input is n-D, indices k-D, output is (n + k - 1)-D
1215 size_t n = ifm_rank;
1216 assert(n == ifm_tensor->num_dimensions());
1217 size_t k = _ctx.at(indices_index).shape().rank();
1218 assert(k == indices_tensor->num_dimensions());
1219
1220 // Disable applied dim_correction
1221 if (n != ifm_tensor->info()->num_dimensions())
1222 {
1223 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1225 }
1226 if (k != indices_tensor->info()->num_dimensions())
1227 {
1228 // This means that high dimension's value is 1 and indices tensor is applied dim_correction
1229 acl_common::disableDimCorrection(indices_tensor);
1230 }
1231
1232 auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
1233 ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
1234
1235 // Revert disabling applied dim_correction
1236 if (ifm_tensor->dimension(0) == 1)
1237 {
1239 }
1240 if (indices_tensor->dimension(0) == 1)
1241 {
1242 acl_common::enableDimCorrection(indices_tensor);
1243 }
1244
1245 _return_fn = asAclFunction(std::move(fn));
1246}
1247
1248void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1249{
1250 const auto ofm_index{node.getOutputs().at(0)};
1251 const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
1252 const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
1253
1254 auto ifm_shape = _ctx.at(ifm_index).shape();
1255 auto ofm_shape = _ctx.at(ofm_index).shape();
1256
1257 assert((ifm_shape.rank() - 1) == ofm_shape.rank());
1258
1259 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1260 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1261 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1262
1263 int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
1264 if (axis_value < 0)
1265 {
1266 axis_value += ifm_rank;
1267 }
1268
1269 auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
1270 auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
1271 : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
1272 auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
1273 ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
1274
1275 _return_fn = asAclFunction(std::move(fn));
1276}
1277
1278void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
1279{
1280 const auto ofm_index{node.getOutputs().at(0)};
1281 const auto ifm_index{
1283
1284 auto radius = node.param().radius;
1285 auto alpha = node.param().alpha;
1286 auto beta = node.param().beta;
1287 auto bias = node.param().bias;
1288
1289 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1290 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1291
1292 const auto norm_info = ::arm_compute::NormalizationLayerInfo(
1293 ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
1294
1295 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1296 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1297
1298 _return_fn = asAclFunction(std::move(fn));
1299}
1300
1301void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1302{
1303 const auto output_index{node.getOutputs().at(0)};
1304 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1305
1306 auto block_size = node.param().block_size;
1307 assert(block_size > 0);
1308
1309 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1310 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1311
1312 auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
1313 input_tensor->handle(), output_tensor->handle(), block_size);
1314
1315 _return_fn = asAclFunction(std::move(fn));
1316}
1317
1318void KernelGenerator::visit(const ir::operation::Split &node)
1319{
1320 const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
1321 const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
1322
1323 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1324 if (!_ctx.at(axis_index).isConstant())
1325 {
1326 throw std::runtime_error("Non-constant axis_index NYI for acl_cl backend");
1327 }
1328
1329 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1330 std::vector<ir::OperandIndex> output_indexes;
1331 for (const auto &output : node.getOutputs())
1332 output_indexes.emplace_back(output);
1333
1334 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1335 std::vector<arm_compute::ICLTensor *> output_tensors;
1336 for (const auto &ofm_ind : output_indexes)
1337 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1338
1339 auto axis = _ctx.at(axis_index).asScalar<int32_t>();
1340 if (axis < 0)
1341 axis += ifm_rank;
1342 axis = acl_common::ToARMComputeAxis(ifm_rank, axis).value();
1343
1344 auto fn =
1345 acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
1346
1347 _return_fn = asAclFunction(std::move(fn));
1348}
1349
1350void KernelGenerator::visit(const ir::operation::SplitV &node)
1351{
1352 const auto ifm_index{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1353 const auto size_split_index{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1354 const auto split_dim_index{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1355
1356 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1357
1358 const size_t ifm_rank = _ctx.at(ifm_index).shape().rank();
1359 std::vector<ir::OperandIndex> output_indexes;
1360 for (const auto &output : node.getOutputs())
1361 output_indexes.emplace_back(output);
1362
1363 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1364 auto size_split_tensor = _tensor_reg->getAclTensor(size_split_index);
1365
1366 std::vector<arm_compute::ICLTensor *> output_tensors;
1367 for (const auto &ofm_ind : output_indexes)
1368 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1369
1370 auto fn = std::make_unique<arm_compute::CLSplitVEx>();
1371 const auto &split_dim_op = _ctx.at(split_dim_index);
1372 if (split_dim_op.isConstant())
1373 {
1374 int32_t split_dim = split_dim_op.asScalar<int32_t>();
1375 uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
1376
1377 if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
1378 {
1379 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1381 }
1382
1383 split_dim_revised = acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised).value();
1384 fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
1385 output_tensors, node.param().num_splits);
1386
1387 if (ifm_tensor->dimension(0) == 1)
1388 {
1390 }
1391 }
1392 else
1393 {
1394 throw std::runtime_error("Non-constant split_dim NYI for acl_cl backend");
1395 }
1396
1397 _return_fn = asAclFunction(std::move(fn));
1398}
1399
1400void KernelGenerator::visit(const ir::operation::Unpack &node)
1401{
1402 const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
1403 auto axis{node.param().axis};
1404
1405 const auto input_rank = _ctx.at(input_index).shape().rank();
1406
1407 std::vector<ir::OperandIndex> output_indexes;
1408 for (const auto &output_index : node.getOutputs())
1409 output_indexes.emplace_back(output_index);
1410
1411 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1412 std::vector<arm_compute::ICLTensor *> outputs;
1413 for (const auto &output_index : output_indexes)
1414 outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
1415
1416 if (axis < 0)
1417 axis += input_rank;
1418 axis = acl_common::ToARMComputeAxis(input_rank, axis).value();
1419
1420 // Disable applied dim_correction
1421 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1422 {
1423 // This means that high dimension's value is 1 and input tensor is applied dim_correction
1425 }
1426
1427 auto fn =
1428 acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
1429
1430 // Revert disabling applied dim_correction
1431 if (input_tensor->dimension(0) == 1)
1432 {
1433 acl_common::enableDimCorrection(input_tensor);
1434 }
1435
1436 _return_fn = asAclFunction(std::move(fn));
1437}
1438
1439void KernelGenerator::visit(const ir::operation::Pad &node)
1440{
1441 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
1442 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
1443 const auto output_index{node.getOutputs().at(0)};
1444 assert(_ctx.at(pad_index).data());
1445
1446 auto rank = _ctx.at(input_index).shape().rank();
1447 auto pad_base = _ctx.at(pad_index).data()->base();
1448
1449 auto input_type = _ctx.at(input_index).typeInfo();
1450 auto data_type = acl_common::asDataType(input_type.type());
1451 auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point());
1452 const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
1453
1454 auto input = _tensor_reg->getAclTensor(input_index)->handle();
1455 auto output = _tensor_reg->getAclTensor(output_index)->handle();
1456
1457 ::arm_compute::PaddingList padding_list;
1458 padding_list.resize(rank);
1459 for (int32_t n = 0; n < rank; ++n)
1460 {
1461 const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
1462
1463 const auto axis = acl_common::ToARMComputeAxis(rank, n).value();
1464 padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
1465 }
1466
1467 // Disable applied dim_correction
1468 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
1469 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1470 {
1471 // This means that high dimension's value is 1 and input tensor is applied dim_correction
1473 }
1474
1475 auto fn =
1476 acl_common::generateLayer<arm_compute::CLPadLayerEx>(input, output, padding_list, pixel_value);
1477
1478 // NOTE Do not revert disabling applied dim_correction for 4D.
1479 // It would produce a mistach of result by incorrect offset_first_element in
1480 // ICLKernel::add_tensor_argument<3>().
1481 // We have to disable applied dim_correction and not to revert enabling for the kernel that slices
1482 // 4D to 3D because slicing arm_compute::Window can causes incorrect offset_first_element if the
1483 // used tensor is 4D and the tensor's high dimention is 1
1484 if (input_tensor->num_dimensions() < 4 && input_tensor->dimension(0) == 1)
1485 {
1486 acl_common::enableDimCorrection(input_tensor);
1487 }
1488
1489 _return_fn = asAclFunction(std::move(fn));
1490}
1491
1492void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
1493{
1494 const auto ofm_index{node.getOutputs().at(0)};
1495 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
1496
1497 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1498 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1499
1500 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1501 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1502
1503 _return_fn = asAclFunction(std::move(fn));
1504}
1505
1506void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
1507{
1508 const auto ofm_index{node.getOutputs().at(0)};
1509 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
1510
1511 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1512 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1513
1514 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1515 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1516
1517 _return_fn = asAclFunction(std::move(fn));
1518}
1519
1520void KernelGenerator::visit(const ir::operation::Reverse &node)
1521{
1522 const auto ofm_index{node.getOutputs().at(0)};
1523 const auto ifm_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
1524 const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)};
1525
1526 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1527 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1528 auto axis_tensor = _tensor_reg->getAclTensor(axis_index);
1529
1530 // WORKAROUND: acl-cl backend only allow U32 type for axis
1531 // ConstantInitializer will resolve S32 type to U32 type
1532 if (_ctx.at(axis_index).isConstant() &&
1533 (axis_tensor->handle()->info()->data_type() == arm_compute::DataType::S32))
1534 {
1535 axis_tensor->handle()->info()->set_data_type(arm_compute::DataType::U32);
1536 }
1537
1538 auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
1539 ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle(), false);
1540
1541 _return_fn = asAclFunction(std::move(fn));
1542}
1543
1544} // namespace onert::backend::acl_cl
This file defines NopFunction.
Class to run FullyConnected Layer after reshaping input tensor.
std::unique_ptr< exec::FunctionSequence > generate(ir::OperationIndex ind) override
KernelGenerator(const ir::Graph &graph, const std::shared_ptr< TensorBuilder > &tensor_builder, const std::shared_ptr< acl_common::AclTensorRegistry< TensorManager > > &_tensor_reg)
static std::unique_ptr< exec::IFunction > generate(ir::Activation code, T_Tensor *ifm_alloc)
Tensor registry class for acl backends.
std::unique_ptr< exec::IFunction > _return_fn
std::unique_ptr< exec::IFunction > releaseFunction()
const Operations & operations() const override
Definition Graph.h:105
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:54
OperandIndexSequence & getInputs()
Definition Operation.h:51
const Object & at(const Index &index) const
Get the object that is associated with the given index.
#define VERBOSE(name, lv)
Definition Log.h:71
std::vector< int > dims(const std::string &src)
Definition Utils.h:35
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
Definition Convert.cc:281
ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
Definition Swizzle.h:45
inline ::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank, const std::vector< int32_t > runtime_pv)
Definition Swizzle.h:68
std::unique_ptr< exec::IFunction > kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
std::set< uint32_t > asSet(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:220
::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
Definition Convert.cc:131
arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
Definition Convert.cc:296
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:207
void enableDimCorrection(IACLTensor *tensor)
arm_compute::PixelValue asPixelValue(const ir::Operand &operand)
Definition Convert.cc:311
arm_compute::Size2D asDilation(uint32_t dilation_width, uint32_t dilation_height)
Definition Convert.cc:332
::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding, const ir::Stride &stride)
Definition Convert.cc:119
std::unique_ptr< AclFunction > asAclFunction(std::unique_ptr<::arm_compute::IFunction > &&layer)
Definition Convert.cc:246
std::unique_ptr< exec::IFunction > kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
::arm_compute::DataType asDataType(const ir::DataType type)
Definition Convert.cc:71
void disableDimCorrection(IACLTensor *tensor)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf=1, uint32_t dhf=1)
Definition Padding.cc:131
CLTensor ker_tensor
CLTensor bias_tensor