ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KernelGenerator.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "KernelGenerator.h"
18
19#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
20#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
21
23#include <AclFunction.h>
24#include <Convert.h>
25#include <Swizzle.h>
26
27#include "ir/Index.h"
28#include "ir/DataType.h"
29#include "ir/InternalType.h"
30#include "exec/NopFunction.h"
32#include "util/logging.h"
33#include "AclKernelGen.h"
34
36{
37
38using ::onert::backend::acl_common::asAclFunction;
40 ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
41
43 const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
44 const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
45 : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx(graph.operations()),
46 _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
47{
48 // DO NOTHING
49}
50
51std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
52{
53 auto ret = std::make_unique<exec::FunctionSequence>();
54 ret->enableDynamicShapeInferer(false);
55
56 const auto &op = _graph.operations().at(ind);
57 op.accept(*this);
58 ret->append(releaseFunction());
59 return ret;
60}
61
62void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
63{
64 const auto ofm_index{node.getOutputs().at(0)};
65 const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
66 const auto block_size_index{
68
69 const auto NNApiInputs = 2;
70 if (node.getInputs().size() != NNApiInputs)
71 {
73 if (!_ctx.at(crops_index).isConstant())
74 {
75 throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
76 }
77
78 auto crops = _ctx.at(crops_index).asVector<int32_t>();
79 for (auto &&crop : crops)
80 {
81 if (crop != 0)
82 {
83 throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
84 }
85 }
86 }
87
88 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
89 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
90
91 if (!_ctx.at(block_size_index).data())
92 throw std::runtime_error("ACL CL does not support dynamic block size for BatchToSpaceND");
93
94 auto block = _ctx.at(block_size_index).asVector<int32_t>();
95 int32_t height = block[0];
96 int32_t width = block[1];
97
98 auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
99 ifm_tensor->handle(), width, height, ofm_tensor->handle());
100
101 _return_fn = asAclFunction(std::move(fn));
102}
103
104void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
105{
106 const auto ofm_index{node.getOutputs().at(0)};
107 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
108 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
109
110 const auto activation = node.param().activation;
111
112 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
113 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
114 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
115
116 const auto act_info = acl_common::asActivationLayerInfo(activation);
117
118 std::unique_ptr<arm_compute::IFunction> fn;
119 switch (node.param().arithmetic_type)
120 {
122 {
123 arm_compute::CLArithmeticAddition::validate(lhs_tensor->info(), rhs_tensor->info(),
124 ofm_tensor->info(),
125 arm_compute::ConvertPolicy::SATURATE, act_info)
126 .throw_if_error();
127 fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
128 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
129 arm_compute::ConvertPolicy::SATURATE, act_info);
130 break;
131 }
133 {
134 arm_compute::CLArithmeticSubtraction::validate(lhs_tensor->info(), rhs_tensor->info(),
135 ofm_tensor->info(),
136 arm_compute::ConvertPolicy::SATURATE, act_info)
137 .throw_if_error();
138 fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
139 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
140 arm_compute::ConvertPolicy::SATURATE, act_info);
141 break;
142 }
144 {
145 arm_compute::CLPixelWiseMultiplication::validate(
146 lhs_tensor->info(), rhs_tensor->info(), ofm_tensor->info(), 1.0,
147 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
148 act_info)
149 .throw_if_error();
150 fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
151 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
152 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
153 act_info);
154 break;
155 }
157 {
158 arm_compute::CLArithmeticDivision::validate(lhs_tensor->info(), rhs_tensor->info(),
159 ofm_tensor->info(), act_info)
160 .throw_if_error();
161 fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
162 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
163 break;
164 }
165 default:
166 assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
167 break;
168 }
169
170 _return_fn = asAclFunction(std::move(fn));
171}
172
173void KernelGenerator::visit(const ir::operation::Conv2D &node)
174{
175 using ir::operation::Conv2D;
176
177 const auto ofm_index{node.getOutputs().at(0)};
178 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
179 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
180 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
181
182 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
183 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
184 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
185 const auto &ker_shape = _ctx.at(ker_index).shape();
186 const auto ker_height = ker_shape.dim(1);
187 const auto ker_width = ker_shape.dim(2);
188
189 const auto stride = node.param().stride;
190 const auto padding =
191 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
192 const auto activation = node.param().activation;
193
194 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
195 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
196 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
197 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
198
199 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
200 const auto act_info = acl_common::asActivationLayerInfo(activation);
201
202 auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
203 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
204 ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
205 ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
206
207 _return_fn = asAclFunction(std::move(fn));
208}
209
210void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
211{
212 using ir::operation::DepthwiseConv2D;
213
214 const auto ofm_index{node.getOutputs().at(0)};
215 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
216 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
217 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
218
219 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
220 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
221 // Kernel format is [1, kernel_height, kernel_width, depth_out].
222 const auto &ker_shape = _ctx.at(ker_index).shape();
223 const auto ker_height = ker_shape.dim(1);
224 const auto ker_width = ker_shape.dim(2);
225
226 const auto stride = node.param().stride;
227 const auto dilation = node.param().dilation;
228 const auto padding =
229 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
230 dilation.width_factor, dilation.height_factor);
231 const auto multiplier = node.param().multiplier;
232 const auto activation = node.param().activation;
233
234 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
235 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
236 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
237 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
238
239 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
240 const auto act_info = acl_common::asActivationLayerInfo(activation);
241 const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
242
243 auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
244 ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
245 conv_info, multiplier, act_info, dilation_info);
246
247 _return_fn = asAclFunction(std::move(fn));
248}
249
250void KernelGenerator::visit(const ir::operation::Concat &node)
251{
252 const auto ofm_index{node.getOutputs().at(0)};
253
254 std::vector<ir::OperandIndex> input_indexes;
255
256 for (const auto &input : node.getInputs())
257 input_indexes.emplace_back(input);
258
259 const auto axis = node.param().axis;
260
261 // Concat elimination check
262 bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
263 if (eliminated)
264 {
265 // If concat eliminated, return a NOP IFunction
266 VERBOSE(acl_cl_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
267 _return_fn = std::make_unique<exec::NopFunction>();
268 return;
269 }
270
271 auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
272 std::vector<const ::arm_compute::ICLTensor *> input_tensors;
273 for (const auto &ifm_ind : input_indexes)
274 input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
275
276 std::unique_ptr<::arm_compute::IFunction> fn;
277 if (input_indexes.size() < 2)
278 {
279 ::arm_compute::ICLTensor *input_tesor =
280 _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
281
282 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tesor, output_tensor->handle());
283 }
284 else
285 {
286 const auto rank = _ctx.at(ofm_index).shape().rank();
287 const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis).value();
288 fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
289 input_tensors, output_tensor->handle(), fixed_axis);
290 }
291
292 _return_fn = asAclFunction(std::move(fn));
293}
294
295void KernelGenerator::visit(const ir::operation::FullyConnected &node)
296{
297 const auto output_index{node.getOutputs().at(0)};
298 auto output_tensor = _tensor_reg->getAclTensor(output_index);
299 const auto activation = node.param().activation;
300 if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
301 throw std::runtime_error(
302 "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
303
304 auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
306 node, _ctx, _tensor_builder, _tensor_reg);
307 _return_fn = std::make_unique<exec::FunctionSequence>(
308 std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
309}
310
311void KernelGenerator::visit(const ir::operation::Reduce &node)
312{
313 const auto output_index{node.getOutputs().at(0)};
314 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
315 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
316 const auto keep_dims{node.param().keep_dims};
317 const auto reduce_type = node.param().reduce_type;
318
319 auto output_tensor = _tensor_reg->getAclTensor(output_index);
320 auto input_tensor = _tensor_reg->getAclTensor(input_index);
321
322 // Convert to ACL axes taking into account negative values and possible duplicates.
323 const auto &axes = _ctx.at(axes_index);
324 const auto input_rank = _ctx.at(input_index).shape().rank();
325
326 std::unique_ptr<arm_compute::IFunction> fn;
328 {
329 const auto acl_axes = acl_common::asCoordinates(axes, input_rank);
330 fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
331 keep_dims, output_tensor->handle());
332 }
333 else
334 {
335 const auto acl_axes = acl_common::asSet(axes, input_rank);
336
337 fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
338 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
339 output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
340 }
341
342 _return_fn = asAclFunction(std::move(fn));
343}
344
345void KernelGenerator::visit(const ir::operation::Reshape &node)
346{
347 const auto output_index{node.getOutputs().at(0)};
348 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
349
350 auto output_tensor = _tensor_reg->getAclTensor(output_index);
351 auto input_tensor = _tensor_reg->getAclTensor(input_index);
352
353 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
354 output_tensor->handle());
355
356 _return_fn = asAclFunction(std::move(fn));
357}
358
359void KernelGenerator::visit(const ir::operation::Squeeze &node)
360{
361 // Squeeze is identical to reshape except that it has an optional dimensions input.
362 // In addition, optional dims_index is ignored since output tensor already has squeezed shape
363 // by freezer and toco
364 // TODO Support multi-layout for frontend and backend
365 const auto output_index{node.getOutputs().at(0)};
366 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
367 const auto dims{node.param().dims};
368 const auto ndim{node.param().ndim};
369 (void)dims;
370 (void)ndim;
371
372 auto output_tensor = _tensor_reg->getAclTensor(output_index);
373 auto input_tensor = _tensor_reg->getAclTensor(input_index);
374 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
375 output_tensor->handle());
376 _return_fn = asAclFunction(std::move(fn));
377}
378
379void KernelGenerator::visit(const ir::operation::Softmax &node)
380{
381 const auto output_index{node.getOutputs().at(0)};
382 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
383
384 const auto beta = node.param().beta;
385
386 auto output_tensor = _tensor_reg->getAclTensor(output_index);
387 auto input_tensor = _tensor_reg->getAclTensor(input_index);
388
389 auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
390 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
391 output_tensor->handle(), beta);
392
393 _return_fn = asAclFunction(std::move(fn));
394}
395
396void KernelGenerator::visit(const ir::operation::Slice &node)
397{
398 const auto output_index{node.getOutputs().at(0)};
399 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
400 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
401 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
402
403 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
404 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
405
406 // Set initializers for indices data such as order of inputData
407 int input_rank = _ctx.at(input_index).shape().rank();
408 std::vector<int32_t> starts;
409 std::vector<int32_t> ends;
410 starts.resize(input_rank, 0);
411 ends.resize(input_rank, 0);
412 {
413 assert(_ctx.at(begins_index).data());
414 assert(_ctx.at(sizes_index).data());
415 auto beginData_base = _ctx.at(begins_index).data()->base();
416 auto sizeData_base = _ctx.at(sizes_index).data()->base();
417 [[maybe_unused]] const int beginData_size = _ctx.at(begins_index).shape().num_elements();
418 [[maybe_unused]] const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
419
420 using ir::DataType;
421
422 assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
423 assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
424 assert(beginData_size == input_rank);
425 assert(sizeData_size == input_rank);
426
427 assert(beginData_base != nullptr);
428 for (int n = 0; n < input_rank; ++n)
429 {
430 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
431
432 int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
433 starts[axis] = begin_value;
434
435 int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
436 ends[axis] = begin_value + size_value;
437 }
438 }
439
440 ::arm_compute::Coordinates starts_set;
441 ::arm_compute::Coordinates ends_set;
442
443 for (size_t i = 0; i < starts.size(); ++i)
444 {
445 starts_set.set(i, starts[i]);
446 ends_set.set(i, ends[i]);
447 }
448
449 auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
450 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
451
452 _return_fn = asAclFunction(std::move(fn));
453}
454
455void KernelGenerator::visit(const ir::operation::StridedSlice &node)
456{
457 const auto output_index{node.getOutputs().at(0)};
458 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
459 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
460 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
461 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
462
463 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
464 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
465
466 // Set initializers for indices data such as order of inputData
467 int input_rank = _ctx.at(input_index).shape().rank();
468 std::vector<int32_t> starts;
469 std::vector<int32_t> ends;
470 std::vector<int32_t> strides;
471 starts.resize(input_rank, 0);
472 ends.resize(input_rank, 0);
473 strides.resize(input_rank, 0);
474 {
475 assert(_ctx.at(starts_index).data());
476 assert(_ctx.at(ends_index).data());
477 assert(_ctx.at(strides_index).data());
478 auto startData_base = _ctx.at(starts_index).data()->base();
479 auto endData_base = _ctx.at(ends_index).data()->base();
480 auto stridesData_base = _ctx.at(strides_index).data()->base();
481 [[maybe_unused]] const int startData_size = _ctx.at(starts_index).shape().num_elements();
482 [[maybe_unused]] const int endData_size = _ctx.at(ends_index).shape().num_elements();
483 [[maybe_unused]] const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
484
485 using ir::DataType;
486
487 assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
488 assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
489 assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
490 assert(startData_size == input_rank);
491 assert(endData_size == input_rank);
492 assert(stridesData_size == input_rank);
493
494 assert(startData_base != nullptr);
495 for (int n = 0; n < input_rank; ++n)
496 {
497 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
498
499 int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
500 starts[axis] = start_value;
501
502 int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
503 ends[axis] = end_value;
504
505 int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
506 strides[axis] = strides_value;
507 }
508 }
509
510 // Set mask bits such as order of inputData
511 const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
512 const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
513 const auto shrink_axis_mask =
514 acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
515
516 ::arm_compute::Coordinates starts_set;
517 ::arm_compute::Coordinates ends_set;
518 ::arm_compute::BiStrides strides_set;
519
520 for (size_t i = 0; i < starts.size(); ++i)
521 {
522 starts_set.set(i, starts[i]);
523 ends_set.set(i, ends[i]);
524 strides_set.set(i, strides[i]);
525 }
526
527 // Disable applied dim_correction
528 if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
529 {
530 // This means that high dimension's value is 1 and input tensor is applied dim_correction
531 acl_common::disableDimCorrection(inputData_tensor);
532 }
533
534 auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
535 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
536 begin_mask, end_mask, shrink_axis_mask);
537
538 // Revert disabling applied dim_correction
539 if (inputData_tensor->dimension(0) == 1)
540 {
541 acl_common::enableDimCorrection(inputData_tensor);
542 }
543
544 _return_fn = asAclFunction(std::move(fn));
545}
546
547void KernelGenerator::visit(const ir::operation::Transpose &node)
548{
549 const auto ofm_idx{node.getOutputs().at(0)};
550 const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
551 const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
552
553 const auto rank = _ctx.at(ifm_idx).shape().rank();
554
555 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
556 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
557
558 const auto &perms = _ctx.at(perm_idx);
559 std::vector<int32_t> pv;
560 if (perms.shape() == ir::Shape{0})
561 {
562 pv.resize(rank);
563 std::iota(pv.begin(), pv.end(), 0);
564 std::reverse(pv.begin(), pv.end());
565 }
566 else
567 {
568 pv = _ctx.at(perm_idx).asVector<int32_t>();
569 }
570
571 std::unique_ptr<arm_compute::IFunction> fn;
572 if (rank == 1)
573 {
574 fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
575 }
576 else if (rank == 2)
577 {
578 assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
579 fn = acl_common::generateLayer<arm_compute::CLTranspose>(ifm_tensor->handle(),
580 ofm_tensor->handle());
581 }
582 else
583 {
584 auto backend_pv = acl_common::getARMComputePermutationVector(rank, pv);
585
586 fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
587 ofm_tensor->handle(), backend_pv);
588 }
589
590 _return_fn = asAclFunction(std::move(fn));
591}
592
593void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
594{
595 const auto ofm_index{node.getOutputs().at(0)};
596 const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
597
598 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
599 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
600
601 const ::arm_compute::ActivationLayerInfo act_info =
602 acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
603
604 auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
605 ifm_tensor->handle(), ofm_tensor->handle(), act_info);
606
607 _return_fn = asAclFunction(std::move(fn));
608}
609
610void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
611{
612 const auto output_index{node.getOutputs().at(0)};
613 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
614 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
615
616 auto output_tensor = _tensor_reg->getAclTensor(output_index);
617 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
618 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
619
620 std::unique_ptr<arm_compute::IFunction> fn;
621 switch (node.param().op_type)
622 {
624 {
625 fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
626 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
628 break;
629 }
631 {
632 fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
633 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
634 break;
635 }
637 {
638 fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
639 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
640 break;
641 }
643 {
644 fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
645 lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
646 break;
647 }
648 default:
649 {
650 std::string err_msg("acl_cl KernelGenerator : " + node.name() +
651 "is not elementwise-binary operations");
652 assert(false && err_msg.c_str());
653 break;
654 }
655 }
656
657 _return_fn = asAclFunction(std::move(fn));
658}
659
660void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
661{
662 const auto output_index{node.getOutputs().at(0)};
663 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
664
665 auto output_tensor = _tensor_reg->getAclTensor(output_index);
666 auto input_tensor = _tensor_reg->getAclTensor(input_index);
667
668 std::unique_ptr<arm_compute::IFunction> fn;
669 switch (node.param().op_type)
670 {
672 {
673 const ::arm_compute::ActivationLayerInfo act_info{
674 ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
675
676 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
677 input_tensor->handle(), output_tensor->handle(), act_info);
678 break;
679 }
681 {
682 if (input_tensor->data_type() == output_tensor->data_type())
683 {
684 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
685 output_tensor->handle());
686 }
687 else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
688 {
689 fn = acl_common::generateLayer<arm_compute::CLCastBool>(input_tensor->handle(),
690 output_tensor->handle());
691 }
692 else
693 {
694 // TODO Support converting float to int32 as round down
695 fn = acl_common::generateLayer<arm_compute::CLCast>(
696 input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
697 }
698 break;
699 }
701 {
702 fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
703 output_tensor->handle());
704 break;
705 }
707 {
708 fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
709 output_tensor->handle());
710 break;
711 }
713 {
714 fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
715 output_tensor->handle());
716 break;
717 }
719 {
720 fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
721 output_tensor->handle());
722 break;
723 }
725 {
726 fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
727 output_tensor->handle());
728 break;
729 }
731 {
732 fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
733 output_tensor->handle());
734 break;
735 }
737 {
738 const ::arm_compute::ActivationLayerInfo act_info{
739 ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
740
741 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
742 input_tensor->handle(), output_tensor->handle(), act_info);
743 break;
744 }
745 default:
746 {
747 throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
748 break;
749 }
750 }
751
752 auto acl_fn = asAclFunction(std::move(fn));
753
754 _return_fn = std::move(acl_fn);
755}
756
757void KernelGenerator::visit(const ir::operation::ExpandDims &node)
758{
759 const auto output_index{node.getOutputs().at(0)};
760 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
761
762 auto output_tensor = _tensor_reg->getAclTensor(output_index);
763 auto input_tensor = _tensor_reg->getAclTensor(input_index);
764
765 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
766 output_tensor->handle());
767
768 _return_fn = asAclFunction(std::move(fn));
769}
770
771void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
772{
773 const auto ofm_index{node.getOutputs().at(0)};
774 const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
775 const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
776 const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
777
778 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
779 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
780 auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
781 auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
782 auto epsilon = node.param().epsilon;
783 auto activation = node.param().activation;
784
785 auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
786 ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
787 epsilon);
788
789 _return_fn = std::make_unique<exec::FunctionSequence>(
790 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
791}
792
793void KernelGenerator::visit(const ir::operation::LSTM &node)
794{
795 _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
796 ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
797}
798
799void KernelGenerator::visit(const ir::operation::Comparison &node)
800{
801 const auto output_index{node.getOutputs().at(0)};
802 const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
803 const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
804
805 const auto comparison_type = node.param().comparison_type;
806
807 auto output_tensor = _tensor_reg->getAclTensor(output_index);
808 auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
809 auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
810
811 auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
812 input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
813 (arm_compute::ComparisonOperation)comparison_type);
814
815 _return_fn = asAclFunction(std::move(fn));
816}
817
818void KernelGenerator::visit(const ir::operation::OneHot &node)
819{
820 const auto output_idx{node.getOutputs().at(0)};
821 const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
822 const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
823 const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
824 const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
825 const auto depth = _ctx.at(depth_idx).asScalar<int32_t>();
826 assert(depth > 0);
827
828 auto output_tensor = _tensor_reg->getAclTensor(output_idx);
829 auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
830 auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
831
832 const size_t output_rank = _ctx.at(output_idx).shape().rank();
833 int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
834 axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
835
836 if (output_tensor->num_dimensions() != output_tensor->info()->num_dimensions())
837 {
838 // This means that high dimension's value is 1 and output_tensor is applied dim_correction
840 }
841
842 std::unique_ptr<::arm_compute::IFunction> fn;
843 const auto &offvalue = _ctx.at(offvalue_idx);
844 if (offvalue.isConstant())
845 {
846 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
847 indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
848 acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
849 }
850 else
851 {
852 auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
853 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
854 indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
855 output_tensor->handle(), static_cast<uint32_t>(depth), axis);
856 }
857
858 if (output_tensor->dimension(0) == 1)
859 {
860 acl_common::enableDimCorrection(output_tensor);
861 }
862
863 _return_fn = asAclFunction(std::move(fn));
864}
865
866void KernelGenerator::visit(const ir::operation::Pack &node)
867{
868 const auto output_index{node.getOutputs().at(0)};
869 auto axis{node.param().axis};
870
871 const auto output_rank = _ctx.at(output_index).shape().rank();
872
873 std::vector<ir::OperandIndex> input_indexes;
874 for (const auto &input_index : node.getInputs())
875 input_indexes.emplace_back(input_index);
876
877 auto output = _tensor_reg->getAclTensor(output_index)->handle();
878 std::vector<arm_compute::ICLTensor *> inputs;
879 for (const auto &input_index : input_indexes)
880 inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
881
882 if (axis < 0)
883 axis += output_rank;
884 axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
885
886 // Disable applied dim_correction
887 for (const auto &input_index : input_indexes)
888 {
889 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
890 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
891 {
892 // This means that high dimension's value is 1 and input tensor is applied dim_correction
894 }
895 }
896
897 auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
898
899 // Revert disabling applied dim_correction
900 for (const auto &input_index : input_indexes)
901 {
902 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
903 if (input_tensor->dimension(0) == 1)
904 {
906 }
907 }
908
909 _return_fn = asAclFunction(std::move(fn));
910}
911
912void KernelGenerator::visit(const ir::operation::Pool2D &node)
913{
914 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
915 node, _ctx, _tensor_reg, acl_common::convertPoolType(node.param().op_type));
916
917 const auto ofm_index{node.getOutputs().at(0)};
918 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
919 const auto activation = node.param().activation;
920 _return_fn = std::make_unique<exec::FunctionSequence>(
921 asAclFunction(std::move(raw_fn)),
922 ActivationBuilder::generate(activation, ofm_tensor->handle()));
923}
924
925void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
926{
927 const auto ofm_index{node.getOutputs().at(0)};
928 const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
929
930 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
931 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
932
933 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
934 ifm_tensor->handle(), ofm_tensor->handle(),
935 ::arm_compute::ScaleKernelInfo{
936 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
937 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
938
939 _return_fn = asAclFunction(std::move(fn));
940}
941
942void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
943{
944 const auto ofm_index{node.getOutputs().at(0)};
945 const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
946
947 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
948 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
949
950 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
951 ifm_tensor->handle(), ofm_tensor->handle(),
952 ::arm_compute::ScaleKernelInfo{
953 ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
954 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
955
956 _return_fn = asAclFunction(std::move(fn));
957}
958
959void KernelGenerator::visit(const ir::operation::RNN &node)
960{
961 const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
962 const auto hidden_state_out_index{
964
965 const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
966 const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
967 const auto recurrent_weights_index{
969 const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
970 const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
971
972 const auto activation = node.param().activation;
973
974 auto output_tensor = _tensor_reg->getAclTensor(output_index);
975 auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
976
977 auto input_tensor = _tensor_reg->getAclTensor(input_index);
978 auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
979 auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
980 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
981 auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
982 auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
983
984 auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
985 hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
986 _return_fn = asAclFunction(std::move(copy_layer));
987
988 auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
989 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
990 weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
991 hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
992 _return_fn = asAclFunction(std::move(fn));
993}
994
995void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
996{
997 const auto ofm_index{node.getOutputs().at(0)};
998 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
999 const auto block_size_index{
1001 const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
1002
1003 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1004 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1005 auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
1006 auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
1007
1008 assert(_ctx.at(block_size_index).data());
1009 assert(_ctx.at(paddings_index).data());
1010
1011 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
1012 ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
1013 ofm_tensor->handle());
1014
1015 _return_fn = asAclFunction(std::move(fn));
1016}
1017
1018void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1019{
1020 const auto ofm_index{node.getOutputs().at(0)};
1021 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1022
1023 auto block_size = node.param().block_size;
1024
1025 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1026 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1027
1028 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
1029 ifm_tensor->handle(), ofm_tensor->handle(), block_size);
1030
1031 _return_fn = asAclFunction(std::move(fn));
1032}
1033
1034void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
1035{
1036 const auto output_index{node.getOutputs().at(0)};
1037 const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
1038 const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
1039
1040 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1041 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1042 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1043
1044 auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
1045 values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
1046
1047 _return_fn = asAclFunction(std::move(fn));
1048}
1049
1050void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1051{
1052 const auto ofm_index{node.getOutputs().at(0)};
1053 const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
1054
1055 // {CL|Neon}L2Normalization performs the reduction only along dimension 0
1056 // L2 Normalization always performs the reduction along the depth axis
1057 // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
1058 // choosing normalization parameters as below
1059
1060 const auto &ifm_shape = _ctx.at(ifm_index).shape();
1061 // TODO Support optional constant dimension that normalization would be performed on
1062 const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
1063 int32_t radius =
1064 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
1065 float alpha = 1.0f; // In the implementation to make alpha_ become 1
1066 float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
1067 float bias = 0.0f; // Don't offset the reduction.
1068
1069 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1070 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1071
1072 const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
1073 radius, alpha, beta, bias, false);
1074
1075 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1076 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1077
1078 _return_fn = asAclFunction(std::move(fn));
1079}
1080
1081void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
1082{
1083 const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
1084 const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
1085
1086 const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
1087 const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
1088 const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
1089
1090 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1091 auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
1092
1093 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1094 auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
1095 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1096
1097 auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
1098 lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
1099 output_tensor->handle(), hits_tensor->handle());
1100
1101 _return_fn = asAclFunction(std::move(fn));
1102}
1103
1104void KernelGenerator::visit(const ir::operation::PReLU &node)
1105{
1106 const auto ofm_index{node.getOutputs().at(0)};
1107 const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
1108 const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
1109
1110 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1111 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1112 auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
1113
1114 auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
1115 ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
1116
1117 _return_fn = asAclFunction(std::move(fn));
1118}
1119
1120void KernelGenerator::visit(const ir::operation::TransposeConv &node)
1121{
1122 const auto ofm_index{node.getOutputs().at(0)};
1123 const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
1124 const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
1125
1126 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
1127 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
1128 const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
1129
1130 const auto stride = node.param().stride;
1131
1132 assert((node.param().padding.type == ir::PaddingType::SAME) ||
1133 (node.param().padding.type == ir::PaddingType::VALID));
1134 auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
1135 ker_shape.W, ker_shape.H);
1136 uint32_t invalid_horizontal = 0;
1137 uint32_t invalid_vertical = 0;
1138 if (node.param().padding.type == ir::PaddingType::VALID)
1139 {
1140 invalid_horizontal =
1141 ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
1142 invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
1143 }
1144
1145 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1146 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1147 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
1148
1149 const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
1150
1151 auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
1152 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
1153 ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
1154 invalid_vertical);
1155
1156 _return_fn = asAclFunction(std::move(fn));
1157}
1158
1159void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1160{
1161 const auto ofm_index{node.getOutputs().at(0)};
1162 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1163 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1164
1165 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1166 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
1167 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
1168
1169 auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
1170 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1171
1172 _return_fn = asAclFunction(std::move(fn));
1173}
1174
1175void KernelGenerator::visit(const ir::operation::TopKV2 &node)
1176{
1177 const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
1178 const auto outputIndices_index{
1179 node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
1180
1181 const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
1182
1183 // Currently, we only support the vector input.
1184 assert(_ctx.at(inputData_index).shape().rank() == 1 ||
1185 _ctx.at(inputData_index).shape().rank() == 2);
1186
1187 const auto k = node.param().k;
1188
1189 auto values_tensor = _tensor_reg->getAclTensor(outputValues_index);
1190 auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index);
1191 auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
1192
1193 auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
1194 input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
1195
1196 _return_fn = asAclFunction(std::move(fn));
1197}
1198
1199void KernelGenerator::visit(const ir::operation::Gather &node)
1200{
1201 const auto ofm_index{node.getOutputs().at(0)};
1202
1203 const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
1204 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
1205
1206 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1207 const auto axis_raw = node.param().axis;
1208 const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
1209 const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
1210
1211 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1212 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1213 auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
1214
1215 // input is n-D, indices k-D, output is (n + k - 1)-D
1216 size_t n = ifm_rank;
1217 assert(n == ifm_tensor->num_dimensions());
1218 size_t k = _ctx.at(indices_index).shape().rank();
1219 assert(k == indices_tensor->num_dimensions());
1220
1221 // Disable applied dim_correction
1222 if (n != ifm_tensor->info()->num_dimensions())
1223 {
1224 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1226 }
1227 if (k != indices_tensor->info()->num_dimensions())
1228 {
1229 // This means that high dimension's value is 1 and indices tensor is applied dim_correction
1230 acl_common::disableDimCorrection(indices_tensor);
1231 }
1232
1233 auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
1234 ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
1235
1236 // Revert disabling applied dim_correction
1237 if (ifm_tensor->dimension(0) == 1)
1238 {
1240 }
1241 if (indices_tensor->dimension(0) == 1)
1242 {
1243 acl_common::enableDimCorrection(indices_tensor);
1244 }
1245
1246 _return_fn = asAclFunction(std::move(fn));
1247}
1248
1249void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1250{
1251 const auto ofm_index{node.getOutputs().at(0)};
1252 const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
1253 const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
1254
1255 auto ifm_shape = _ctx.at(ifm_index).shape();
1256 auto ofm_shape = _ctx.at(ofm_index).shape();
1257
1258 assert((ifm_shape.rank() - 1) == ofm_shape.rank());
1259
1260 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1261 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1262 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1263
1264 int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
1265 if (axis_value < 0)
1266 {
1267 axis_value += ifm_rank;
1268 }
1269
1270 auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
1271 auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
1272 : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
1273 auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
1274 ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
1275
1276 _return_fn = asAclFunction(std::move(fn));
1277}
1278
1279void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
1280{
1281 const auto ofm_index{node.getOutputs().at(0)};
1282 const auto ifm_index{
1284
1285 auto radius = node.param().radius;
1286 auto alpha = node.param().alpha;
1287 auto beta = node.param().beta;
1288 auto bias = node.param().bias;
1289
1290 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1291 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1292
1293 const auto norm_info = ::arm_compute::NormalizationLayerInfo(
1294 ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
1295
1296 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1297 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1298
1299 _return_fn = asAclFunction(std::move(fn));
1300}
1301
1302void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1303{
1304 const auto output_index{node.getOutputs().at(0)};
1305 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1306
1307 auto block_size = node.param().block_size;
1308 assert(block_size > 0);
1309
1310 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1311 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1312
1313 auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
1314 input_tensor->handle(), output_tensor->handle(), block_size);
1315
1316 _return_fn = asAclFunction(std::move(fn));
1317}
1318
1319void KernelGenerator::visit(const ir::operation::Split &node)
1320{
1321 const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
1322 const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
1323
1324 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1325 if (!_ctx.at(axis_index).isConstant())
1326 {
1327 throw std::runtime_error("Non-constant axis_index NYI for acl_cl backend");
1328 }
1329
1330 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1331 std::vector<ir::OperandIndex> output_indexes;
1332 for (const auto &output : node.getOutputs())
1333 output_indexes.emplace_back(output);
1334
1335 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1336 std::vector<arm_compute::ICLTensor *> output_tensors;
1337 for (const auto &ofm_ind : output_indexes)
1338 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1339
1340 auto axis = _ctx.at(axis_index).asScalar<int32_t>();
1341 if (axis < 0)
1342 axis += ifm_rank;
1343 axis = acl_common::ToARMComputeAxis(ifm_rank, axis).value();
1344
1345 auto fn =
1346 acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
1347
1348 _return_fn = asAclFunction(std::move(fn));
1349}
1350
1351void KernelGenerator::visit(const ir::operation::SplitV &node)
1352{
1353 const auto ifm_index{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1354 const auto size_split_index{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1355 const auto split_dim_index{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1356
1357 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1358
1359 const size_t ifm_rank = _ctx.at(ifm_index).shape().rank();
1360 std::vector<ir::OperandIndex> output_indexes;
1361 for (const auto &output : node.getOutputs())
1362 output_indexes.emplace_back(output);
1363
1364 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1365 auto size_split_tensor = _tensor_reg->getAclTensor(size_split_index);
1366
1367 std::vector<arm_compute::ICLTensor *> output_tensors;
1368 for (const auto &ofm_ind : output_indexes)
1369 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1370
1371 auto fn = std::make_unique<arm_compute::CLSplitVEx>();
1372 const auto &split_dim_op = _ctx.at(split_dim_index);
1373 if (split_dim_op.isConstant())
1374 {
1375 int32_t split_dim = split_dim_op.asScalar<int32_t>();
1376 uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
1377
1378 if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
1379 {
1380 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1382 }
1383
1384 split_dim_revised = acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised).value();
1385 fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
1386 output_tensors, node.param().num_splits);
1387
1388 if (ifm_tensor->dimension(0) == 1)
1389 {
1391 }
1392 }
1393 else
1394 {
1395 throw std::runtime_error("Non-constant split_dim NYI for acl_cl backend");
1396 }
1397
1398 _return_fn = asAclFunction(std::move(fn));
1399}
1400
1401void KernelGenerator::visit(const ir::operation::Unpack &node)
1402{
1403 const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
1404 auto axis{node.param().axis};
1405
1406 const auto input_rank = _ctx.at(input_index).shape().rank();
1407
1408 std::vector<ir::OperandIndex> output_indexes;
1409 for (const auto &output_index : node.getOutputs())
1410 output_indexes.emplace_back(output_index);
1411
1412 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1413 std::vector<arm_compute::ICLTensor *> outputs;
1414 for (const auto &output_index : output_indexes)
1415 outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
1416
1417 if (axis < 0)
1418 axis += input_rank;
1419 axis = acl_common::ToARMComputeAxis(input_rank, axis).value();
1420
1421 // Disable applied dim_correction
1422 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1423 {
1424 // This means that high dimension's value is 1 and input tensor is applied dim_correction
1426 }
1427
1428 auto fn =
1429 acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
1430
1431 // Revert disabling applied dim_correction
1432 if (input_tensor->dimension(0) == 1)
1433 {
1434 acl_common::enableDimCorrection(input_tensor);
1435 }
1436
1437 _return_fn = asAclFunction(std::move(fn));
1438}
1439
1440void KernelGenerator::visit(const ir::operation::Pad &node)
1441{
1442 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
1443 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
1444 const auto output_index{node.getOutputs().at(0)};
1445 assert(_ctx.at(pad_index).data());
1446
1447 auto rank = _ctx.at(input_index).shape().rank();
1448 auto pad_base = _ctx.at(pad_index).data()->base();
1449
1450 auto input_type = _ctx.at(input_index).typeInfo();
1451 auto data_type = acl_common::asDataType(input_type.type());
1452 auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point());
1453 const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
1454
1455 auto input = _tensor_reg->getAclTensor(input_index)->handle();
1456 auto output = _tensor_reg->getAclTensor(output_index)->handle();
1457
1458 ::arm_compute::PaddingList padding_list;
1459 padding_list.resize(rank);
1460 for (int32_t n = 0; n < rank; ++n)
1461 {
1462 const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
1463
1464 const auto axis = acl_common::ToARMComputeAxis(rank, n).value();
1465 padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
1466 }
1467
1468 // Disable applied dim_correction
1469 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
1470 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1471 {
1472 // This means that high dimension's value is 1 and input tensor is applied dim_correction
1474 }
1475
1476 auto fn =
1477 acl_common::generateLayer<arm_compute::CLPadLayerEx>(input, output, padding_list, pixel_value);
1478
1479 // NOTE Do not revert disabling applied dim_correction for 4D.
1480 // It would produce a mistach of result by incorrect offset_first_element in
1481 // ICLKernel::add_tensor_argument<3>().
1482 // We have to disable applied dim_correction and not to revert enabling for the kernel that slices
1483 // 4D to 3D because slicing arm_compute::Window can causes incorrect offset_first_element if the
1484 // used tensor is 4D and the tensor's high dimention is 1
1485 if (input_tensor->num_dimensions() < 4 && input_tensor->dimension(0) == 1)
1486 {
1487 acl_common::enableDimCorrection(input_tensor);
1488 }
1489
1490 _return_fn = asAclFunction(std::move(fn));
1491}
1492
1493void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
1494{
1495 const auto ofm_index{node.getOutputs().at(0)};
1496 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
1497
1498 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1499 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1500
1501 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1502 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1503
1504 _return_fn = asAclFunction(std::move(fn));
1505}
1506
1507void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
1508{
1509 const auto ofm_index{node.getOutputs().at(0)};
1510 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
1511
1512 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1513 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1514
1515 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1516 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1517
1518 _return_fn = asAclFunction(std::move(fn));
1519}
1520
1521void KernelGenerator::visit(const ir::operation::Reverse &node)
1522{
1523 const auto ofm_index{node.getOutputs().at(0)};
1524 const auto ifm_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
1525 const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)};
1526
1527 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1528 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1529 auto axis_tensor = _tensor_reg->getAclTensor(axis_index);
1530
1531 // WORKAROUND: acl-cl backend only allow U32 type for axis
1532 // ConstantInitializer will resolve S32 type to U32 type
1533 if (_ctx.at(axis_index).isConstant() &&
1534 (axis_tensor->handle()->info()->data_type() == arm_compute::DataType::S32))
1535 {
1536 axis_tensor->handle()->info()->set_data_type(arm_compute::DataType::U32);
1537 }
1538
1539 auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
1540 ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle(), false);
1541
1542 _return_fn = asAclFunction(std::move(fn));
1543}
1544
1545} // namespace onert::backend::acl_cl
This file defines NopFunction.
Class to run FullyConnected Layer after reshaping input tensor.
std::unique_ptr< exec::FunctionSequence > generate(ir::OperationIndex ind) override
KernelGenerator(const ir::Graph &graph, const std::shared_ptr< TensorBuilder > &tensor_builder, const std::shared_ptr< acl_common::AclTensorRegistry< TensorManager > > &_tensor_reg)
static std::unique_ptr< exec::IFunction > generate(ir::Activation code, T_Tensor *ifm_alloc)
Tensor registry class for acl backends.
std::unique_ptr< exec::IFunction > _return_fn
std::unique_ptr< exec::IFunction > releaseFunction()
const Operations & operations() const override
Definition Graph.h:112
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:53
OperandIndexSequence & getInputs()
Definition Operation.h:51
const Object & at(const Index &index) const
Get the object that is associated with the given index.
#define VERBOSE(name, lv)
Definition Log.h:71
std::vector< int > dims(const std::string &src)
Definition Utils.h:35
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
Definition Convert.cc:279
ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
Definition Swizzle.h:45
inline ::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank, const std::vector< int32_t > runtime_pv)
Definition Swizzle.h:68
std::unique_ptr< exec::IFunction > kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
std::set< uint32_t > asSet(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:220
::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
Definition Convert.cc:131
arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
Definition Convert.cc:294
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:207
void enableDimCorrection(IACLTensor *tensor)
arm_compute::PixelValue asPixelValue(const ir::Operand &operand)
Definition Convert.cc:309
arm_compute::Size2D asDilation(uint32_t dilation_width, uint32_t dilation_height)
Definition Convert.cc:330
::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding, const ir::Stride &stride)
Definition Convert.cc:119
std::unique_ptr< AclFunction > asAclFunction(std::unique_ptr<::arm_compute::IFunction > &&layer)
Definition Convert.cc:246
std::unique_ptr< exec::IFunction > kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
::arm_compute::DataType asDataType(const ir::DataType type)
Definition Convert.cc:71
void disableDimCorrection(IACLTensor *tensor)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf=1, uint32_t dhf=1)
Definition Padding.cc:131
CLTensor ker_tensor
CLTensor bias_tensor