19#include <arm_compute/runtime/CL/CLFunctions.h>
28#include "ir/DataType.h"
38using ::onert::backend::acl_common::asAclFunction;
43 const ir::Graph &graph,
const std::shared_ptr<TensorBuilder> &tensor_builder,
45 : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()), _operations_ctx(graph.operations()),
46 _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
53 auto ret = std::make_unique<exec::FunctionSequence>();
54 ret->enableDynamicShapeInferer(
false);
66 const auto block_size_index{
69 const auto NNApiInputs = 2;
73 if (!_ctx.
at(crops_index).isConstant())
75 throw std::runtime_error(
"Non-constant crops NYI for acl_cl backend BatchToSpaceND");
78 auto crops = _ctx.
at(crops_index).asVector<int32_t>();
79 for (
auto &&crop : crops)
83 throw std::runtime_error(
"Non-zero crops NYI for acl_cl backend BatchToSpaceND");
88 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
89 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
91 if (!_ctx.
at(block_size_index).data())
92 throw std::runtime_error(
"ACL CL does not support dynamic block size for BatchToSpaceND");
94 auto block = _ctx.
at(block_size_index).asVector<int32_t>();
95 int32_t height = block[0];
96 int32_t width = block[1];
98 auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
99 ifm_tensor->handle(), width, height, ofm_tensor->handle());
104void KernelGenerator::visit(
const ir::operation::BinaryArithmetic &node)
106 const auto ofm_index{node.getOutputs().at(0)};
110 const auto activation = node.param().activation;
112 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
113 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
114 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
118 std::unique_ptr<arm_compute::IFunction> fn;
119 switch (node.param().arithmetic_type)
123 arm_compute::CLArithmeticAddition::validate(lhs_tensor->info(), rhs_tensor->info(),
125 arm_compute::ConvertPolicy::SATURATE, act_info)
127 fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
128 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
129 arm_compute::ConvertPolicy::SATURATE, act_info);
134 arm_compute::CLArithmeticSubtraction::validate(lhs_tensor->info(), rhs_tensor->info(),
136 arm_compute::ConvertPolicy::SATURATE, act_info)
138 fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
139 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
140 arm_compute::ConvertPolicy::SATURATE, act_info);
145 arm_compute::CLPixelWiseMultiplication::validate(
146 lhs_tensor->info(), rhs_tensor->info(), ofm_tensor->info(), 1.0,
147 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
150 fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
151 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0,
152 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
158 arm_compute::CLArithmeticDivision::validate(lhs_tensor->info(), rhs_tensor->info(),
159 ofm_tensor->info(), act_info)
161 fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
162 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
166 assert(
false &&
"The BinaryArithmetic operation supports only binary arithmetic operations");
173void KernelGenerator::visit(
const ir::operation::Conv2D &node)
175 using ir::operation::Conv2D;
177 const auto ofm_index{node.getOutputs().at(0)};
178 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
179 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
180 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
182 const auto ifm_shape = _ctx.
at(ifm_index).shape().asFeature();
183 const auto ofm_shape = _ctx.
at(ofm_index).shape().asFeature();
185 const auto &ker_shape = _ctx.
at(ker_index).shape();
186 const auto ker_height = ker_shape.dim(1);
187 const auto ker_width = ker_shape.dim(2);
189 const auto stride = node.param().stride;
191 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
192 const auto activation = node.param().activation;
194 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
195 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
196 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
197 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
202 auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
203 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
205 ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
210void KernelGenerator::visit(
const ir::operation::DepthwiseConv2D &node)
212 using ir::operation::DepthwiseConv2D;
214 const auto ofm_index{node.getOutputs().at(0)};
215 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
216 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
217 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
219 const auto ifm_shape = _ctx.
at(ifm_index).shape().asFeature();
220 const auto ofm_shape = _ctx.
at(ofm_index).shape().asFeature();
222 const auto &ker_shape = _ctx.
at(ker_index).shape();
223 const auto ker_height = ker_shape.dim(1);
224 const auto ker_width = ker_shape.dim(2);
226 const auto stride = node.param().stride;
227 const auto dilation = node.param().dilation;
230 dilation.width_factor, dilation.height_factor);
231 const auto multiplier = node.param().multiplier;
232 const auto activation = node.param().activation;
234 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
235 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
236 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
237 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
243 auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
245 conv_info, multiplier, act_info, dilation_info);
250void KernelGenerator::visit(
const ir::operation::Concat &node)
252 const auto ofm_index{node.getOutputs().at(0)};
254 std::vector<ir::OperandIndex> input_indexes;
256 for (
const auto &input : node.getInputs())
257 input_indexes.emplace_back(
input);
259 const auto axis = node.param().axis;
262 bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
266 VERBOSE(acl_cl_KernelGenerator_Concat) <<
"Concat eliminated" << std::endl;
267 _return_fn = std::make_unique<exec::NopFunction>();
272 std::vector<const ::arm_compute::ICLTensor *> input_tensors;
273 for (
const auto &ifm_ind : input_indexes)
274 input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
276 std::unique_ptr<::arm_compute::IFunction> fn;
277 if (input_indexes.size() < 2)
279 ::arm_compute::ICLTensor *input_tesor =
280 _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
282 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tesor,
output_tensor->handle());
286 const auto rank = _ctx.
at(ofm_index).shape().rank();
288 fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
295void KernelGenerator::visit(
const ir::operation::FullyConnected &node)
297 const auto output_index{node.getOutputs().at(0)};
298 auto output_tensor = _tensor_reg->getAclTensor(output_index);
299 const auto activation = node.param().activation;
301 throw std::runtime_error(
302 "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
306 node, _ctx, _tensor_builder, _tensor_reg);
307 _return_fn = std::make_unique<exec::FunctionSequence>(
311void KernelGenerator::visit(
const ir::operation::Reduce &node)
313 const auto output_index{node.getOutputs().at(0)};
316 const auto keep_dims{node.param().keep_dims};
317 const auto reduce_type = node.param().reduce_type;
319 auto output_tensor = _tensor_reg->getAclTensor(output_index);
320 auto input_tensor = _tensor_reg->getAclTensor(input_index);
323 const auto &axes = _ctx.at(axes_index);
324 const auto input_rank = _ctx.at(input_index).shape().rank();
326 std::unique_ptr<arm_compute::IFunction> fn;
330 fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
337 fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
338 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
345void KernelGenerator::visit(
const ir::operation::Reshape &node)
347 const auto output_index{node.getOutputs().at(0)};
350 auto output_tensor = _tensor_reg->getAclTensor(output_index);
351 auto input_tensor = _tensor_reg->getAclTensor(input_index);
353 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
359void KernelGenerator::visit(
const ir::operation::Squeeze &node)
365 const auto output_index{node.getOutputs().at(0)};
367 const auto dims{node.param().dims};
368 const auto ndim{node.param().ndim};
372 auto output_tensor = _tensor_reg->getAclTensor(output_index);
373 auto input_tensor = _tensor_reg->getAclTensor(input_index);
374 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
379void KernelGenerator::visit(
const ir::operation::Softmax &node)
381 const auto output_index{node.getOutputs().at(0)};
384 const auto beta = node.param().beta;
386 auto output_tensor = _tensor_reg->getAclTensor(output_index);
387 auto input_tensor = _tensor_reg->getAclTensor(input_index);
389 auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
390 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
396void KernelGenerator::visit(
const ir::operation::Slice &node)
398 const auto output_index{node.getOutputs().at(0)};
403 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
404 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
407 int input_rank = _ctx.at(input_index).shape().rank();
408 std::vector<int32_t> starts;
409 std::vector<int32_t> ends;
410 starts.resize(input_rank, 0);
411 ends.resize(input_rank, 0);
413 assert(_ctx.at(begins_index).data());
414 assert(_ctx.at(sizes_index).data());
415 auto beginData_base = _ctx.at(begins_index).data()->base();
416 auto sizeData_base = _ctx.at(sizes_index).data()->base();
417 [[maybe_unused]]
const int beginData_size = _ctx.at(begins_index).shape().num_elements();
418 [[maybe_unused]]
const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
422 assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
423 assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
424 assert(beginData_size == input_rank);
425 assert(sizeData_size == input_rank);
427 assert(beginData_base !=
nullptr);
428 for (
int n = 0; n < input_rank; ++n)
432 int32_t begin_value = *(
reinterpret_cast<const int32_t *
>(beginData_base) + n);
433 starts[axis] = begin_value;
435 int32_t size_value = *(
reinterpret_cast<const int32_t *
>(sizeData_base) + n);
436 ends[axis] = begin_value + size_value;
440 ::arm_compute::Coordinates starts_set;
441 ::arm_compute::Coordinates ends_set;
443 for (
size_t i = 0; i < starts.size(); ++i)
445 starts_set.set(i, starts[i]);
446 ends_set.set(i, ends[i]);
449 auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
450 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
455void KernelGenerator::visit(
const ir::operation::StridedSlice &node)
457 const auto output_index{node.getOutputs().at(0)};
463 auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
464 auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
467 int input_rank = _ctx.at(input_index).shape().rank();
468 std::vector<int32_t> starts;
469 std::vector<int32_t> ends;
470 std::vector<int32_t> strides;
471 starts.resize(input_rank, 0);
472 ends.resize(input_rank, 0);
473 strides.resize(input_rank, 0);
475 assert(_ctx.at(starts_index).data());
476 assert(_ctx.at(ends_index).data());
477 assert(_ctx.at(strides_index).data());
478 auto startData_base = _ctx.at(starts_index).data()->base();
479 auto endData_base = _ctx.at(ends_index).data()->base();
480 auto stridesData_base = _ctx.at(strides_index).data()->base();
481 [[maybe_unused]]
const int startData_size = _ctx.at(starts_index).shape().num_elements();
482 [[maybe_unused]]
const int endData_size = _ctx.at(ends_index).shape().num_elements();
483 [[maybe_unused]]
const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
487 assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
488 assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
489 assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
490 assert(startData_size == input_rank);
491 assert(endData_size == input_rank);
492 assert(stridesData_size == input_rank);
494 assert(startData_base !=
nullptr);
495 for (
int n = 0; n < input_rank; ++n)
499 int32_t start_value = *(
reinterpret_cast<const int32_t *
>(startData_base) + n);
500 starts[axis] = start_value;
502 int32_t end_value = *(
reinterpret_cast<const int32_t *
>(endData_base) + n);
503 ends[axis] = end_value;
505 int32_t strides_value = *(
reinterpret_cast<const int32_t *
>(stridesData_base) + n);
506 strides[axis] = strides_value;
511 const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
512 const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
513 const auto shrink_axis_mask =
514 acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
516 ::arm_compute::Coordinates starts_set;
517 ::arm_compute::Coordinates ends_set;
518 ::arm_compute::BiStrides strides_set;
520 for (
size_t i = 0; i < starts.size(); ++i)
522 starts_set.set(i, starts[i]);
523 ends_set.set(i, ends[i]);
524 strides_set.set(i, strides[i]);
528 if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
534 auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
535 inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
536 begin_mask, end_mask, shrink_axis_mask);
539 if (inputData_tensor->dimension(0) == 1)
547void KernelGenerator::visit(
const ir::operation::Transpose &node)
549 const auto ofm_idx{node.getOutputs().at(0)};
553 const auto rank = _ctx.at(ifm_idx).shape().rank();
555 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
556 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
558 const auto &perms = _ctx.at(perm_idx);
559 std::vector<int32_t> pv;
560 if (perms.shape() == ir::Shape{0})
563 std::iota(pv.begin(), pv.end(), 0);
564 std::reverse(pv.begin(), pv.end());
568 pv = _ctx.at(perm_idx).asVector<int32_t>();
571 std::unique_ptr<arm_compute::IFunction> fn;
574 fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
578 assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
579 fn = acl_common::generateLayer<arm_compute::CLTranspose>(ifm_tensor->handle(),
580 ofm_tensor->handle());
586 fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
587 ofm_tensor->handle(), backend_pv);
593void KernelGenerator::visit(
const ir::operation::ElementwiseActivation &node)
595 const auto ofm_index{node.getOutputs().at(0)};
598 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
599 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
601 const ::arm_compute::ActivationLayerInfo act_info =
604 auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
605 ifm_tensor->handle(), ofm_tensor->handle(), act_info);
610void KernelGenerator::visit(
const ir::operation::ElementwiseBinary &node)
612 const auto output_index{node.getOutputs().at(0)};
616 auto output_tensor = _tensor_reg->getAclTensor(output_index);
617 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
618 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
620 std::unique_ptr<arm_compute::IFunction> fn;
621 switch (node.param().op_type)
625 fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
626 lhs_tensor->handle(), rhs_tensor->handle(),
output_tensor->handle(),
632 fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
633 lhs_tensor->handle(), rhs_tensor->handle(),
output_tensor->handle());
638 fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
639 lhs_tensor->handle(), rhs_tensor->handle(),
output_tensor->handle());
644 fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
645 lhs_tensor->handle(), rhs_tensor->handle(),
output_tensor->handle());
650 std::string err_msg(
"acl_cl KernelGenerator : " + node.name() +
651 "is not elementwise-binary operations");
652 assert(
false && err_msg.c_str());
660void KernelGenerator::visit(
const ir::operation::ElementwiseUnary &node)
662 const auto output_index{node.getOutputs().at(0)};
665 auto output_tensor = _tensor_reg->getAclTensor(output_index);
666 auto input_tensor = _tensor_reg->getAclTensor(input_index);
668 std::unique_ptr<arm_compute::IFunction> fn;
669 switch (node.param().op_type)
673 const ::arm_compute::ActivationLayerInfo act_info{
674 ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
676 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
684 fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
687 else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
689 fn = acl_common::generateLayer<arm_compute::CLCastBool>(input_tensor->handle(),
695 fn = acl_common::generateLayer<arm_compute::CLCast>(
696 input_tensor->handle(),
output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
702 fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
708 fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
714 fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
720 fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
726 fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
732 fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
738 const ::arm_compute::ActivationLayerInfo act_info{
739 ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
741 fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
747 throw std::runtime_error(
"acl_cl KernelGenerator : " + node.name() +
"is not supported yet");
757void KernelGenerator::visit(
const ir::operation::ExpandDims &node)
759 const auto output_index{node.getOutputs().at(0)};
762 auto output_tensor = _tensor_reg->getAclTensor(output_index);
763 auto input_tensor = _tensor_reg->getAclTensor(input_index);
765 auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
771void KernelGenerator::visit(
const ir::operation::InstanceNorm &node)
773 const auto ofm_index{node.getOutputs().at(0)};
778 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
779 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
780 auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
781 auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
782 auto epsilon = node.param().epsilon;
783 auto activation = node.param().activation;
785 auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
786 ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
789 _return_fn = std::make_unique<exec::FunctionSequence>(
793void KernelGenerator::visit(
const ir::operation::LSTM &node)
796 ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
799void KernelGenerator::visit(
const ir::operation::Comparison &node)
801 const auto output_index{node.getOutputs().at(0)};
805 const auto comparison_type = node.param().comparison_type;
807 auto output_tensor = _tensor_reg->getAclTensor(output_index);
808 auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
809 auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
811 auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
812 input0_tensor->handle(), input1_tensor->handle(),
output_tensor->handle(),
813 (arm_compute::ComparisonOperation)comparison_type);
818void KernelGenerator::visit(
const ir::operation::OneHot &node)
820 const auto output_idx{node.getOutputs().at(0)};
825 const auto depth = _ctx.at(depth_idx).asScalar<int32_t>();
829 auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
830 auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
832 const size_t output_rank = _ctx.at(output_idx).shape().rank();
833 int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
842 std::unique_ptr<::arm_compute::IFunction> fn;
843 const auto &offvalue = _ctx.at(offvalue_idx);
844 if (offvalue.isConstant())
846 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
847 indices_tensor->handle(), onvalue_tensor->handle(),
output_tensor->handle(),
852 auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
853 fn = acl_common::generateLayer<arm_compute::CLOneHot>(
854 indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
855 output_tensor->handle(),
static_cast<uint32_t
>(depth), axis);
866void KernelGenerator::visit(
const ir::operation::Pack &node)
868 const auto output_index{node.getOutputs().at(0)};
869 auto axis{node.param().axis};
871 const auto output_rank = _ctx.at(output_index).shape().rank();
873 std::vector<ir::OperandIndex> input_indexes;
874 for (
const auto &input_index : node.getInputs())
877 auto output = _tensor_reg->getAclTensor(output_index)->handle();
878 std::vector<arm_compute::ICLTensor *>
inputs;
879 for (
const auto &input_index : input_indexes)
887 for (
const auto &input_index : input_indexes)
889 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
890 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
897 auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
900 for (
const auto &input_index : input_indexes)
902 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
903 if (input_tensor->dimension(0) == 1)
912void KernelGenerator::visit(
const ir::operation::Pool2D &node)
914 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
917 const auto ofm_index{node.getOutputs().at(0)};
918 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
919 const auto activation = node.param().activation;
920 _return_fn = std::make_unique<exec::FunctionSequence>(
925void KernelGenerator::visit(
const ir::operation::ResizeBilinear &node)
927 const auto ofm_index{node.getOutputs().at(0)};
930 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
931 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
933 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
934 ifm_tensor->handle(), ofm_tensor->handle(),
935 ::arm_compute::ScaleKernelInfo{
936 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
937 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
942void KernelGenerator::visit(
const ir::operation::ResizeNearestNeighbor &node)
944 const auto ofm_index{node.getOutputs().at(0)};
947 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
948 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
950 auto fn = acl_common::generateLayer<arm_compute::CLScale>(
951 ifm_tensor->handle(), ofm_tensor->handle(),
952 ::arm_compute::ScaleKernelInfo{
953 ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
954 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
959void KernelGenerator::visit(
const ir::operation::RNN &node)
962 const auto hidden_state_out_index{
967 const auto recurrent_weights_index{
972 const auto activation = node.param().activation;
974 auto output_tensor = _tensor_reg->getAclTensor(output_index);
975 auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
977 auto input_tensor = _tensor_reg->getAclTensor(input_index);
978 auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
979 auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
980 auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
981 auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
984 auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
985 hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
988 auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
989 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
990 weights_tensor->handle(), recurrent_weights_tensor->handle(),
bias_tensor->handle(),
991 hidden_state_out_tensor->handle(),
output_tensor->handle(), act_info);
995void KernelGenerator::visit(
const ir::operation::SpaceToBatchND &node)
997 const auto ofm_index{node.getOutputs().at(0)};
999 const auto block_size_index{
1003 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1004 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1005 auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
1006 auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
1008 assert(_ctx.at(block_size_index).data());
1009 assert(_ctx.at(paddings_index).data());
1011 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
1012 ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
1013 ofm_tensor->handle());
1018void KernelGenerator::visit(
const ir::operation::SpaceToDepth &node)
1020 const auto ofm_index{node.getOutputs().at(0)};
1023 auto block_size = node.param().block_size;
1025 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1026 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1028 auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
1029 ifm_tensor->handle(), ofm_tensor->handle(), block_size);
1034void KernelGenerator::visit(
const ir::operation::EmbeddingLookup &node)
1036 const auto output_index{node.getOutputs().at(0)};
1040 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1041 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1042 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1044 auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
1045 values_tensor->handle(),
output_tensor->handle(), lookups_tensor->handle());
1050void KernelGenerator::visit(
const ir::operation::L2Normalization &node)
1052 const auto ofm_index{node.getOutputs().at(0)};
1060 const auto &ifm_shape = _ctx.at(ifm_index).shape();
1062 const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
1064 2 * ifm_shape.dim(normalization_axis) + 1;
1069 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1070 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1072 const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
1073 radius, alpha, beta, bias,
false);
1075 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1076 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1081void KernelGenerator::visit(
const ir::operation::HashtableLookup &node)
1090 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1091 auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
1093 auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
1094 auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
1095 auto values_tensor = _tensor_reg->getAclTensor(values_index);
1097 auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
1098 lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
1104void KernelGenerator::visit(
const ir::operation::PReLU &node)
1106 const auto ofm_index{node.getOutputs().at(0)};
1110 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1111 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1112 auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
1114 auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
1115 ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
1120void KernelGenerator::visit(
const ir::operation::TransposeConv &node)
1122 const auto ofm_index{node.getOutputs().at(0)};
1126 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
1127 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
1128 const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
1130 const auto stride = node.param().stride;
1135 ker_shape.W, ker_shape.H);
1136 uint32_t invalid_horizontal = 0;
1137 uint32_t invalid_vertical = 0;
1140 invalid_horizontal =
1141 ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
1142 invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
1145 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1146 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1147 auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
1151 auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
1152 _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
1153 ker_tensor->handle(),
nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
1159void KernelGenerator::visit(
const ir::operation::SquaredDifference &node)
1161 const auto ofm_index{node.getOutputs().at(0)};
1165 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1166 auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
1167 auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
1169 auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
1170 lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1175void KernelGenerator::visit(
const ir::operation::TopKV2 &node)
1178 const auto outputIndices_index{
1184 assert(_ctx.at(inputData_index).shape().rank() == 1 ||
1185 _ctx.at(inputData_index).shape().rank() == 2);
1187 const auto k = node.param().k;
1189 auto values_tensor = _tensor_reg->getAclTensor(outputValues_index);
1190 auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index);
1191 auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
1193 auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
1194 input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
1199void KernelGenerator::visit(
const ir::operation::Gather &node)
1201 const auto ofm_index{node.getOutputs().at(0)};
1206 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1207 const auto axis_raw = node.param().axis;
1208 const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
1211 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1212 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1213 auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
1216 size_t n = ifm_rank;
1217 assert(n == ifm_tensor->num_dimensions());
1218 size_t k = _ctx.at(indices_index).shape().rank();
1219 assert(k == indices_tensor->num_dimensions());
1222 if (n != ifm_tensor->info()->num_dimensions())
1227 if (k != indices_tensor->info()->num_dimensions())
1233 auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
1234 ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
1237 if (ifm_tensor->dimension(0) == 1)
1241 if (indices_tensor->dimension(0) == 1)
1249void KernelGenerator::visit(
const ir::operation::ArgMinMax &node)
1251 const auto ofm_index{node.getOutputs().at(0)};
1255 auto ifm_shape = _ctx.at(ifm_index).shape();
1256 auto ofm_shape = _ctx.at(ofm_index).shape();
1258 assert((ifm_shape.rank() - 1) == ofm_shape.rank());
1260 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1261 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1262 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1264 int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
1267 axis_value += ifm_rank;
1271 auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
1272 : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
1273 auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
1274 ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
1279void KernelGenerator::visit(
const ir::operation::LocalResponseNormalization &node)
1281 const auto ofm_index{node.getOutputs().at(0)};
1282 const auto ifm_index{
1285 auto radius = node.param().radius;
1286 auto alpha = node.param().alpha;
1287 auto beta = node.param().beta;
1288 auto bias = node.param().bias;
1290 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1291 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1293 const auto norm_info = ::arm_compute::NormalizationLayerInfo(
1294 ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias,
false);
1296 auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
1297 ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1302void KernelGenerator::visit(
const ir::operation::DepthToSpace &node)
1304 const auto output_index{node.getOutputs().at(0)};
1307 auto block_size = node.param().block_size;
1308 assert(block_size > 0);
1310 auto output_tensor = _tensor_reg->getAclTensor(output_index);
1311 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1313 auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
1314 input_tensor->handle(),
output_tensor->handle(), block_size);
1319void KernelGenerator::visit(
const ir::operation::Split &node)
1324 assert(node.param().num_splits ==
static_cast<int>(node.getOutputs().size()));
1325 if (!_ctx.at(axis_index).isConstant())
1327 throw std::runtime_error(
"Non-constant axis_index NYI for acl_cl backend");
1330 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1331 std::vector<ir::OperandIndex> output_indexes;
1332 for (
const auto &output : node.getOutputs())
1333 output_indexes.emplace_back(
output);
1335 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1336 std::vector<arm_compute::ICLTensor *> output_tensors;
1337 for (
const auto &ofm_ind : output_indexes)
1338 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1340 auto axis = _ctx.at(axis_index).asScalar<int32_t>();
1346 acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
1351void KernelGenerator::visit(
const ir::operation::SplitV &node)
1357 assert(node.param().num_splits ==
static_cast<int>(node.getOutputs().size()));
1359 const size_t ifm_rank = _ctx.at(ifm_index).shape().rank();
1360 std::vector<ir::OperandIndex> output_indexes;
1361 for (
const auto &output : node.getOutputs())
1362 output_indexes.emplace_back(
output);
1364 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1365 auto size_split_tensor = _tensor_reg->getAclTensor(size_split_index);
1367 std::vector<arm_compute::ICLTensor *> output_tensors;
1368 for (
const auto &ofm_ind : output_indexes)
1369 output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
1371 auto fn = std::make_unique<arm_compute::CLSplitVEx>();
1372 const auto &split_dim_op = _ctx.at(split_dim_index);
1373 if (split_dim_op.isConstant())
1375 int32_t split_dim = split_dim_op.asScalar<int32_t>();
1376 uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
1378 if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
1385 fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
1386 output_tensors, node.param().num_splits);
1388 if (ifm_tensor->dimension(0) == 1)
1395 throw std::runtime_error(
"Non-constant split_dim NYI for acl_cl backend");
1401void KernelGenerator::visit(
const ir::operation::Unpack &node)
1404 auto axis{node.param().axis};
1406 const auto input_rank = _ctx.at(input_index).shape().rank();
1408 std::vector<ir::OperandIndex> output_indexes;
1409 for (
const auto &output_index : node.getOutputs())
1410 output_indexes.emplace_back(output_index);
1412 auto input_tensor = _tensor_reg->getAclTensor(input_index);
1413 std::vector<arm_compute::ICLTensor *> outputs;
1414 for (
const auto &output_index : output_indexes)
1415 outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
1422 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1429 acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
1432 if (input_tensor->dimension(0) == 1)
1440void KernelGenerator::visit(
const ir::operation::Pad &node)
1444 const auto output_index{node.getOutputs().at(0)};
1445 assert(_ctx.at(pad_index).data());
1447 auto rank = _ctx.at(input_index).shape().rank();
1448 auto pad_base = _ctx.at(pad_index).data()->base();
1450 auto input_type = _ctx.at(input_index).typeInfo();
1452 auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point());
1453 const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
1455 auto input = _tensor_reg->getAclTensor(input_index)->handle();
1456 auto output = _tensor_reg->getAclTensor(output_index)->handle();
1458 ::arm_compute::PaddingList padding_list;
1459 padding_list.resize(rank);
1460 for (int32_t n = 0; n < rank; ++n)
1462 const int32_t *from =
reinterpret_cast<const int32_t *
>(pad_base) + (n * 2);
1465 padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
1469 const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
1470 if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
1477 acl_common::generateLayer<arm_compute::CLPadLayerEx>(input, output, padding_list, pixel_value);
1485 if (input_tensor->num_dimensions() < 4 && input_tensor->dimension(0) == 1)
1493void KernelGenerator::visit(
const ir::operation::ConvertFp32ToFp16 &node)
1495 const auto ofm_index{node.getOutputs().at(0)};
1498 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1499 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1501 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1502 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1507void KernelGenerator::visit(
const ir::operation::ConvertFp16ToFp32 &node)
1509 const auto ofm_index{node.getOutputs().at(0)};
1512 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1513 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1515 auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
1516 ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
1521void KernelGenerator::visit(
const ir::operation::Reverse &node)
1523 const auto ofm_index{node.getOutputs().at(0)};
1527 auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
1528 auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
1529 auto axis_tensor = _tensor_reg->getAclTensor(axis_index);
1533 if (_ctx.at(axis_index).isConstant() &&
1534 (axis_tensor->handle()->info()->data_type() == arm_compute::DataType::S32))
1536 axis_tensor->handle()->info()->set_data_type(arm_compute::DataType::U32);
1539 auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
1540 ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle(),
false);
This file defines NopFunction.
Class to run FullyConnected Layer after reshaping input tensor.
std::unique_ptr< exec::FunctionSequence > generate(ir::OperationIndex ind) override
KernelGenerator(const ir::Graph &graph, const std::shared_ptr< TensorBuilder > &tensor_builder, const std::shared_ptr< acl_common::AclTensorRegistry< TensorManager > > &_tensor_reg)
uint32_t value(void) const
static std::unique_ptr< exec::IFunction > generate(ir::Activation code, T_Tensor *ifm_alloc)
Tensor registry class for acl backends.
std::unique_ptr< exec::IFunction > _return_fn
std::unique_ptr< exec::IFunction > releaseFunction()
const Operations & operations() const override
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
OperandIndexSequence & getInputs()
const Object & at(const Index &index) const
Get the object that is associated with the given index.
#define VERBOSE(name, lv)
std::vector< int > dims(const std::string &src)
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
inline ::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank, const std::vector< int32_t > runtime_pv)
std::unique_ptr< exec::IFunction > kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
std::set< uint32_t > asSet(const ir::Operand &operand, int32_t rank)
::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank)
void enableDimCorrection(IACLTensor *tensor)
arm_compute::PixelValue asPixelValue(const ir::Operand &operand)
arm_compute::Size2D asDilation(uint32_t dilation_width, uint32_t dilation_height)
::arm_compute::PadStrideInfo asPadStrideInfo(const ir::ExplicitPadding &padding, const ir::Stride &stride)
std::unique_ptr< AclFunction > asAclFunction(std::unique_ptr<::arm_compute::IFunction > &&layer)
std::unique_ptr< exec::IFunction > kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
::arm_compute::DataType asDataType(const ir::DataType type)
void disableDimCorrection(IACLTensor *tensor)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf=1, uint32_t dhf=1)