Data Structures
class	AclActivationBuilder

class	AclBackendContext

class	AclConstantInitializer

class	AclFunction

class	AclInternalBufferManager
	class for InternalBufferManager which has arm_compute::IMemoryManager pointer More...

class	AclLinearMemoryManager

class	AclMemoryManager

class	AclSubTensorAnalyzer
	Class to analyze tensor subsumption. More...

class	AclTensorBuilder

class	AclTensorManager

class	AclTensorRegistry
	Tensor registry class for acl backends. More...

class	ARMComputeAxis

class	IACLTensor
	Class representing Tensor for ACL. More...

struct	IInternalBufferManager
	Interface for InternalBufferManager which has arm_compute::IMemoryManager pointer. More...

Typedefs
using	float16 = uint16_t

Functions
template<typename T >
void	copyInit (const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)

template<typename T >
void	initReverseOrder (const ir::Operand &model_obj, backend::ITensor &obj)

void	enableDimCorrection (IACLTensor *tensor)

void	disableDimCorrection (IACLTensor *tensor)

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction >	generateLayer (Args &&...args)

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction >	generateLayer (std::shared_ptr< arm_compute::IMemoryManager > memory_manager, Args &&...args)

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction >	kernelGenLSTM (const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction >	kernelGenFullyConnected (const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)

template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >
std::unique_ptr<::arm_compute::IFunction >	kernelGenPool2D (const T_PoolOp &node, const ir::Operands &operands, const std::shared_ptr< T_AclTensorRegistry > &tensor_reg, ::arm_compute::PoolingType pooling_type)

template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >
std::shared_ptr< T_MemoryManager >	createMemoryManager ()

::arm_compute::TensorShape	asTensorShape (const ir::Shape &shape, bool apply_dim_correction)

::arm_compute::Coordinates	asTensorCoordinate (const ir::Coordinates &coord)

::arm_compute::DataType	asDataType (const ir::DataType type)

::arm_compute::QuantizationInfo	asQuantizationInfo (const float scale, const int32_t offset)

::arm_compute::TensorInfo	asTensorInfo (const ir::Shape &shape, const ir::TypeInfo &typeInfo, bool apply_dim_correction)

::arm_compute::PadStrideInfo	asPadStrideInfo (const ir::ExplicitPadding &padding, const ir::Stride &stride)

::arm_compute::ActivationLayerInfo	asActivationLayerInfo (const ir::Activation act_code)

::arm_compute::ActivationLayerInfo	asActivationLayerInfo (const ir::operation::ElementwiseActivation::Type op_type, float alpha, float beta)

arm_compute::Coordinates	asCoordinates (const ir::Operand &operand, int32_t rank)

std::set< uint32_t >	asSet (const ir::Operand &operand, int32_t rank)

std::unique_ptr< AclFunction >	asAclFunction (std::unique_ptr<::arm_compute::IFunction > &&layer)

ir::DataType	asRuntimeDataType (::arm_compute::DataType data_type)

arm_compute::PoolingType	convertPoolType (ir::operation::Pool2D::PoolType pool_type_ir)

arm_compute::ReductionOperation	convertReduceType (ir::operation::Reduce::ReduceType reduce_type_ir)

arm_compute::PixelValue	asPixelValue (const ir::Operand &operand)

arm_compute::Size2D	asDilation (uint32_t dilation_width, uint32_t dilation_height)

template<typename T_Function >
std::unique_ptr< T_Function >	asFunction (std::unique_ptr<::arm_compute::IFunction > &&fn)

ARMComputeAxis	ToARMComputeAxis (uint32_t rank, uint32_t axis)

inline ::arm_compute::Coordinates	getARMComputeAxises (uint32_t rank)

inline ::arm_compute::PermutationVector	getARMComputePermutationVector (uint32_t rank, const std::vector< int32_t > runtime_pv)

template<typename T >
T	ReorderBits (T in, size_t numOfBits)

Typedef Documentation

◆ float16

using onert::backend::acl_common::float16 = typedef uint16_t

Definition at line 117 of file AclConstantInitializer.cc.

Function Documentation

◆ asAclFunction()

std::unique_ptr< AclFunction > onert::backend::acl_common::asAclFunction ( std::unique_ptr<::arm_compute::IFunction > && layer )

Definition at line 246 of file Convert.cc.

{
  return std::make_unique<AclFunction>(std::move(layer));
}

◆ asActivationLayerInfo() [1/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo ( const ir::Activation act_code )

Definition at line 131 of file Convert.cc.

{
  switch (act_code)
  {
    case ir::Activation::NONE:
      return ::arm_compute::ActivationLayerInfo{};
    case ir::Activation::RELU:
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
    case ir::Activation::RELU1:
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
    case ir::Activation::RELU6:
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
    // Cases for activation of LSTM.
    case ir::Activation::TANH:
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
    case ir::Activation::SIGMOID:
      // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
      // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
      // 0(always sigmoid) regardless of values of the parameter.
      //      If ACL support non-sigmoid logistic, should fix param values.
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
    default:
      throw std::runtime_error{"Not supported internal activation, yet"};
      break;
  }
}

References onert::ir::NONE, onert::ir::RELU, onert::ir::RELU1, onert::ir::RELU6, onert::ir::SIGMOID, and onert::ir::TANH.

Referenced by kernelGenLSTM().

◆ asActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo	(	const ir::operation::ElementwiseActivation::Type	op_type,
		float	alpha,
		float	beta
	)

Definition at line 164 of file Convert.cc.

{
  switch (op_type)
  {
    case ir::operation::ElementwiseActivation::Type::RELU:
      if (beta == 0.f)
      {
        if (alpha == ir::operation::ElementwiseActivation::infinity)
        {
          return ::arm_compute::ActivationLayerInfo{
            ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
        }
        else
        {
          return ::arm_compute::ActivationLayerInfo{
            ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
        }
      }
      else
      {
        return ::arm_compute::ActivationLayerInfo{
          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
      }
    case ir::operation::ElementwiseActivation::Type::TANH:
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
      // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
      // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
      // 0(always sigmoid) regardless of values of the parameter.
      //      If ACL support non-sigmoid logistic, should fix param values.
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
      return ::arm_compute::ActivationLayerInfo{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
    default:
      throw std::runtime_error{"Not supported internal elementwise activation, yet"};
      break;
  }
}

References onert::ir::operation::ElementwiseActivation::infinity, onert::ir::operation::ElementwiseActivation::LEAKY_RELU, onert::ir::operation::ElementwiseActivation::LOGISTIC, onert::ir::operation::ElementwiseActivation::RELU, and onert::ir::operation::ElementwiseActivation::TANH.

◆ asCoordinates()

arm_compute::Coordinates onert::backend::acl_common::asCoordinates	(	const ir::Operand &	operand,
		int32_t	rank
	)

Definition at line 207 of file Convert.cc.

{
  std::set<uint32_t> axes = asSet(operand, rank);
 
  arm_compute::Coordinates reduce_axes;
  for (const int32_t axis : axes)
  {
    reduce_axes.set(reduce_axes.num_dimensions(), axis);
  }
 
  return reduce_axes;
}

References asSet().

◆ asDataType()

arm_compute::DataType onert::backend::acl_common::asDataType ( const ir::DataType type )

Definition at line 71 of file Convert.cc.

{
  switch (type)
  {
    case ir::DataType::FLOAT32:
      return ::arm_compute::DataType::F32;
    case ir::DataType::INT32:
      return ::arm_compute::DataType::S32;
    case ir::DataType::UINT32:
      return ::arm_compute::DataType::U32;
    case ir::DataType::QUANT_UINT8_ASYMM:
      return ::arm_compute::DataType::QASYMM8;
    case ir::DataType::BOOL8:
    case ir::DataType::UINT8:
      return ::arm_compute::DataType::U8;
    case ir::DataType::QUANT_INT8_SYMM:
      return ::arm_compute::DataType::QSYMM8;
    case ir::DataType::QUANT_INT8_ASYMM:
      return ::arm_compute::DataType::QASYMM8_SIGNED;
    case ir::DataType::FLOAT16:
      return ::arm_compute::DataType::F16;
    case ir::DataType::INT64:
      return ::arm_compute::DataType::S64;
    case ir::DataType::QUANT_INT16_ASYMM:
      return ::arm_compute::DataType::QASYMM16;
    case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
      return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
    default:
      throw std::runtime_error("Not supported internal data type, yet");
      break;
  }
}

Referenced by asTensorInfo().

◆ asDilation()

arm_compute::Size2D onert::backend::acl_common::asDilation	(	uint32_t	dilation_width,
		uint32_t	dilation_height
	)

Definition at line 330 of file Convert.cc.

{
  assert(dilation_width != 0);
  assert(dilation_height != 0);
 
  return arm_compute::Size2D(dilation_width, dilation_height);
}

◆ asFunction()

template<typename T_Function >

std::unique_ptr< T_Function > onert::backend::acl_common::asFunction ( std::unique_ptr<::arm_compute::IFunction > && fn )

Definition at line 67 of file Convert.h.

{
  return std::make_unique<T_Function>(std::move(fn));
}

◆ asPadStrideInfo()

arm_compute::PadStrideInfo onert::backend::acl_common::asPadStrideInfo	(	const ir::ExplicitPadding &	padding,
		const ir::Stride &	stride
	)

Definition at line 119 of file Convert.cc.

{
  return ::arm_compute::PadStrideInfo{stride.horizontal,
                                      stride.vertical,
                                      padding.left,
                                      padding.right,
                                      padding.top,
                                      padding.bottom,
                                      ::arm_compute::DimensionRoundingType::FLOOR};
}

References onert::ir::ExplicitPadding::bottom, onert::ir::Stride::horizontal, onert::ir::ExplicitPadding::left, onert::ir::ExplicitPadding::right, onert::ir::ExplicitPadding::top, and onert::ir::Stride::vertical.

Referenced by kernelGenPool2D().

◆ asPixelValue()

arm_compute::PixelValue onert::backend::acl_common::asPixelValue ( const ir::Operand & operand )

Definition at line 309 of file Convert.cc.

{
  assert(operand.isConstant());
  assert(operand.shape().num_elements() == 1);
  switch (operand.typeInfo().type())
  {
    case ir::DataType::INT32:
      return arm_compute::PixelValue(operand.asScalar<int32_t>());
    case ir::DataType::INT64:
      return arm_compute::PixelValue(operand.asScalar<int64_t>());
    case ir::DataType::UINT32:
      return arm_compute::PixelValue(operand.asScalar<uint64_t>());
    case ir::DataType::UINT8:
      return arm_compute::PixelValue(operand.asScalar<uint8_t>());
    case ir::DataType::FLOAT32:
      return arm_compute::PixelValue(operand.asScalar<float>());
    default:
      throw std::runtime_error("asPixelValue : Not supported datatype yet");
  }
}

References onert::ir::Operand::asScalar(), onert::ir::Operand::isConstant(), onert::ir::Operand::shape(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

◆ asQuantizationInfo()

::arm_compute::QuantizationInfo onert::backend::acl_common::asQuantizationInfo	(	const float	scale,
		const int32_t	offset
	)

Definition at line 104 of file Convert.cc.

{
  return ::arm_compute::QuantizationInfo(scale, offset);
}

References offset().

Referenced by asTensorInfo().

◆ asRuntimeDataType()

ir::DataType onert::backend::acl_common::asRuntimeDataType ( ::arm_compute::DataType data_type )

Definition at line 251 of file Convert.cc.

{
  switch (data_type)
  {
    case ::arm_compute::DataType::F32:
      return ir::DataType::FLOAT32;
    case ::arm_compute::DataType::S32:
      return ir::DataType::INT32;
    case ::arm_compute::DataType::U32:
      return ir::DataType::UINT32;
    case ::arm_compute::DataType::QASYMM8:
      return ir::DataType::QUANT_UINT8_ASYMM;
    case ::arm_compute::DataType::QASYMM8_SIGNED:
      return ir::DataType::QUANT_INT8_ASYMM;
    case ::arm_compute::DataType::U8:
      return ir::DataType::UINT8;
    case ::arm_compute::DataType::QSYMM8:
      return ir::DataType::QUANT_INT8_SYMM;
    case ::arm_compute::DataType::F16:
      return ir::DataType::FLOAT16;
    case ::arm_compute::DataType::S64:
      return ir::DataType::INT64;
    default:
      throw std::runtime_error{"Not supported acl data type, yet"};
      break;
  }
}

Referenced by onert::backend::acl_common::IACLTensor::data_type().

◆ asSet()

std::set< uint32_t > onert::backend::acl_common::asSet	(	const ir::Operand &	operand,
		int32_t	rank
	)

Definition at line 220 of file Convert.cc.

{
  std::set<std::uint32_t> axes;
 
  for (size_t i = 0; i < operand.shape().num_elements(); ++i)
  {
    int32_t axis = 0;
    switch (operand.typeInfo().type())
    {
      case ir::DataType::INT32:
        axis = reinterpret_cast<const int32_t *>(operand.data()->base())[i];
        break;
      case ir::DataType::INT64:
        axis = reinterpret_cast<const int64_t *>(operand.data()->base())[i];
        break;
      default:
        throw std::runtime_error("acl_common::asSet: Not supported data type");
    }
    if (axis < 0)
      axis += rank;
    axes.insert(ToARMComputeAxis(rank, axis).value());
  }
 
  return axes;
}

References onert::ir::Operand::data(), onert::ir::Operand::shape(), ToARMComputeAxis(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

Referenced by asCoordinates().

◆ asTensorCoordinate()

arm_compute::Coordinates onert::backend::acl_common::asTensorCoordinate ( const ir::Coordinates & coord )

Definition at line 55 of file Convert.cc.

{
  const uint32_t rank = coord.size();
 
  ::arm_compute::Coordinates res{};
 
  res.set_num_dimensions(rank);
 
  for (uint32_t axis = 0; axis < rank; ++axis)
  {
    res.set(ToARMComputeAxis(rank, axis).value(), coord[axis]);
  }
 
  return res;
}

References onert::ir::Coordinates::size(), and ToARMComputeAxis().

◆ asTensorInfo()

arm_compute::TensorInfo onert::backend::acl_common::asTensorInfo	(	const ir::Shape &	shape,
		const ir::TypeInfo &	typeInfo,
		bool	apply_dim_correction
	)

Definition at line 109 of file Convert.cc.

{
  ::arm_compute::TensorInfo info(asTensorShape(shape, apply_dim_correction), 1,
                                 asDataType(typeInfo.type()),
                                 asQuantizationInfo(typeInfo.scale(), typeInfo.zero_point()));
  info.set_data_layout(::arm_compute::DataLayout::NHWC);
  return info;
}

References asDataType(), asQuantizationInfo(), asTensorShape(), info, onert::ir::TypeInfo::scale(), onert::ir::TypeInfo::type(), and onert::ir::TypeInfo::zero_point().

◆ asTensorShape()

arm_compute::TensorShape onert::backend::acl_common::asTensorShape	(	const ir::Shape &	shape,
		bool	apply_dim_correction
	)

Definition at line 27 of file Convert.cc.

{
  // If shape's rank is 0, the tensor is a scalar
  // Sometimes, some ACL kernel can use a scalar as tensor. But ACL does not allocate buffer for
  // tensor having rank as 0.
  const auto tensor_shape = shape.rank() == 0 ? ir::Shape{1} : shape;
 
  const uint32_t rank = tensor_shape.rank();
 
  ::arm_compute::TensorShape res{};
 
  res.set_num_dimensions(rank);
 
  for (uint32_t axis = 0; axis < rank; ++axis)
  {
    // NOTE In some cases, in incorrect dimensions is required.
    // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
    // LSTM is used as the weight of the FullyConnected.
    // The FullyConnected's weight must be greater or equal than 2-dimensions.
    // However, if the dimension correction is applied to input_to_input_weights with input_size
    // equal to 1, it will be changed to 1-D.
    // So input_to_input_weights is not used by the weight of FullyConnected.
    res.set(ToARMComputeAxis(rank, axis).value(), tensor_shape.dim(axis), apply_dim_correction);
  }
 
  return res;
}

References loco::TensorShape::dim(), loco::TensorShape::rank(), and ToARMComputeAxis().

Referenced by asTensorInfo(), and kernelGenFullyConnected().

◆ convertPoolType()

arm_compute::PoolingType onert::backend::acl_common::convertPoolType ( ir::operation::Pool2D::PoolType pool_type_ir )

Definition at line 279 of file Convert.cc.

{
  switch (pool_type_ir)
  {
    case ir::operation::Pool2D::PoolType::AVG:
      return arm_compute::PoolingType::AVG;
    case ir::operation::Pool2D::PoolType::L2:
      return arm_compute::PoolingType::L2;
    case ir::operation::Pool2D::PoolType::MAX:
      return arm_compute::PoolingType::MAX;
    default:
      throw std::runtime_error("convertPoolType: Not supported operation yet");
  }
}

References onert::ir::operation::Pool2D::AVG, onert::ir::operation::Pool2D::L2, and onert::ir::operation::Pool2D::MAX.

◆ convertReduceType()

arm_compute::ReductionOperation onert::backend::acl_common::convertReduceType ( ir::operation::Reduce::ReduceType reduce_type_ir )

Definition at line 294 of file Convert.cc.

{
  switch (reduce_type_ir)
  {
    case ir::operation::Reduce::ReduceType::MAX:
      return arm_compute::ReductionOperation::MAX;
    case ir::operation::Reduce::ReduceType::MIN:
      return arm_compute::ReductionOperation::MIN;
    case ir::operation::Reduce::ReduceType::SUM:
      return arm_compute::ReductionOperation::SUM;
    default:
      throw std::runtime_error("convertReduceType: Not supported operation yet");
  }
}

References onert::ir::operation::Reduce::MAX, onert::ir::operation::Reduce::MIN, and onert::ir::operation::Reduce::SUM.

◆ copyInit()

template<typename T >

void onert::backend::acl_common::copyInit	(	const onert::ir::Operand &	model_obj,
		onert::backend::ITensor &	obj
	)

Definition at line 123 of file AclConstantInitializer.h.

{
  Init<T>(model_obj, obj);
}

◆ createMemoryManager()

template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >

std::shared_ptr< T_MemoryManager > onert::backend::acl_common::createMemoryManager ( )

Definition at line 30 of file AclLinearMemoryManager.h.

{
  std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
  std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
 
  std::shared_ptr<T_MemoryManager> mem_mgr =
    std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
  return mem_mgr;
}

◆ disableDimCorrection()

void onert::backend::acl_common::disableDimCorrection ( IACLTensor * tensor )

Definition at line 36 of file AclKernelGen.h.

{
  size_t input_rank = tensor->getShape().rank();
  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
}

◆ enableDimCorrection()

void onert::backend::acl_common::enableDimCorrection ( IACLTensor * tensor )

Definition at line 29 of file AclKernelGen.h.

{
  size_t input_rank = tensor->getShape().rank();
  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
}

◆ generateLayer() [1/2]

template<typename Layer , typename... Args>

std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer ( Args &&... args )

Definition at line 44 of file AclKernelGen.h.

{
  auto l = std::make_unique<Layer>();
 
  l->configure(std::forward<Args>(args)...);
 
  return l;
}

◆ generateLayer() [2/2]

template<typename Layer , typename... Args>

std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer	(	std::shared_ptr< arm_compute::IMemoryManager >	memory_manager,
		Args &&...	args
	)

Definition at line 55 of file AclKernelGen.h.

{
  auto l = std::make_unique<Layer>(memory_manager);
 
  l->configure(std::forward<Args>(args)...);
 
  return l;
}

◆ getARMComputeAxises()

inline ::arm_compute::Coordinates onert::backend::acl_common::getARMComputeAxises ( uint32_t rank )

Definition at line 52 of file Swizzle.h.

{
  ::arm_compute::Coordinates res{};
 
  res.set_num_dimensions(rank);
 
  for (uint32_t axis = 0; axis < rank; ++axis)
  {
    res.set(axis, ToARMComputeAxis(rank, axis).value());
  }
 
  return res;
}

References ToARMComputeAxis().

Referenced by getARMComputePermutationVector().

◆ getARMComputePermutationVector()

inline ::arm_compute::PermutationVector onert::backend::acl_common::getARMComputePermutationVector	(	uint32_t	rank,
		const std::vector< int32_t >	runtime_pv
	)

Definition at line 68 of file Swizzle.h.

{
  // rank upto 4 is supported
  assert(rank <= 4);
  assert(runtime_pv.size() > 0);
 
  int new_pv[4] = {0};
  ::arm_compute::Coordinates axises = getARMComputeAxises(rank);
 
  for (uint32_t i = 0; i < rank; ++i)
  {
    new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i]).value();
  }
 
  ::arm_compute::PermutationVector ACL_PV =
    ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
  ACL_PV.set_num_dimensions(rank);
 
  return ACL_PV;
}

References getARMComputeAxises(), ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ initReverseOrder()

template<typename T >

void onert::backend::acl_common::initReverseOrder	(	const ir::Operand &	model_obj,
		backend::ITensor &	obj
	)

Definition at line 129 of file AclConstantInitializer.h.

{
  assert(model_obj.data());
  const auto &shape = model_obj.shape();
  const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
  assert(model_obj.shape().rank() == 1);
  obj.access([&](ITensor &tensor) {
    for (size_t i = 0; i < shape.num_elements(); ++i)
    {
      const T value = base[shape.num_elements() - i - 1];
      T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
      *into = value;
    }
  });
}

References onert::ir::Operand::data(), and onert::ir::Operand::shape().

◆ kernelGenFullyConnected()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >

std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenFullyConnected	(	const ir::operation::FullyConnected &	node,
		const ir::Operands &	operands,
		const std::shared_ptr< T_TensorBuilder > &	tensor_builder,
		const std::shared_ptr< T_TensorRegistry > &	tensor_reg
	)

Definition at line 224 of file AclKernelGen.h.

{
  using ir::operation::FullyConnected;
 
  const auto output_index{node.getOutputs().at(0)};
  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
 
  const auto input_rank = operands.at(input_index).shape().rank();
 
  [[maybe_unused]] const auto output_size =
    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
  assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
  assert(operands.at(weight_index).shape().dim(0) == output_size);
  const auto batch_size =
    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
  const auto input_size =
    operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
 
  // Check for reshaping input's shape into rank-2
  bool needs_reshape = false;
  ir::Shape reshape(2);
  if (input_rank == 3 || input_rank == 4)
  {
    const auto &ifm_shape = operands.at(input_index).shape();
    [[maybe_unused]] auto feature_size = 1;
    for (int i = 0; i < ifm_shape.rank(); ++i)
    {
      feature_size *= ifm_shape.dim(i);
    }
 
    assert(feature_size == batch_size * input_size);
 
    // for reshaping
    needs_reshape = true;
    reshape.dim(0) = batch_size; /* H */
    reshape.dim(1) = input_size; /* W */
  }
 
  auto output_tensor = tensor_reg->getAclTensor(output_index);
  const auto input_tensor = tensor_reg->getAclTensor(input_index);
  const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
  const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index);
 
  typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
  if (operands.at(weight_index).isConstant())
  {
    kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
    assert(operands.at(weight_index).data());
  }
 
  auto fn = generateLayer<T_ACLLayer>(
    tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
    weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
    output_tensor->handle(), needs_reshape, asTensorShape(reshape), kernel_type);
 
  return std::make_unique<T_FunctionWrapper>(std::move(fn));
}

References asTensorShape(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::OperandIndexSequence::at(), bias_tensor, onert::ir::Operation::getInputs(), and onert::ir::Operation::getOutputs().

◆ kernelGenLSTM()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >

std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenLSTM	(	const ir::operation::LSTM &	node,
		const ir::Operands &	operands,
		const std::shared_ptr< T_TensorRegistry > &	tensor_reg
	)

Definition at line 66 of file AclKernelGen.h.

{
  // TODO Support dynamic rnn
  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
  const auto scratch_buffer_index{
    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
  const auto output_state_out_index{
    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
  const auto cell_state_out_index{
    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
 
  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
  const auto input_to_input_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
  const auto input_to_forget_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
  const auto input_to_cell_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
  const auto input_to_output_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
  const auto recurrent_to_input_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
  const auto recurrent_to_forget_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
  const auto recurrent_to_cell_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
  const auto recurrent_to_output_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
  const auto cell_to_input_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
  const auto cell_to_forget_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
  const auto cell_to_output_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
  const auto input_gate_bias_index{
    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
  const auto forget_gate_bias_index{
    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
  const auto output_gate_bias_index{
    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
  const auto projection_weights_index{
    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
  const auto projection_bias_index{
    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
  const auto output_state_in_index{
    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
  const auto cell_threshold = node.param().cell_threshold;
  const auto projection_threshold = node.param().projection_threshold;
 
  bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
                                    operands.at(input_to_input_weights_index).shape().dim(1) != 0;
  bool has_recurrent_to_input_weights =
    operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
    operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
  bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
  bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
  bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
                                operands.at(projection_weights_index).shape().dim(1) != 0;
  bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
 
  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
  // true: no CIFG
  // false: CIFG
  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
 
  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
  // true: peephole
  // false: no peephole
  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
 
  // NOTE Although the projection weights has data the projection bias may not have data.
  bool has_projection_param = has_projection_weights;
 
  const auto activation = node.param().activation;
  const auto cell_clip = cell_threshold;
  const auto projection_clip = projection_threshold;
  assert(cell_clip >= 0.f && projection_clip >= 0.f);
 
  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
  auto output_tensor = tensor_reg->getAclTensor(output_index);
 
  auto input_tensor = tensor_reg->getAclTensor(input_index);
 
  auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
  auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
  auto recurrent_to_forget_weights_tensor =
    tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
  auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
  auto recurrent_to_output_weights_tensor =
    tensor_reg->getAclTensor(recurrent_to_output_weights_index);
 
  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
 
  auto act_info = asActivationLayerInfo(activation);
 
  ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
  if (has_cifg_param)
  {
    auto input_to_input_weights_tensor =
      tensor_reg->getAclTensor(input_to_input_weights_index); // optional
    auto recurrent_to_input_weights_tensor =
      tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
    auto cell_to_input_weights_handle =
      has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
                         : nullptr; // optional (non-cifg && peephole)
    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
    lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
                                recurrent_to_input_weights_tensor->handle(),
                                cell_to_input_weights_handle, input_gate_bias_tensor->handle());
  }
  if (has_peephole_param)
  {
    auto cell_to_forget_weights_tensor =
      tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
    auto cell_to_output_weights_tensor =
      tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
    lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
                                    cell_to_output_weights_tensor->handle());
  }
  if (has_projection_param)
  {
    auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
    auto projection_bias_handle = has_projection_bias
                                    ? tensor_reg->getAclTensor(projection_bias_index)->handle()
                                    : nullptr; // optional
    lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
  }
 
  auto fn = generateLayer<T_ACLLayer>(
    input_tensor->handle(), input_to_forget_weights_tensor->handle(),
    input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
    recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
    recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
    cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
    cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
    output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
    lstm_params, act_info, cell_clip, projection_clip);
 
  return std::make_unique<T_FunctionWrapper>(std::move(fn));
}

◆ kernelGenPool2D()

template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >

std::unique_ptr<::arm_compute::IFunction > onert::backend::acl_common::kernelGenPool2D	(	const T_PoolOp &	node,
		const ir::Operands &	operands,
		const std::shared_ptr< T_AclTensorRegistry > &	tensor_reg,
		::arm_compute::PoolingType	pooling_type
	)

Definition at line 288 of file AclKernelGen.h.

{
  const auto ofm_index{node.getOutputs().at(0)};
  const auto ifm_index{node.getInputs().at(0)};
 
  const auto ofm_shape = operands.at(ofm_index).shape().asFeature();
  const auto ifm_shape = operands.at(ifm_index).shape().asFeature();
 
  const auto kh = node.param().kh;
  const auto kw = node.param().kw;
  const auto stride = node.param().stride;
  const auto padding =
    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
 
  VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
  VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
  VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
  VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
  VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
  VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
  VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
  VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
  VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
  VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
  VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
  VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
 
  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
 
  ::arm_compute::PoolingLayerInfo info{
    pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
    asPadStrideInfo(padding, stride), true /* exclude_padding */};
 
  auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
 
  return fn;
}

References asPadStrideInfo(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::calculatePadding(), info, and VERBOSE.

◆ ReorderBits()

template<typename T >

T onert::backend::acl_common::ReorderBits	(	T	in,
		size_t	numOfBits
	)

inline

Definition at line 89 of file Swizzle.h.

{
  assert(numOfBits > 0);
  T out = 0;
  for (int32_t i = numOfBits - 1; i >= 0; --i)
  {
    const uint32_t toShift = numOfBits - ToARMComputeAxis(numOfBits, i).value() - 1;
    out += ((in & 1) << toShift);
    in >>= 1;
  }
  return out;
}

References ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ ToARMComputeAxis()

ARMComputeAxis onert::backend::acl_common::ToARMComputeAxis	(	uint32_t	rank,
		uint32_t	axis
	)

inline

Definition at line 45 of file Swizzle.h.

{
  assert(rank > axis);
 
  return ARMComputeAxis{(rank - axis) - 1};
}

Referenced by asSet(), asTensorCoordinate(), asTensorShape(), getARMComputeAxises(), getARMComputePermutationVector(), ReorderBits(), and onert::backend::acl_cl::ConstantInitializer::visit().

Data Structures

Typedefs

Functions

Typedef Documentation

◆ float16

Function Documentation

◆ asAclFunction()

◆ asActivationLayerInfo() [1/2]

◆ asActivationLayerInfo() [2/2]

◆ asCoordinates()

◆ asDataType()

◆ asDilation()

◆ asFunction()

◆ asPadStrideInfo()

◆ asPixelValue()

◆ asQuantizationInfo()

◆ asRuntimeDataType()

◆ asSet()

◆ asTensorCoordinate()

◆ asTensorInfo()

◆ asTensorShape()

◆ convertPoolType()

◆ convertReduceType()

◆ copyInit()

◆ createMemoryManager()

◆ disableDimCorrection()

◆ enableDimCorrection()

◆ generateLayer() [1/2]

◆ generateLayer() [2/2]

◆ getARMComputeAxises()

◆ getARMComputePermutationVector()

◆ initReverseOrder()

◆ kernelGenFullyConnected()

◆ kernelGenLSTM()

◆ kernelGenPool2D()

◆ ReorderBits()

◆ ToARMComputeAxis()