ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
onert::backend::acl_common Namespace Reference

Data Structures

class  AclActivationBuilder
 
class  AclBackendContext
 
class  AclConstantInitializer
 
class  AclFunction
 
class  AclInternalBufferManager
 class for InternalBufferManager which has arm_compute::IMemoryManager pointer More...
 
class  AclLinearMemoryManager
 
class  AclMemoryManager
 
class  AclSubTensorAnalyzer
 Class to analyze tensor subsumption. More...
 
class  AclTensorBuilder
 
class  AclTensorManager
 
class  AclTensorRegistry
 Tensor registry class for acl backends. More...
 
class  ARMComputeAxis
 
class  IACLTensor
 Class representing Tensor for ACL. More...
 
struct  IInternalBufferManager
 Interface for InternalBufferManager which has arm_compute::IMemoryManager pointer. More...
 

Typedefs

using float16 = uint16_t
 

Functions

template<typename T >
void copyInit (const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
 
template<typename T >
void initReverseOrder (const ir::Operand &model_obj, backend::ITensor &obj)
 
void enableDimCorrection (IACLTensor *tensor)
 
void disableDimCorrection (IACLTensor *tensor)
 
template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > generateLayer (Args &&...args)
 
template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > generateLayer (std::shared_ptr< arm_compute::IMemoryManager > memory_manager, Args &&...args)
 
template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >
std::unique_ptr< exec::IFunctionkernelGenLSTM (const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
 
template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >
std::unique_ptr< exec::IFunctionkernelGenFullyConnected (const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
 
template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >
std::unique_ptr<::arm_compute::IFunction > kernelGenPool2D (const T_PoolOp &node, const ir::Operands &operands, const std::shared_ptr< T_AclTensorRegistry > &tensor_reg, ::arm_compute::PoolingType pooling_type)
 
template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >
std::shared_ptr< T_MemoryManager > createMemoryManager ()
 
::arm_compute::TensorShape asTensorShape (const ir::Shape &shape, bool apply_dim_correction)
 
::arm_compute::Coordinates asTensorCoordinate (const ir::Coordinates &coord)
 
::arm_compute::DataType asDataType (const ir::DataType type)
 
::arm_compute::QuantizationInfo asQuantizationInfo (const float scale, const int32_t offset)
 
::arm_compute::TensorInfo asTensorInfo (const ir::Shape &shape, const ir::TypeInfo &typeInfo, bool apply_dim_correction)
 
::arm_compute::PadStrideInfo asPadStrideInfo (const ir::ExplicitPadding &padding, const ir::Stride &stride)
 
::arm_compute::ActivationLayerInfo asActivationLayerInfo (const ir::Activation act_code)
 
::arm_compute::ActivationLayerInfo asActivationLayerInfo (const ir::operation::ElementwiseActivation::Type op_type, float alpha, float beta)
 
arm_compute::Coordinates asCoordinates (const ir::Operand &operand, int32_t rank)
 
std::set< uint32_t > asSet (const ir::Operand &operand, int32_t rank)
 
std::unique_ptr< AclFunctionasAclFunction (std::unique_ptr<::arm_compute::IFunction > &&layer)
 
ir::DataType asRuntimeDataType (::arm_compute::DataType data_type)
 
arm_compute::PoolingType convertPoolType (ir::operation::Pool2D::PoolType pool_type_ir)
 
arm_compute::ReductionOperation convertReduceType (ir::operation::Reduce::ReduceType reduce_type_ir)
 
arm_compute::PixelValue asPixelValue (const ir::Operand &operand)
 
arm_compute::Size2D asDilation (uint32_t dilation_width, uint32_t dilation_height)
 
template<typename T_Function >
std::unique_ptr< T_Function > asFunction (std::unique_ptr<::arm_compute::IFunction > &&fn)
 
ARMComputeAxis ToARMComputeAxis (uint32_t rank, uint32_t axis)
 
inline ::arm_compute::Coordinates getARMComputeAxises (uint32_t rank)
 
inline ::arm_compute::PermutationVector getARMComputePermutationVector (uint32_t rank, const std::vector< int32_t > runtime_pv)
 
template<typename T >
ReorderBits (T in, size_t numOfBits)
 

Typedef Documentation

◆ float16

using onert::backend::acl_common::float16 = typedef uint16_t

Definition at line 117 of file AclConstantInitializer.cc.

Function Documentation

◆ asAclFunction()

std::unique_ptr< AclFunction > onert::backend::acl_common::asAclFunction ( std::unique_ptr<::arm_compute::IFunction > &&  layer)

Definition at line 246 of file Convert.cc.

247{
248 return std::make_unique<AclFunction>(std::move(layer));
249}

◆ asActivationLayerInfo() [1/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo ( const ir::Activation  act_code)

Definition at line 131 of file Convert.cc.

132{
133 switch (act_code)
134 {
135 case ir::Activation::NONE:
136 return ::arm_compute::ActivationLayerInfo{};
137 case ir::Activation::RELU:
138 return ::arm_compute::ActivationLayerInfo{
139 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
140 case ir::Activation::RELU1:
141 return ::arm_compute::ActivationLayerInfo{
142 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
143 case ir::Activation::RELU6:
144 return ::arm_compute::ActivationLayerInfo{
145 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
146 // Cases for activation of LSTM.
147 case ir::Activation::TANH:
148 return ::arm_compute::ActivationLayerInfo{
149 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
150 case ir::Activation::SIGMOID:
151 // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
152 // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
153 // 0(always sigmoid) regardless of values of the parameter.
154 // If ACL support non-sigmoid logistic, should fix param values.
155 return ::arm_compute::ActivationLayerInfo{
156 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
157 default:
158 throw std::runtime_error{"Not supported internal activation, yet"};
159 break;
160 }
161}

References onert::ir::NONE, onert::ir::RELU, onert::ir::RELU1, onert::ir::RELU6, onert::ir::SIGMOID, and onert::ir::TANH.

Referenced by kernelGenLSTM().

◆ asActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo ( const ir::operation::ElementwiseActivation::Type  op_type,
float  alpha,
float  beta 
)

Definition at line 164 of file Convert.cc.

166{
167 switch (op_type)
168 {
169 case ir::operation::ElementwiseActivation::Type::RELU:
170 if (beta == 0.f)
171 {
172 if (alpha == ir::operation::ElementwiseActivation::infinity)
173 {
174 return ::arm_compute::ActivationLayerInfo{
175 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
176 }
177 else
178 {
179 return ::arm_compute::ActivationLayerInfo{
180 ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
181 }
182 }
183 else
184 {
185 return ::arm_compute::ActivationLayerInfo{
186 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
187 }
188 case ir::operation::ElementwiseActivation::Type::TANH:
189 return ::arm_compute::ActivationLayerInfo{
190 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
191 case ir::operation::ElementwiseActivation::Type::LOGISTIC:
192 // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
193 // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
194 // 0(always sigmoid) regardless of values of the parameter.
195 // If ACL support non-sigmoid logistic, should fix param values.
196 return ::arm_compute::ActivationLayerInfo{
197 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
198 case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
199 return ::arm_compute::ActivationLayerInfo{
200 ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
201 default:
202 throw std::runtime_error{"Not supported internal elementwise activation, yet"};
203 break;
204 }
205}

References onert::ir::operation::ElementwiseActivation::infinity, onert::ir::operation::ElementwiseActivation::LEAKY_RELU, onert::ir::operation::ElementwiseActivation::LOGISTIC, onert::ir::operation::ElementwiseActivation::RELU, and onert::ir::operation::ElementwiseActivation::TANH.

◆ asCoordinates()

arm_compute::Coordinates onert::backend::acl_common::asCoordinates ( const ir::Operand operand,
int32_t  rank 
)

Definition at line 207 of file Convert.cc.

208{
209 std::set<uint32_t> axes = asSet(operand, rank);
210
211 arm_compute::Coordinates reduce_axes;
212 for (const int32_t axis : axes)
213 {
214 reduce_axes.set(reduce_axes.num_dimensions(), axis);
215 }
216
217 return reduce_axes;
218}
std::set< uint32_t > asSet(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:220

References asSet().

◆ asDataType()

arm_compute::DataType onert::backend::acl_common::asDataType ( const ir::DataType  type)

Definition at line 71 of file Convert.cc.

72{
73 switch (type)
74 {
75 case ir::DataType::FLOAT32:
76 return ::arm_compute::DataType::F32;
77 case ir::DataType::INT32:
78 return ::arm_compute::DataType::S32;
79 case ir::DataType::UINT32:
80 return ::arm_compute::DataType::U32;
81 case ir::DataType::QUANT_UINT8_ASYMM:
82 return ::arm_compute::DataType::QASYMM8;
83 case ir::DataType::BOOL8:
84 case ir::DataType::UINT8:
85 return ::arm_compute::DataType::U8;
86 case ir::DataType::QUANT_INT8_SYMM:
87 return ::arm_compute::DataType::QSYMM8;
88 case ir::DataType::QUANT_INT8_ASYMM:
89 return ::arm_compute::DataType::QASYMM8_SIGNED;
90 case ir::DataType::FLOAT16:
91 return ::arm_compute::DataType::F16;
92 case ir::DataType::INT64:
93 return ::arm_compute::DataType::S64;
94 case ir::DataType::QUANT_INT16_ASYMM:
95 return ::arm_compute::DataType::QASYMM16;
96 case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
97 return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
98 default:
99 throw std::runtime_error("Not supported internal data type, yet");
100 break;
101 }
102}

Referenced by asTensorInfo().

◆ asDilation()

arm_compute::Size2D onert::backend::acl_common::asDilation ( uint32_t  dilation_width,
uint32_t  dilation_height 
)

Definition at line 330 of file Convert.cc.

331{
332 assert(dilation_width != 0);
333 assert(dilation_height != 0);
334
335 return arm_compute::Size2D(dilation_width, dilation_height);
336}

◆ asFunction()

template<typename T_Function >
std::unique_ptr< T_Function > onert::backend::acl_common::asFunction ( std::unique_ptr<::arm_compute::IFunction > &&  fn)

Definition at line 67 of file Convert.h.

68{
69 return std::make_unique<T_Function>(std::move(fn));
70}

◆ asPadStrideInfo()

arm_compute::PadStrideInfo onert::backend::acl_common::asPadStrideInfo ( const ir::ExplicitPadding padding,
const ir::Stride stride 
)

Definition at line 119 of file Convert.cc.

121{
122 return ::arm_compute::PadStrideInfo{stride.horizontal,
123 stride.vertical,
124 padding.left,
125 padding.right,
126 padding.top,
127 padding.bottom,
128 ::arm_compute::DimensionRoundingType::FLOOR};
129}

References onert::ir::ExplicitPadding::bottom, onert::ir::Stride::horizontal, onert::ir::ExplicitPadding::left, onert::ir::ExplicitPadding::right, onert::ir::ExplicitPadding::top, and onert::ir::Stride::vertical.

Referenced by kernelGenPool2D().

◆ asPixelValue()

arm_compute::PixelValue onert::backend::acl_common::asPixelValue ( const ir::Operand operand)

Definition at line 309 of file Convert.cc.

310{
311 assert(operand.isConstant());
312 assert(operand.shape().num_elements() == 1);
313 switch (operand.typeInfo().type())
314 {
315 case ir::DataType::INT32:
316 return arm_compute::PixelValue(operand.asScalar<int32_t>());
317 case ir::DataType::INT64:
318 return arm_compute::PixelValue(operand.asScalar<int64_t>());
319 case ir::DataType::UINT32:
320 return arm_compute::PixelValue(operand.asScalar<uint64_t>());
321 case ir::DataType::UINT8:
322 return arm_compute::PixelValue(operand.asScalar<uint8_t>());
323 case ir::DataType::FLOAT32:
324 return arm_compute::PixelValue(operand.asScalar<float>());
325 default:
326 throw std::runtime_error("asPixelValue : Not supported datatype yet");
327 }
328}
const TypeInfo & typeInfo(void) const
Definition Operand.h:45
T asScalar(void) const
Definition Operand.h:86
const Shape & shape(void) const
Definition Operand.h:44
bool isConstant(void) const
Get true if Operand is const, otherwise false a.
Definition Operand.h:77
DataType type() const
Definition TypeInfo.h:50

References onert::ir::Operand::asScalar(), onert::ir::Operand::isConstant(), onert::ir::Operand::shape(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

◆ asQuantizationInfo()

::arm_compute::QuantizationInfo onert::backend::acl_common::asQuantizationInfo ( const float  scale,
const int32_t  offset 
)

Definition at line 104 of file Convert.cc.

105{
106 return ::arm_compute::QuantizationInfo(scale, offset);
107}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References offset().

Referenced by asTensorInfo().

◆ asRuntimeDataType()

ir::DataType onert::backend::acl_common::asRuntimeDataType ( ::arm_compute::DataType  data_type)

Definition at line 251 of file Convert.cc.

252{
253 switch (data_type)
254 {
255 case ::arm_compute::DataType::F32:
256 return ir::DataType::FLOAT32;
257 case ::arm_compute::DataType::S32:
258 return ir::DataType::INT32;
259 case ::arm_compute::DataType::U32:
260 return ir::DataType::UINT32;
261 case ::arm_compute::DataType::QASYMM8:
262 return ir::DataType::QUANT_UINT8_ASYMM;
263 case ::arm_compute::DataType::QASYMM8_SIGNED:
264 return ir::DataType::QUANT_INT8_ASYMM;
265 case ::arm_compute::DataType::U8:
266 return ir::DataType::UINT8;
267 case ::arm_compute::DataType::QSYMM8:
268 return ir::DataType::QUANT_INT8_SYMM;
269 case ::arm_compute::DataType::F16:
270 return ir::DataType::FLOAT16;
271 case ::arm_compute::DataType::S64:
272 return ir::DataType::INT64;
273 default:
274 throw std::runtime_error{"Not supported acl data type, yet"};
275 break;
276 }
277}

Referenced by onert::backend::acl_common::IACLTensor::data_type().

◆ asSet()

std::set< uint32_t > onert::backend::acl_common::asSet ( const ir::Operand operand,
int32_t  rank 
)

Definition at line 220 of file Convert.cc.

221{
222 std::set<std::uint32_t> axes;
223
224 for (size_t i = 0; i < operand.shape().num_elements(); ++i)
225 {
226 int32_t axis = 0;
227 switch (operand.typeInfo().type())
228 {
229 case ir::DataType::INT32:
230 axis = reinterpret_cast<const int32_t *>(operand.data()->base())[i];
231 break;
232 case ir::DataType::INT64:
233 axis = reinterpret_cast<const int64_t *>(operand.data()->base())[i];
234 break;
235 default:
236 throw std::runtime_error("acl_common::asSet: Not supported data type");
237 }
238 if (axis < 0)
239 axis += rank;
240 axes.insert(ToARMComputeAxis(rank, axis).value());
241 }
242
243 return axes;
244}
void data(std::shared_ptr< Data > &&data)
Definition Operand.h:62

References onert::ir::Operand::data(), onert::ir::Operand::shape(), ToARMComputeAxis(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

Referenced by asCoordinates().

◆ asTensorCoordinate()

arm_compute::Coordinates onert::backend::acl_common::asTensorCoordinate ( const ir::Coordinates coord)

Definition at line 55 of file Convert.cc.

56{
57 const uint32_t rank = coord.size();
58
59 ::arm_compute::Coordinates res{};
60
61 res.set_num_dimensions(rank);
62
63 for (uint32_t axis = 0; axis < rank; ++axis)
64 {
65 res.set(ToARMComputeAxis(rank, axis).value(), coord[axis]);
66 }
67
68 return res;
69}
size_t size() const
Return size of coordinates.
Definition Coordinates.h:93

References onert::ir::Coordinates::size(), and ToARMComputeAxis().

◆ asTensorInfo()

arm_compute::TensorInfo onert::backend::acl_common::asTensorInfo ( const ir::Shape shape,
const ir::TypeInfo typeInfo,
bool  apply_dim_correction 
)

Definition at line 109 of file Convert.cc.

111{
112 ::arm_compute::TensorInfo info(asTensorShape(shape, apply_dim_correction), 1,
113 asDataType(typeInfo.type()),
114 asQuantizationInfo(typeInfo.scale(), typeInfo.zero_point()));
115 info.set_data_layout(::arm_compute::DataLayout::NHWC);
116 return info;
117}
float scale() const
Definition TypeInfo.h:51
int32_t zero_point() const
Definition TypeInfo.h:53
volatile const char info[]
::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, bool apply_dim_correction)
Definition Convert.cc:27
::arm_compute::DataType asDataType(const ir::DataType type)
Definition Convert.cc:71
::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
Definition Convert.cc:104

References asDataType(), asQuantizationInfo(), asTensorShape(), info, onert::ir::TypeInfo::scale(), onert::ir::TypeInfo::type(), and onert::ir::TypeInfo::zero_point().

◆ asTensorShape()

arm_compute::TensorShape onert::backend::acl_common::asTensorShape ( const ir::Shape shape,
bool  apply_dim_correction 
)

Definition at line 27 of file Convert.cc.

28{
29 // If shape's rank is 0, the tensor is a scalar
30 // Sometimes, some ACL kernel can use a scalar as tensor. But ACL does not allocate buffer for
31 // tensor having rank as 0.
32 const auto tensor_shape = shape.rank() == 0 ? ir::Shape{1} : shape;
33
34 const uint32_t rank = tensor_shape.rank();
35
36 ::arm_compute::TensorShape res{};
37
38 res.set_num_dimensions(rank);
39
40 for (uint32_t axis = 0; axis < rank; ++axis)
41 {
42 // NOTE In some cases, in incorrect dimensions is required.
43 // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
44 // LSTM is used as the weight of the FullyConnected.
45 // The FullyConnected's weight must be greater or equal than 2-dimensions.
46 // However, if the dimension correction is applied to input_to_input_weights with input_size
47 // equal to 1, it will be changed to 1-D.
48 // So input_to_input_weights is not used by the weight of FullyConnected.
49 res.set(ToARMComputeAxis(rank, axis).value(), tensor_shape.dim(axis), apply_dim_correction);
50 }
51
52 return res;
53}
const Dimension & dim(uint32_t axis) const
Definition TensorShape.h:38
uint32_t rank(void) const
Definition TensorShape.h:35

References loco::TensorShape::dim(), loco::TensorShape::rank(), and ToARMComputeAxis().

Referenced by asTensorInfo(), and kernelGenFullyConnected().

◆ convertPoolType()

arm_compute::PoolingType onert::backend::acl_common::convertPoolType ( ir::operation::Pool2D::PoolType  pool_type_ir)

Definition at line 279 of file Convert.cc.

280{
281 switch (pool_type_ir)
282 {
283 case ir::operation::Pool2D::PoolType::AVG:
284 return arm_compute::PoolingType::AVG;
285 case ir::operation::Pool2D::PoolType::L2:
286 return arm_compute::PoolingType::L2;
287 case ir::operation::Pool2D::PoolType::MAX:
288 return arm_compute::PoolingType::MAX;
289 default:
290 throw std::runtime_error("convertPoolType: Not supported operation yet");
291 }
292}

References onert::ir::operation::Pool2D::AVG, onert::ir::operation::Pool2D::L2, and onert::ir::operation::Pool2D::MAX.

◆ convertReduceType()

arm_compute::ReductionOperation onert::backend::acl_common::convertReduceType ( ir::operation::Reduce::ReduceType  reduce_type_ir)

Definition at line 294 of file Convert.cc.

295{
296 switch (reduce_type_ir)
297 {
298 case ir::operation::Reduce::ReduceType::MAX:
299 return arm_compute::ReductionOperation::MAX;
300 case ir::operation::Reduce::ReduceType::MIN:
301 return arm_compute::ReductionOperation::MIN;
302 case ir::operation::Reduce::ReduceType::SUM:
303 return arm_compute::ReductionOperation::SUM;
304 default:
305 throw std::runtime_error("convertReduceType: Not supported operation yet");
306 }
307}

References onert::ir::operation::Reduce::MAX, onert::ir::operation::Reduce::MIN, and onert::ir::operation::Reduce::SUM.

◆ copyInit()

template<typename T >
void onert::backend::acl_common::copyInit ( const onert::ir::Operand model_obj,
onert::backend::ITensor obj 
)

Definition at line 123 of file AclConstantInitializer.h.

124{
125 Init<T>(model_obj, obj);
126}

◆ createMemoryManager()

template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >
std::shared_ptr< T_MemoryManager > onert::backend::acl_common::createMemoryManager ( )

Definition at line 30 of file AclLinearMemoryManager.h.

31{
32 std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
33 std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
34
35 std::shared_ptr<T_MemoryManager> mem_mgr =
36 std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
37 return mem_mgr;
38}

◆ disableDimCorrection()

void onert::backend::acl_common::disableDimCorrection ( IACLTensor tensor)

Definition at line 36 of file AclKernelGen.h.

37{
38 size_t input_rank = tensor->getShape().rank();
39 const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
40 .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
41}

◆ enableDimCorrection()

void onert::backend::acl_common::enableDimCorrection ( IACLTensor tensor)

Definition at line 29 of file AclKernelGen.h.

30{
31 size_t input_rank = tensor->getShape().rank();
32 const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
33 .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
34}

◆ generateLayer() [1/2]

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer ( Args &&...  args)

Definition at line 44 of file AclKernelGen.h.

45{
46 auto l = std::make_unique<Layer>();
47
48 l->configure(std::forward<Args>(args)...);
49
50 return l;
51}

◆ generateLayer() [2/2]

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer ( std::shared_ptr< arm_compute::IMemoryManager >  memory_manager,
Args &&...  args 
)

Definition at line 55 of file AclKernelGen.h.

56{
57 auto l = std::make_unique<Layer>(memory_manager);
58
59 l->configure(std::forward<Args>(args)...);
60
61 return l;
62}

◆ getARMComputeAxises()

inline ::arm_compute::Coordinates onert::backend::acl_common::getARMComputeAxises ( uint32_t  rank)

Definition at line 52 of file Swizzle.h.

53{
54 ::arm_compute::Coordinates res{};
55
56 res.set_num_dimensions(rank);
57
58 for (uint32_t axis = 0; axis < rank; ++axis)
59 {
60 res.set(axis, ToARMComputeAxis(rank, axis).value());
61 }
62
63 return res;
64}

References ToARMComputeAxis().

Referenced by getARMComputePermutationVector().

◆ getARMComputePermutationVector()

inline ::arm_compute::PermutationVector onert::backend::acl_common::getARMComputePermutationVector ( uint32_t  rank,
const std::vector< int32_t >  runtime_pv 
)

Definition at line 68 of file Swizzle.h.

69{
70 // rank upto 4 is supported
71 assert(rank <= 4);
72 assert(runtime_pv.size() > 0);
73
74 int new_pv[4] = {0};
75 ::arm_compute::Coordinates axises = getARMComputeAxises(rank);
76
77 for (uint32_t i = 0; i < rank; ++i)
78 {
79 new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i]).value();
80 }
81
82 ::arm_compute::PermutationVector ACL_PV =
83 ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
84 ACL_PV.set_num_dimensions(rank);
85
86 return ACL_PV;
87}

References getARMComputeAxises(), ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ initReverseOrder()

template<typename T >
void onert::backend::acl_common::initReverseOrder ( const ir::Operand model_obj,
backend::ITensor obj 
)

Definition at line 129 of file AclConstantInitializer.h.

130{
131 assert(model_obj.data());
132 const auto &shape = model_obj.shape();
133 const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
134 assert(model_obj.shape().rank() == 1);
135 obj.access([&](ITensor &tensor) {
136 for (size_t i = 0; i < shape.num_elements(); ++i)
137 {
138 const T value = base[shape.num_elements() - i - 1];
139 T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
140 *into = value;
141 }
142 });
143}

References onert::ir::Operand::data(), and onert::ir::Operand::shape().

◆ kernelGenFullyConnected()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenFullyConnected ( const ir::operation::FullyConnected node,
const ir::Operands operands,
const std::shared_ptr< T_TensorBuilder > &  tensor_builder,
const std::shared_ptr< T_TensorRegistry > &  tensor_reg 
)

Definition at line 224 of file AclKernelGen.h.

227{
229
230 const auto output_index{node.getOutputs().at(0)};
231 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
232 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
233 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
234
235 const auto input_rank = operands.at(input_index).shape().rank();
236
237 [[maybe_unused]] const auto output_size =
238 operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
239 assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
240 assert(operands.at(weight_index).shape().dim(0) == output_size);
241 const auto batch_size =
242 operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
243 const auto input_size =
244 operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
245
246 // Check for reshaping input's shape into rank-2
247 bool needs_reshape = false;
248 ir::Shape reshape(2);
249 if (input_rank == 3 || input_rank == 4)
250 {
251 const auto &ifm_shape = operands.at(input_index).shape();
252 [[maybe_unused]] auto feature_size = 1;
253 for (int i = 0; i < ifm_shape.rank(); ++i)
254 {
255 feature_size *= ifm_shape.dim(i);
256 }
257
258 assert(feature_size == batch_size * input_size);
259
260 // for reshaping
261 needs_reshape = true;
262 reshape.dim(0) = batch_size; /* H */
263 reshape.dim(1) = input_size; /* W */
264 }
265
266 auto output_tensor = tensor_reg->getAclTensor(output_index);
267 const auto input_tensor = tensor_reg->getAclTensor(input_index);
268 const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
269 const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index);
270
271 typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
272 if (operands.at(weight_index).isConstant())
273 {
274 kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
275 assert(operands.at(weight_index).data());
276 }
277
278 auto fn = generateLayer<T_ACLLayer>(
279 tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
280 weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
281 output_tensor->handle(), needs_reshape, asTensorShape(reshape), kernel_type);
282
283 return std::make_unique<T_FunctionWrapper>(std::move(fn));
284}
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:53
OperandIndexSequence & getInputs()
Definition Operation.h:51
const Object & at(const Index &index) const
Get the object that is associated with the given index.
CLTensor bias_tensor

References asTensorShape(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::OperandIndexSequence::at(), bias_tensor, onert::ir::Operation::getInputs(), and onert::ir::Operation::getOutputs().

◆ kernelGenLSTM()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenLSTM ( const ir::operation::LSTM node,
const ir::Operands operands,
const std::shared_ptr< T_TensorRegistry > &  tensor_reg 
)

Definition at line 66 of file AclKernelGen.h.

69{
70 // TODO Support dynamic rnn
71 // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
72 const auto scratch_buffer_index{
73 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
74 const auto output_state_out_index{
75 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
76 const auto cell_state_out_index{
77 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
78 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
79
80 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
81 const auto input_to_input_weights_index{
82 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
83 const auto input_to_forget_weights_index{
84 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
85 const auto input_to_cell_weights_index{
86 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
87 const auto input_to_output_weights_index{
88 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
89 const auto recurrent_to_input_weights_index{
90 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
91 const auto recurrent_to_forget_weights_index{
92 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
93 const auto recurrent_to_cell_weights_index{
94 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
95 const auto recurrent_to_output_weights_index{
96 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
97 const auto cell_to_input_weights_index{
98 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
99 const auto cell_to_forget_weights_index{
100 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
101 const auto cell_to_output_weights_index{
102 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
103 const auto input_gate_bias_index{
104 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
105 const auto forget_gate_bias_index{
106 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
107 const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
108 const auto output_gate_bias_index{
109 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
110 const auto projection_weights_index{
111 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
112 const auto projection_bias_index{
113 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
114 const auto output_state_in_index{
115 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
116 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
117 const auto cell_threshold = node.param().cell_threshold;
118 const auto projection_threshold = node.param().projection_threshold;
119
120 bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
121 operands.at(input_to_input_weights_index).shape().dim(1) != 0;
122 bool has_recurrent_to_input_weights =
123 operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
124 operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
125 bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
126 bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
127 bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
128 operands.at(projection_weights_index).shape().dim(1) != 0;
129 bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
130
131 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
132 // true: no CIFG
133 // false: CIFG
134 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
135 bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
136
137 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
138 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
139 // true: peephole
140 // false: no peephole
141 bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
142
143 // NOTE Although the projection weights has data the projection bias may not have data.
144 bool has_projection_param = has_projection_weights;
145
146 const auto activation = node.param().activation;
147 const auto cell_clip = cell_threshold;
148 const auto projection_clip = projection_threshold;
149 assert(cell_clip >= 0.f && projection_clip >= 0.f);
150
151 auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
152 auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
153 auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
154 auto output_tensor = tensor_reg->getAclTensor(output_index);
155
156 auto input_tensor = tensor_reg->getAclTensor(input_index);
157
158 auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
159 auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
160 auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
161 auto recurrent_to_forget_weights_tensor =
162 tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
163 auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
164 auto recurrent_to_output_weights_tensor =
165 tensor_reg->getAclTensor(recurrent_to_output_weights_index);
166
167 auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
168 auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
169 auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
170 auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
171 auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
172
173 auto act_info = asActivationLayerInfo(activation);
174
175 ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
176 if (has_cifg_param)
177 {
178 auto input_to_input_weights_tensor =
179 tensor_reg->getAclTensor(input_to_input_weights_index); // optional
180 auto recurrent_to_input_weights_tensor =
181 tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
182 auto cell_to_input_weights_handle =
183 has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
184 : nullptr; // optional (non-cifg && peephole)
185 auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
186 lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
187 recurrent_to_input_weights_tensor->handle(),
188 cell_to_input_weights_handle, input_gate_bias_tensor->handle());
189 }
190 if (has_peephole_param)
191 {
192 auto cell_to_forget_weights_tensor =
193 tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
194 auto cell_to_output_weights_tensor =
195 tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
196 lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
197 cell_to_output_weights_tensor->handle());
198 }
199 if (has_projection_param)
200 {
201 auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
202 auto projection_bias_handle = has_projection_bias
203 ? tensor_reg->getAclTensor(projection_bias_index)->handle()
204 : nullptr; // optional
205 lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
206 }
207
208 auto fn = generateLayer<T_ACLLayer>(
209 input_tensor->handle(), input_to_forget_weights_tensor->handle(),
210 input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
211 recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
212 recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
213 cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
214 cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
215 output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
216 lstm_params, act_info, cell_clip, projection_clip);
217
218 return std::make_unique<T_FunctionWrapper>(std::move(fn));
219}
const Param & param() const
Definition LSTM.h:82
::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
Definition Convert.cc:131

References onert::ir::operation::LSTM::Param::activation, asActivationLayerInfo(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::OperandIndexSequence::at(), onert::ir::operation::LSTM::CELL_BIAS, onert::ir::operation::LSTM::CELL_STATE_IN, onert::ir::operation::LSTM::CELL_STATE_OUT, onert::ir::operation::LSTM::Param::cell_threshold, onert::ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS, onert::ir::operation::LSTM::FORGET_GATE_BIAS, onert::ir::Operation::getInputs(), onert::ir::Operation::getOutputs(), onert::ir::operation::LSTM::INPUT, onert::ir::operation::LSTM::INPUT_GATE_BIAS, onert::ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS, onert::ir::operation::LSTM::OUTPUT, onert::ir::operation::LSTM::OUTPUT_GATE_BIAS, onert::ir::operation::LSTM::OUTPUT_STATE_IN, onert::ir::operation::LSTM::OUTPUT_STATE_OUT, onert::ir::operation::LSTM::param(), onert::ir::operation::LSTM::PROJECTION_BIAS, onert::ir::operation::LSTM::Param::projection_threshold, onert::ir::operation::LSTM::PROJECTION_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS, and onert::ir::operation::LSTM::SCRATCH_BUFFER.

◆ kernelGenPool2D()

template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >
std::unique_ptr<::arm_compute::IFunction > onert::backend::acl_common::kernelGenPool2D ( const T_PoolOp &  node,
const ir::Operands operands,
const std::shared_ptr< T_AclTensorRegistry > &  tensor_reg,
::arm_compute::PoolingType  pooling_type 
)

Definition at line 288 of file AclKernelGen.h.

291{
292 const auto ofm_index{node.getOutputs().at(0)};
293 const auto ifm_index{node.getInputs().at(0)};
294
295 const auto ofm_shape = operands.at(ofm_index).shape().asFeature();
296 const auto ifm_shape = operands.at(ifm_index).shape().asFeature();
297
298 const auto kh = node.param().kh;
299 const auto kw = node.param().kw;
300 const auto stride = node.param().stride;
301 const auto padding =
302 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
303
304 VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
305 VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
306 VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
307 VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
308 VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
309 VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
310 VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
311 VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
312 VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
313 VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
314 VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
315 VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
316
317 auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
318 auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
319
320 ::arm_compute::PoolingLayerInfo info{
321 pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
322 asPadStrideInfo(padding, stride), true /* exclude_padding */};
323
324 auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
325
326 return fn;
327}
#define VERBOSE(name, lv)
Definition Log.h:71

References asPadStrideInfo(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::calculatePadding(), info, and VERBOSE.

◆ ReorderBits()

template<typename T >
T onert::backend::acl_common::ReorderBits ( in,
size_t  numOfBits 
)
inline

Definition at line 89 of file Swizzle.h.

90{
91 assert(numOfBits > 0);
92 T out = 0;
93 for (int32_t i = numOfBits - 1; i >= 0; --i)
94 {
95 const uint32_t toShift = numOfBits - ToARMComputeAxis(numOfBits, i).value() - 1;
96 out += ((in & 1) << toShift);
97 in >>= 1;
98 }
99 return out;
100}
ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
Definition Swizzle.h:45

References ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ ToARMComputeAxis()

ARMComputeAxis onert::backend::acl_common::ToARMComputeAxis ( uint32_t  rank,
uint32_t  axis 
)
inline