ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert::backend::acl_common Namespace Reference

Data Structures

class  AclActivationBuilder
 
class  AclBackendContext
 
class  AclConstantInitializer
 
class  AclFunction
 
class  AclInternalBufferManager
 class for InternalBufferManager which has arm_compute::IMemoryManager pointer More...
 
class  AclLinearMemoryManager
 
class  AclMemoryManager
 
class  AclSubTensorAnalyzer
 Class to analyze tensor subsumption. More...
 
class  AclTensorBuilder
 
class  AclTensorManager
 
class  AclTensorRegistry
 Tensor registry class for acl backends. More...
 
class  ARMComputeAxis
 
class  IACLTensor
 Class representing Tensor for ACL. More...
 
struct  IInternalBufferManager
 Interface for InternalBufferManager which has arm_compute::IMemoryManager pointer. More...
 

Typedefs

using float16 = uint16_t
 

Functions

template<typename T >
void copyInit (const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
 
template<typename T >
void initReverseOrder (const ir::Operand &model_obj, backend::ITensor &obj)
 
void enableDimCorrection (IACLTensor *tensor)
 
void disableDimCorrection (IACLTensor *tensor)
 
template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > generateLayer (Args &&...args)
 
template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > generateLayer (std::shared_ptr< arm_compute::IMemoryManager > memory_manager, Args &&...args)
 
template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >
std::unique_ptr< exec::IFunctionkernelGenLSTM (const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
 
template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >
std::unique_ptr< exec::IFunctionkernelGenFullyConnected (const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
 
template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >
std::unique_ptr<::arm_compute::IFunction > kernelGenPool2D (const T_PoolOp &node, const ir::Operands &operands, const std::shared_ptr< T_AclTensorRegistry > &tensor_reg, ::arm_compute::PoolingType pooling_type)
 
template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >
std::shared_ptr< T_MemoryManager > createMemoryManager ()
 
::arm_compute::TensorShape asTensorShape (const ir::Shape &shape, bool apply_dim_correction)
 
::arm_compute::Coordinates asTensorCoordinate (const ir::Coordinates &coord)
 
::arm_compute::DataType asDataType (const ir::DataType type)
 
::arm_compute::QuantizationInfo asQuantizationInfo (const float scale, const int32_t offset)
 
::arm_compute::TensorInfo asTensorInfo (const ir::Shape &shape, const ir::TypeInfo &typeInfo, bool apply_dim_correction)
 
::arm_compute::PadStrideInfo asPadStrideInfo (const ir::ExplicitPadding &padding, const ir::Stride &stride)
 
::arm_compute::ActivationLayerInfo asActivationLayerInfo (const ir::Activation act_code)
 
::arm_compute::ActivationLayerInfo asActivationLayerInfo (const ir::operation::ElementwiseActivation::Type op_type, float alpha, float beta)
 
arm_compute::Coordinates asCoordinates (const ir::Operand &operand, int32_t rank)
 
std::set< uint32_t > asSet (const ir::Operand &operand, int32_t rank)
 
std::unique_ptr< AclFunctionasAclFunction (std::unique_ptr<::arm_compute::IFunction > &&layer)
 
ir::DataType asRuntimeDataType (::arm_compute::DataType data_type)
 
arm_compute::PoolingType convertPoolType (ir::operation::Pool2D::PoolType pool_type_ir)
 
arm_compute::ReductionOperation convertReduceType (ir::operation::Reduce::ReduceType reduce_type_ir)
 
arm_compute::PixelValue asPixelValue (const ir::Operand &operand)
 
arm_compute::Size2D asDilation (uint32_t dilation_width, uint32_t dilation_height)
 
template<typename T_Function >
std::unique_ptr< T_Function > asFunction (std::unique_ptr<::arm_compute::IFunction > &&fn)
 
ARMComputeAxis ToARMComputeAxis (uint32_t rank, uint32_t axis)
 
inline ::arm_compute::Coordinates getARMComputeAxises (uint32_t rank)
 
inline ::arm_compute::PermutationVector getARMComputePermutationVector (uint32_t rank, const std::vector< int32_t > runtime_pv)
 
template<typename T >
ReorderBits (T in, size_t numOfBits)
 

Typedef Documentation

◆ float16

using onert::backend::acl_common::float16 = typedef uint16_t

Definition at line 117 of file AclConstantInitializer.cc.

Function Documentation

◆ asAclFunction()

std::unique_ptr< AclFunction > onert::backend::acl_common::asAclFunction ( std::unique_ptr<::arm_compute::IFunction > &&  layer)

Definition at line 246 of file Convert.cc.

247{
248 return std::make_unique<AclFunction>(std::move(layer));
249}

◆ asActivationLayerInfo() [1/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo ( const ir::Activation  act_code)

Definition at line 131 of file Convert.cc.

132{
133 switch (act_code)
134 {
135 case ir::Activation::NONE:
136 return ::arm_compute::ActivationLayerInfo{};
137 case ir::Activation::RELU:
138 return ::arm_compute::ActivationLayerInfo{
139 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
140 case ir::Activation::RELU1:
141 return ::arm_compute::ActivationLayerInfo{
142 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
143 case ir::Activation::RELU6:
144 return ::arm_compute::ActivationLayerInfo{
145 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
146 // Cases for activation of LSTM.
147 case ir::Activation::TANH:
148 return ::arm_compute::ActivationLayerInfo{
149 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
150 case ir::Activation::SIGMOID:
151 // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
152 // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
153 // 0(always sigmoid) regardless of values of the parameter.
154 // If ACL support non-sigmoid logistic, should fix param values.
155 return ::arm_compute::ActivationLayerInfo{
156 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
157 default:
158 throw std::runtime_error{"Not supported internal activation, yet"};
159 break;
160 }
161}

References onert::ir::NONE, onert::ir::RELU, onert::ir::RELU1, onert::ir::RELU6, onert::ir::SIGMOID, and onert::ir::TANH.

Referenced by kernelGenLSTM().

◆ asActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo ( const ir::operation::ElementwiseActivation::Type  op_type,
float  alpha,
float  beta 
)

Definition at line 164 of file Convert.cc.

166{
167 switch (op_type)
168 {
169 case ir::operation::ElementwiseActivation::Type::RELU:
170 if (beta == 0.f)
171 {
172 if (alpha == ir::operation::ElementwiseActivation::infinity)
173 {
174 return ::arm_compute::ActivationLayerInfo{
175 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
176 }
177 else
178 {
179 return ::arm_compute::ActivationLayerInfo{
180 ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
181 }
182 }
183 else
184 {
185 return ::arm_compute::ActivationLayerInfo{
186 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
187 }
188 case ir::operation::ElementwiseActivation::Type::TANH:
189 return ::arm_compute::ActivationLayerInfo{
190 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
191 case ir::operation::ElementwiseActivation::Type::LOGISTIC:
192 // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
193 // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
194 // 0(always sigmoid) regardless of values of the parameter.
195 // If ACL support non-sigmoid logistic, should fix param values.
196 return ::arm_compute::ActivationLayerInfo{
197 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
198 case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
199 return ::arm_compute::ActivationLayerInfo{
200 ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
201 default:
202 throw std::runtime_error{"Not supported internal elementwise activation, yet"};
203 break;
204 }
205}

References onert::ir::operation::ElementwiseActivation::infinity, onert::ir::operation::ElementwiseActivation::LEAKY_RELU, onert::ir::operation::ElementwiseActivation::LOGISTIC, onert::ir::operation::ElementwiseActivation::RELU, and onert::ir::operation::ElementwiseActivation::TANH.

◆ asCoordinates()

arm_compute::Coordinates onert::backend::acl_common::asCoordinates ( const ir::Operand operand,
int32_t  rank 
)

Definition at line 207 of file Convert.cc.

208{
209 std::set<uint32_t> axes = asSet(operand, rank);
210
211 arm_compute::Coordinates reduce_axes;
212 for (const int32_t axis : axes)
213 {
214 reduce_axes.set(reduce_axes.num_dimensions(), axis);
215 }
216
217 return reduce_axes;
218}
std::set< uint32_t > asSet(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:220

References asSet().

◆ asDataType()

arm_compute::DataType onert::backend::acl_common::asDataType ( const ir::DataType  type)

Definition at line 71 of file Convert.cc.

72{
73 switch (type)
74 {
75 case ir::DataType::FLOAT32:
76 return ::arm_compute::DataType::F32;
77 case ir::DataType::INT32:
78 return ::arm_compute::DataType::S32;
79 case ir::DataType::UINT32:
80 return ::arm_compute::DataType::U32;
81 case ir::DataType::QUANT_UINT8_ASYMM:
82 return ::arm_compute::DataType::QASYMM8;
83 case ir::DataType::BOOL8:
84 case ir::DataType::UINT8:
85 return ::arm_compute::DataType::U8;
86 case ir::DataType::QUANT_INT8_SYMM:
87 return ::arm_compute::DataType::QSYMM8;
88 case ir::DataType::QUANT_INT8_ASYMM:
89 return ::arm_compute::DataType::QASYMM8_SIGNED;
90 case ir::DataType::FLOAT16:
91 return ::arm_compute::DataType::F16;
92 case ir::DataType::INT64:
93 return ::arm_compute::DataType::S64;
94 case ir::DataType::QUANT_INT16_SYMM:
95 return ::arm_compute::DataType::QSYMM16;
96 case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
97 return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
98 default:
99 throw std::runtime_error("Not supported internal data type, yet");
100 break;
101 }
102}
int32_t type

References type.

Referenced by asTensorInfo().

◆ asDilation()

arm_compute::Size2D onert::backend::acl_common::asDilation ( uint32_t  dilation_width,
uint32_t  dilation_height 
)

Definition at line 332 of file Convert.cc.

333{
334 assert(dilation_width != 0);
335 assert(dilation_height != 0);
336
337 return arm_compute::Size2D(dilation_width, dilation_height);
338}

◆ asFunction()

template<typename T_Function >
std::unique_ptr< T_Function > onert::backend::acl_common::asFunction ( std::unique_ptr<::arm_compute::IFunction > &&  fn)

Definition at line 64 of file Convert.h.

65{
66 return std::make_unique<T_Function>(std::move(fn));
67}

◆ asPadStrideInfo()

arm_compute::PadStrideInfo onert::backend::acl_common::asPadStrideInfo ( const ir::ExplicitPadding padding,
const ir::Stride stride 
)

Definition at line 119 of file Convert.cc.

121{
122 return ::arm_compute::PadStrideInfo{stride.horizontal,
123 stride.vertical,
124 padding.left,
125 padding.right,
126 padding.top,
127 padding.bottom,
128 ::arm_compute::DimensionRoundingType::FLOOR};
129}

References onert::ir::ExplicitPadding::bottom, onert::ir::Stride::horizontal, onert::ir::ExplicitPadding::left, onert::ir::ExplicitPadding::right, onert::ir::ExplicitPadding::top, and onert::ir::Stride::vertical.

Referenced by kernelGenPool2D().

◆ asPixelValue()

arm_compute::PixelValue onert::backend::acl_common::asPixelValue ( const ir::Operand operand)

Definition at line 311 of file Convert.cc.

312{
313 assert(operand.isConstant());
314 assert(operand.shape().num_elements() == 1);
315 switch (operand.typeInfo().type())
316 {
317 case ir::DataType::INT32:
318 return arm_compute::PixelValue(operand.asScalar<int32_t>());
319 case ir::DataType::INT64:
320 return arm_compute::PixelValue(operand.asScalar<int64_t>());
321 case ir::DataType::UINT32:
322 return arm_compute::PixelValue(operand.asScalar<uint64_t>());
323 case ir::DataType::UINT8:
324 return arm_compute::PixelValue(operand.asScalar<uint8_t>());
325 case ir::DataType::FLOAT32:
326 return arm_compute::PixelValue(operand.asScalar<float>());
327 default:
328 throw std::runtime_error("asPixelValue : Not supported datatype yet");
329 }
330}
const TypeInfo & typeInfo(void) const
Definition Operand.h:45
T asScalar(void) const
Definition Operand.h:86
const Shape & shape(void) const
Definition Operand.h:44
bool isConstant(void) const
Get true if Operand is const, otherwise false a.
Definition Operand.h:77
DataType type() const
Definition TypeInfo.h:63

References onert::ir::Operand::asScalar(), onert::ir::Operand::isConstant(), onert::ir::Operand::shape(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

◆ asQuantizationInfo()

::arm_compute::QuantizationInfo onert::backend::acl_common::asQuantizationInfo ( const float  scale,
const int32_t  offset 
)

Definition at line 104 of file Convert.cc.

105{
106 return ::arm_compute::QuantizationInfo(scale, offset);
107}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References offset().

Referenced by asTensorInfo().

◆ asRuntimeDataType()

ir::DataType onert::backend::acl_common::asRuntimeDataType ( ::arm_compute::DataType  data_type)

Definition at line 251 of file Convert.cc.

252{
253 switch (data_type)
254 {
255 case ::arm_compute::DataType::F32:
256 return ir::DataType::FLOAT32;
257 case ::arm_compute::DataType::S32:
258 return ir::DataType::INT32;
259 case ::arm_compute::DataType::U32:
260 return ir::DataType::UINT32;
261 case ::arm_compute::DataType::QASYMM8:
262 return ir::DataType::QUANT_UINT8_ASYMM;
263 case ::arm_compute::DataType::QASYMM8_SIGNED:
264 return ir::DataType::QUANT_INT8_ASYMM;
265 case ::arm_compute::DataType::U8:
266 return ir::DataType::UINT8;
267 case ::arm_compute::DataType::QSYMM8:
268 return ir::DataType::QUANT_INT8_SYMM;
269 case ::arm_compute::DataType::F16:
270 return ir::DataType::FLOAT16;
271 case ::arm_compute::DataType::S64:
272 return ir::DataType::INT64;
273 case ::arm_compute::DataType::QSYMM16:
274 return ir::DataType::QUANT_INT16_SYMM;
275 default:
276 throw std::runtime_error{"Not supported acl data type, yet"};
277 break;
278 }
279}

Referenced by onert::backend::acl_common::IACLTensor::data_type().

◆ asSet()

std::set< uint32_t > onert::backend::acl_common::asSet ( const ir::Operand operand,
int32_t  rank 
)

Definition at line 220 of file Convert.cc.

221{
222 std::set<std::uint32_t> axes;
223
224 for (size_t i = 0; i < operand.shape().num_elements(); ++i)
225 {
226 int32_t axis = 0;
227 switch (operand.typeInfo().type())
228 {
229 case ir::DataType::INT32:
230 axis = reinterpret_cast<const int32_t *>(operand.data()->base())[i];
231 break;
232 case ir::DataType::INT64:
233 axis = reinterpret_cast<const int64_t *>(operand.data()->base())[i];
234 break;
235 default:
236 throw std::runtime_error("acl_common::asSet: Not supported data type");
237 }
238 if (axis < 0)
239 axis += rank;
240 axes.insert(ToARMComputeAxis(rank, axis).value());
241 }
242
243 return axes;
244}
void data(std::shared_ptr< Data > &&data)
Definition Operand.h:62

References onert::ir::Operand::data(), onert::ir::Operand::shape(), ToARMComputeAxis(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

Referenced by asCoordinates().

◆ asTensorCoordinate()

arm_compute::Coordinates onert::backend::acl_common::asTensorCoordinate ( const ir::Coordinates coord)

Definition at line 55 of file Convert.cc.

56{
57 const uint32_t rank = coord.size();
58
59 ::arm_compute::Coordinates res{};
60
61 res.set_num_dimensions(rank);
62
63 for (uint32_t axis = 0; axis < rank; ++axis)
64 {
65 res.set(ToARMComputeAxis(rank, axis).value(), coord[axis]);
66 }
67
68 return res;
69}
size_t size() const
Return size of coordinates.
Definition Coordinates.h:93

References onert::ir::Coordinates::size(), and ToARMComputeAxis().

◆ asTensorInfo()

arm_compute::TensorInfo onert::backend::acl_common::asTensorInfo ( const ir::Shape shape,
const ir::TypeInfo typeInfo,
bool  apply_dim_correction 
)

Definition at line 109 of file Convert.cc.

111{
112 ::arm_compute::TensorInfo info(asTensorShape(shape, apply_dim_correction), 1,
113 asDataType(typeInfo.type()),
114 asQuantizationInfo(typeInfo.scale(), typeInfo.zero_point()));
115 info.set_data_layout(::arm_compute::DataLayout::NHWC);
116 return info;
117}
float scale() const
Definition TypeInfo.h:64
int32_t zero_point() const
Definition TypeInfo.h:66
volatile const char info[]
::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, bool apply_dim_correction)
Definition Convert.cc:27
::arm_compute::DataType asDataType(const ir::DataType type)
Definition Convert.cc:71
::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
Definition Convert.cc:104

References asDataType(), asQuantizationInfo(), asTensorShape(), info, onert::ir::TypeInfo::scale(), onert::ir::TypeInfo::type(), and onert::ir::TypeInfo::zero_point().

◆ asTensorShape()

arm_compute::TensorShape onert::backend::acl_common::asTensorShape ( const ir::Shape shape,
bool  apply_dim_correction 
)

Definition at line 27 of file Convert.cc.

28{
29 // If shape's rank is 0, the tensor is a scalar
30 // Sometimes, some ACL kernel can use a scalar as tensor. But ACL does not allocate buffer for
31 // tensor having rank as 0.
32 const auto tensor_shape = shape.rank() == 0 ? ir::Shape{1} : shape;
33
34 const uint32_t rank = tensor_shape.rank();
35
36 ::arm_compute::TensorShape res{};
37
38 res.set_num_dimensions(rank);
39
40 for (uint32_t axis = 0; axis < rank; ++axis)
41 {
42 // NOTE In some cases, in incorrect dimensions is required.
43 // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
44 // LSTM is used as the weight of the FullyConnected.
45 // The FullyConnected's weight must be greater or equal than 2-dimensions.
46 // However, if the dimension correction is applied to input_to_input_weights with input_size
47 // equal to 1, it will be changed to 1-D.
48 // So input_to_input_weights is not used by the weight of FullyConnected.
49 res.set(ToARMComputeAxis(rank, axis).value(), tensor_shape.dim(axis), apply_dim_correction);
50 }
51
52 return res;
53}
const Dimension & dim(uint32_t axis) const
Definition TensorShape.h:38
uint32_t rank(void) const
Definition TensorShape.h:35

References loco::TensorShape::dim(), loco::TensorShape::rank(), and ToARMComputeAxis().

Referenced by asTensorInfo(), and kernelGenFullyConnected().

◆ convertPoolType()

arm_compute::PoolingType onert::backend::acl_common::convertPoolType ( ir::operation::Pool2D::PoolType  pool_type_ir)

Definition at line 281 of file Convert.cc.

282{
283 switch (pool_type_ir)
284 {
285 case ir::operation::Pool2D::PoolType::AVG:
286 return arm_compute::PoolingType::AVG;
287 case ir::operation::Pool2D::PoolType::L2:
288 return arm_compute::PoolingType::L2;
289 case ir::operation::Pool2D::PoolType::MAX:
290 return arm_compute::PoolingType::MAX;
291 default:
292 throw std::runtime_error("convertPoolType: Not supported operation yet");
293 }
294}

References onert::ir::operation::Pool2D::AVG, onert::ir::operation::Pool2D::L2, and onert::ir::operation::Pool2D::MAX.

◆ convertReduceType()

arm_compute::ReductionOperation onert::backend::acl_common::convertReduceType ( ir::operation::Reduce::ReduceType  reduce_type_ir)

Definition at line 296 of file Convert.cc.

297{
298 switch (reduce_type_ir)
299 {
300 case ir::operation::Reduce::ReduceType::MAX:
301 return arm_compute::ReductionOperation::MAX;
302 case ir::operation::Reduce::ReduceType::MIN:
303 return arm_compute::ReductionOperation::MIN;
304 case ir::operation::Reduce::ReduceType::SUM:
305 return arm_compute::ReductionOperation::SUM;
306 default:
307 throw std::runtime_error("convertReduceType: Not supported operation yet");
308 }
309}

References onert::ir::operation::Reduce::MAX, onert::ir::operation::Reduce::MIN, and onert::ir::operation::Reduce::SUM.

◆ copyInit()

template<typename T >
void onert::backend::acl_common::copyInit ( const onert::ir::Operand model_obj,
onert::backend::ITensor obj 
)

Definition at line 123 of file AclConstantInitializer.h.

124{
125 Init<T>(model_obj, obj);
126}

◆ createMemoryManager()

template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >
std::shared_ptr< T_MemoryManager > onert::backend::acl_common::createMemoryManager ( )

Definition at line 30 of file AclLinearMemoryManager.h.

31{
32 std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
33 std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
34
35 std::shared_ptr<T_MemoryManager> mem_mgr =
36 std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
37 return mem_mgr;
38}

◆ disableDimCorrection()

void onert::backend::acl_common::disableDimCorrection ( IACLTensor tensor)

Definition at line 29 of file AclKernelGen.cc.

30{
31 size_t input_rank = tensor->getShape().rank();
32 const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
33 .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
34}

◆ enableDimCorrection()

void onert::backend::acl_common::enableDimCorrection ( IACLTensor tensor)

Definition at line 22 of file AclKernelGen.cc.

23{
24 size_t input_rank = tensor->getShape().rank();
25 const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
26 .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
27}

◆ generateLayer() [1/2]

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer ( Args &&...  args)

Definition at line 41 of file AclKernelGen.h.

42{
43 auto l = std::make_unique<Layer>();
44
45 l->configure(std::forward<Args>(args)...);
46
47 return l;
48}

◆ generateLayer() [2/2]

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer ( std::shared_ptr< arm_compute::IMemoryManager >  memory_manager,
Args &&...  args 
)

Definition at line 52 of file AclKernelGen.h.

53{
54 auto l = std::make_unique<Layer>(memory_manager);
55
56 l->configure(std::forward<Args>(args)...);
57
58 return l;
59}

◆ getARMComputeAxises()

inline ::arm_compute::Coordinates onert::backend::acl_common::getARMComputeAxises ( uint32_t  rank)

Definition at line 52 of file Swizzle.h.

53{
54 ::arm_compute::Coordinates res{};
55
56 res.set_num_dimensions(rank);
57
58 for (uint32_t axis = 0; axis < rank; ++axis)
59 {
60 res.set(axis, ToARMComputeAxis(rank, axis).value());
61 }
62
63 return res;
64}

References ToARMComputeAxis().

Referenced by getARMComputePermutationVector().

◆ getARMComputePermutationVector()

inline ::arm_compute::PermutationVector onert::backend::acl_common::getARMComputePermutationVector ( uint32_t  rank,
const std::vector< int32_t >  runtime_pv 
)

Definition at line 68 of file Swizzle.h.

69{
70 // rank upto 4 is supported
71 assert(rank <= 4);
72 assert(runtime_pv.size() > 0);
73
74 int new_pv[4] = {0};
75 ::arm_compute::Coordinates axises = getARMComputeAxises(rank);
76
77 for (uint32_t i = 0; i < rank; ++i)
78 {
79 new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i]).value();
80 }
81
82 ::arm_compute::PermutationVector ACL_PV =
83 ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
84 ACL_PV.set_num_dimensions(rank);
85
86 return ACL_PV;
87}

References getARMComputeAxises(), ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ initReverseOrder()

template<typename T >
void onert::backend::acl_common::initReverseOrder ( const ir::Operand model_obj,
backend::ITensor obj 
)

Definition at line 129 of file AclConstantInitializer.h.

130{
131 assert(model_obj.data());
132 const auto &shape = model_obj.shape();
133 const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
134 assert(model_obj.shape().rank() == 1);
135 obj.access([&](ITensor &tensor) {
136 for (size_t i = 0; i < shape.num_elements(); ++i)
137 {
138 const T value = base[shape.num_elements() - i - 1];
139 T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
140 *into = value;
141 }
142 });
143}

References onert::ir::Operand::data(), and onert::ir::Operand::shape().

◆ kernelGenFullyConnected()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenFullyConnected ( const ir::operation::FullyConnected node,
const ir::Operands operands,
const std::shared_ptr< T_TensorBuilder > &  tensor_builder,
const std::shared_ptr< T_TensorRegistry > &  tensor_reg 
)

Definition at line 221 of file AclKernelGen.h.

224{
226
227 const auto output_index{node.getOutputs().at(0)};
228 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
229 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
230 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
231
232 const auto input_rank = operands.at(input_index).shape().rank();
233
234 [[maybe_unused]] const auto output_size =
235 operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
236 assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
237 assert(operands.at(weight_index).shape().dim(0) == output_size);
238 const auto batch_size =
239 operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
240 const auto input_size =
241 operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
242
243 // Check for reshaping input's shape into rank-2
244 bool needs_reshape = false;
245 ir::Shape reshape(2);
246 if (input_rank == 3 || input_rank == 4)
247 {
248 const auto &ifm_shape = operands.at(input_index).shape();
249 [[maybe_unused]] auto feature_size = 1;
250 for (int i = 0; i < ifm_shape.rank(); ++i)
251 {
252 feature_size *= ifm_shape.dim(i);
253 }
254
255 assert(feature_size == batch_size * input_size);
256
257 // for reshaping
258 needs_reshape = true;
259 reshape.dim(0) = batch_size; /* H */
260 reshape.dim(1) = input_size; /* W */
261 }
262
263 auto output_tensor = tensor_reg->getAclTensor(output_index);
264 const auto input_tensor = tensor_reg->getAclTensor(input_index);
265 const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
266 const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index);
267
268 typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
269 if (operands.at(weight_index).isConstant())
270 {
271 kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
272 assert(operands.at(weight_index).data());
273 }
274
275 auto fn = generateLayer<T_ACLLayer>(
276 tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
277 weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
278 output_tensor->handle(), needs_reshape, asTensorShape(reshape), kernel_type);
279
280 return std::make_unique<T_FunctionWrapper>(std::move(fn));
281}
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:54
OperandIndexSequence & getInputs()
Definition Operation.h:51
const Object & at(const Index &index) const
Get the object that is associated with the given index.
CLTensor bias_tensor

References asTensorShape(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::OperandIndexSequence::at(), bias_tensor, onert::ir::Operation::getInputs(), and onert::ir::Operation::getOutputs().

◆ kernelGenLSTM()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenLSTM ( const ir::operation::LSTM node,
const ir::Operands operands,
const std::shared_ptr< T_TensorRegistry > &  tensor_reg 
)

Definition at line 63 of file AclKernelGen.h.

66{
67 // TODO Support dynamic rnn
68 // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
69 const auto scratch_buffer_index{
70 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
71 const auto output_state_out_index{
72 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
73 const auto cell_state_out_index{
74 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
75 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
76
77 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
78 const auto input_to_input_weights_index{
79 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
80 const auto input_to_forget_weights_index{
81 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
82 const auto input_to_cell_weights_index{
83 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
84 const auto input_to_output_weights_index{
85 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
86 const auto recurrent_to_input_weights_index{
87 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
88 const auto recurrent_to_forget_weights_index{
89 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
90 const auto recurrent_to_cell_weights_index{
91 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
92 const auto recurrent_to_output_weights_index{
93 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
94 const auto cell_to_input_weights_index{
95 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
96 const auto cell_to_forget_weights_index{
97 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
98 const auto cell_to_output_weights_index{
99 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
100 const auto input_gate_bias_index{
101 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
102 const auto forget_gate_bias_index{
103 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
104 const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
105 const auto output_gate_bias_index{
106 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
107 const auto projection_weights_index{
108 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
109 const auto projection_bias_index{
110 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
111 const auto output_state_in_index{
112 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
113 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
114 const auto cell_threshold = node.param().cell_threshold;
115 const auto projection_threshold = node.param().projection_threshold;
116
117 bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
118 operands.at(input_to_input_weights_index).shape().dim(1) != 0;
119 bool has_recurrent_to_input_weights =
120 operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
121 operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
122 bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
123 bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
124 bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
125 operands.at(projection_weights_index).shape().dim(1) != 0;
126 bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
127
128 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
129 // true: no CIFG
130 // false: CIFG
131 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
132 bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
133
134 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
135 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
136 // true: peephole
137 // false: no peephole
138 bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
139
140 // NOTE Although the projection weights has data the projection bias may not have data.
141 bool has_projection_param = has_projection_weights;
142
143 const auto activation = node.param().activation;
144 const auto cell_clip = cell_threshold;
145 const auto projection_clip = projection_threshold;
146 assert(cell_clip >= 0.f && projection_clip >= 0.f);
147
148 auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
149 auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
150 auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
151 auto output_tensor = tensor_reg->getAclTensor(output_index);
152
153 auto input_tensor = tensor_reg->getAclTensor(input_index);
154
155 auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
156 auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
157 auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
158 auto recurrent_to_forget_weights_tensor =
159 tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
160 auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
161 auto recurrent_to_output_weights_tensor =
162 tensor_reg->getAclTensor(recurrent_to_output_weights_index);
163
164 auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
165 auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
166 auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
167 auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
168 auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
169
170 auto act_info = asActivationLayerInfo(activation);
171
172 ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
173 if (has_cifg_param)
174 {
175 auto input_to_input_weights_tensor =
176 tensor_reg->getAclTensor(input_to_input_weights_index); // optional
177 auto recurrent_to_input_weights_tensor =
178 tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
179 auto cell_to_input_weights_handle =
180 has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
181 : nullptr; // optional (non-cifg && peephole)
182 auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
183 lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
184 recurrent_to_input_weights_tensor->handle(),
185 cell_to_input_weights_handle, input_gate_bias_tensor->handle());
186 }
187 if (has_peephole_param)
188 {
189 auto cell_to_forget_weights_tensor =
190 tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
191 auto cell_to_output_weights_tensor =
192 tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
193 lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
194 cell_to_output_weights_tensor->handle());
195 }
196 if (has_projection_param)
197 {
198 auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
199 auto projection_bias_handle = has_projection_bias
200 ? tensor_reg->getAclTensor(projection_bias_index)->handle()
201 : nullptr; // optional
202 lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
203 }
204
205 auto fn = generateLayer<T_ACLLayer>(
206 input_tensor->handle(), input_to_forget_weights_tensor->handle(),
207 input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
208 recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
209 recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
210 cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
211 cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
212 output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
213 lstm_params, act_info, cell_clip, projection_clip);
214
215 return std::make_unique<T_FunctionWrapper>(std::move(fn));
216}
const Param & param() const
Definition LSTM.h:82
::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
Definition Convert.cc:131

References onert::ir::operation::LSTM::Param::activation, asActivationLayerInfo(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::OperandIndexSequence::at(), onert::ir::operation::LSTM::CELL_BIAS, onert::ir::operation::LSTM::CELL_STATE_IN, onert::ir::operation::LSTM::CELL_STATE_OUT, onert::ir::operation::LSTM::Param::cell_threshold, onert::ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS, onert::ir::operation::LSTM::FORGET_GATE_BIAS, onert::ir::Operation::getInputs(), onert::ir::Operation::getOutputs(), onert::ir::operation::LSTM::INPUT, onert::ir::operation::LSTM::INPUT_GATE_BIAS, onert::ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS, onert::ir::operation::LSTM::OUTPUT, onert::ir::operation::LSTM::OUTPUT_GATE_BIAS, onert::ir::operation::LSTM::OUTPUT_STATE_IN, onert::ir::operation::LSTM::OUTPUT_STATE_OUT, onert::ir::operation::LSTM::param(), onert::ir::operation::LSTM::PROJECTION_BIAS, onert::ir::operation::LSTM::Param::projection_threshold, onert::ir::operation::LSTM::PROJECTION_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS, and onert::ir::operation::LSTM::SCRATCH_BUFFER.

◆ kernelGenPool2D()

template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >
std::unique_ptr<::arm_compute::IFunction > onert::backend::acl_common::kernelGenPool2D ( const T_PoolOp &  node,
const ir::Operands operands,
const std::shared_ptr< T_AclTensorRegistry > &  tensor_reg,
::arm_compute::PoolingType  pooling_type 
)

Definition at line 285 of file AclKernelGen.h.

288{
289 const auto ofm_index{node.getOutputs().at(0)};
290 const auto ifm_index{node.getInputs().at(0)};
291
292 const auto ofm_shape = operands.at(ofm_index).shape().asFeature();
293 const auto ifm_shape = operands.at(ifm_index).shape().asFeature();
294
295 const auto kh = node.param().kh;
296 const auto kw = node.param().kw;
297 const auto stride = node.param().stride;
298 const auto padding =
299 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
300
301 VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
302 VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
303 VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
304 VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
305 VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
306 VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
307 VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
308 VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
309 VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
310 VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
311 VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
312 VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
313
314 auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
315 auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
316
317 ::arm_compute::PoolingLayerInfo info{
318 pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
319 asPadStrideInfo(padding, stride), true /* exclude_padding */};
320
321 auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
322
323 return fn;
324}
#define VERBOSE(name, lv)
Definition Log.h:71

References asPadStrideInfo(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::calculatePadding(), info, and VERBOSE.

◆ ReorderBits()

template<typename T >
T onert::backend::acl_common::ReorderBits ( in,
size_t  numOfBits 
)
inline

Definition at line 89 of file Swizzle.h.

90{
91 assert(numOfBits > 0);
92 T out = 0;
93 for (int32_t i = numOfBits - 1; i >= 0; --i)
94 {
95 const uint32_t toShift = numOfBits - ToARMComputeAxis(numOfBits, i).value() - 1;
96 out += ((in & 1) << toShift);
97 in >>= 1;
98 }
99 return out;
100}
ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
Definition Swizzle.h:45

References ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ ToARMComputeAxis()

ARMComputeAxis onert::backend::acl_common::ToARMComputeAxis ( uint32_t  rank,
uint32_t  axis 
)
inline