ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert::backend::acl_common Namespace Reference

Data Structures

class  AclActivationBuilder
 
class  AclBackendContext
 
class  AclConstantInitializer
 
class  AclFunction
 
class  AclInternalBufferManager
 class for InternalBufferManager which has arm_compute::IMemoryManager pointer More...
 
class  AclLinearMemoryManager
 
class  AclMemoryManager
 
class  AclSubTensorAnalyzer
 Class to analyze tensor subsumption. More...
 
class  AclTensorBuilder
 
class  AclTensorManager
 
class  AclTensorRegistry
 Tensor registry class for acl backends. More...
 
class  ARMComputeAxis
 
class  IACLTensor
 Class representing Tensor for ACL. More...
 
struct  IInternalBufferManager
 Interface for InternalBufferManager which has arm_compute::IMemoryManager pointer. More...
 

Typedefs

using float16 = uint16_t
 

Functions

template<typename T >
void copyInit (const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
 
template<typename T >
void initReverseOrder (const ir::Operand &model_obj, backend::ITensor &obj)
 
void enableDimCorrection (IACLTensor *tensor)
 
void disableDimCorrection (IACLTensor *tensor)
 
template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > generateLayer (Args &&...args)
 
template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > generateLayer (std::shared_ptr< arm_compute::IMemoryManager > memory_manager, Args &&...args)
 
template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >
std::unique_ptr< exec::IFunctionkernelGenLSTM (const ir::operation::LSTM &node, const ir::Operands &operands, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
 
template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >
std::unique_ptr< exec::IFunctionkernelGenFullyConnected (const ir::operation::FullyConnected &node, const ir::Operands &operands, const std::shared_ptr< T_TensorBuilder > &tensor_builder, const std::shared_ptr< T_TensorRegistry > &tensor_reg)
 
template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >
std::unique_ptr<::arm_compute::IFunction > kernelGenPool2D (const T_PoolOp &node, const ir::Operands &operands, const std::shared_ptr< T_AclTensorRegistry > &tensor_reg, ::arm_compute::PoolingType pooling_type)
 
template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >
std::shared_ptr< T_MemoryManager > createMemoryManager ()
 
::arm_compute::TensorShape asTensorShape (const ir::Shape &shape, bool apply_dim_correction)
 
::arm_compute::Coordinates asTensorCoordinate (const ir::Coordinates &coord)
 
::arm_compute::DataType asDataType (const ir::DataType type)
 
::arm_compute::QuantizationInfo asQuantizationInfo (const float scale, const int32_t offset)
 
::arm_compute::TensorInfo asTensorInfo (const ir::Shape &shape, const ir::TypeInfo &typeInfo, bool apply_dim_correction)
 
::arm_compute::PadStrideInfo asPadStrideInfo (const ir::ExplicitPadding &padding, const ir::Stride &stride)
 
::arm_compute::ActivationLayerInfo asActivationLayerInfo (const ir::Activation act_code)
 
::arm_compute::ActivationLayerInfo asActivationLayerInfo (const ir::operation::ElementwiseActivation::Type op_type, float alpha, float beta)
 
arm_compute::Coordinates asCoordinates (const ir::Operand &operand, int32_t rank)
 
std::set< uint32_t > asSet (const ir::Operand &operand, int32_t rank)
 
std::unique_ptr< AclFunctionasAclFunction (std::unique_ptr<::arm_compute::IFunction > &&layer)
 
ir::DataType asRuntimeDataType (::arm_compute::DataType data_type)
 
arm_compute::PoolingType convertPoolType (ir::operation::Pool2D::PoolType pool_type_ir)
 
arm_compute::ReductionOperation convertReduceType (ir::operation::Reduce::ReduceType reduce_type_ir)
 
arm_compute::PixelValue asPixelValue (const ir::Operand &operand)
 
arm_compute::Size2D asDilation (uint32_t dilation_width, uint32_t dilation_height)
 
template<typename T_Function >
std::unique_ptr< T_Function > asFunction (std::unique_ptr<::arm_compute::IFunction > &&fn)
 
ARMComputeAxis ToARMComputeAxis (uint32_t rank, uint32_t axis)
 
inline ::arm_compute::Coordinates getARMComputeAxises (uint32_t rank)
 
inline ::arm_compute::PermutationVector getARMComputePermutationVector (uint32_t rank, const std::vector< int32_t > runtime_pv)
 
template<typename T >
ReorderBits (T in, size_t numOfBits)
 

Typedef Documentation

◆ float16

using onert::backend::acl_common::float16 = typedef uint16_t

Definition at line 121 of file AclConstantInitializer.cc.

Function Documentation

◆ asAclFunction()

std::unique_ptr< AclFunction > onert::backend::acl_common::asAclFunction ( std::unique_ptr<::arm_compute::IFunction > &&  layer)

Definition at line 250 of file Convert.cc.

251{
252 return std::make_unique<AclFunction>(std::move(layer));
253}

◆ asActivationLayerInfo() [1/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo ( const ir::Activation  act_code)

Definition at line 135 of file Convert.cc.

136{
137 switch (act_code)
138 {
139 case ir::Activation::NONE:
140 return ::arm_compute::ActivationLayerInfo{};
141 case ir::Activation::RELU:
142 return ::arm_compute::ActivationLayerInfo{
143 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
144 case ir::Activation::RELU1:
145 return ::arm_compute::ActivationLayerInfo{
146 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
147 case ir::Activation::RELU6:
148 return ::arm_compute::ActivationLayerInfo{
149 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
150 // Cases for activation of LSTM.
151 case ir::Activation::TANH:
152 return ::arm_compute::ActivationLayerInfo{
153 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
154 case ir::Activation::SIGMOID:
155 // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
156 // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
157 // 0(always sigmoid) regardless of values of the parameter.
158 // If ACL support non-sigmoid logistic, should fix param values.
159 return ::arm_compute::ActivationLayerInfo{
160 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
161 default:
162 throw std::runtime_error{"Not supported internal activation, yet"};
163 break;
164 }
165}

References onert::ir::NONE, onert::ir::RELU, onert::ir::RELU1, onert::ir::RELU6, onert::ir::SIGMOID, and onert::ir::TANH.

Referenced by kernelGenLSTM().

◆ asActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo onert::backend::acl_common::asActivationLayerInfo ( const ir::operation::ElementwiseActivation::Type  op_type,
float  alpha,
float  beta 
)

Definition at line 168 of file Convert.cc.

170{
171 switch (op_type)
172 {
173 case ir::operation::ElementwiseActivation::Type::RELU:
174 if (beta == 0.f)
175 {
176 if (alpha == ir::operation::ElementwiseActivation::infinity)
177 {
178 return ::arm_compute::ActivationLayerInfo{
179 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
180 }
181 else
182 {
183 return ::arm_compute::ActivationLayerInfo{
184 ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
185 }
186 }
187 else
188 {
189 return ::arm_compute::ActivationLayerInfo{
190 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
191 }
192 case ir::operation::ElementwiseActivation::Type::TANH:
193 return ::arm_compute::ActivationLayerInfo{
194 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
195 case ir::operation::ElementwiseActivation::Type::LOGISTIC:
196 // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
197 // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
198 // 0(always sigmoid) regardless of values of the parameter.
199 // If ACL support non-sigmoid logistic, should fix param values.
200 return ::arm_compute::ActivationLayerInfo{
201 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
202 case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
203 return ::arm_compute::ActivationLayerInfo{
204 ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
205 default:
206 throw std::runtime_error{"Not supported internal elementwise activation, yet"};
207 break;
208 }
209}

References onert::ir::operation::ElementwiseActivation::infinity, onert::ir::operation::ElementwiseActivation::LEAKY_RELU, onert::ir::operation::ElementwiseActivation::LOGISTIC, onert::ir::operation::ElementwiseActivation::RELU, and onert::ir::operation::ElementwiseActivation::TANH.

◆ asCoordinates()

arm_compute::Coordinates onert::backend::acl_common::asCoordinates ( const ir::Operand operand,
int32_t  rank 
)

Definition at line 211 of file Convert.cc.

212{
213 std::set<uint32_t> axes = asSet(operand, rank);
214
215 arm_compute::Coordinates reduce_axes;
216 for (const int32_t axis : axes)
217 {
218 reduce_axes.set(reduce_axes.num_dimensions(), axis);
219 }
220
221 return reduce_axes;
222}
std::set< uint32_t > asSet(const ir::Operand &operand, int32_t rank)
Definition Convert.cc:224

References asSet().

◆ asDataType()

arm_compute::DataType onert::backend::acl_common::asDataType ( const ir::DataType  type)

Definition at line 75 of file Convert.cc.

76{
77 switch (type)
78 {
79 case ir::DataType::FLOAT32:
80 return ::arm_compute::DataType::F32;
81 case ir::DataType::INT32:
82 return ::arm_compute::DataType::S32;
83 case ir::DataType::UINT32:
84 return ::arm_compute::DataType::U32;
85 case ir::DataType::QUANT_UINT8_ASYMM:
86 return ::arm_compute::DataType::QASYMM8;
87 case ir::DataType::BOOL8:
88 case ir::DataType::UINT8:
89 return ::arm_compute::DataType::U8;
90 case ir::DataType::QUANT_INT8_SYMM:
91 return ::arm_compute::DataType::QSYMM8;
92 case ir::DataType::QUANT_INT8_ASYMM:
93 return ::arm_compute::DataType::QASYMM8_SIGNED;
94 case ir::DataType::FLOAT16:
95 return ::arm_compute::DataType::F16;
96 case ir::DataType::INT64:
97 return ::arm_compute::DataType::S64;
98 case ir::DataType::QUANT_INT16_ASYMM:
99 return ::arm_compute::DataType::QASYMM16;
100 case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
101 return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
102 default:
103 throw std::runtime_error("Not supported internal data type, yet");
104 break;
105 }
106}

Referenced by asTensorInfo().

◆ asDilation()

arm_compute::Size2D onert::backend::acl_common::asDilation ( uint32_t  dilation_width,
uint32_t  dilation_height 
)

Definition at line 334 of file Convert.cc.

335{
336 assert(dilation_width != 0);
337 assert(dilation_height != 0);
338
339 return arm_compute::Size2D(dilation_width, dilation_height);
340}

◆ asFunction()

template<typename T_Function >
std::unique_ptr< T_Function > onert::backend::acl_common::asFunction ( std::unique_ptr<::arm_compute::IFunction > &&  fn)

Definition at line 71 of file Convert.h.

72{
73 return std::make_unique<T_Function>(std::move(fn));
74}

◆ asPadStrideInfo()

arm_compute::PadStrideInfo onert::backend::acl_common::asPadStrideInfo ( const ir::ExplicitPadding padding,
const ir::Stride stride 
)

Definition at line 123 of file Convert.cc.

125{
126 return ::arm_compute::PadStrideInfo{stride.horizontal,
127 stride.vertical,
128 padding.left,
129 padding.right,
130 padding.top,
131 padding.bottom,
132 ::arm_compute::DimensionRoundingType::FLOOR};
133}

References onert::ir::ExplicitPadding::bottom, onert::ir::Stride::horizontal, onert::ir::ExplicitPadding::left, onert::ir::ExplicitPadding::right, onert::ir::ExplicitPadding::top, and onert::ir::Stride::vertical.

Referenced by kernelGenPool2D().

◆ asPixelValue()

arm_compute::PixelValue onert::backend::acl_common::asPixelValue ( const ir::Operand operand)

Definition at line 313 of file Convert.cc.

314{
315 assert(operand.isConstant());
316 assert(operand.shape().num_elements() == 1);
317 switch (operand.typeInfo().type())
318 {
319 case ir::DataType::INT32:
320 return arm_compute::PixelValue(operand.asScalar<int32_t>());
321 case ir::DataType::INT64:
322 return arm_compute::PixelValue(operand.asScalar<int64_t>());
323 case ir::DataType::UINT32:
324 return arm_compute::PixelValue(operand.asScalar<uint64_t>());
325 case ir::DataType::UINT8:
326 return arm_compute::PixelValue(operand.asScalar<uint8_t>());
327 case ir::DataType::FLOAT32:
328 return arm_compute::PixelValue(operand.asScalar<float>());
329 default:
330 throw std::runtime_error("asPixelValue : Not supported datatype yet");
331 }
332}
const TypeInfo & typeInfo(void) const
Definition Operand.h:47
T asScalar(void) const
Definition Operand.h:88
const Shape & shape(void) const
Definition Operand.h:46
bool isConstant(void) const
Get true if Operand is const, otherwise false a.
Definition Operand.h:79
DataType type() const
Definition TypeInfo.h:52

References onert::ir::Operand::asScalar(), onert::ir::Operand::isConstant(), onert::ir::Operand::shape(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

◆ asQuantizationInfo()

::arm_compute::QuantizationInfo onert::backend::acl_common::asQuantizationInfo ( const float  scale,
const int32_t  offset 
)

Definition at line 108 of file Convert.cc.

109{
110 return ::arm_compute::QuantizationInfo(scale, offset);
111}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References offset().

Referenced by asTensorInfo().

◆ asRuntimeDataType()

ir::DataType onert::backend::acl_common::asRuntimeDataType ( ::arm_compute::DataType  data_type)

Definition at line 255 of file Convert.cc.

256{
257 switch (data_type)
258 {
259 case ::arm_compute::DataType::F32:
260 return ir::DataType::FLOAT32;
261 case ::arm_compute::DataType::S32:
262 return ir::DataType::INT32;
263 case ::arm_compute::DataType::U32:
264 return ir::DataType::UINT32;
265 case ::arm_compute::DataType::QASYMM8:
266 return ir::DataType::QUANT_UINT8_ASYMM;
267 case ::arm_compute::DataType::QASYMM8_SIGNED:
268 return ir::DataType::QUANT_INT8_ASYMM;
269 case ::arm_compute::DataType::U8:
270 return ir::DataType::UINT8;
271 case ::arm_compute::DataType::QSYMM8:
272 return ir::DataType::QUANT_INT8_SYMM;
273 case ::arm_compute::DataType::F16:
274 return ir::DataType::FLOAT16;
275 case ::arm_compute::DataType::S64:
276 return ir::DataType::INT64;
277 default:
278 throw std::runtime_error{"Not supported acl data type, yet"};
279 break;
280 }
281}

Referenced by onert::backend::acl_common::IACLTensor::data_type().

◆ asSet()

std::set< uint32_t > onert::backend::acl_common::asSet ( const ir::Operand operand,
int32_t  rank 
)

Definition at line 224 of file Convert.cc.

225{
226 std::set<std::uint32_t> axes;
227
228 for (size_t i = 0; i < operand.shape().num_elements(); ++i)
229 {
230 int32_t axis = 0;
231 switch (operand.typeInfo().type())
232 {
233 case ir::DataType::INT32:
234 axis = reinterpret_cast<const int32_t *>(operand.data()->base())[i];
235 break;
236 case ir::DataType::INT64:
237 axis = reinterpret_cast<const int64_t *>(operand.data()->base())[i];
238 break;
239 default:
240 throw std::runtime_error("acl_common::asSet: Not supported data type");
241 }
242 if (axis < 0)
243 axis += rank;
244 axes.insert(ToARMComputeAxis(rank, axis).value());
245 }
246
247 return axes;
248}
void data(std::shared_ptr< Data > &&data)
Definition Operand.h:64

References onert::ir::Operand::data(), onert::ir::Operand::shape(), ToARMComputeAxis(), onert::ir::TypeInfo::type(), and onert::ir::Operand::typeInfo().

Referenced by asCoordinates().

◆ asTensorCoordinate()

arm_compute::Coordinates onert::backend::acl_common::asTensorCoordinate ( const ir::Coordinates coord)

Definition at line 59 of file Convert.cc.

60{
61 const uint32_t rank = coord.size();
62
63 ::arm_compute::Coordinates res{};
64
65 res.set_num_dimensions(rank);
66
67 for (uint32_t axis = 0; axis < rank; ++axis)
68 {
69 res.set(ToARMComputeAxis(rank, axis).value(), coord[axis]);
70 }
71
72 return res;
73}
size_t size() const
Return size of coordinates.
Definition Coordinates.h:95

References onert::ir::Coordinates::size(), and ToARMComputeAxis().

◆ asTensorInfo()

arm_compute::TensorInfo onert::backend::acl_common::asTensorInfo ( const ir::Shape shape,
const ir::TypeInfo typeInfo,
bool  apply_dim_correction 
)

Definition at line 113 of file Convert.cc.

115{
116 ::arm_compute::TensorInfo info(asTensorShape(shape, apply_dim_correction), 1,
117 asDataType(typeInfo.type()),
118 asQuantizationInfo(typeInfo.scale(), typeInfo.zero_point()));
119 info.set_data_layout(::arm_compute::DataLayout::NHWC);
120 return info;
121}
float scale() const
Definition TypeInfo.h:53
int32_t zero_point() const
Definition TypeInfo.h:55
volatile const char info[]
::arm_compute::TensorShape asTensorShape(const ir::Shape &shape, bool apply_dim_correction)
Definition Convert.cc:31
::arm_compute::DataType asDataType(const ir::DataType type)
Definition Convert.cc:75
::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
Definition Convert.cc:108

References asDataType(), asQuantizationInfo(), asTensorShape(), info, onert::ir::TypeInfo::scale(), onert::ir::TypeInfo::type(), and onert::ir::TypeInfo::zero_point().

◆ asTensorShape()

arm_compute::TensorShape onert::backend::acl_common::asTensorShape ( const ir::Shape shape,
bool  apply_dim_correction 
)

Definition at line 31 of file Convert.cc.

32{
33 // If shape's rank is 0, the tensor is a scalar
34 // Sometimes, some ACL kernel can use a scalar as tensor. But ACL does not allocate buffer for
35 // tensor having rank as 0.
36 const auto tensor_shape = shape.rank() == 0 ? ir::Shape{1} : shape;
37
38 const uint32_t rank = tensor_shape.rank();
39
40 ::arm_compute::TensorShape res{};
41
42 res.set_num_dimensions(rank);
43
44 for (uint32_t axis = 0; axis < rank; ++axis)
45 {
46 // NOTE In some cases, in incorrect dimensions is required.
47 // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
48 // LSTM is used as the weight of the FullyConnected.
49 // The FullyConnected's weight must be greater or equal than 2-dimensions.
50 // However, if the dimension correction is applied to input_to_input_weights with input_size
51 // equal to 1, it will be changed to 1-D.
52 // So input_to_input_weights is not used by the weight of FullyConnected.
53 res.set(ToARMComputeAxis(rank, axis).value(), tensor_shape.dim(axis), apply_dim_correction);
54 }
55
56 return res;
57}
const Dimension & dim(uint32_t axis) const
Definition TensorShape.h:38
uint32_t rank(void) const
Definition TensorShape.h:35

References loco::TensorShape::dim(), loco::TensorShape::rank(), and ToARMComputeAxis().

Referenced by asTensorInfo(), and kernelGenFullyConnected().

◆ convertPoolType()

arm_compute::PoolingType onert::backend::acl_common::convertPoolType ( ir::operation::Pool2D::PoolType  pool_type_ir)

Definition at line 283 of file Convert.cc.

284{
285 switch (pool_type_ir)
286 {
287 case ir::operation::Pool2D::PoolType::AVG:
288 return arm_compute::PoolingType::AVG;
289 case ir::operation::Pool2D::PoolType::L2:
290 return arm_compute::PoolingType::L2;
291 case ir::operation::Pool2D::PoolType::MAX:
292 return arm_compute::PoolingType::MAX;
293 default:
294 throw std::runtime_error("convertPoolType: Not supported operation yet");
295 }
296}

References onert::ir::operation::Pool2D::AVG, onert::ir::operation::Pool2D::L2, and onert::ir::operation::Pool2D::MAX.

◆ convertReduceType()

arm_compute::ReductionOperation onert::backend::acl_common::convertReduceType ( ir::operation::Reduce::ReduceType  reduce_type_ir)

Definition at line 298 of file Convert.cc.

299{
300 switch (reduce_type_ir)
301 {
302 case ir::operation::Reduce::ReduceType::MAX:
303 return arm_compute::ReductionOperation::MAX;
304 case ir::operation::Reduce::ReduceType::MIN:
305 return arm_compute::ReductionOperation::MIN;
306 case ir::operation::Reduce::ReduceType::SUM:
307 return arm_compute::ReductionOperation::SUM;
308 default:
309 throw std::runtime_error("convertReduceType: Not supported operation yet");
310 }
311}

References onert::ir::operation::Reduce::MAX, onert::ir::operation::Reduce::MIN, and onert::ir::operation::Reduce::SUM.

◆ copyInit()

template<typename T >
void onert::backend::acl_common::copyInit ( const onert::ir::Operand model_obj,
onert::backend::ITensor obj 
)

Definition at line 127 of file AclConstantInitializer.h.

128{
129 Init<T>(model_obj, obj);
130}

◆ createMemoryManager()

template<typename T_MemoryManager , typename T_PoolManager , typename T_LifetimeManager >
std::shared_ptr< T_MemoryManager > onert::backend::acl_common::createMemoryManager ( )

Definition at line 34 of file AclLinearMemoryManager.h.

35{
36 std::shared_ptr<T_LifetimeManager> lifetime_mgr = std::make_shared<T_LifetimeManager>();
37 std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
38
39 std::shared_ptr<T_MemoryManager> mem_mgr =
40 std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
41 return mem_mgr;
42}

◆ disableDimCorrection()

void onert::backend::acl_common::disableDimCorrection ( IACLTensor tensor)

Definition at line 40 of file AclKernelGen.h.

41{
42 size_t input_rank = tensor->getShape().rank();
43 const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
44 .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
45}

◆ enableDimCorrection()

void onert::backend::acl_common::enableDimCorrection ( IACLTensor tensor)

Definition at line 33 of file AclKernelGen.h.

34{
35 size_t input_rank = tensor->getShape().rank();
36 const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
37 .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
38}

◆ generateLayer() [1/2]

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer ( Args &&...  args)

Definition at line 48 of file AclKernelGen.h.

49{
50 auto l = std::make_unique<Layer>();
51
52 l->configure(std::forward<Args>(args)...);
53
54 return l;
55}

◆ generateLayer() [2/2]

template<typename Layer , typename... Args>
std::unique_ptr< arm_compute::IFunction > onert::backend::acl_common::generateLayer ( std::shared_ptr< arm_compute::IMemoryManager >  memory_manager,
Args &&...  args 
)

Definition at line 59 of file AclKernelGen.h.

60{
61 auto l = std::make_unique<Layer>(memory_manager);
62
63 l->configure(std::forward<Args>(args)...);
64
65 return l;
66}

◆ getARMComputeAxises()

inline ::arm_compute::Coordinates onert::backend::acl_common::getARMComputeAxises ( uint32_t  rank)

Definition at line 56 of file Swizzle.h.

57{
58 ::arm_compute::Coordinates res{};
59
60 res.set_num_dimensions(rank);
61
62 for (uint32_t axis = 0; axis < rank; ++axis)
63 {
64 res.set(axis, ToARMComputeAxis(rank, axis).value());
65 }
66
67 return res;
68}

References ToARMComputeAxis().

Referenced by getARMComputePermutationVector().

◆ getARMComputePermutationVector()

inline ::arm_compute::PermutationVector onert::backend::acl_common::getARMComputePermutationVector ( uint32_t  rank,
const std::vector< int32_t >  runtime_pv 
)

Definition at line 72 of file Swizzle.h.

73{
74 // rank upto 4 is supported
75 assert(rank <= 4);
76 assert(runtime_pv.size() > 0);
77
78 int new_pv[4] = {0};
79 ::arm_compute::Coordinates axises = getARMComputeAxises(rank);
80
81 for (uint32_t i = 0; i < rank; ++i)
82 {
83 new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i]).value();
84 }
85
86 ::arm_compute::PermutationVector ACL_PV =
87 ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
88 ACL_PV.set_num_dimensions(rank);
89
90 return ACL_PV;
91}

References getARMComputeAxises(), ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ initReverseOrder()

template<typename T >
void onert::backend::acl_common::initReverseOrder ( const ir::Operand model_obj,
backend::ITensor obj 
)

Definition at line 133 of file AclConstantInitializer.h.

134{
135 assert(model_obj.data());
136 const auto &shape = model_obj.shape();
137 const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
138 assert(model_obj.shape().rank() == 1);
139 obj.access([&](ITensor &tensor) {
140 for (size_t i = 0; i < shape.num_elements(); ++i)
141 {
142 const T value = base[shape.num_elements() - i - 1];
143 T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
144 *into = value;
145 }
146 });
147}

References onert::ir::Operand::data(), and onert::ir::Operand::shape().

◆ kernelGenFullyConnected()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorBuilder , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenFullyConnected ( const ir::operation::FullyConnected node,
const ir::Operands operands,
const std::shared_ptr< T_TensorBuilder > &  tensor_builder,
const std::shared_ptr< T_TensorRegistry > &  tensor_reg 
)

Definition at line 228 of file AclKernelGen.h.

231{
233
234 const auto output_index{node.getOutputs().at(0)};
235 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
236 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
237 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
238
239 const auto input_rank = operands.at(input_index).shape().rank();
240
241 [[maybe_unused]] const auto output_size =
242 operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
243 assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
244 assert(operands.at(weight_index).shape().dim(0) == output_size);
245 const auto batch_size =
246 operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
247 const auto input_size =
248 operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
249
250 // Check for reshaping input's shape into rank-2
251 bool needs_reshape = false;
252 ir::Shape reshape(2);
253 if (input_rank == 3 || input_rank == 4)
254 {
255 const auto &ifm_shape = operands.at(input_index).shape();
256 [[maybe_unused]] auto feature_size = 1;
257 for (int i = 0; i < ifm_shape.rank(); ++i)
258 {
259 feature_size *= ifm_shape.dim(i);
260 }
261
262 assert(feature_size == batch_size * input_size);
263
264 // for reshaping
265 needs_reshape = true;
266 reshape.dim(0) = batch_size; /* H */
267 reshape.dim(1) = input_size; /* W */
268 }
269
270 auto output_tensor = tensor_reg->getAclTensor(output_index);
271 const auto input_tensor = tensor_reg->getAclTensor(input_index);
272 const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
273 const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index);
274
275 typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
276 if (operands.at(weight_index).isConstant())
277 {
278 kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
279 assert(operands.at(weight_index).data());
280 }
281
282 auto fn = generateLayer<T_ACLLayer>(
283 tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
284 weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
285 output_tensor->handle(), needs_reshape, asTensorShape(reshape), kernel_type);
286
287 return std::make_unique<T_FunctionWrapper>(std::move(fn));
288}
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
Definition Operation.h:55
OperandIndexSequence & getInputs()
Definition Operation.h:53
const Object & at(const Index &index) const
Get the object that is associated with the given index.

References asTensorShape(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::OperandIndexSequence::at(), onert::ir::Operation::getInputs(), and onert::ir::Operation::getOutputs().

◆ kernelGenLSTM()

template<typename T_FunctionWrapper , typename T_Tensor , typename T_ACLLayer , typename T_TensorRegistry >
std::unique_ptr< exec::IFunction > onert::backend::acl_common::kernelGenLSTM ( const ir::operation::LSTM node,
const ir::Operands operands,
const std::shared_ptr< T_TensorRegistry > &  tensor_reg 
)

Definition at line 70 of file AclKernelGen.h.

73{
74 // TODO Support dynamic rnn
75 // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
76 const auto scratch_buffer_index{
77 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
78 const auto output_state_out_index{
79 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
80 const auto cell_state_out_index{
81 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
82 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
83
84 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
85 const auto input_to_input_weights_index{
86 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
87 const auto input_to_forget_weights_index{
88 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
89 const auto input_to_cell_weights_index{
90 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
91 const auto input_to_output_weights_index{
92 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
93 const auto recurrent_to_input_weights_index{
94 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
95 const auto recurrent_to_forget_weights_index{
96 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
97 const auto recurrent_to_cell_weights_index{
98 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
99 const auto recurrent_to_output_weights_index{
100 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
101 const auto cell_to_input_weights_index{
102 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
103 const auto cell_to_forget_weights_index{
104 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
105 const auto cell_to_output_weights_index{
106 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
107 const auto input_gate_bias_index{
108 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
109 const auto forget_gate_bias_index{
110 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
111 const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
112 const auto output_gate_bias_index{
113 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
114 const auto projection_weights_index{
115 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
116 const auto projection_bias_index{
117 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
118 const auto output_state_in_index{
119 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
120 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
121 const auto cell_threshold = node.param().cell_threshold;
122 const auto projection_threshold = node.param().projection_threshold;
123
124 bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
125 operands.at(input_to_input_weights_index).shape().dim(1) != 0;
126 bool has_recurrent_to_input_weights =
127 operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
128 operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
129 bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
130 bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
131 bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
132 operands.at(projection_weights_index).shape().dim(1) != 0;
133 bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
134
135 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
136 // true: no CIFG
137 // false: CIFG
138 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
139 bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
140
141 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
142 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
143 // true: peephole
144 // false: no peephole
145 bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
146
147 // NOTE Although the projection weights has data the projection bias may not have data.
148 bool has_projection_param = has_projection_weights;
149
150 const auto activation = node.param().activation;
151 const auto cell_clip = cell_threshold;
152 const auto projection_clip = projection_threshold;
153 assert(cell_clip >= 0.f && projection_clip >= 0.f);
154
155 auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
156 auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
157 auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
158 auto output_tensor = tensor_reg->getAclTensor(output_index);
159
160 auto input_tensor = tensor_reg->getAclTensor(input_index);
161
162 auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
163 auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
164 auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
165 auto recurrent_to_forget_weights_tensor =
166 tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
167 auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
168 auto recurrent_to_output_weights_tensor =
169 tensor_reg->getAclTensor(recurrent_to_output_weights_index);
170
171 auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
172 auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
173 auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
174 auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
175 auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
176
177 auto act_info = asActivationLayerInfo(activation);
178
179 ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
180 if (has_cifg_param)
181 {
182 auto input_to_input_weights_tensor =
183 tensor_reg->getAclTensor(input_to_input_weights_index); // optional
184 auto recurrent_to_input_weights_tensor =
185 tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
186 auto cell_to_input_weights_handle =
187 has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
188 : nullptr; // optional (non-cifg && peephole)
189 auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
190 lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
191 recurrent_to_input_weights_tensor->handle(),
192 cell_to_input_weights_handle, input_gate_bias_tensor->handle());
193 }
194 if (has_peephole_param)
195 {
196 auto cell_to_forget_weights_tensor =
197 tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
198 auto cell_to_output_weights_tensor =
199 tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
200 lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
201 cell_to_output_weights_tensor->handle());
202 }
203 if (has_projection_param)
204 {
205 auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
206 auto projection_bias_handle = has_projection_bias
207 ? tensor_reg->getAclTensor(projection_bias_index)->handle()
208 : nullptr; // optional
209 lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
210 }
211
212 auto fn = generateLayer<T_ACLLayer>(
213 input_tensor->handle(), input_to_forget_weights_tensor->handle(),
214 input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
215 recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
216 recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
217 cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
218 cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
219 output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
220 lstm_params, act_info, cell_clip, projection_clip);
221
222 return std::make_unique<T_FunctionWrapper>(std::move(fn));
223}
const Param & param() const
Definition LSTM.h:86
::arm_compute::ActivationLayerInfo asActivationLayerInfo(const ir::Activation act_code)
Definition Convert.cc:135

References onert::ir::operation::LSTM::Param::activation, asActivationLayerInfo(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::OperandIndexSequence::at(), onert::ir::operation::LSTM::CELL_BIAS, onert::ir::operation::LSTM::CELL_STATE_IN, onert::ir::operation::LSTM::CELL_STATE_OUT, onert::ir::operation::LSTM::Param::cell_threshold, onert::ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS, onert::ir::operation::LSTM::FORGET_GATE_BIAS, onert::ir::Operation::getInputs(), onert::ir::Operation::getOutputs(), onert::ir::operation::LSTM::INPUT, onert::ir::operation::LSTM::INPUT_GATE_BIAS, onert::ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS, onert::ir::operation::LSTM::OUTPUT, onert::ir::operation::LSTM::OUTPUT_GATE_BIAS, onert::ir::operation::LSTM::OUTPUT_STATE_IN, onert::ir::operation::LSTM::OUTPUT_STATE_OUT, onert::ir::operation::LSTM::param(), onert::ir::operation::LSTM::PROJECTION_BIAS, onert::ir::operation::LSTM::Param::projection_threshold, onert::ir::operation::LSTM::PROJECTION_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS, onert::ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS, and onert::ir::operation::LSTM::SCRATCH_BUFFER.

◆ kernelGenPool2D()

template<typename T_ACLLayer , typename T_PoolOp , typename T_AclTensorRegistry >
std::unique_ptr<::arm_compute::IFunction > onert::backend::acl_common::kernelGenPool2D ( const T_PoolOp &  node,
const ir::Operands operands,
const std::shared_ptr< T_AclTensorRegistry > &  tensor_reg,
::arm_compute::PoolingType  pooling_type 
)

Definition at line 292 of file AclKernelGen.h.

295{
296 const auto ofm_index{node.getOutputs().at(0)};
297 const auto ifm_index{node.getInputs().at(0)};
298
299 const auto ofm_shape = operands.at(ofm_index).shape().asFeature();
300 const auto ifm_shape = operands.at(ifm_index).shape().asFeature();
301
302 const auto kh = node.param().kh;
303 const auto kw = node.param().kw;
304 const auto stride = node.param().stride;
305 const auto padding =
306 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
307
308 VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
309 VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
310 VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
311 VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
312 VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
313 VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
314 VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
315 VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
316 VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
317 VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
318 VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
319 VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
320
321 auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
322 auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
323
324 ::arm_compute::PoolingLayerInfo info{
325 pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
326 asPadStrideInfo(padding, stride), true /* exclude_padding */};
327
328 auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
329
330 return fn;
331}
#define VERBOSE(name, lv)
Definition Log.h:71

References asPadStrideInfo(), onert::util::ObjectManager< Index, Object >::at(), onert::ir::calculatePadding(), info, and VERBOSE.

◆ ReorderBits()

template<typename T >
T onert::backend::acl_common::ReorderBits ( in,
size_t  numOfBits 
)
inline

Definition at line 93 of file Swizzle.h.

94{
95 assert(numOfBits > 0);
96 T out = 0;
97 for (int32_t i = numOfBits - 1; i >= 0; --i)
98 {
99 const uint32_t toShift = numOfBits - ToARMComputeAxis(numOfBits, i).value() - 1;
100 out += ((in & 1) << toShift);
101 in >>= 1;
102 }
103 return out;
104}
ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
Definition Swizzle.h:49

References ToARMComputeAxis(), and onert::backend::acl_common::ARMComputeAxis::value().

◆ ToARMComputeAxis()

ARMComputeAxis onert::backend::acl_common::ToARMComputeAxis ( uint32_t  rank,
uint32_t  axis 
)
inline