|
ONE - On-device Neural Engine
|
Enumerations | |
| enum class | ArithmeticType { kAdd , kSub , kMul , kDiv } |
| enum class | ElementwiseActivationType { kElu , kLogistic , kReLU , kTanh , kLeakyReLU , kGELU } |
| enum class | ElementwiseBinaryType { kFloorDiv , kFloorMod , kLogicalAnd , kLogicalOr , kMax , kMin } |
| enum class | ElementwiseUnaryType { kAbs , kCast , kCos , kDequantize , kErf , kExp , kFloor , kLog , kLogicalNot , kNeg , kQuantize , kRound , kRSqrt , kSin , kSqrt , kSquare , kZerosLike } |
| enum class | PoolType { kAvg , kL2 , kMax } |
| enum class | ReduceType { kSum , kProd , kMax , kMin , kAny , kAll , kInvalid } |
Functions | |
| int32_t | blockSizeFor (int32_t index, int32_t block_size=32) |
| template<typename T > | |
| Array< T > | toArray (uint8_t *ptr, std::vector< int32_t > &descr) |
| template<typename InputT , typename OutputT > | |
| void | affineQuantize (const IPortableTensor *input, IPortableTensor *output) |
| uint32_t | getNumberOfDimensions (const IPortableTensor *tensor) |
| uint32_t | getNumberOfElements (const IPortableTensor *tensor) |
| uint32_t | getSizeOfDimension (const IPortableTensor *tensor, uint32_t dimensionIdx) |
| void | QuantizeMultiplier (double double_multiplier, int32_t *quantized_multiplier, int *shift) |
| void | GetQuantizedConvolutionMultiplier (const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier) |
| void | GetQuantizedConvolutionMultipliersAndShifts (float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift) |
| void | QuantizeMultiplierGreaterThanOne (double double_multiplier, int32_t *quantized_multiplier, int *left_shift) |
| void | CalculateActivationRangeQuantized (ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max) |
| bool | HaveSameShapes (const IPortableTensor *input1, const IPortableTensor *input2) |
| int32_t | CalculateInputRadius (int input_integer_bits, int input_left_shift) |
| uint32_t | sizeOfData (OperandType type, const std::vector< int32_t > &dimensions) |
| nnfw::cker::PaddingType | getPaddingType (ir::PaddingType ir_padding_type) |
| std::vector< int32_t > | getReducerAxes (const IPortableTensor *axes) |
| nnfw::cker::RoPEMode | getRoPEMode (ir::operation::RoPE::RoPEMode rope_mode) |
| nnfw::cker::Shape | getExtendedTensorShape (const IPortableTensor *tensor) |
| nnfw::cker::Shape | getShape (const IPortableTensor *tensor) |
| nnfw::cker::FusedActivationFunctionType | convertActivationType (const ir::Activation activation) |
| int32_t | getAxis (uint32_t rank, int32_t axis) |
| template<typename T > | |
| const T * | getBuffer (const IPortableTensor *tensor) |
| template<typename T > | |
| T * | getBuffer (IPortableTensor *tensor) |
| template<> | |
| const bool * | getBuffer (const IPortableTensor *tensor) |
| template<> | |
| bool * | getBuffer (IPortableTensor *tensor) |
| template<typename T > | |
| void | GetRawShape (const IPortableTensor *input, T *output_data) |
|
strong |
| Enumerator | |
|---|---|
| kAdd | |
| kSub | |
| kMul | |
| kDiv | |
Definition at line 28 of file BinaryArithmeticLayer.h.
|
strong |
| Enumerator | |
|---|---|
| kElu | |
| kLogistic | |
| kReLU | |
| kTanh | |
| kLeakyReLU | |
| kGELU | |
Definition at line 27 of file ElementwiseActivationLayer.h.
|
strong |
| Enumerator | |
|---|---|
| kFloorDiv | |
| kFloorMod | |
| kLogicalAnd | |
| kLogicalOr | |
| kMax | |
| kMin | |
Definition at line 27 of file ElementwiseBinaryLayer.h.
|
strong |
| Enumerator | |
|---|---|
| kAbs | |
| kCast | |
| kCos | |
| kDequantize | |
| kErf | |
| kExp | |
| kFloor | |
| kLog | |
| kLogicalNot | |
| kNeg | |
| kQuantize | |
| kRound | |
| kRSqrt | |
| kSin | |
| kSqrt | |
| kSquare | |
| kZerosLike | |
Definition at line 27 of file ElementwiseUnaryLayer.h.
|
strong |
| Enumerator | |
|---|---|
| kAvg | |
| kL2 | |
| kMax | |
Definition at line 28 of file Pool2DLayer.h.
|
strong |
| void onert::backend::cpu::ops::affineQuantize | ( | const IPortableTensor * | input, |
| IPortableTensor * | output | ||
| ) |
Definition at line 479 of file ElementwiseUnaryLayer.cc.
References getShape(), and nnfw::cker::Quantize().
| int32_t onert::backend::cpu::ops::blockSizeFor | ( | int32_t | index, |
| int32_t | block_size = 32 |
||
| ) |
Calculate the block-aligned size that includes the given index.
| index | The current token position (0-based index) |
| block_size | The minimum memory access unit (default: 32) |
This function calculates the minimum block-aligned memory size needed to access data from position 0 up to and including the specified index.
Example with block_size = 32:
This ensures block-aligned memory access for optimal performance.
Definition at line 157 of file AttentionLayer.cc.
| void onert::backend::cpu::ops::CalculateActivationRangeQuantized | ( | ir::Activation | activation, |
| const IPortableTensor * | output, | ||
| int32_t * | act_min, | ||
| int32_t * | act_max | ||
| ) |
Definition at line 138 of file OperationUtils.cc.
References onert::ir::NONE, onert::ir::RELU, onert::ir::RELU1, onert::ir::RELU6, and onert::ir::SIGMOID.
Referenced by onert::backend::cpu::ops::PoolLayer::configure(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8i(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerChannel(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerTensor(), and onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedQuant8().
| int32_t onert::backend::cpu::ops::CalculateInputRadius | ( | int | input_integer_bits, |
| int | input_left_shift | ||
| ) |
Definition at line 219 of file OperationUtils.cc.
|
inline |
Definition at line 106 of file OperationUtils.h.
References nnfw::cker::kNone, nnfw::cker::kRelu, nnfw::cker::kRelu1, nnfw::cker::kRelu6, nnfw::cker::kSigmoid, nnfw::cker::kTanh, onert::ir::NONE, onert::ir::RELU, onert::ir::RELU1, onert::ir::RELU6, onert::ir::SIGMOID, and onert::ir::TANH.
Referenced by onert::backend::cpu::ops::FullyConnectedLayer::fullyConnected16x1Float32(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedFloat32(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedHybrid(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedSparseWeight(), and onert::backend::cpu::ops::LSTMLayer::LSTMFloat().
|
inline |
Definition at line 127 of file OperationUtils.h.
| const T * onert::backend::cpu::ops::getBuffer | ( | const IPortableTensor * | tensor | ) |
Definition at line 169 of file OperationUtils.h.
References getBuffer().
Referenced by getBuffer(), and getBuffer().
|
inline |
Definition at line 179 of file OperationUtils.h.
References getBuffer().
| T * onert::backend::cpu::ops::getBuffer | ( | IPortableTensor * | tensor | ) |
Definition at line 174 of file OperationUtils.h.
References getBuffer().
|
inline |
Definition at line 185 of file OperationUtils.h.
References getBuffer().
|
inline |
Definition at line 67 of file OperationUtils.h.
| uint32_t onert::backend::cpu::ops::getNumberOfDimensions | ( | const IPortableTensor * | tensor | ) |
Definition at line 26 of file OperationUtils.cc.
References getNumberOfDimensions().
| uint32_t onert::backend::cpu::ops::getNumberOfElements | ( | const IPortableTensor * | tensor | ) |
Definition at line 32 of file OperationUtils.cc.
References getNumberOfElements().
| nnfw::cker::PaddingType onert::backend::cpu::ops::getPaddingType | ( | ir::PaddingType | ir_padding_type | ) |
Definition at line 262 of file OperationUtils.cc.
References onert::ir::EXPLICIT, getPaddingType(), nnfw::cker::kNone, nnfw::cker::kSame, nnfw::cker::kValid, onert::ir::SAME, and onert::ir::VALID.
Referenced by getPaddingType(), and onert::backend::cpu::ops::ConvolutionLayer::prepare().
| void onert::backend::cpu::ops::GetQuantizedConvolutionMultiplier | ( | const IPortableTensor * | input, |
| const IPortableTensor * | filter, | ||
| const IPortableTensor * | bias, | ||
| const IPortableTensor * | output, | ||
| double * | multiplier | ||
| ) |
Definition at line 77 of file OperationUtils.cc.
Referenced by onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerTensor(), and onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedQuant8().
| void onert::backend::cpu::ops::GetQuantizedConvolutionMultipliersAndShifts | ( | float | input_scale, |
| float | output_scale, | ||
| const float * | filter_scales, | ||
| size_t | filter_scales_size, | ||
| int | num_channels, | ||
| std::vector< int32_t > & | per_channel_output_multiplier, | ||
| std::vector< int > & | per_channel_output_shift | ||
| ) |
Definition at line 93 of file OperationUtils.cc.
References QuantizeMultiplier().
Referenced by onert::backend::cpu::ops::ConvolutionLayer::prepare().
| void onert::backend::cpu::ops::GetRawShape | ( | const IPortableTensor * | input, |
| T * | output_data | ||
| ) |
Definition at line 53 of file ShapeLayer.cc.
Referenced by onert::backend::cpu::ops::ShapeLayer::run().
| std::vector< int32_t > onert::backend::cpu::ops::getReducerAxes | ( | const IPortableTensor * | axes | ) |
Definition at line 278 of file OperationUtils.cc.
References onert::backend::IPortableTensor::data_type(), and onert::backend::IPortableTensor::getShape().
Referenced by onert::backend::train::ops::MeanLayer::backward(), onert::backend::cpu::ops::MeanLayer::MeanFloat32(), onert::backend::cpu::ops::MeanLayer::MeanQuant8(), and onert::backend::cpu::ops::ReduceLayer::run().
| nnfw::cker::RoPEMode onert::backend::cpu::ops::getRoPEMode | ( | ir::operation::RoPE::RoPEMode | rope_mode | ) |
Definition at line 305 of file OperationUtils.cc.
References onert::ir::operation::RoPE::GPT_J, onert::ir::operation::RoPE::GPT_NEOX, nnfw::cker::kGptJ, and nnfw::cker::kGptNeox.
|
inline |
Definition at line 89 of file OperationUtils.h.
References nnfw::cker::Shape::DimsData().
Referenced by affineQuantize(), onert::backend::cpu::ops::BatchMatMulLayer::batchMatMulFloat32(), onert::backend::cpu::ops::BatchToSpaceNDLayer::batchToSpaceNDGeneric(), onert::backend::cpu::ops::ConcatLayer::concatenationGeneral(), onert::backend::cpu::ops::ConcatLayer::concatenationQuant8(), onert::backend::cpu::ops::AttentionLayer::configure(), onert::backend::cpu::ops::ElementwiseActivationLayer::configure(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convFloat32(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8i(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8iHybridPerChannel(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerChannel(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerTensor(), onert::backend::cpu::ops::ElementwiseActivationLayer::EvalUsingLookupTable(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnected16x1Float32(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedFloat32(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedHybrid(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedQuant8(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedSparseWeight(), onert::backend::cpu::ops::FusedBatchNormLayer::fusedbatchnormFloat32(), onert::backend::cpu::ops::LogSoftMaxLayer::logsoftmaxFloat32(), onert::backend::cpu::ops::LogSoftMaxLayer::logsoftmaxQuant8(), onert::backend::cpu::ops::MeanLayer::MeanFloat32(), onert::backend::cpu::ops::MeanLayer::MeanQuant8(), onert::backend::cpu::ops::OneHotLayer::oneHotImpl(), onert::backend::cpu::ops::PackLayer::packImpl(), onert::backend::cpu::ops::PadLayer::padImpl(), onert::backend::cpu::ops::PowLayer::powFloat32(), onert::backend::cpu::ops::ConvolutionLayer::prepare(), onert::backend::cpu::ops::FullyConnectedLayer::prepare(), onert::backend::cpu::ops::AddNLayer::run(), onert::backend::cpu::ops::BroadcastToLayer::run(), onert::backend::cpu::ops::DynamicUpdateSliceLayer::run(), onert::backend::cpu::ops::QuantizeLayer::run(), onert::backend::cpu::ops::FillLayer::run(), onert::backend::cpu::ops::L2NormLayer::run(), onert::backend::cpu::ops::ReduceLayer::run(), onert::backend::cpu::ops::ResizeBilinearLayer::run(), onert::backend::cpu::ops::ReverseLayer::run(), onert::backend::cpu::ops::RmsNormLayer::run(), onert::backend::cpu::ops::TopKV2Layer::run(), onert::backend::cpu::ops::SoftMaxLayer::softmaxFloat32(), onert::backend::cpu::ops::SoftMaxLayer::softmaxQuant8(), onert::backend::cpu::ops::SplitLayer::split(), onert::backend::cpu::ops::SplitVLayer::splitV(), onert::backend::cpu::ops::SqDiffLayer::SqDiffFloat32(), onert::backend::cpu::ops::StatelessRandomUniformLayer::StatelessRandomUniformFloat32(), onert::backend::cpu::ops::TileLayer::tileFloat32(), and onert::backend::cpu::ops::TransposeLayer::transpose().
| uint32_t onert::backend::cpu::ops::getSizeOfDimension | ( | const IPortableTensor * | tensor, |
| uint32_t | dimensionIdx | ||
| ) |
Definition at line 44 of file OperationUtils.cc.
References getSizeOfDimension().
| bool onert::backend::cpu::ops::HaveSameShapes | ( | const IPortableTensor * | input1, |
| const IPortableTensor * | input2 | ||
| ) |
Definition at line 195 of file OperationUtils.cc.
References getNumberOfDimensions(), and onert::backend::IPortableTensor::getShape().
Referenced by onert::backend::cpu::ops::PowLayer::powFloat32(), and onert::backend::cpu::ops::SelectLayer::run().
| void onert::backend::cpu::ops::QuantizeMultiplier | ( | double | double_multiplier, |
| int32_t * | quantized_multiplier, | ||
| int * | shift | ||
| ) |
Definition at line 56 of file OperationUtils.cc.
Referenced by onert::backend::cpu::ops::QuantizeLayer::configure(), onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerTensor(), onert::backend::cpu::ops::FullyConnectedLayer::fullyConnectedQuant8(), and GetQuantizedConvolutionMultipliersAndShifts().
| void onert::backend::cpu::ops::QuantizeMultiplierGreaterThanOne | ( | double | double_multiplier, |
| int32_t * | quantized_multiplier, | ||
| int * | left_shift | ||
| ) |
Definition at line 121 of file OperationUtils.cc.
| uint32_t onert::backend::cpu::ops::sizeOfData | ( | OperandType | type, |
| const std::vector< int32_t > & | dimensions | ||
| ) |
Definition at line 229 of file OperationUtils.cc.
Referenced by onert::backend::cpu::ops::SplitLayer::split(), and onert::backend::cpu::ops::SplitVLayer::splitV().
| Array< T > onert::backend::cpu::ops::toArray | ( | uint8_t * | ptr, |
| std::vector< int32_t > & | descr | ||
| ) |
Definition at line 313 of file DetectionPostProcessLayer.cc.
References ndarray::Shape::dim().