ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert_micro::execute::pal Namespace Reference

Data Structures

struct  AddFn
 
struct  DivFn
 
struct  FloorDivFn
 
struct  FloorModFn
 
struct  MaximumFn
 
struct  MinimumFn
 
struct  MulFn
 
struct  NdArrayDesc
 
struct  SquaredDifferenceFn
 
struct  SubFn
 
struct  TanhFunctor
 

Functions

OMStatus Add (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 
template<typename T1 , typename T2 , typename T3 >
OMStatus ArgMax (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data)
 
OMStatus AveragePool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus ConvPerChannel (const core::ConvQuant &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus DepthwiseConvPerChannel (const core::ConvQuant &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<>
OMStatus FullyConnected< int8_t > (const core::FullyConnectedParams &params, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<>
OMStatus FullyConnected (const core::FullyConnectedParams &params, const int16_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int64_t *bias_data, const core::OMRuntimeShape &output_shape, int16_t *output_data)
 
OMStatus MaxPool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus Mul (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 
template<>
OMStatus ReLUCommon< int8_t > (const int flat_size, const int8_t *input_data, int8_t *output_data, const float alpha, const bool is_relu_6)
 
template<>
OMStatus Softmax< int8_t, int8_t > (const core::SoftmaxParams &params, const int8_t *input_data, int8_t *output_data)
 
OMStatus SVDF (const core::SVDFQuantParams &params, const int8_t *input_data, const int8_t *weights_feature_data, const int8_t *weights_time_data, const int32_t *bias_data, int8_t *state_data, int8_t *output_data, const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &weights_feature_shape, const core::OMRuntimeShape &weights_time_shape, const core::OMRuntimeShape &bias_shape, const core::OMRuntimeShape &output_shape)
 
template<typename T >
OMStatus Abs (const core::OMRuntimeShape &shape, const T *input_data, T *output_data)
 
int8_t AddFunc (int8_t x, int8_t y, const core::ArithmeticQuantParams &params)
 
template<typename T >
OMStatus Add (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastAdd4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus BroadcastAdd4DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<typename T >
OMStatus AddN (const size_t flat_size, const size_t num_inputs, const T *const *input_data, T *output_data)
 
template<typename T1 , typename T2 , typename T3 , typename Cmp >
OMStatus ArgMinMax (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data, const Cmp &cmp)
 
template<typename T , typename Fn >
OMStatus ArithmeticOp (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
void ElementWise (const uint32_t size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T , typename Fn >
void ArithmeticOpScalar (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input_data, const T scalar_value, T *output_data)
 
template<typename T , typename Fn >
OMStatus BroadcastArithmeticOp4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
void BroadcastInput1 (int size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastInput2 (int size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastRecursiveDimensions (const core::ArithmeticQuantParams &params, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastBinaryFunction6DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
OMStatus AveragePool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus BatchToSpaceND (const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *crops_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T , typename Fn >
OMStatus BinaryOp (const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T , typename Fn >
OMStatus BroadcastBinaryOp4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename FromT , typename ToT >
OMStatus Cast (const core::OMRuntimeShape &input_shape, const FromT *input_data, const core::OMRuntimeShape &output_shape, ToT *output_data)
 
template<typename T >
OMStatus Ceil (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
bool LessFn (T lhs, T rhs)
 
template<typename T >
bool LessEqualFn (T lhs, T rhs)
 
template<typename T >
bool EqualFn (T lhs, T rhs)
 
template<typename T >
bool GreaterFn (T lhs, T rhs)
 
template<typename T >
bool GreaterEqualFn (T lhs, T rhs)
 
template<typename T >
bool NotEqualFn (T lhs, T rhs)
 
template<typename T >
void ComparisonNoScaling (const int64_t flat_size, const T *input1_data, const T *input2_data, bool *output_data, bool F(T, T))
 
template<typename T , typename AccType >
void BroadcastComparison4DSlowWithScaling (const core::ComparisonParams &op_params, const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const T *input2_data, const core::OMRuntimeShape &unextended_output_shape, bool *output_data, bool F(AccType, AccType))
 
template<typename T , typename AccType >
void ComparisonWithScaling (const core::ComparisonParams &op_params, const int64_t flat_size, const T *input1_data, const T *input2_data, bool *output_data, bool F(AccType, AccType))
 
template<typename T >
void BroadcastComparison4DSlowNoScaling (const core::ComparisonParams &op_params, const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const T *input2_data, const core::OMRuntimeShape &unextended_output_shape, bool *output_data, bool F(T, T))
 
template<typename Scalar >
OMStatus Concatenation (core::ConcatenationParams &params, std::vector< uint32_t > &input_shapes, std::vector< const Scalar * > &input_data, const core::OMRuntimeShape &output_shape, Scalar *output_data)
 
OMStatus ConvFloat (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Cos (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus DepthwiseConv2D (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &filter_shape, const T *filter_data, const T *bias_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus DepthwiseConv2D< float > (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename InputT , typename OutputT >
OMStatus Dequantize (const core::QuantizationParams op_params, const uint32_t flat_size, const InputT *input_data, OutputT *output_data)
 
template<typename T >
OMStatus Div (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastDiv4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Elu (const int flat_size, const float *input_data, float *output_data)
 
template<typename T >
OMStatus Exp (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Fill (const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Floor (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
void FloorDiv (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
void BroadcastFloorDiv4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
void FloorMod (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
void BroadcastFloorMod4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename InputType , typename WeightType , typename OutputType , typename BiasType >
OMStatus FullyConnected (const core::FullyConnectedParams &params, const InputType *input_data, const core::OMRuntimeShape &filter_shape, const WeightType *filter_data, const BiasType *bias_data, const core::OMRuntimeShape &output_shape, OutputType *output_data)
 
template<typename WeightType >
OMStatus FullyConnected (const core::FullyConnectedParams &params, const float *input_data, const core::OMRuntimeShape &filter_shape, const WeightType *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename ParamsT , typename IndicesT >
OMStatus GatherND (core::OMRuntimeShape params_shape, const ParamsT *param_data, core::OMRuntimeShape indices_shape, const IndicesT *index_data, ParamsT *output_data)
 
OMStatus GRU (const float *input_data, const float *weight_input_data, const float *weight_hidden_data, const float *bias_input_data, const float *bias_hidden_data, const float *hidden_state_data, float *output_data, float *output_input_data, float *output_hidden_data, const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &output_shape, const core::OMRuntimeShape &weight_input_shape, const core::OMRuntimeShape &weight_hidden_shape, const size_t intermediate_buffer_size, float *intermediate_buffer)
 
OMStatus L2Normalization (const core::L2NormalizationParams &params, const float *input_data, float *output_data)
 
OMStatus L2Pool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Log (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Logistic (const int flat_size, const float *input_data, float *output_data)
 
OMStatus Logistic (const int flat_size, const int8_t *input_data, float input_scale, int input_zero_point, int8_t *output_data, float output_scale, int output_zero_point)
 
OMStatus LogSoftmax (const core::LogSoftmaxParams &params, const float *input_data, float *output_data)
 
OMStatus Maximum (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
OMStatus BroadcastMaximum4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
OMStatus MaxPool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
OMStatus Minimum (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
template<typename T >
OMStatus BroadcastMinimum4DSlow (const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Mul (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastMul4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus BroadcastMul6DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Neg (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Pad (const core::PadParams &op_params, const core::OMRuntimeShape &input_shape, const float *input_data, const float pad_value, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename InputT , typename OutputT >
OMStatus Quantize (const core::QuantizationParams op_params, const uint32_t flat_size, const InputT *input_data, OutputT *output_data)
 
template<typename Type >
OMStatus ReLUCommon (const int flat_size, const Type *input_data, Type *output_data, const float alpha, const bool is_relu_6)
 
template<typename T >
OMStatus Round (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus Round< float > (const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Rsqrt (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Sin (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus SISOOperation (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data, std::function< T(T)> const &func)
 
template<typename T >
OMStatus Slice (const core::SliceParams &op_params, const core::OMRuntimeShape &input_shape, const T *input_data, T *output_data)
 
template<typename T , typename U >
OMStatus Softmax (const core::SoftmaxParams &params, const T *input_data, U *output_data)
 
template<>
OMStatus Softmax< float, float > (const core::SoftmaxParams &params, const float *input_data, float *output_data)
 
template<typename T >
OMStatus SpaceToBatchND (const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *paddings_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus SpaceToDepth (const int32_t block_size, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus Split (const core::SplitParams &params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, int32_t axis_value)
 
template<typename T >
OMStatus Sqrt (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Square (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus SquaredDifference (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastSquaredDifference4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus StridedSlice (core::StridedSliceParams &op_params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, T *output_data)
 
int8_t SubFunc (int8_t x, int8_t y, const core::ArithmeticQuantParams &params)
 
template<typename T >
OMStatus Sub (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastSub4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus BroadcastSub4DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus SVDF (const float *input_data, const float *weights_feature_data, const float *weights_time_data, const float *bias_data, float *state_data, float *scratch_data, float *output_data, const int rank, const int input_size, const int batch_size, const int num_filters, const int num_units, const int memory_size, const circle::ActivationFunctionType activation)
 
template<typename T >
OMStatus Tanh (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T , int N>
OMStatus TransposeImpl (const core::TransposeParams &params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T , int N = 5>
OMStatus Transpose (const core::TransposeParams &params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus TransposeConv (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &filter_shape, const T *filter_data, const T *bias_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus TransposeConv< float > (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T , typename Fn >
OMStatus UnaryOp (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Unpack (const core::SplitParams &params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, int32_t axis_value)
 
std::pair< uint32_t, uint32_t > getUpLowerWeightTensorDepth (core::OpTrainableRankType rank, const uint32_t output_depth)
 
std::int32_t saturatingRoundingDoublingHighMul (std::int32_t a, std::int32_t b)
 
int32_t roundingDivideByPOT (int32_t x, int32_t exponent)
 
int32_t multiplyByQuantizedMultiplier (int32_t x, int32_t quantized_multiplier, int shift)
 
int32_t multiplyByQuantizedMultiplierSmallerThanOneExp (int32_t x, int32_t quantized_multiplier, int left_shift)
 
template<typename P >
void getActivationParams (const P &params, int32_t *min, int32_t *max)
 
template<typename P >
void getActivationParams (const P &params, float *min, float *max)
 
template<typename P >
void getActivationParams (const P &params, int64_t *min, int64_t *max)
 
size_t reducedOutputOffset (const int32_t num_dims, const int32_t *dims, const int32_t *index, const int32_t num_axis, const int32_t *axis)
 
bool nextIndex (const int32_t num_dims, const int32_t *dims, int32_t *current)
 
int MatchingDim (const core::OMRuntimeShape &shape1, int index1, const core::OMRuntimeShape &shape2, int index2)
 
int flatSizeSkipDim (const int32_t *dims_data, int skip_dim, int num_dims)
 
int offset (const int32_t *dims_data, int i0, int i1, int i2, int i3)
 
int offset (const int32_t *dims_data, int i0, int i1, int i2, int i3, int i4)
 
template<typename T >
activationFunctionWithMinMax (T x, T output_activation_min, T output_activation_max)
 
template<int MAX_DIM = 6>
bool ReduceDimensionsForBroadcast (const core::OMRuntimeShape &input1_shape, const core::OMRuntimeShape &input2_shape, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape)
 
template<int N>
void copyDimsToDesc (const core::OMRuntimeShape &input_shape, NdArrayDesc< N > *desc_out)
 
template<int N, int DIM, typename Calc >
std::enable_if< DIM==N-1, void >::type NDOpsHelperImpl (const NdArrayDesc< N > &output, const Calc &calc, int indexes[N])
 
template<int N, int DIM, typename Calc >
std::enable_if< DIM!=N-1, void >::type NDOpsHelperImpl (const NdArrayDesc< N > &output, const Calc &calc, int indexes[N])
 
template<int N, typename Calc >
void NDOpsHelper (const NdArrayDesc< N > &output, const Calc &calc)
 
template<int N>
void NdArrayDescsForElementwiseBroadcast (const core::OMRuntimeShape &input0_shape, const core::OMRuntimeShape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
 
int subscriptToIndex (const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
 
int subscriptToIndex (const NdArrayDesc< 5 > &desc, int indexes[5])
 
bool processBroadcastShapes (const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
 
template<>
OMStatus AddN< int8_t > (const size_t flat_size, const size_t num_inputs, const int8_t *const *input_data, int8_t *output_data)
 
template<>
OMStatus AddN< int16_t > (const size_t flat_size, const size_t num_inputs, const int16_t *const *input_data, int16_t *output_data)
 
template<typename T1 , typename T2 , typename T3 >
OMStatus ArgMin (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data)
 
template<typename InputType , typename OutputType >
OMStatus Mul (const core::ArithmeticQuantParams &params, uint32_t size, const InputType *input1_data, const InputType *input2_data, OutputType *output_data)
 
OMStatus Sub (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 

Variables

constexpr int MAX_INDICES_ND = 5
 

Function Documentation

◆ Abs()

template<typename T >
OMStatus onert_micro::execute::pal::Abs ( const core::OMRuntimeShape shape,
const T *  input_data,
T *  output_data 
)
inline

Definition at line 33 of file PALAbs.h.

34{
35 const uint32_t flat_size = shape.flatSize();
36
37 if (flat_size == -1)
38 return UnknownError;
39
40 assert(input_data != nullptr);
41 assert(output_data != nullptr);
42
43 for (uint32_t i = 0; i < flat_size; ++i)
44 {
45 output_data[i] = std::abs(input_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, and onert_micro::UnknownError.

◆ activationFunctionWithMinMax()

template<typename T >
T onert_micro::execute::pal::activationFunctionWithMinMax ( x,
output_activation_min,
output_activation_max 
)
inline

Definition at line 231 of file PALUtils.h.

232{
233 using std::max;
234 using std::min;
235 return min(max(x, output_activation_min), output_activation_max);
236}

Referenced by DepthwiseConv2D< float >(), L2Pool(), and TransposeConv< float >().

◆ Add() [1/2]

OMStatus onert_micro::execute::pal::Add ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

Definition at line 33 of file PALAdd.h.

35{
36 auto status = arm_elementwise_add_s8(
37 input1_data, input2_data, params.input1_offset, params.input1_multiplier, params.input1_shift,
38 params.input2_offset, params.input2_multiplier, params.input2_shift, params.left_shift,
39 output_data, params.output_offset, params.output_multiplier, params.output_shift,
40 params.quantized_activation_min, params.quantized_activation_max, flat_size);
41
42 assert(status == ARM_CMSIS_NN_SUCCESS);
43 if (status != ARM_CMSIS_NN_SUCCESS)
44 return UnknownError;
45
46 return Ok;
47}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, onert_micro::core::ArithmeticQuantParams::quantized_activation_min, and onert_micro::UnknownError.

◆ Add() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::Add ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 50 of file PALAddCommon.h.

52{
53 ArithmeticOp<T, AddFn<T>>(params, flat_size, input1_data, input2_data, output_data);
54 return Ok;
55}

References onert_micro::Ok.

◆ AddFunc()

int8_t onert_micro::execute::pal::AddFunc ( int8_t  x,
int8_t  y,
const core::ArithmeticQuantParams params 
)

Definition at line 30 of file PALAddCommon.h.

31{
32 const int32_t input1_val = params.input1_offset + x;
33 const int32_t input2_val = params.input2_offset + y;
34 const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
35 const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
36 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
37 shifted_input1_val, params.input1_multiplier, params.input1_shift);
38 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
39 shifted_input2_val, params.input2_multiplier, params.input2_shift);
40 const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
41 const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp(
42 raw_sum, params.output_multiplier, params.output_shift) +
43 params.output_offset;
44 const int32_t clamped_output = std::min(params.quantized_activation_max,
45 std::max(params.quantized_activation_min, raw_output));
46 return static_cast<int8_t>(clamped_output);
47}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by BroadcastAdd4DSlow().

◆ AddN()

template<typename T >
OMStatus onert_micro::execute::pal::AddN ( const size_t  flat_size,
const size_t  num_inputs,
const T *const *  input_data,
T *  output_data 
)

Definition at line 32 of file PALAddNCommon.h.

34{
35 // All inputs and output should have the same shape, this is checked during
36 // Prepare stage.
37 for (size_t i = 0; i < flat_size; ++i)
38 {
39 T x = 0;
40 for (size_t j = 0; j < num_inputs; ++j)
41 {
42 x += input_data[j][i];
43 }
44 output_data[i] = x;
45 }
46 return Ok;
47}

References onert_micro::Ok.

◆ AddN< int16_t >()

template<>
OMStatus onert_micro::execute::pal::AddN< int16_t > ( const size_t  flat_size,
const size_t  num_inputs,
const int16_t *const *  input_data,
int16_t *  output_data 
)

Definition at line 38 of file PALAddN.h.

40{
41 assert(false && "Not IMPL yet");
42 return UnsupportedOp;
43}
@ UnsupportedOp
Definition OMStatus.h:29

References onert_micro::UnsupportedOp.

◆ AddN< int8_t >()

template<>
OMStatus onert_micro::execute::pal::AddN< int8_t > ( const size_t  flat_size,
const size_t  num_inputs,
const int8_t *const *  input_data,
int8_t *  output_data 
)

Definition at line 30 of file PALAddN.h.

32{
33 assert(false && "Not IMPL yet");
34 return UnsupportedOp;
35}

References onert_micro::UnsupportedOp.

◆ ArgMax()

template<typename T1 , typename T2 , typename T3 >
OMStatus onert_micro::execute::pal::ArgMax ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data 
)

Definition at line 30 of file PALArgMax.h.

32{
33 return ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
34 std::greater<T1>());
35}
const luci_interpreter::RuntimeShape output_shape

References ArgMinMax(), and output_shape.

◆ ArgMin()

template<typename T1 , typename T2 , typename T3 >
OMStatus onert_micro::execute::pal::ArgMin ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data 
)

Definition at line 30 of file PALArgMin.h.

32{
33 return ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
34 std::less<T1>());
35}

References ArgMinMax(), and output_shape.

◆ ArgMinMax()

template<typename T1 , typename T2 , typename T3 , typename Cmp >
OMStatus onert_micro::execute::pal::ArgMinMax ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data,
const Cmp &  cmp 
)

Definition at line 32 of file PALArgMinMaxCommon.h.

35{
36 int axis = input2_data[0];
37 if (axis < 0)
38 {
39 axis += input1_shape.dimensionsCount();
40 }
41 const int axis_size = input1_shape.dims(axis);
42
43 int outer_size = 1;
44 for (int i = 0; i < axis; ++i)
45 {
46 outer_size *= input1_shape.dims(i);
47 }
48
49 int inner_size = 1;
50 const int dims_count = input1_shape.dimensionsCount();
51 for (int i = axis + 1; i < dims_count; ++i)
52 {
53 inner_size *= input1_shape.dims(i);
54 }
55 for (int outer = 0; outer < outer_size; ++outer)
56 {
57 for (int inner = 0; inner < inner_size; ++inner)
58 {
59 auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
60 T2 min_max_index = 0;
61 for (int i = 1; i < axis_size; ++i)
62 {
63 const auto &curr_value = input1_data[(outer * axis_size + i) * inner_size + inner];
64 if (cmp(curr_value, min_max_value))
65 {
66 min_max_value = curr_value;
67 min_max_index = static_cast<T2>(i);
68 }
69 }
70 output_data[outer * inner_size + inner] = min_max_index;
71 }
72 }
73 return Ok;
74}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dims(), and onert_micro::Ok.

Referenced by ArgMax(), and ArgMin().

◆ ArithmeticOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::ArithmeticOp ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 54 of file PALArithmeticOpCommon.h.

56{
57 T activation_min, activation_max;
58 getActivationParams(params, &activation_min, &activation_max);
59
60 Fn func;
61 for (int i = 0; i < flat_size; ++i)
62 output_data[i] =
63 std::min(std::max(func(input1_data[i], input2_data[i]), activation_min), activation_max);
64
65 return Ok;
66}

References getActivationParams(), and onert_micro::Ok.

◆ ArithmeticOpScalar()

template<typename T , typename Fn >
void onert_micro::execute::pal::ArithmeticOpScalar ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input_data,
const T  scalar_value,
T *  output_data 
)
inline

Definition at line 80 of file PALArithmeticOpCommon.h.

83{
84 T activation_min, activation_max;
85 getActivationParams(params, &activation_min, &activation_max);
86
87 for (int i = 0; i < flat_size; ++i)
88 output_data[i] =
89 std::min(std::max(func(input_data[i], scalar_value), activation_min), activation_max);
90}

References getActivationParams().

◆ AveragePool() [1/2]

OMStatus onert_micro::execute::pal::AveragePool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 33 of file PALAveragePool2DCommon.h.

36{
37 const int32_t batches = input_shape.dims(0);
38 const int32_t depth = output_shape.dims(3);
39 const int32_t input_height = input_shape.dims(1);
40 const int32_t input_width = input_shape.dims(2);
41 const int32_t output_height = output_shape.dims(1);
42 const int32_t output_width = output_shape.dims(2);
43 const int32_t stride_height = params.stride_h;
44 const int32_t stride_width = params.stride_w;
45 for (int batch = 0; batch < batches; ++batch)
46 {
47 for (int out_y = 0; out_y < output_height; ++out_y)
48 {
49 for (int out_x = 0; out_x < output_width; ++out_x)
50 {
51 for (int channel = 0; channel < depth; ++channel)
52 {
53 const int in_x_origin = (out_x * stride_width) - params.pad_w;
54 const int in_y_origin = (out_y * stride_height) - params.pad_h;
55 // Compute the boundaries of the filter region clamped so as to
56 // ensure that the filter window fits in the input array.
57 const int filter_x_start = std::max(0, -in_x_origin);
58 const int filter_x_end = std::min(static_cast<int32_t>(params.filter_w),
59 static_cast<int32_t>(input_width - in_x_origin));
60 const int filter_y_start = std::max(0, -in_y_origin);
61 const int filter_y_end = std::min(static_cast<int32_t>(params.filter_h),
62 static_cast<int32_t>(input_height - in_y_origin));
63
64 float total = 0.f;
65 float filter_count = 0;
66
67 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
68 {
69 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
70 {
71 const int in_x = in_x_origin + filter_x;
72 const int in_y = in_y_origin + filter_y;
73
74 const int input_data_offset =
75 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
76 input_shape.dims(3) +
77 channel;
78
79 total += input_data[input_data_offset];
80 filter_count++;
81 }
82 }
83 const int output_data_offset =
84 ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
86 channel;
87
88 assert(filter_count != 0);
89 const float average = total / filter_count;
90
91 output_data[output_data_offset] =
92 std::min(std::max(average, params.activation_min), params.activation_max);
93 }
94 }
95 }
96 }
97 return Ok;
98}
int32_t dims(int i) const
Definition Tensor.h:108

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ AveragePool() [2/2]

OMStatus onert_micro::execute::pal::AveragePool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALAveragePool2D.h.

35{
36 cmsis_nn_dims input_dims;
37 cmsis_nn_dims output_dims;
38 cmsis_nn_pool_params pool_params;
39 cmsis_nn_dims filter_dims;
40 cmsis_nn_context ctx;
41
42 const int depth = input_shape.dims(3);
43 const int output_width = output_shape.dims(2);
44
45 input_dims.n = 1;
46 input_dims.h = input_shape.dims(1);
47 input_dims.w = input_shape.dims(2);
48 input_dims.c = depth;
49
50 output_dims.n = 1;
51 output_dims.h = output_shape.dims(1);
52 output_dims.w = output_width;
53 output_dims.c = depth;
54
55 pool_params.stride.h = params.stride_h;
56 pool_params.stride.w = params.stride_w;
57 pool_params.padding.h = params.pad_h;
58 pool_params.padding.w = params.pad_w;
59 pool_params.activation.min = params.quantized_activation_min;
60 pool_params.activation.max = params.quantized_activation_max;
61
62 filter_dims.n = 1;
63 filter_dims.h = params.filter_h;
64 filter_dims.w = params.filter_w;
65 filter_dims.c = 1;
66
67 const int32_t buffer_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
68 int8_t *buffer = nullptr;
69 if (buffer_size > 0)
70 {
71 buffer = new int8_t[buffer_size];
72 }
73
74 ctx.buf = buffer;
75 ctx.size = buffer_size;
76
77 auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
78 output_data);
79
80 if (buffer_size > 0)
81 delete[] buffer;
82
83 assert(res == ARM_CMSIS_NN_SUCCESS);
84 if (res != ARM_CMSIS_NN_SUCCESS)
85 return CmsisNNError;
86
87 return Ok;
88}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::quantized_activation_max, onert_micro::core::Pool2DParams::quantized_activation_min, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ BatchToSpaceND()

template<typename T >
OMStatus onert_micro::execute::pal::BatchToSpaceND ( const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const int32_t *  block_shape_data,
const core::OMRuntimeShape unextended_input3_shape,
const int32_t *  crops_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 51 of file PALBatchToSpaceNDCommon.h.

55{
56 const core::OMRuntimeShape input1_shape = extendShapeBatchToSpace(unextended_input1_shape);
57 const core::OMRuntimeShape output_shape = extendShapeBatchToSpace(unextended_output_shape);
58
59 const int output_width = output_shape.dims(2);
60 const int output_height = output_shape.dims(1);
61 const int output_batch_size = output_shape.dims(0);
62
63 const int depth = input1_shape.dims(3);
64 const int input_width = input1_shape.dims(2);
65 const int input_height = input1_shape.dims(1);
66 const int input_batch_size = input1_shape.dims(0);
67
68 const int block_shape_height = block_shape_data[0];
69 const int block_shape_width =
70 unextended_input1_shape.dimensionsCount() == 4 ? block_shape_data[1] : 1;
71 const int crops_top = crops_data[0];
72 const int crops_left = unextended_input1_shape.dimensionsCount() == 4 ? crops_data[2] : 0;
73 for (int in_batch = 0; in_batch < input_batch_size; ++in_batch)
74 {
75 const int out_batch = in_batch % output_batch_size;
76 const int spatial_offset = in_batch / output_batch_size;
77 for (int in_h = 0; in_h < input_height; ++in_h)
78 {
79 const int out_h = in_h * block_shape_height + spatial_offset / block_shape_width - crops_top;
80 if (out_h < 0 || out_h >= output_height)
81 {
82 continue;
83 }
84 for (int in_w = 0; in_w < input_width; ++in_w)
85 {
86 const int out_w =
87 in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
88
89 if (out_w < 0 || out_w >= output_width)
90 {
91 continue;
92 }
93 T *out = output_data + offset(output_shape.dimsData(), out_batch, out_h, out_w, 0);
94 const T *in = input1_data + offset(input1_shape.dimsData(), in_batch, in_h, in_w, 0);
95 memcpy(out, in, depth * sizeof(T));
96 }
97 }
98 }
99 return Ok;
100}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), offset(), onert_micro::Ok, and output_shape.

◆ BinaryOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BinaryOp ( const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)
inline

Definition at line 62 of file PALBinaryOpCommon.h.

64{
65 Fn func;
66 for (int i = 0; i < flat_size; ++i)
67 {
68 output_data[i] = func(input1_data[i], input2_data[i]);
69 }
70 return Ok;
71}

References onert_micro::Ok.

◆ BroadcastAdd4DSlow() [1/2]

OMStatus onert_micro::execute::pal::BroadcastAdd4DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const int8_t *  input1_data,
const core::OMRuntimeShape input2_shape,
const int8_t *  input2_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 68 of file PALAddCommon.h.

72{
73 BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data,
74 output_shape, output_data, AddFunc);
75 return Ok;
76}
void BroadcastBinaryFunction6DSlow(const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References AddFunc(), BroadcastBinaryFunction6DSlow(), onert_micro::Ok, and output_shape.

◆ BroadcastAdd4DSlow() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastAdd4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 58 of file PALAddCommon.h.

62{
63 BroadcastArithmeticOp4DSlow<T, AddFn<T>>(params, input1_shape, input1_data, input2_shape,
64 input2_data, output_shape, output_data);
65 return Ok;
66}

References onert_micro::Ok, and output_shape.

◆ BroadcastArithmeticOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BroadcastArithmeticOp4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 93 of file PALArithmeticOpCommon.h.

97{
100 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
101 const core::OMRuntimeShape extended_output_shape =
102 core::OMRuntimeShape::extendedShape(4, output_shape);
103
104 T activation_min, activation_max;
105 getActivationParams(params, &activation_min, &activation_max);
106
107 // In Tensorflow, the dimensions are canonically named (batch_number, row,
108 // col, channel), with extents (batches, height, width, depth), with the
109 // trailing dimension changing most rapidly (channels has the smallest stride,
110 // typically 1 element).
111 //
112 // In generated C code, we store arrays with the dimensions reversed. The
113 // first dimension has smallest stride.
114 //
115 // We name our variables by their Tensorflow convention, but generate C code
116 // nesting loops such that the innermost loop has the smallest stride for the
117 // best cache behavior.
118 Fn func;
119 for (int b = 0; b < extended_output_shape.dims(0); ++b)
120 {
121 for (int y = 0; y < extended_output_shape.dims(1); ++y)
122 {
123 for (int x = 0; x < extended_output_shape.dims(2); ++x)
124 {
125 for (int c = 0; c < extended_output_shape.dims(3); ++c)
126 {
127 const int output_data_offset =
128 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
129 extended_output_shape.dims(3) +
130 c;
131
132 output_data[output_data_offset] =
133 std::min(std::max(func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
134 input2_data[subscriptToIndex(desc2, b, y, x, c)]),
135 activation_min),
136 activation_max);
137 }
138 }
139 }
140 }
141 return Ok;
142}
void NdArrayDescsForElementwiseBroadcast(const Dims< N > &input0_dims, const Dims< N > &input1_dims, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
Definition NDArray.h:89
NdArrayDesc< 4 > desc1
NdArrayDesc< 4 > desc2

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), getActivationParams(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, and subscriptToIndex().

◆ BroadcastBinaryFunction6DSlow()

template<typename T >
void onert_micro::execute::pal::BroadcastBinaryFunction6DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 224 of file PALArithmeticOpCommon.h.

229{
230 constexpr int kMaxBroadcastDim = 6;
231
232 // In Tensorflow, the dimensions are canonically named (batch_number, row,
233 // col, channel), with extents (batches, height, width, depth), with the
234 // trailing dimension changing most rapidly (channels has the smallest stride,
235 // typically 1 element).
236 //
237 // In generated C code, we store arrays with the dimensions reversed. The
238 // first dimension has smallest stride.
239 //
240 // We name our variables by their Tensorflow convention, but generate C code
241 // nesting loops such that the innermost loop has the smallest stride for the
242 // best cache behavior.
243 size_t compressed_input1_stride[kMaxBroadcastDim];
244 size_t compressed_input2_stride[kMaxBroadcastDim];
245 size_t compressed_output_shape[kMaxBroadcastDim];
246 bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
247 input1_shape, input2_shape, compressed_input1_stride, compressed_input2_stride,
248 compressed_output_shape);
249 // Skip broadcasting for degenerate shapes.
250 if (!broadcastable_shape)
251 {
252 return;
253 }
254
255 size_t input1_offset = 0;
256 size_t input2_offset = 0;
257 size_t output_offset = 0;
258 BroadcastRecursiveDimensions(params, kMaxBroadcastDim - 1, &input1_offset, &input2_offset,
259 &output_offset, compressed_input1_stride, compressed_input2_stride,
260 compressed_output_shape, input1_data, input2_data, output_data,
261 binary_func);
262}

References BroadcastRecursiveDimensions().

Referenced by BroadcastAdd4DSlow(), and BroadcastSub4DSlow().

◆ BroadcastBinaryOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BroadcastBinaryOp4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 75 of file PALBinaryOpCommon.h.

78{
81 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
82
83 const core::OMRuntimeShape extended_output_shape =
84 core::OMRuntimeShape::extendedShape(4, output_shape);
85
86 // In Tensorflow, the dimensions are canonically named (batch_number, row,
87 // col, channel), with extents (batches, height, width, depth), with the
88 // trailing dimension changing most rapidly (channels has the smallest stride,
89 // typically 1 element).
90 //
91 // In generated C code, we store arrays with the dimensions reversed. The
92 // first dimension has smallest stride.
93 //
94 // We name our variables by their Tensorflow convention, but generate C code
95 // nesting loops such that the innermost loop has the smallest stride for the
96 // best cache behavior.
97
98 Fn func;
99 for (int b = 0; b < extended_output_shape.dims(0); ++b)
100 {
101 for (int y = 0; y < extended_output_shape.dims(1); ++y)
102 {
103 for (int x = 0; x < extended_output_shape.dims(2); ++x)
104 {
105 for (int c = 0; c < extended_output_shape.dims(3); ++c)
106 {
107 const int output_data_offset =
108 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
109 extended_output_shape.dims(3) +
110 c;
111
112 output_data[output_data_offset] = func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
113 input2_data[subscriptToIndex(desc2, b, y, x, c)]);
114 }
115 }
116 }
117 }
118 return Ok;
119}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, and subscriptToIndex().

◆ BroadcastComparison4DSlowNoScaling()

template<typename T >
void onert_micro::execute::pal::BroadcastComparison4DSlowNoScaling ( const core::ComparisonParams op_params,
const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const T *  input2_data,
const core::OMRuntimeShape unextended_output_shape,
bool *  output_data,
bool   FT, T 
)
inline

Definition at line 149 of file PALComparisons.h.

153{
154 const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
155 unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
156
157 for (int b = 0; b < dims.output_shape.dims(0); ++b)
158 {
159 for (int y = 0; y < dims.output_shape.dims(1); ++y)
160 {
161 for (int x = 0; x < dims.output_shape.dims(2); ++x)
162 {
163 for (int c = 0; c < dims.output_shape.dims(3); ++c)
164 {
165 const int output_data_offset =
166 ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
167 dims.output_shape.dims(3) +
168 c;
169 output_data[output_data_offset] =
170 F(input1_data[subscriptToIndex(dims.desc1, b, y, x, c)],
171 input2_data[subscriptToIndex(dims.desc2, b, y, x, c)]);
172 }
173 }
174 }
175 }
176}

References subscriptToIndex().

◆ BroadcastComparison4DSlowWithScaling()

template<typename T , typename AccType >
void onert_micro::execute::pal::BroadcastComparison4DSlowWithScaling ( const core::ComparisonParams op_params,
const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const T *  input2_data,
const core::OMRuntimeShape unextended_output_shape,
bool *  output_data,
bool   FAccType, AccType 
)
inline

Definition at line 75 of file PALComparisons.h.

79{
80 const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
81 unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
82
83 int left_shift = op_params.left_shift;
84 int32_t input1_offset = op_params.input1_offset;
85 int32_t input1_multiplier = op_params.input1_multiplier;
86 int input1_shift = op_params.input1_shift;
87 int32_t input2_offset = op_params.input2_offset;
88 int32_t input2_multiplier = op_params.input2_multiplier;
89 int input2_shift = op_params.input2_shift;
90
91 for (int b = 0; b < dims.output_shape.dims(0); ++b)
92 {
93 for (int y = 0; y < dims.output_shape.dims(1); ++y)
94 {
95 for (int x = 0; x < dims.output_shape.dims(2); ++x)
96 {
97 for (int c = 0; c < dims.output_shape.dims(3); ++c)
98 {
99 const int32_t input1_val =
100 input1_offset + input1_data[subscriptToIndex(dims.desc1, b, y, x, c)];
101 const int32_t input2_val =
102 input2_offset + input2_data[subscriptToIndex(dims.desc2, b, y, x, c)];
103 const int32_t shifted_input1_val = input1_val * (1 << left_shift);
104 const int32_t shifted_input2_val = input2_val * (1 << left_shift);
105 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
106 shifted_input1_val, input1_multiplier, input1_shift);
107 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
108 shifted_input2_val, input2_multiplier, input2_shift);
109
110 const int output_data_offset =
111 ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
112 dims.output_shape.dims(3) +
113 c;
114 output_data[output_data_offset] = F(scaled_input1_val, scaled_input2_val);
115 }
116 }
117 }
118 }
119}

References onert_micro::core::ComparisonParams::input1_multiplier, onert_micro::core::ComparisonParams::input1_offset, onert_micro::core::ComparisonParams::input1_shift, onert_micro::core::ComparisonParams::input2_multiplier, onert_micro::core::ComparisonParams::input2_offset, onert_micro::core::ComparisonParams::input2_shift, onert_micro::core::ComparisonParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), and subscriptToIndex().

◆ BroadcastDiv4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastDiv4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 39 of file PALDivCommon.h.

43{
44 BroadcastArithmeticOp4DSlow<T, DivFn<T>>(params, input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46 return Ok;
47}

References onert_micro::Ok, and output_shape.

◆ BroadcastFloorDiv4DSlow()

void onert_micro::execute::pal::BroadcastFloorDiv4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 40 of file PALFloorDivCommon.h.

43{
44 BroadcastBinaryOp4DSlow<float, FloorDivFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

◆ BroadcastFloorMod4DSlow()

void onert_micro::execute::pal::BroadcastFloorMod4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 40 of file PALFloorModCommon.h.

43{
44 BroadcastBinaryOp4DSlow<float, FloorModFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

◆ BroadcastInput1()

template<typename T >
void onert_micro::execute::pal::BroadcastInput1 ( int  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 145 of file PALArithmeticOpCommon.h.

148{
149 for (int i = 0; i < size; ++i)
150 {
151 output_data[i] = binary_func(input1_data[0], input2_data[i], params);
152 }
153}
int32_t size[5]
Definition Slice.cpp:35

References size.

◆ BroadcastInput2()

template<typename T >
void onert_micro::execute::pal::BroadcastInput2 ( int  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 156 of file PALArithmeticOpCommon.h.

159{
160 for (int i = 0; i < size; ++i)
161 {
162 output_data[i] = binary_func(input1_data[i], input2_data[0], params);
163 }
164}

References size.

◆ BroadcastMaximum4DSlow()

OMStatus onert_micro::execute::pal::BroadcastMaximum4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 38 of file PALMaximumCommon.h.

43{
44 return BroadcastBinaryOp4DSlow<float, MaximumFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

◆ BroadcastMinimum4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMinimum4DSlow ( const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 40 of file PALMinimumCommon.h.

43{
44 return BroadcastBinaryOp4DSlow<float, MinimumFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

◆ BroadcastMul4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMul4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 44 of file PALMulCommon.h.

48{
49 BroadcastArithmeticOp4DSlow<T, MulFn<T>>(params, input1_shape, input1_data, input2_shape,
50 input2_data, output_shape, output_data);
51 return Ok;
52}

References onert_micro::Ok, and output_shape.

◆ BroadcastMul6DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMul6DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 55 of file PALMulCommon.h.

59{
62 // The input shapes are extended as part of NdArrayDesc initialization.
63 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
64 const core::OMRuntimeShape extended_output_shape =
65 core::OMRuntimeShape::extendedShape(kMaxMulBroadcastDim, output_shape);
66 // Cache output shape dimensions.
67 int32_t extended_output_shape_dims[kMaxMulBroadcastDim];
68 std::memcpy(extended_output_shape_dims, extended_output_shape.dimsData(),
69 sizeof(extended_output_shape_dims));
70
71 size_t input1_offset_a = 0;
72 size_t input2_offset_a = 0;
73 size_t output_offset_a = 0;
74 for (int a = 0; a < extended_output_shape_dims[0]; ++a)
75 {
76 size_t input1_offset_d = input1_offset_a;
77 size_t input2_offset_d = input2_offset_a;
78 size_t output_offset_d = output_offset_a;
79 for (int d = 0; d < extended_output_shape_dims[1]; ++d)
80 {
81 size_t input1_offset_b = input1_offset_d;
82 size_t input2_offset_b = input2_offset_d;
83 size_t output_offset_b = output_offset_d;
84 for (int b = 0; b < extended_output_shape_dims[2]; ++b)
85 {
86 size_t input1_offset_y = input1_offset_b;
87 size_t input2_offset_y = input2_offset_b;
88 size_t output_offset_y = output_offset_b;
89 for (int y = 0; y < extended_output_shape_dims[3]; ++y)
90 {
91 size_t input1_offset_x = input1_offset_y;
92 size_t input2_offset_x = input2_offset_y;
93 size_t output_offset_x = output_offset_y;
94 for (int x = 0; x < extended_output_shape_dims[4]; ++x)
95 {
96 size_t input1_offset_c = input1_offset_x;
97 size_t input2_offset_c = input2_offset_x;
98 size_t output_offset_c = output_offset_x;
99 for (int c = 0; c < extended_output_shape_dims[5]; ++c)
100 {
101 const int32_t input1_val = params.input1_offset + input1_data[input1_offset_c];
102 const int32_t input2_val = params.input2_offset + input2_data[input2_offset_c];
103 const int32_t unclamped_result =
104 params.output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
105 params.output_multiplier,
106 params.output_shift);
107 const int32_t clamped_output =
108 std::min(params.quantized_activation_max,
109 std::max(params.quantized_activation_min, unclamped_result));
110 output_data[output_offset_c] = static_cast<T>(clamped_output);
111 input1_offset_c += desc1.strides[5];
112 input2_offset_c += desc2.strides[5];
113 ++output_offset_c;
114 }
115 input1_offset_x += desc1.strides[4];
116 input2_offset_x += desc2.strides[4];
117 output_offset_x += extended_output_shape_dims[5];
118 }
119 input1_offset_y += desc1.strides[3];
120 input2_offset_y += desc2.strides[3];
121 output_offset_y += extended_output_shape_dims[4] * extended_output_shape_dims[5];
122 }
123 input1_offset_b += desc1.strides[2];
124 input2_offset_b += desc2.strides[2];
125 output_offset_b += extended_output_shape_dims[3] * extended_output_shape_dims[4] *
126 extended_output_shape_dims[5];
127 }
128 input1_offset_d += desc1.strides[1];
129 input2_offset_d += desc2.strides[1];
130 output_offset_d += extended_output_shape_dims[2] * extended_output_shape_dims[3] *
131 extended_output_shape_dims[4] * extended_output_shape_dims[5];
132 }
133 input1_offset_a += desc1.strides[0];
134 input2_offset_a += desc2.strides[0];
135 output_offset_a += extended_output_shape_dims[1] * extended_output_shape_dims[2] *
136 extended_output_shape_dims[3] * extended_output_shape_dims[4] *
137 extended_output_shape_dims[5];
138 }
139 return Ok;
140}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input2_offset, multiplyByQuantizedMultiplier(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, output_shape, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

◆ BroadcastRecursiveDimensions()

template<typename T >
void onert_micro::execute::pal::BroadcastRecursiveDimensions ( const core::ArithmeticQuantParams params,
int  dimension,
size_t *  input1_offset_p,
size_t *  input2_offset_p,
size_t *  output_offset,
size_t *  compressed_input1_stride,
size_t *  compressed_input2_stride,
size_t *  compressed_output_shape,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 167 of file PALArithmeticOpCommon.h.

173{
174 if (dimension > 0)
175 {
176 for (size_t c = 0; c < compressed_output_shape[dimension]; ++c)
177 {
178 size_t input1_offset_c = *input1_offset_p;
179 size_t input2_offset_c = *input2_offset_p;
180 BroadcastRecursiveDimensions(params, dimension - 1, &input1_offset_c, &input2_offset_c,
181 output_offset, compressed_input1_stride,
182 compressed_input2_stride, compressed_output_shape, input1_data,
183 input2_data, output_data, binary_func);
184 *input1_offset_p += compressed_input1_stride[dimension];
185 *input2_offset_p += compressed_input2_stride[dimension];
186 }
187 }
188 else
189 {
190 assert(dimension == 0);
191 bool input1_is_broadcast = compressed_input1_stride[dimension] == 0;
192 bool input2_is_broadcast = compressed_input2_stride[dimension] == 0;
193 assert(!(input1_is_broadcast && input2_is_broadcast));
194 const T *input1_data_ptr = input1_data + *input1_offset_p;
195 const T *input2_data_ptr = input2_data + *input2_offset_p;
196 T *output_data_ptr = output_data + *output_offset;
197 if (input1_is_broadcast)
198 {
199 // input1 is broadcast.
200 BroadcastInput1<T>(compressed_output_shape[dimension], params, input1_data_ptr,
201 input2_data_ptr, output_data_ptr, binary_func);
202 *input2_offset_p += compressed_output_shape[dimension];
203 }
204 else if (input2_is_broadcast)
205 {
206 // input2 is broadcast.
207 BroadcastInput2<T>(compressed_output_shape[dimension], params, input1_data_ptr,
208 input2_data_ptr, output_data_ptr, binary_func);
209 *input1_offset_p += compressed_output_shape[dimension];
210 }
211 else
212 {
213 // Add element-wise.
214 ElementWise<T>(compressed_output_shape[dimension], params, input1_data_ptr, input2_data_ptr,
215 output_data_ptr, binary_func);
216 *input1_offset_p += compressed_output_shape[dimension];
217 *input2_offset_p += compressed_output_shape[dimension];
218 }
219 *output_offset += compressed_output_shape[dimension];
220 }
221}
void BroadcastRecursiveDimensions(const core::ArithmeticQuantParams &params, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References BroadcastRecursiveDimensions().

Referenced by BroadcastBinaryFunction6DSlow(), and BroadcastRecursiveDimensions().

◆ BroadcastSquaredDifference4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastSquaredDifference4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 38 of file PALSquaredDifferenceCommon.h.

42{
43 BroadcastArithmeticOp4DSlow<T, SquaredDifferenceFn<T>>(
44 params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data);
45 return Ok;
46}

References onert_micro::Ok, and output_shape.

◆ BroadcastSub4DSlow() [1/2]

OMStatus onert_micro::execute::pal::BroadcastSub4DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const int8_t *  input1_data,
const core::OMRuntimeShape input2_shape,
const int8_t *  input2_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 67 of file PALSubCommon.h.

71{
72 BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data,
73 output_shape, output_data, SubFunc);
74 return Ok;
75}

References BroadcastBinaryFunction6DSlow(), onert_micro::Ok, output_shape, and SubFunc().

◆ BroadcastSub4DSlow() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastSub4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 57 of file PALSubCommon.h.

61{
62 BroadcastArithmeticOp4DSlow<T, SubFn<T>>(params, input1_shape, input1_data, input2_shape,
63 input2_data, output_shape, output_data);
64 return Ok;
65}

References onert_micro::Ok, and output_shape.

◆ Cast()

template<typename FromT , typename ToT >
OMStatus onert_micro::execute::pal::Cast ( const core::OMRuntimeShape input_shape,
const FromT *  input_data,
const core::OMRuntimeShape output_shape,
ToT *  output_data 
)
inline

Definition at line 34 of file PALCast.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = static_cast<ToT>(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Ceil()

template<typename T >
OMStatus onert_micro::execute::pal::Ceil ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALCeil.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::ceil(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ ComparisonNoScaling()

template<typename T >
void onert_micro::execute::pal::ComparisonNoScaling ( const int64_t  flat_size,
const T *  input1_data,
const T *  input2_data,
bool *  output_data,
bool   FT, T 
)
inline

Definition at line 65 of file PALComparisons.h.

67{
68 for (int64_t i = 0; i < flat_size; ++i)
69 {
70 output_data[i] = F(input1_data[i], input2_data[i]);
71 }
72}

◆ ComparisonWithScaling()

template<typename T , typename AccType >
void onert_micro::execute::pal::ComparisonWithScaling ( const core::ComparisonParams op_params,
const int64_t  flat_size,
const T *  input1_data,
const T *  input2_data,
bool *  output_data,
bool   FAccType, AccType 
)
inline

Definition at line 122 of file PALComparisons.h.

125{
126 int left_shift = op_params.left_shift;
127 int32_t input1_offset = op_params.input1_offset;
128 int32_t input1_multiplier = op_params.input1_multiplier;
129 int input1_shift = op_params.input1_shift;
130 int32_t input2_offset = op_params.input2_offset;
131 int32_t input2_multiplier = op_params.input2_multiplier;
132 int input2_shift = op_params.input2_shift;
133
134 for (int64_t i = 0; i < flat_size; ++i)
135 {
136 const int32_t input1_val = input1_offset + input1_data[i];
137 const int32_t input2_val = input2_offset + input2_data[i];
138 const int32_t shifted_input1_val = input1_val * (1 << left_shift);
139 const int32_t shifted_input2_val = input2_val * (1 << left_shift);
140 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
141 shifted_input1_val, input1_multiplier, input1_shift);
142 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
143 shifted_input2_val, input2_multiplier, input2_shift);
144 output_data[i] = F(scaled_input1_val, scaled_input2_val);
145 }
146}

References onert_micro::core::ComparisonParams::input1_multiplier, onert_micro::core::ComparisonParams::input1_offset, onert_micro::core::ComparisonParams::input1_shift, onert_micro::core::ComparisonParams::input2_multiplier, onert_micro::core::ComparisonParams::input2_offset, onert_micro::core::ComparisonParams::input2_shift, onert_micro::core::ComparisonParams::left_shift, and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Concatenation()

template<typename Scalar >
OMStatus onert_micro::execute::pal::Concatenation ( core::ConcatenationParams params,
std::vector< uint32_t > &  input_shapes,
std::vector< const Scalar * > &  input_data,
const core::OMRuntimeShape output_shape,
Scalar *  output_data 
)

Definition at line 30 of file PALConcatenation.h.

33{
34 int axis = params.axis;
35 int inputs_count = params.num_inputs;
36 const int concat_dimensions = output_shape.dimensionsCount();
37
38 int64_t concat_size = 0;
39 for (int i = 0; i < inputs_count; i++)
40 {
41 concat_size += input_shapes[i];
42 }
43 int64_t outer_size = 1;
44 for (int i = 0; i < axis; ++i)
45 {
46 outer_size *= output_shape.dims(i);
47 }
48 // For all input arrays,
49 int64_t base_inner_size = 1;
50 for (int i = axis + 1; i < concat_dimensions; ++i)
51 {
52 base_inner_size *= output_shape.dims(i);
53 }
54
55 Scalar *output_ptr = output_data;
56 for (int k = 0; k < outer_size; k++)
57 {
58 for (int i = 0; i < inputs_count; ++i)
59 {
60 const int copy_size = input_shapes[i] * base_inner_size;
61 const Scalar *input_ptr = input_data[i] + k * copy_size;
62 memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
63 output_ptr += copy_size;
64 }
65 }
66
67 return Ok;
68}
int32_t dimensionsCount() const
Definition Tensor.h:106

References onert_micro::core::ConcatenationParams::axis, luci_interpreter::RuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::ConcatenationParams::num_inputs, onert_micro::Ok, and output_shape.

◆ ConvFloat()

OMStatus onert_micro::execute::pal::ConvFloat ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 31 of file PALConv2DCommon.h.

35{
36 const int stride_width = params->stride_w;
37 const int stride_height = params->stride_h;
38 const int dilation_width_factor = params->dilation_width_factor;
39 const int dilation_height_factor = params->dilation_height_factor;
40 const int pad_width = params->pad_w;
41 const int pad_height = params->pad_h;
42 const float output_activation_min = params->activation_min;
43 const float output_activation_max = params->activation_max;
44
45 const auto batches = input_shape.dims(0);
46 const int input_height = input_shape.dims(1);
47 const int input_width = input_shape.dims(2);
48 const int input_depth = input_shape.dims(3);
49 const int output_depth = filter_shape.dims(0);
50 const int filter_height = filter_shape.dims(1);
51 const int filter_width = filter_shape.dims(2);
52 const int output_height = output_shape.dims(1);
53 const int output_width = output_shape.dims(2);
54 for (int batch = 0; batch < batches; ++batch)
55 {
56 for (int out_y = 0; out_y < output_height; ++out_y)
57 {
58 const int in_y_origin = (out_y * stride_height) - pad_height;
59 for (int out_x = 0; out_x < output_width; ++out_x)
60 {
61 const int in_x_origin = (out_x * stride_width) - pad_width;
62 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
63 {
64 float total = 0.f;
65 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
66 {
67 const int in_y = in_y_origin + dilation_height_factor * filter_y;
68 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
69 {
70 const int in_x = in_x_origin + dilation_width_factor * filter_x;
71
72 // Zero padding by omitting the areas outside the image.
73 const bool is_point_inside_image =
74 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
75
76 if (!is_point_inside_image)
77 {
78 continue;
79 }
80
81 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
82 {
83 const int input_data_offset =
84 ((batch * input_height + in_y) * input_width + in_x) * input_depth + in_channel;
85
86 const int filter_data_offset =
87 ((out_channel * filter_height + filter_y) * filter_width + filter_x) *
88 input_depth +
89 in_channel;
90
91 const float input_value = input_data[input_data_offset];
92 const float filter_value = filter_data[filter_data_offset];
93 total += (input_value * filter_value);
94 }
95 }
96 }
97 // float bias_value = 0.0f;
98 if (bias_data)
99 {
100 total += bias_data[out_channel];
101 }
102
103 const int output_data_offset =
104 ((batch * output_height + out_y) * output_width + out_x) * output_depth + out_channel;
105
106 output_data[output_data_offset] =
107 std::min(std::max(total, output_activation_min), output_activation_max);
108 }
109 }
110 }
111 }
112 return Ok;
113}

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, onert_micro::core::FloatConv2D::dilation_height_factor, onert_micro::core::FloatConv2D::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

◆ ConvPerChannel()

OMStatus onert_micro::execute::pal::ConvPerChannel ( const core::ConvQuant params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 36 of file PALConv2D.h.

40{
41 cmsis_nn_conv_params conv_params;
42 conv_params.dilation.h = params.dilation_height_factor;
43 conv_params.dilation.w = params.dilation_width_factor;
44
45 assert(conv_params.dilation.h == 1);
46 assert(conv_params.dilation.w == 1);
47
48 conv_params.input_offset = params.input_offset;
49 conv_params.output_offset = params.output_offset;
50 conv_params.stride.h = params.stride_height;
51 conv_params.stride.w = params.stride_width;
52 conv_params.padding.h = params.pad_h;
53 conv_params.padding.w = params.pad_w;
54 conv_params.activation.min = params.quantized_activation_min;
55 conv_params.activation.max = params.quantized_activation_max;
56
57 cmsis_nn_per_channel_quant_params quant_params;
58 quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data());
59 quant_params.shift = const_cast<int32_t *>(
60 reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data()));
61
62 assert(conv_params.activation.min <= conv_params.activation.max);
63 const int batch_size = input_shape.dims(0);
64 const int input_depth = input_shape.dims(3);
65 const int output_depth = filter_shape.dims(0);
66
67 cmsis_nn_dims input_dims;
68 input_dims.n = batch_size;
69 input_dims.h = input_shape.dims(1);
70 input_dims.w = input_shape.dims(2);
71 input_dims.c = input_depth;
72
73 cmsis_nn_dims filter_dims;
74 filter_dims.n = output_depth;
75 filter_dims.h = filter_shape.dims(1);
76 filter_dims.w = filter_shape.dims(2);
77 filter_dims.c = input_depth;
78
79 cmsis_nn_dims bias_dims;
80 bias_dims.n = 1;
81 bias_dims.h = 1;
82 bias_dims.w = 1;
83 bias_dims.c = output_depth;
84
85 cmsis_nn_dims output_dims;
86 output_dims.n = batch_size;
87 output_dims.h = output_shape.dims(1);
88 output_dims.w = output_shape.dims(2);
89 output_dims.c = output_depth;
90
91 auto buf_size =
92 arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
93
94 auto buffer = std::make_unique<int8_t[]>(buf_size);
95 assert(buffer != nullptr);
96
97 cmsis_nn_context ctx;
98 ctx.buf = buffer.get();
99 ctx.size = buf_size;
100
101 auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
102 &filter_dims, filter_data, &bias_dims, bias_data, &output_dims,
103 output_data);
104
105 assert(res == ARM_CMSIS_NN_SUCCESS);
106 if (res != ARM_CMSIS_NN_SUCCESS)
107 return CmsisNNError;
108 return Ok;
109}
std::vector< int > per_channel_output_shift
std::vector< int32_t > per_channel_output_multiplier

References onert_micro::CmsisNNError, onert_micro::core::ConvQuant::dilation_height_factor, onert_micro::core::ConvQuant::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::ConvQuant::input_offset, onert_micro::Ok, onert_micro::core::ConvQuant::output_offset, output_shape, onert_micro::core::ConvQuant::pad_h, onert_micro::core::ConvQuant::pad_w, onert_micro::core::ConvQuant::per_channel_output_multiplier, onert_micro::core::ConvQuant::per_channel_output_shift, onert_micro::core::ConvQuant::quantized_activation_max, onert_micro::core::ConvQuant::quantized_activation_min, onert_micro::core::ConvQuant::stride_height, and onert_micro::core::ConvQuant::stride_width.

◆ copyDimsToDesc()

template<int N>
void onert_micro::execute::pal::copyDimsToDesc ( const core::OMRuntimeShape input_shape,
NdArrayDesc< N > *  desc_out 
)
inline

Definition at line 53 of file ProcessBroadcastShapes.h.

54{
55 int desc_stride = 1;
56 for (int i = N - 1; i >= 0; --i)
57 {
58 desc_out->extents[i] = input_shape.dims(i);
59 desc_out->strides[i] = desc_stride;
60 desc_stride *= input_shape.dims(i);
61 }
62}

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::execute::pal::NdArrayDesc< N >::extents, and onert_micro::execute::pal::NdArrayDesc< N >::strides.

Referenced by TransposeImpl().

◆ Cos()

template<typename T >
OMStatus onert_micro::execute::pal::Cos ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALCosCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::cos(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ DepthwiseConv2D()

template<typename T >
OMStatus onert_micro::execute::pal::DepthwiseConv2D ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape filter_shape,
const T *  filter_data,
const T *  bias_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALDepthwiseConv2DCommon.h.

37{
38 assert(false && "Not IMPL yet");
39}

◆ DepthwiseConv2D< float >()

template<>
OMStatus onert_micro::execute::pal::DepthwiseConv2D< float > ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 41 of file PALDepthwiseConv2DCommon.h.

46{
47 const int stride_width = params->stride_w;
48 const int stride_height = params->stride_h;
49 const int dilation_width_factor = params->dilation_width_factor;
50 const int dilation_height_factor = params->dilation_height_factor;
51 const int pad_width = params->pad_w;
52 const int pad_height = params->pad_h;
53 const int depth_multiplier = params->depth_multiplier;
54 const float output_activation_min = params->activation_min;
55 const float output_activation_max = params->activation_max;
56
57 const auto batches = input_shape.dims(0);
58 const int input_height = input_shape.dims(1);
59 const int input_width = input_shape.dims(2);
60 const int input_depth = input_shape.dims(3);
61 const int output_depth = filter_shape.dims(0);
62 const int filter_height = filter_shape.dims(1);
63 const int filter_width = filter_shape.dims(2);
64 const int output_height = output_shape.dims(1);
65 const int output_width = output_shape.dims(2);
66 for (int b = 0; b < batches; ++b)
67 {
68 for (int out_y = 0; out_y < output_height; ++out_y)
69 {
70 for (int out_x = 0; out_x < output_width; ++out_x)
71 {
72 for (int ic = 0; ic < input_depth; ++ic)
73 {
74 for (int m = 0; m < depth_multiplier; m++)
75 {
76 const int oc = m + ic * depth_multiplier;
77 const int in_x_origin = (out_x * stride_width) - pad_width;
78 const int in_y_origin = (out_y * stride_height) - pad_height;
79 float total = 0.f;
80 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
81 {
82 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
83 {
84 const int in_x = in_x_origin + dilation_width_factor * filter_x;
85 const int in_y = in_y_origin + dilation_height_factor * filter_y;
86 // If the location is outside the bounds of the input image,
87 // use zero as a default value.
88 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
89 {
90 float input_value = input_data[offset(input_shape.dimsData(), b, in_y, in_x, ic)];
91 float filter_value =
92 filter_data[offset(filter_shape.dimsData(), 0, filter_y, filter_x, oc)];
93 total += (input_value * filter_value);
94 }
95 }
96 }
97 float bias_value = 0.0f;
98 if (bias_data)
99 {
100 bias_value = bias_data[oc];
101 }
102 output_data[offset(output_shape.dimsData(), b, out_y, out_x, oc)] =
103 activationFunctionWithMinMax(total + bias_value, output_activation_min,
104 output_activation_max);
105 }
106 }
107 }
108 }
109 }
110 return Ok;
111}
T activationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
Definition PALUtils.h:204

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, activationFunctionWithMinMax(), onert_micro::core::FloatConv2D::depth_multiplier, onert_micro::core::FloatConv2D::dilation_height_factor, onert_micro::core::FloatConv2D::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), m, offset(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

◆ DepthwiseConvPerChannel()

OMStatus onert_micro::execute::pal::DepthwiseConvPerChannel ( const core::ConvQuant params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 36 of file PALDepthwiseConv2D.h.

41{
42 cmsis_nn_dw_conv_params conv_params;
43 conv_params.dilation.h = params.dilation_height_factor;
44 conv_params.dilation.w = params.dilation_width_factor;
45
46 assert(conv_params.dilation.h == 1);
47 assert(conv_params.dilation.w == 1);
48
49 conv_params.input_offset = params.input_offset;
50 conv_params.output_offset = params.output_offset;
51 conv_params.stride.h = params.stride_height;
52 conv_params.stride.w = params.stride_width;
53 conv_params.padding.h = params.pad_h;
54 conv_params.padding.w = params.pad_w;
55 conv_params.activation.min = params.quantized_activation_min;
56 conv_params.activation.max = params.quantized_activation_max;
57 conv_params.ch_mult = params.depth_multiplier;
58
59 cmsis_nn_per_channel_quant_params quant_params;
60 quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data());
61 quant_params.shift = const_cast<int32_t *>(
62 reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data()));
63
64 assert(conv_params.activation.min <= conv_params.activation.max);
65 const int batch_size = input_shape.dims(0);
66 const int input_depth = input_shape.dims(3);
67 const int output_depth = filter_shape.dims(0);
68
69 cmsis_nn_dims input_dims;
70 input_dims.n = batch_size;
71 input_dims.h = input_shape.dims(1);
72 input_dims.w = input_shape.dims(2);
73 input_dims.c = input_depth;
74
75 cmsis_nn_dims filter_dims;
76 filter_dims.n = output_depth;
77 filter_dims.h = filter_shape.dims(1);
78 filter_dims.w = filter_shape.dims(2);
79 filter_dims.c = input_depth;
80
81 cmsis_nn_dims bias_dims;
82 bias_dims.n = 1;
83 bias_dims.h = 1;
84 bias_dims.w = 1;
85 bias_dims.c = output_depth;
86
87 cmsis_nn_dims output_dims;
88 output_dims.n = batch_size;
89 output_dims.h = output_shape.dims(1);
90 output_dims.w = output_shape.dims(2);
91 output_dims.c = output_depth;
92
93 auto buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
94 &filter_dims, &output_dims);
95
96 auto buffer = std::make_unique<int8_t[]>(buf_size);
97 assert(buffer != nullptr);
98
99 cmsis_nn_context ctx;
100 ctx.buf = buffer.get();
101 ctx.size = buf_size;
102
103 auto res = arm_depthwise_conv_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims,
104 input_data, &filter_dims, filter_data, &bias_dims,
105 bias_data, &output_dims, output_data);
106
107 assert(res == ARM_CMSIS_NN_SUCCESS);
108 if (res != ARM_CMSIS_NN_SUCCESS)
109 return CmsisNNError;
110 return Ok;
111}

References onert_micro::CmsisNNError, onert_micro::core::ConvQuant::depth_multiplier, onert_micro::core::ConvQuant::dilation_height_factor, onert_micro::core::ConvQuant::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::ConvQuant::input_offset, onert_micro::Ok, onert_micro::core::ConvQuant::output_offset, output_shape, onert_micro::core::ConvQuant::pad_h, onert_micro::core::ConvQuant::pad_w, onert_micro::core::ConvQuant::per_channel_output_multiplier, onert_micro::core::ConvQuant::per_channel_output_shift, onert_micro::core::ConvQuant::quantized_activation_max, onert_micro::core::ConvQuant::quantized_activation_min, onert_micro::core::ConvQuant::stride_height, and onert_micro::core::ConvQuant::stride_width.

◆ Dequantize()

template<typename InputT , typename OutputT >
OMStatus onert_micro::execute::pal::Dequantize ( const core::QuantizationParams  op_params,
const uint32_t  flat_size,
const InputT *  input_data,
OutputT *  output_data 
)

Definition at line 35 of file PALDequantize.h.

37{
38 const int32_t zero_point = op_params.zero_point;
39 const double scale = op_params.scale;
40
41 for (uint32_t i = 0; i < flat_size; i++)
42 {
43 const int32_t val = input_data[i];
44 const auto result = static_cast<OutputT>(scale * (val - zero_point));
45 output_data[i] = result;
46 }
47 return Ok;
48}

References onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

◆ Div()

template<typename T >
OMStatus onert_micro::execute::pal::Div ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 31 of file PALDivCommon.h.

33{
34 ArithmeticOp<T, DivFn<T>>(params, flat_size, input1_data, input2_data, output_data);
35 return Ok;
36}

References onert_micro::Ok.

◆ ElementWise()

template<typename T >
void onert_micro::execute::pal::ElementWise ( const uint32_t  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 69 of file PALArithmeticOpCommon.h.

72{
73 for (int i = 0; i < size; ++i)
74 {
75 output_data[i] = binary_func(input1_data[i], input2_data[i], params);
76 }
77}

References size.

Referenced by Sub().

◆ Elu()

OMStatus onert_micro::execute::pal::Elu ( const int  flat_size,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALElu.h.

33{
34 for (int i = 0; i < flat_size; i++)
35 {
36 float val = input_data[i];
37 float result = val < 0.0f ? std::exp(val) - 1 : val;
38 output_data[i] = result;
39 }
40
41 return Ok;
42}

References onert_micro::Ok.

◆ EqualFn()

template<typename T >
bool onert_micro::execute::pal::EqualFn ( lhs,
rhs 
)
inline

Definition at line 59 of file PALComparisons.h.

59{ return lhs == rhs; }

◆ Exp()

template<typename T >
OMStatus onert_micro::execute::pal::Exp ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALExpCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::exp(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Fill()

template<typename T >
OMStatus onert_micro::execute::pal::Fill ( const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 32 of file PALFill.h.

33{
34 const uint32_t flat_size = output_shape.flatSize();
35
36 if (flat_size == -1)
37 return UnknownError;
38
39 assert(input_data != nullptr);
40 assert(output_data != nullptr);
41
42 for (int i = 0; i < flat_size; ++i)
43 {
44 output_data[i] = input_data[0];
45 }
46
47 return Ok;
48}

References luci_interpreter::RuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ flatSizeSkipDim()

int onert_micro::execute::pal::flatSizeSkipDim ( const int32_t *  dims_data,
int  skip_dim,
int  num_dims 
)
inline

Definition at line 210 of file PALUtils.h.

211{
212 int flat_size = 1;
213 for (int i = 0; i < num_dims; ++i)
214 {
215 flat_size *= (i == skip_dim) ? 1 : dims_data[i];
216 }
217 return flat_size;
218}

Referenced by FullyConnected(), FullyConnected(), FullyConnected(), and FullyConnected< int8_t >().

◆ Floor()

template<typename T >
OMStatus onert_micro::execute::pal::Floor ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 33 of file PALFloorCommon.h.

35{
36 const uint32_t flat_size = input_shape.flatSize();
37
38 if (flat_size == -1)
39 return UnknownError;
40
41 assert(input_data != nullptr);
42 assert(output_data != nullptr);
43
44 // check that input and output dimensions are equal
45 int N = input_shape.dimensionsCount();
46 assert(N == output_shape.dimensionsCount());
47
48 // check that sizes of all dimensions are equal
49 for (int i = 0; i < N; ++i)
50 {
51 assert(input_shape.dims(i) == output_shape.dims(i));
52 }
53
54 for (int i = 0; i < flat_size; i++)
55 {
56 output_data[i] = std::floor(input_data[i]);
57 }
58
59 return Ok;
60}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ FloorDiv()

void onert_micro::execute::pal::FloorDiv ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALFloorDivCommon.h.

34{
35 BinaryOp<float, FloorDivFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

◆ FloorMod()

void onert_micro::execute::pal::FloorMod ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALFloorModCommon.h.

34{
35 BinaryOp<float, FloorModFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

◆ FullyConnected() [1/3]

template<typename WeightType >
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const WeightType *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 80 of file PALFullyConnectedCommon.h.

84{
85 const float output_activation_min = params.float_activation_min;
86 const float output_activation_max = params.float_activation_max;
87
88 const int batches = flatSizeSkipDim(output_shape.dimsData(), output_shape.dimensionsCount() - 1,
90 const int output_depth = output_shape.dims(output_shape.dimensionsCount() - 1);
91 const int accum_depth = filter_shape.dims(filter_shape.dimensionsCount() - 1);
92
93 for (int b = 0; b < batches; ++b)
94 {
95 const float *weight_scale_ptr = params.weights_scales;
96 for (int out_c = 0; out_c < output_depth; ++out_c)
97 {
98 float total = 0.f;
99 for (int d = 0; d < accum_depth; ++d)
100 {
101 auto input_value = input_data[b * accum_depth + d];
102 if (std::is_same<WeightType, float>::value)
103 {
104 total += input_value * filter_data[out_c * accum_depth + d];
105 }
106 else
107 {
108 const float filter_scale = *weight_scale_ptr;
109 const float filter_value =
110 static_cast<float>(filter_data[out_c * accum_depth + d]) * filter_scale;
111 total += input_value * filter_value;
112 }
113 }
114 float bias_value = 0.0f;
115 if (bias_data)
116 {
117 bias_value = bias_data[out_c];
118 }
119 output_data[out_c + output_depth * b] =
120 std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
121
122 if (std::is_same<WeightType, int8_t>::value)
123 {
124 if (params.is_channel_wise_quant)
125 weight_scale_ptr++;
126 }
127 }
128 }
129 return Ok;
130}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::float_activation_max, onert_micro::core::FullyConnectedParams::float_activation_min, onert_micro::core::FullyConnectedParams::is_channel_wise_quant, onert_micro::Ok, output_shape, and onert_micro::core::FullyConnectedParams::weights_scales.

◆ FullyConnected() [2/3]

template<typename InputType , typename WeightType , typename OutputType , typename BiasType >
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const InputType *  input_data,
const core::OMRuntimeShape filter_shape,
const WeightType *  filter_data,
const BiasType *  bias_data,
const core::OMRuntimeShape output_shape,
OutputType *  output_data 
)

Definition at line 34 of file PALFullyConnectedCommon.h.

38{
39 const int32_t input_offset = params.input_offset;
40 const int32_t filter_offset = params.weights_offset;
41 const int32_t output_offset = params.output_offset;
42 const int32_t output_multiplier = params.output_multiplier;
43 const int output_shift = params.output_shift;
44 const int32_t output_activation_min = params.quantized_activation_min;
45 const int32_t output_activation_max = params.quantized_activation_max;
46
47 const int filter_dim_count = filter_shape.dimensionsCount();
48 const int output_dim_count = output_shape.dimensionsCount();
49 const int batches =
50 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
51 const int output_depth = output_shape.dims(output_dim_count - 1);
52
53 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
54 for (int b = 0; b < batches; ++b)
55 {
56 for (int out_c = 0; out_c < output_depth; ++out_c)
57 {
58 BiasType acc = 0;
59 for (int d = 0; d < accum_depth; ++d)
60 {
61 int32_t input_val = input_data[b * accum_depth + d];
62 int32_t filter_val = filter_data[out_c * accum_depth + d];
63 acc += (filter_val + filter_offset) * (input_val + input_offset);
64 }
65 if (bias_data)
66 {
67 acc += bias_data[out_c];
68 }
69 int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
70 acc_scaled += output_offset;
71 acc_scaled = std::max(acc_scaled, output_activation_min);
72 acc_scaled = std::min(acc_scaled, output_activation_max);
73 output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
74 }
75 }
76 return Ok;
77}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, multiplyByQuantizedMultiplier(), onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

◆ FullyConnected() [3/3]

template<>
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const int16_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int64_t *  bias_data,
const core::OMRuntimeShape output_shape,
int16_t *  output_data 
)

Definition at line 98 of file PALFullyConnected.h.

102{
103 const int filter_dim_count = filter_shape.dimensionsCount();
104 const int output_dim_count = output_shape.dimensionsCount();
105 const int batches =
106 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
107 const int output_depth = output_shape.dims(output_dim_count - 1);
108 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
109
110 cmsis_nn_fc_params fc_params;
111 fc_params.input_offset = params.input_offset;
112 fc_params.output_offset = params.output_offset;
113 fc_params.filter_offset = params.weights_offset;
114 fc_params.activation.min = params.quantized_activation_min;
115 fc_params.activation.max = params.quantized_activation_max;
116
117 cmsis_nn_per_tensor_quant_params quant_params;
118 quant_params.multiplier = params.output_multiplier;
119 quant_params.shift = params.output_shift;
120
121 cmsis_nn_dims input_dims;
122 input_dims.n = batches;
123 input_dims.h = 1;
124 input_dims.w = 1;
125 input_dims.c = accum_depth;
126
127 cmsis_nn_dims filter_dims;
128 filter_dims.n = accum_depth;
129 filter_dims.h = 1;
130 filter_dims.w = 1;
131 filter_dims.c = output_depth;
132
133 cmsis_nn_dims bias_dims;
134 bias_dims.n = 1;
135 bias_dims.h = 1;
136 bias_dims.w = 1;
137 bias_dims.c = output_depth;
138
139 cmsis_nn_dims output_dims;
140 output_dims.n = batches;
141 output_dims.h = 1;
142 output_dims.w = 1;
143 output_dims.c = output_depth;
144
145 int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
146 auto buffer = std::make_unique<int8_t[]>(buf_size);
147 assert(buffer != nullptr);
148
149 cmsis_nn_context ctx;
150 ctx.buf = buffer.get();
151 ctx.size = buf_size;
152
153 auto res =
154 arm_fully_connected_s16(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
155 filter_data, &bias_dims, bias_data, &output_dims, output_data);
156 assert(res == ARM_CMSIS_NN_SUCCESS);
157
158 if (res != ARM_CMSIS_NN_SUCCESS)
159 return CmsisNNError;
160
161 return Ok;
162}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

◆ FullyConnected< int8_t >()

template<>
OMStatus onert_micro::execute::pal::FullyConnected< int8_t > ( const core::FullyConnectedParams params,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALFullyConnected.h.

36{
37 const int filter_dim_count = filter_shape.dimensionsCount();
38 const int output_dim_count = output_shape.dimensionsCount();
39 const int batches =
40 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
41 const int output_depth = output_shape.dims(output_dim_count - 1);
42 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
43
44 cmsis_nn_fc_params fc_params;
45 fc_params.input_offset = params.input_offset;
46 fc_params.output_offset = params.output_offset;
47 fc_params.filter_offset = params.weights_offset;
48 fc_params.activation.min = params.quantized_activation_min;
49 fc_params.activation.max = params.quantized_activation_max;
50
51 cmsis_nn_per_tensor_quant_params quant_params;
52 quant_params.multiplier = params.output_multiplier;
53 quant_params.shift = params.output_shift;
54
55 cmsis_nn_dims input_dims;
56 input_dims.n = batches;
57 input_dims.h = 1;
58 input_dims.w = 1;
59 input_dims.c = accum_depth;
60
61 cmsis_nn_dims filter_dims;
62 filter_dims.n = accum_depth;
63 filter_dims.h = 1;
64 filter_dims.w = 1;
65 filter_dims.c = output_depth;
66
67 cmsis_nn_dims bias_dims;
68 bias_dims.n = 1;
69 bias_dims.h = 1;
70 bias_dims.w = 1;
71 bias_dims.c = output_depth;
72
73 cmsis_nn_dims output_dims;
74 output_dims.n = batches;
75 output_dims.h = 1;
76 output_dims.w = 1;
77 output_dims.c = output_depth;
78
79 int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
80 auto buffer = std::make_unique<int8_t[]>(buf_size);
81 assert(buffer != nullptr);
82
83 cmsis_nn_context ctx;
84 ctx.buf = buffer.get();
85 ctx.size = buf_size;
86
87 auto res =
88 arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
89 filter_data, &bias_dims, bias_data, &output_dims, output_data);
90 assert(res == ARM_CMSIS_NN_SUCCESS);
91 if (res != ARM_CMSIS_NN_SUCCESS)
92 return CmsisNNError;
93
94 return Ok;
95}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

◆ GatherND()

template<typename ParamsT , typename IndicesT >
OMStatus onert_micro::execute::pal::GatherND ( core::OMRuntimeShape  params_shape,
const ParamsT *  param_data,
core::OMRuntimeShape  indices_shape,
const IndicesT *  index_data,
ParamsT *  output_data 
)
inline

Definition at line 35 of file PALGatherND.h.

38{
39 const int indices_dims = indices_shape.dimensionsCount();
40 const int indices_nd = indices_shape.dims(indices_dims - 1);
41 const int params_dims = params_shape.dimensionsCount();
42
43 int n_slices = 1;
44 for (int i = 0; i < indices_dims - 1; ++i)
45 {
46 n_slices *= indices_shape.dims(i);
47 }
48
49 // If indices[-1] == params.rank, fetch single elements.
50 // If indices[-1] < params.rank, fetch slices.
51 int slice_size = 1;
52 for (int i = indices_nd; i < params_dims; ++i)
53 {
54 slice_size *= params_shape.dims(i);
55 }
56
57 int params_flat_size = params_shape.flatSize();
58 int remain_flat_size = params_flat_size;
59
60 // Number of elements per dimension
61 int dims_to_count[MAX_INDICES_ND];
62 for (int i = 0; i < indices_nd; ++i)
63 {
64 dims_to_count[i] = remain_flat_size / params_shape.dims(i);
65 remain_flat_size = dims_to_count[i];
66 }
67
68 for (int i = 0; i < n_slices; ++i)
69 {
70 int from_pos = 0;
71 for (int j = 0; j < indices_nd; ++j)
72 {
73 int offset = i * indices_nd + j;
74 IndicesT index = index_data[offset];
75 from_pos += index * dims_to_count[j];
76 }
77 if (from_pos < 0 || from_pos + slice_size > params_flat_size)
78 {
79 assert(false && "GatherND error");
80 return UnknownError;
81 }
82 std::memcpy(output_data + i * slice_size, param_data + from_pos, sizeof(ParamsT) * slice_size);
83 }
84
85 return Ok;
86}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::flatSize(), MAX_INDICES_ND, offset(), onert_micro::Ok, and onert_micro::UnknownError.

◆ getActivationParams() [1/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
float *  min,
float *  max 
)
inline

Definition at line 126 of file PALUtils.h.

127{
128 *min = params.float_activation_min;
129 *max = params.float_activation_max;
130}

◆ getActivationParams() [2/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
int32_t *  min,
int32_t *  max 
)
inline

Definition at line 120 of file PALUtils.h.

121{
122 *min = params.int32_activation_min;
123 *max = params.int32_activation_max;
124}

Referenced by ArithmeticOp(), ArithmeticOpScalar(), and BroadcastArithmeticOp4DSlow().

◆ getActivationParams() [3/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
int64_t *  min,
int64_t *  max 
)
inline

Definition at line 132 of file PALUtils.h.

133{
134 *min = params.int64_activation_min;
135 *max = params.int64_activation_max;
136}

◆ getUpLowerWeightTensorDepth()

std::pair< uint32_t, uint32_t > onert_micro::execute::pal::getUpLowerWeightTensorDepth ( core::OpTrainableRankType  rank,
const uint32_t  output_depth 
)
inline

Definition at line 30 of file PALUtils.h.

32{
33 std::pair<uint32_t, uint32_t> result(0u, output_depth);
34
35 switch (rank)
36 {
37 case core::ALL:
38 break;
39 case core::UP_1_2_PART:
40 result.second = static_cast<uint32_t>(static_cast<float>(output_depth) / 2.f);
41 break;
42 case core::LOWER_1_2_PART:
43 result.first = static_cast<uint32_t>(static_cast<float>(output_depth) / 2.f);
44 break;
45 default:
46 assert("Unsupported type");
47 break;
48 }
49
50 return result;
51}

References onert_micro::core::ALL, onert_micro::core::LOWER_1_2_PART, and onert_micro::core::UP_1_2_PART.

Referenced by onert_micro::train::pal::Conv2DWeightGrad(), and onert_micro::train::pal::FullyConnectedWeightGrad().

◆ GreaterEqualFn()

template<typename T >
bool onert_micro::execute::pal::GreaterEqualFn ( lhs,
rhs 
)
inline

Definition at line 61 of file PALComparisons.h.

61{ return lhs >= rhs; }

◆ GreaterFn()

template<typename T >
bool onert_micro::execute::pal::GreaterFn ( lhs,
rhs 
)
inline

Definition at line 60 of file PALComparisons.h.

60{ return lhs > rhs; }

◆ GRU()

OMStatus onert_micro::execute::pal::GRU ( const float *  input_data,
const float *  weight_input_data,
const float *  weight_hidden_data,
const float *  bias_input_data,
const float *  bias_hidden_data,
const float *  hidden_state_data,
float *  output_data,
float *  output_input_data,
float *  output_hidden_data,
const core::OMRuntimeShape input_shape,
const core::OMRuntimeShape output_shape,
const core::OMRuntimeShape weight_input_shape,
const core::OMRuntimeShape weight_hidden_shape,
const size_t  intermediate_buffer_size,
float *  intermediate_buffer 
)

Definition at line 183 of file PALGRUCommon.h.

191{
192 const int32_t time = input_shape.dims(0);
193
194 core::OMRuntimeShape output_shape_fc(2);
195 output_shape_fc.setDim(0, 1);
196 output_shape_fc.setDim(1, weight_hidden_shape.dims(0));
197
198 std::memcpy(output_data, hidden_state_data, output_shape.flatSize() * sizeof(float));
199
200 for (int i = 0; i < time; ++i)
201 {
202 calculateGRU(input_data, weight_input_data, weight_hidden_data, bias_input_data,
203 bias_hidden_data, output_data, input_shape, output_shape, weight_input_shape,
204 weight_hidden_shape, output_input_data, output_hidden_data, output_shape_fc,
205 intermediate_buffer);
206 input_data += input_shape.dims(2);
207 if (intermediate_buffer_size != 0)
208 {
209 assert(intermediate_buffer != nullptr);
210 intermediate_buffer += intermediate_buffer_size;
211 }
212 }
213 return Ok;
214}

References onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::core::OMRuntimeShape::setDim().

◆ L2Normalization()

OMStatus onert_micro::execute::pal::L2Normalization ( const core::L2NormalizationParams params,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 33 of file PALL2Normalize.h.

35{
36
37 const int outer_size = params.num_rows;
38 const int depth = params.row_size;
39 const int epsilon = params.epsilon;
40
41 for (int i = 0; i < outer_size; ++i)
42 {
43 float squared_l2_norm = 0;
44 for (int c = 0; c < depth; ++c)
45 {
46 const float val = input_data[depth * i + c];
47 squared_l2_norm += val * val;
48 }
49 float l2_norm = std::sqrt(squared_l2_norm);
50 l2_norm = std::max(l2_norm, static_cast<float>(epsilon));
51 for (int c = 0; c < depth; ++c)
52 {
53 output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
54 }
55 }
56
57 return Ok;
58}

References onert_micro::core::L2NormalizationParams::epsilon, onert_micro::core::L2NormalizationParams::num_rows, onert_micro::Ok, and onert_micro::core::L2NormalizationParams::row_size.

◆ L2Pool()

OMStatus onert_micro::execute::pal::L2Pool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 34 of file PALL2Pool2DCommon.h.

37{
38 const int32_t batches = input_shape.dims(0);
39 const int32_t depth = output_shape.dims(3);
40 const int32_t input_height = input_shape.dims(1);
41 const int32_t input_width = input_shape.dims(2);
42 const int32_t output_height = output_shape.dims(1);
43 const int32_t output_width = output_shape.dims(2);
44 const int32_t stride_height = params.stride_h;
45 const int32_t stride_width = params.stride_w;
46 for (int batch = 0; batch < batches; ++batch)
47 {
48 for (int out_y = 0; out_y < output_height; ++out_y)
49 {
50 for (int out_x = 0; out_x < output_width; ++out_x)
51 {
52 for (int channel = 0; channel < depth; ++channel)
53 {
54 const int in_x_origin = (out_x * stride_width) - params.pad_w;
55 const int in_y_origin = (out_y * stride_height) - params.pad_h;
56 // Compute the boundaries of the filter region clamped so as to
57 // ensure that the filter window fits in the input array.
58 const int filter_x_start = std::max(0, -in_x_origin);
59 const int filter_x_end = std::min(params.filter_w, input_width - in_x_origin);
60 const int filter_y_start = std::max(0, -in_y_origin);
61 const int filter_y_end = std::min(params.filter_h, input_height - in_y_origin);
62 float sum_squares = 0.f;
63 int filter_count = 0;
64 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
65 {
66 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
67 {
68 const int in_x = in_x_origin + filter_x;
69 const int in_y = in_y_origin + filter_y;
70 const float val =
71 input_data[offset(input_shape.dimsData(), batch, in_y, in_x, channel)];
72 sum_squares += val * val;
73 filter_count++;
74 }
75 }
76 assert(filter_count != 0);
77 if (filter_count == 0)
78 {
79 std::cerr << "filter_count is zero" << std::endl;
81 }
82 const float l2pool_result = std::sqrt(sum_squares / filter_count);
83 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, channel)] =
84 activationFunctionWithMinMax(l2pool_result, params.activation_min,
85 params.activation_max);
86 }
87 }
88 }
89 }
90 return Ok;
91}
@ FailedCheckCondition
Definition OMStatus.h:32

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, activationFunctionWithMinMax(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::FailedCheckCondition, onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, offset(), onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ LessEqualFn()

template<typename T >
bool onert_micro::execute::pal::LessEqualFn ( lhs,
rhs 
)
inline

Definition at line 58 of file PALComparisons.h.

58{ return lhs <= rhs; }

◆ LessFn()

template<typename T >
bool onert_micro::execute::pal::LessFn ( lhs,
rhs 
)
inline

Definition at line 57 of file PALComparisons.h.

57{ return lhs < rhs; }

◆ Log()

template<typename T >
OMStatus onert_micro::execute::pal::Log ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALLogCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::log(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Logistic() [1/2]

OMStatus onert_micro::execute::pal::Logistic ( const int  flat_size,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALLogistic.h.

33{
34 const float cutoff_upper = 16.619047164916992188f;
35 const float cutoff_lower = -9.f;
36
37 // Rational for using approximation in reference kernel.
38 // 0. This approximation gives enough precision for float.
39 // 1. This works around an issue on an embedded chipset where exp() does not
40 // return correctly as expected - exp(x) should return inf when overflown
41 // not 1.701417 IEEE 754 defines representation for inf.
42 // 2. This will speed up calculation and is matching the behavior in the
43 // optimized kernels. (check the definition of scalar_logistic_op<float>)
44
45 for (int i = 0; i < flat_size; i++)
46 {
47 float val = input_data[i];
48 float result;
49 if (val > cutoff_upper)
50 {
51 result = 1.0f;
52 }
53 else if (val < cutoff_lower)
54 {
55 result = std::exp(val);
56 }
57 else
58 {
59 result = 1.f / (1.f + std::exp(-val));
60 }
61 output_data[i] = result;
62 }
63 return Ok;
64}

References onert_micro::Ok.

◆ Logistic() [2/2]

OMStatus onert_micro::execute::pal::Logistic ( const int  flat_size,
const int8_t *  input_data,
float  input_scale,
int  input_zero_point,
int8_t *  output_data,
float  output_scale,
int  output_zero_point 
)
inline

Definition at line 66 of file PALLogistic.h.

69{
70 const float cutoff_upper = 16.619047164916992188f;
71 const float cutoff_lower = -9.f;
72
73 // Rational for using approximation in reference kernel.
74 // 0. This approximation gives enough precision for float.
75 // 1. This works around an issue on an embedded chipset where exp() does not
76 // return correctly as expected - exp(x) should return inf when overflown
77 // not 1.701417 IEEE 754 defines representation for inf.
78 // 2. This will speed up calculation and is matching the behavior in the
79 // optimized kernels. (check the definition of scalar_logistic_op<float>)
80
81 for (int i = 0; i < flat_size; i++)
82 {
83 // Dequantize.
84 float val = static_cast<float>((input_data[i] - input_zero_point) * input_scale);
85 float result;
86 if (val > cutoff_upper)
87 {
88 result = 1.0f;
89 }
90 else if (val < cutoff_lower)
91 {
92 result = std::exp(val);
93 }
94 else
95 {
96 result = 1.f / (1.f + std::exp(-val));
97 }
98 // Requantize
99 int8_t output = static_cast<int8_t>(std::round(result / output_scale) + output_zero_point);
100 output_data[i] = output;
101 }
102 return Ok;
103}

References onert_micro::Ok.

◆ LogSoftmax()

OMStatus onert_micro::execute::pal::LogSoftmax ( const core::LogSoftmaxParams params,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALLogSoftmax.h.

34{
35 const int outer_size = params.num_rows;
36 const int depth = params.row_size;
37
38 for (int i = 0; i < outer_size; ++i)
39 {
40 // Find max element value which we'll use to ensure numerical stability
41 // taking advantage of the following equality:
42 // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
43 float max = std::numeric_limits<float>::lowest();
44 for (int c = 0; c < depth; ++c)
45 {
46 max = std::max(max, input_data[i * depth + c]);
47 }
48
49 // Compute sum.
50 float sum = 0.f;
51 for (int c = 0; c < depth; ++c)
52 {
53 sum += std::exp(input_data[i * depth + c] - max);
54 }
55
56 // Compute result.
57 const float log_sum = std::log(sum);
58 for (int c = 0; c < depth; ++c)
59 {
60 output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
61 }
62 }
63
64 return Ok;
65}

References onert_micro::core::LogSoftmaxParams::num_rows, onert_micro::Ok, and onert_micro::core::LogSoftmaxParams::row_size.

◆ MatchingDim()

int onert_micro::execute::pal::MatchingDim ( const core::OMRuntimeShape shape1,
int  index1,
const core::OMRuntimeShape shape2,
int  index2 
)
inline

Definition at line 200 of file PALUtils.h.

202{
203 assert(shape1.dims(index1) == shape2.dims(index2));
204 return shape1.dims(index1);
205}

References onert_micro::core::OMRuntimeShape::dims().

◆ Maximum()

OMStatus onert_micro::execute::pal::Maximum ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALMaximumCommon.h.

34{
35 return BinaryOp<float, MaximumFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

◆ MaxPool() [1/2]

OMStatus onert_micro::execute::pal::MaxPool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 32 of file PALMaxPool2DCommon.h.

35{
36 const int32_t batches = input_shape.dims(0);
37 const int32_t depth = output_shape.dims(3);
38 const int32_t input_height = input_shape.dims(1);
39 const int32_t input_width = input_shape.dims(2);
40 const int32_t output_height = output_shape.dims(1);
41 const int32_t output_width = output_shape.dims(2);
42 const int32_t stride_height = params.stride_h;
43 const int32_t stride_width = params.stride_w;
44 for (int batch = 0; batch < batches; ++batch)
45 {
46 for (int out_y = 0; out_y < output_height; ++out_y)
47 {
48 for (int out_x = 0; out_x < output_width; ++out_x)
49 {
50 for (int channel = 0; channel < depth; ++channel)
51 {
52 const int in_x_origin = (out_x * stride_width) - params.pad_w;
53 const int in_y_origin = (out_y * stride_height) - params.pad_h;
54 // Compute the boundaries of the filter region clamped so as to
55 // ensure that the filter window fits in the input array.
56 const int filter_x_start = std::max(0, -in_x_origin);
57 const int filter_x_end = std::min(params.filter_w, input_width - in_x_origin);
58 const int filter_y_start = std::max(0, -in_y_origin);
59 const int filter_y_end = std::min(params.filter_h, input_height - in_y_origin);
60 float max = std::numeric_limits<float>::lowest();
61 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
62 {
63 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
64 {
65 const int in_x = in_x_origin + filter_x;
66 const int in_y = in_y_origin + filter_y;
67
68 const int input_data_offset =
69 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
70 input_shape.dims(3) +
71 channel;
72
73 max = std::max(max, input_data[input_data_offset]);
74 }
75 }
76 const int output_data_offset =
77 ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
79 channel;
80
81 output_data[output_data_offset] =
82 std::min(std::max(max, params.activation_min), params.activation_max);
83 }
84 }
85 }
86 }
87 return Ok;
88}

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ MaxPool() [2/2]

OMStatus onert_micro::execute::pal::MaxPool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALMaxPool2D.h.

35{
36 cmsis_nn_dims input_dims;
37 cmsis_nn_dims output_dims;
38 cmsis_nn_pool_params pool_params;
39 cmsis_nn_dims filter_dims;
40 cmsis_nn_context ctx;
41
42 const int depth = input_shape.dims(3);
43 const int output_width = output_shape.dims(2);
44
45 input_dims.n = 1;
46 input_dims.h = input_shape.dims(1);
47 input_dims.w = input_shape.dims(2);
48 input_dims.c = depth;
49
50 output_dims.n = 1;
51 output_dims.h = output_shape.dims(1);
52 output_dims.w = output_width;
53 output_dims.c = depth;
54
55 pool_params.stride.h = params.stride_h;
56 pool_params.stride.w = params.stride_w;
57 pool_params.padding.h = params.pad_h;
58 pool_params.padding.w = params.pad_w;
59 pool_params.activation.min = params.quantized_activation_min;
60 pool_params.activation.max = params.quantized_activation_max;
61
62 filter_dims.n = 1;
63 filter_dims.h = params.filter_h;
64 filter_dims.w = params.filter_w;
65 filter_dims.c = 1;
66
67 auto res = arm_max_pool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims,
68 &output_dims, output_data);
69
70 assert(res == ARM_CMSIS_NN_SUCCESS);
71 if (res != ARM_CMSIS_NN_SUCCESS)
72 return CmsisNNError;
73
74 return Ok;
75}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::quantized_activation_max, onert_micro::core::Pool2DParams::quantized_activation_min, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ Minimum()

OMStatus onert_micro::execute::pal::Minimum ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALMinimumCommon.h.

34{
35 return BinaryOp<float, MinimumFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

◆ Mul() [1/3]

OMStatus onert_micro::execute::pal::Mul ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

◆ Mul() [2/3]

template<typename InputType , typename OutputType >
OMStatus onert_micro::execute::pal::Mul ( const core::ArithmeticQuantParams params,
uint32_t  size,
const InputType *  input1_data,
const InputType *  input2_data,
OutputType *  output_data 
)

Definition at line 31 of file PALMul.h.

33{
34 for (int i = 0; i < size; ++i)
35 {
36 const int32_t input1_val = params.input1_offset + input1_data[i];
37 const int32_t input2_val = params.input2_offset + input2_data[i];
38 const int32_t unclamped_result =
39 params.output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
40 params.output_multiplier,
41 params.output_shift);
42 const int32_t clamped_output = std::min(
43 params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
44 output_data[i] = static_cast<OutputType>(clamped_output);
45 }
46 return Ok;
47}

References onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input2_offset, multiplyByQuantizedMultiplier(), onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, onert_micro::core::ArithmeticQuantParams::quantized_activation_min, and size.

◆ Mul() [3/3]

template<typename T >
OMStatus onert_micro::execute::pal::Mul ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 36 of file PALMulCommon.h.

38{
39 ArithmeticOp<T, MulFn<T>>(params, flat_size, input1_data, input2_data, output_data);
40 return Ok;
41}

References onert_micro::Ok.

◆ multiplyByQuantizedMultiplier()

int32_t onert_micro::execute::pal::multiplyByQuantizedMultiplier ( int32_t  x,
int32_t  quantized_multiplier,
int  shift 
)
inline

Definition at line 104 of file PALUtils.h.

105{
106 int left_shift = shift > 0 ? shift : 0;
107 int right_shift = shift > 0 ? 0 : -shift;
108 return roundingDivideByPOT(
109 saturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
110}

References roundingDivideByPOT(), and saturatingRoundingDoublingHighMul().

Referenced by BroadcastMul6DSlow(), FullyConnected(), and Mul().

◆ multiplyByQuantizedMultiplierSmallerThanOneExp()

int32_t onert_micro::execute::pal::multiplyByQuantizedMultiplierSmallerThanOneExp ( int32_t  x,
int32_t  quantized_multiplier,
int  left_shift 
)
inline

Definition at line 112 of file PALUtils.h.

115{
116 return roundingDivideByPOT(saturatingRoundingDoublingHighMul(x, quantized_multiplier),
117 -left_shift);
118}

References roundingDivideByPOT(), and saturatingRoundingDoublingHighMul().

Referenced by AddFunc(), BroadcastComparison4DSlowWithScaling(), ComparisonWithScaling(), and SubFunc().

◆ NdArrayDescsForElementwiseBroadcast()

template<int N>
void onert_micro::execute::pal::NdArrayDescsForElementwiseBroadcast ( const core::OMRuntimeShape input0_shape,
const core::OMRuntimeShape input1_shape,
NdArrayDesc< N > *  desc0_out,
NdArrayDesc< N > *  desc1_out 
)
inline

Definition at line 94 of file ProcessBroadcastShapes.h.

98{
99
100 auto extended_input0_shape = core::OMRuntimeShape::extendedShape(N, input0_shape);
101 auto extended_input1_shape = core::OMRuntimeShape::extendedShape(N, input1_shape);
102
103 // Copy dims to desc, calculating strides.
104 copyDimsToDesc<N>(extended_input0_shape, desc0_out);
105 copyDimsToDesc<N>(extended_input1_shape, desc1_out);
106
107 // Walk over each dimension. If the extents are equal do nothing.
108 // Otherwise, set the desc with extent 1 to have extent equal to the other and
109 // stride 0.
110 for (int i = 0; i < N; ++i)
111 {
112 const int extent0 = extended_input0_shape.dims(i);
113 const int extent1 = extended_input1_shape.dims(i);
114 if (extent0 != extent1)
115 {
116 if (extent0 == 1)
117 {
118 desc0_out->strides[i] = 0;
119 desc0_out->extents[i] = extent1;
120 }
121 else
122 {
123 desc1_out->strides[i] = 0;
124 desc1_out->extents[i] = extent0;
125 }
126 }
127 }
128}

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::execute::pal::NdArrayDesc< N >::extents, and onert_micro::execute::pal::NdArrayDesc< N >::strides.

Referenced by BroadcastArithmeticOp4DSlow(), BroadcastBinaryOp4DSlow(), and BroadcastMul6DSlow().

◆ NDOpsHelper()

template<int N, typename Calc >
void onert_micro::execute::pal::NDOpsHelper ( const NdArrayDesc< N > &  output,
const Calc &  calc 
)
inline

Definition at line 87 of file ProcessBroadcastShapes.h.

88{
89 int indexes[N] = {0};
90 NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
91}

◆ NDOpsHelperImpl() [1/2]

template<int N, int DIM, typename Calc >
std::enable_if< DIM==N-1, void >::type onert_micro::execute::pal::NDOpsHelperImpl ( const NdArrayDesc< N > &  output,
const Calc &  calc,
int  indexes[N] 
)

Definition at line 65 of file ProcessBroadcastShapes.h.

67{
68 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
69 {
70 calc(indexes);
71 }
72}

◆ NDOpsHelperImpl() [2/2]

template<int N, int DIM, typename Calc >
std::enable_if< DIM!=N-1, void >::type onert_micro::execute::pal::NDOpsHelperImpl ( const NdArrayDesc< N > &  output,
const Calc &  calc,
int  indexes[N] 
)

Definition at line 75 of file ProcessBroadcastShapes.h.

77{
78 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
79 {
80 NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
81 }
82}

◆ Neg()

template<typename T >
OMStatus onert_micro::execute::pal::Neg ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 30 of file PALNegCommon.h.

32{
33 const uint32_t flat_size = input_shape.flatSize();
34
35 if (flat_size == -1)
36 return UnknownError;
37
38 assert(input_data != nullptr);
39 assert(output_data != nullptr);
40
41 assert(input_shape == output_shape);
42
43 for (int i = 0; i < flat_size; i++)
44 {
45 output_data[i] = -(input_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ nextIndex()

bool onert_micro::execute::pal::nextIndex ( const int32_t  num_dims,
const int32_t *  dims,
int32_t *  current 
)
inline

Definition at line 175 of file PALUtils.h.

176{
177 if (num_dims == 0)
178 {
179 return false;
180 }
181 int carry = 1;
182 for (int idx = num_dims - 1; idx >= 0; --idx)
183 {
184 int current_val = current[idx] + carry;
185 if (dims[idx] == current_val)
186 {
187 current[idx] = 0;
188 }
189 else
190 {
191 current[idx] = current_val;
192 carry = 0;
193 break;
194 }
195 }
196 return (carry == 0);
197}

◆ NotEqualFn()

template<typename T >
bool onert_micro::execute::pal::NotEqualFn ( lhs,
rhs 
)
inline

Definition at line 62 of file PALComparisons.h.

62{ return lhs != rhs; }

◆ offset() [1/2]

int onert_micro::execute::pal::offset ( const int32_t *  dims_data,
int  i0,
int  i1,
int  i2,
int  i3 
)
inline

Definition at line 220 of file PALUtils.h.

221{
222 return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
223}

Referenced by BatchToSpaceND(), DepthwiseConv2D< float >(), GatherND(), L2Pool(), reducedOutputOffset(), SpaceToBatchND(), SpaceToDepth(), and TransposeConv< float >().

◆ offset() [2/2]

int onert_micro::execute::pal::offset ( const int32_t *  dims_data,
int  i0,
int  i1,
int  i2,
int  i3,
int  i4 
)
inline

Definition at line 225 of file PALUtils.h.

226{
227 return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) * dims_data[4] + i4;
228}

◆ Pad()

OMStatus onert_micro::execute::pal::Pad ( const core::PadParams op_params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const float  pad_value,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 35 of file PALPad.h.

38{
39 // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
40 // pad them to 5 dims (yes, we are "padding the padding").
41 int left_padding_copy[padKernelMaxDimensionCount];
42 for (int &i : left_padding_copy)
43 {
44 i = 0;
45 }
46 for (int i = 0; i < op_params.left_padding_count; ++i)
47 {
48 left_padding_copy[i + padKernelMaxDimensionCount - op_params.left_padding_count] =
49 op_params.left_padding[i];
50 }
51 int right_padding_copy[padKernelMaxDimensionCount];
52 for (int &i : right_padding_copy)
53 {
54 i = 0;
55 }
56 for (int i = 0; i < op_params.right_padding_count; ++i)
57 {
58 right_padding_copy[i + padKernelMaxDimensionCount - op_params.right_padding_count] =
59 op_params.right_padding[i];
60 }
61 const auto extended_output =
62 core::OMRuntimeShape::extendedShape(padKernelMaxDimensionCount, output_shape);
63 const int output_batch = extended_output.dims(0);
64 const int output_plane = extended_output.dims(1);
65 const int output_height = extended_output.dims(2);
66 const int output_width = extended_output.dims(3);
67 const int output_depth = extended_output.dims(4);
68
69 const int left_b_padding = left_padding_copy[0];
70 const int left_p_padding = left_padding_copy[1];
71 const int left_h_padding = left_padding_copy[2];
72 const int left_w_padding = left_padding_copy[3];
73 const int left_d_padding = left_padding_copy[4];
74
75 const int right_b_padding = right_padding_copy[0];
76 const int right_p_padding = right_padding_copy[1];
77 const int right_h_padding = right_padding_copy[2];
78 const int right_w_padding = right_padding_copy[3];
79 const int right_d_padding = right_padding_copy[4];
80
81 const float *in_ptr = input_data;
82 float *out_ptr = output_data;
83 for (int out_b = 0; out_b < output_batch; ++out_b)
84 {
85 for (int out_p = 0; out_p < output_plane; ++out_p)
86 {
87 for (int out_h = 0; out_h < output_height; ++out_h)
88 {
89 for (int out_w = 0; out_w < output_width; ++out_w)
90 {
91 for (int out_d = 0; out_d < output_depth; ++out_d)
92 {
93 if (out_b < left_b_padding || out_b >= output_batch - right_b_padding ||
94 out_p < left_p_padding || out_p >= output_plane - right_p_padding ||
95 out_h < left_h_padding || out_h >= output_height - right_h_padding ||
96 out_w < left_w_padding || out_w >= output_width - right_w_padding ||
97 out_d < left_d_padding || out_d >= output_depth - right_d_padding)
98 {
99 *out_ptr++ = pad_value;
100 }
101 else
102 {
103 *out_ptr++ = *in_ptr++;
104 }
105 }
106 }
107 }
108 }
109 }
110
111 return Ok;
112}
list input_data
Definition infer.py:29

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::PadParams::left_padding, onert_micro::core::PadParams::left_padding_count, onert_micro::Ok, output_shape, onert_micro::core::PadParams::right_padding, and onert_micro::core::PadParams::right_padding_count.

◆ processBroadcastShapes()

bool onert_micro::execute::pal::processBroadcastShapes ( const core::OMRuntimeShape shape0,
const core::OMRuntimeShape shape1,
core::BinaryArithmeticBroadcastParams params 
)
inline

Definition at line 155 of file ProcessBroadcastShapes.h.

158{
159 const int dims_count = std::max(shape0.dimensionsCount(), shape1.dimensionsCount());
160
161 params->broadcast_category = core::BroadcastableOpCategory::kGenericBroadcast;
162
163 auto extended_shape0 = core::OMRuntimeShape::extendedShape(dims_count, shape0);
164 auto extended_shape1 = core::OMRuntimeShape::extendedShape(dims_count, shape1);
165
166 // Check for "exact" match, implicitly accepting any scalar shapes.
167 if (extended_shape0 == extended_shape1)
168 {
169 params->broadcast_category = core::BroadcastableOpCategory::kNonBroadcast;
170 return false;
171 }
172
173 if (shape0.flatSize() == 1)
174 {
175 params->broadcast_category = core::BroadcastableOpCategory::kScalarFirstBroadcast;
176 return true;
177 }
178 else if (shape1.flatSize() == 1)
179 {
180 params->broadcast_category = core::BroadcastableOpCategory::kScalarSecondBroadcast;
181 return true;
182 }
183
184 for (int i = dims_count - 1; i >= 0; --i)
185 {
186 if (extended_shape0.dims(i) == extended_shape1.dims(i))
187 {
188 continue;
189 }
190 else if (extended_shape0.dims(i) == 1)
191 {
192 params->broadcast_category = core::BroadcastableOpCategory::kFirstInputBroadcastsFast;
193 return true;
194 }
195 else if (extended_shape1.dims(i) == 1)
196 {
197 params->broadcast_category = core::BroadcastableOpCategory::kSecondInputBroadcastsFast;
198 return true;
199 }
200 else
201 {
202 // This case is erroneous: there is a dimension that does not match and
203 // is not a broadcast from one shape to the other.
204 params->broadcast_category = core::BroadcastableOpCategory::kGenericBroadcast;
205 return true;
206 }
207 }
208
209 return false;
210}

References onert_micro::core::BinaryArithmeticBroadcastParams::broadcast_category, onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::core::kFirstInputBroadcastsFast, onert_micro::core::kGenericBroadcast, onert_micro::core::kNonBroadcast, onert_micro::core::kScalarFirstBroadcast, onert_micro::core::kScalarSecondBroadcast, and onert_micro::core::kSecondInputBroadcastsFast.

◆ Quantize()

template<typename InputT , typename OutputT >
OMStatus onert_micro::execute::pal::Quantize ( const core::QuantizationParams  op_params,
const uint32_t  flat_size,
const InputT *  input_data,
OutputT *  output_data 
)

Definition at line 35 of file PALQuantize.h.

37{
38 const int32_t zero_point = op_params.zero_point;
39 const double scale = op_params.scale;
40 static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
41 static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
42
43 for (int i = 0; i < flat_size; i++)
44 {
45 const InputT val = input_data[i];
46 int32_t unclamped =
47 static_cast<int32_t>(std::round(val / static_cast<float>(scale))) + zero_point;
48 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
49 output_data[i] = clamped;
50 }
51
52 return Ok;
53}

References onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

◆ ReduceDimensionsForBroadcast()

template<int MAX_DIM = 6>
bool onert_micro::execute::pal::ReduceDimensionsForBroadcast ( const core::OMRuntimeShape input1_shape,
const core::OMRuntimeShape input2_shape,
size_t *  compressed_input1_stride,
size_t *  compressed_input2_stride,
size_t *  compressed_output_shape 
)

Definition at line 242 of file PALUtils.h.

246{
247 size_t num_compressed_dims = 0;
248 size_t compressed_input1_shape[MAX_DIM];
249 size_t compressed_input2_shape[MAX_DIM];
250 std::fill(compressed_input1_shape, compressed_input1_shape + MAX_DIM, 1);
251 std::fill(compressed_input2_shape, compressed_input2_shape + MAX_DIM, 1);
252 std::fill(compressed_output_shape, compressed_output_shape + MAX_DIM, 1);
253 bool broadcast_input1 = false;
254 bool broadcast_input2 = false;
255 bool first_nonunit = true;
256
257 if (input1_shape.dimensionsCount() < 0 || input2_shape.dimensionsCount() < 0)
258 {
259 return false;
260 }
261 const size_t num_input1_dims = input1_shape.dimensionsCount();
262 const size_t num_input2_dims = input2_shape.dimensionsCount();
263 const int32_t *input1_dims = input1_shape.dimsData();
264 const int32_t *input2_dims = input2_shape.dimsData();
265 const size_t num_common_dims = std::min(num_input1_dims, num_input2_dims);
266 for (size_t i = 1; i <= num_common_dims; i++)
267 {
268 if (input1_dims[num_input1_dims - i] < 0 || input2_dims[num_input2_dims - i] < 0)
269 {
270 return false;
271 }
272 const size_t input1_dim = input1_dims[num_input1_dims - i];
273 const size_t input2_dim = input2_dims[num_input2_dims - i];
274 if (input1_dim == 0 || input2_dim == 0)
275 {
276 return false;
277 }
278 if (input1_dim == 1 && input2_dim == 1)
279 {
280 continue;
281 }
282 assert(!broadcast_input1 || !broadcast_input2);
283
284 if (input1_dim == 1)
285 {
286 if (!broadcast_input1)
287 {
288 broadcast_input1 = true;
289 broadcast_input2 = false;
290 num_compressed_dims++;
291 }
292 compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
293 compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
294 }
295 else if (input2_dim == 1)
296 {
297 if (!broadcast_input2)
298 {
299 broadcast_input1 = false;
300 broadcast_input2 = true;
301 num_compressed_dims++;
302 }
303 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
304 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
305 }
306 else
307 {
308 assert(input1_dim == input2_dim);
309 if (broadcast_input1 || broadcast_input2 || first_nonunit)
310 {
311 broadcast_input1 = false;
312 broadcast_input2 = false;
313 num_compressed_dims++;
314 }
315 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
316 compressed_input2_shape[num_compressed_dims - 1] *= input1_dim;
317 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
318 }
319 first_nonunit = false;
320 }
321 if (num_input1_dims > num_input2_dims)
322 {
323 if (!broadcast_input2)
324 {
325 num_compressed_dims++;
326 }
327 for (size_t i = 0; i < num_input1_dims - num_input2_dims; i++)
328 {
329 if (input1_dims[i] < 0)
330 return false;
331 const size_t input1_dim = input1_dims[i];
332 if (input1_dim == 0)
333 {
334 return false;
335 }
336 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
337 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
338 }
339 }
340 else if (num_input2_dims > num_input1_dims)
341 {
342 if (!broadcast_input1)
343 {
344 num_compressed_dims++;
345 }
346 for (size_t i = 0; i < num_input2_dims - num_input1_dims; i++)
347 {
348 if (input2_dims[i] < 0)
349 return false;
350 const size_t input2_dim = input2_dims[i];
351 if (input2_dim == 0)
352 {
353 return false;
354 }
355 compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
356 compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
357 }
358 }
359 num_compressed_dims = (num_compressed_dims > 1) ? num_compressed_dims : 1;
360
361 int input1_stride = 1;
362 int input2_stride = 1;
363 for (int i = 0; i < MAX_DIM; ++i)
364 {
365 compressed_input1_stride[i] = input1_stride;
366 input1_stride *= compressed_input1_shape[i];
367 compressed_input2_stride[i] = input2_stride;
368 input2_stride *= compressed_input2_shape[i];
369 }
370 for (int i = 0; i < MAX_DIM; ++i)
371 {
372 if (compressed_input1_shape[i] != compressed_input2_shape[i])
373 {
374 if (compressed_input1_shape[i] == 1)
375 {
376 compressed_input1_stride[i] = 0;
377 }
378 else
379 {
380 assert(compressed_input2_shape[i] == 1);
381 compressed_input2_stride[i] = 0;
382 }
383 }
384 }
385 return true;
386}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), and onert_micro::core::OMRuntimeShape::dimsData().

◆ reducedOutputOffset()

size_t onert_micro::execute::pal::reducedOutputOffset ( const int32_t  num_dims,
const int32_t *  dims,
const int32_t *  index,
const int32_t  num_axis,
const int32_t *  axis 
)
inline

Definition at line 143 of file PALUtils.h.

145{
146 if (num_dims == 0)
147 {
148 return 0;
149 }
150 size_t offset = 0;
151 for (int idx = 0; idx < num_dims; ++idx)
152 {
153 // if we need to skip this axis
154 bool is_axis = false;
155 if (axis != nullptr)
156 {
157 for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
158 {
159 if (idx == axis[axis_idx])
160 {
161 is_axis = true;
162 break;
163 }
164 }
165 }
166 if (!is_axis)
167 {
168 offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
169 }
170 }
171 return offset;
172}

References offset().

◆ ReLUCommon()

template<typename Type >
OMStatus onert_micro::execute::pal::ReLUCommon ( const int  flat_size,
const Type *  input_data,
Type *  output_data,
const float  alpha,
const bool  is_relu_6 
)

Definition at line 33 of file PALReluCommon.h.

35{
36 const Type relu_6_value = 6.0f;
37 for (int i = 0; i < flat_size; i++)
38 {
39 const Type val = input_data[i];
40 Type result = val > 0 ? val : val * alpha;
41 result = is_relu_6 ? (result > relu_6_value ? relu_6_value : result) : result;
42 output_data[i] = result;
43 }
44
45 return Ok;
46}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_relu_common().

◆ ReLUCommon< int8_t >()

template<>
OMStatus onert_micro::execute::pal::ReLUCommon< int8_t > ( const int  flat_size,
const int8_t *  input_data,
int8_t *  output_data,
const float  alpha,
const bool  is_relu_6 
)

Definition at line 33 of file PALRelu.h.

35{
36 // 1. Relu
37 if (is_relu_6 == false && alpha == 0)
38 {
39 memcpy(output_data, input_data, flat_size);
40 arm_relu_q7(output_data, flat_size);
41 }
42 // 2. Relu6
43 else if (is_relu_6 && alpha == 0)
44 {
45 memcpy(output_data, input_data, flat_size);
46 arm_relu6_s8(output_data, flat_size);
47 }
48 // 3. Leaky_Relu not supported by cmsis_nn
49 else if (alpha != 0)
50 {
51 for (int i = 0; i < flat_size; i++)
52 {
53 const int8_t val = input_data[i];
54 int8_t result = val > 0 ? val : val * alpha;
55 output_data[i] = result;
56 }
57 }
58
59 return Ok;
60}

References onert_micro::Ok.

◆ Round()

template<typename T >
OMStatus onert_micro::execute::pal::Round ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

◆ Round< float >()

template<>
OMStatus onert_micro::execute::pal::Round< float > ( const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 38 of file PALRoundCommon.h.

40{
41 const uint32_t flat_size = input_shape.flatSize();
42
43 if (flat_size == -1)
44 return UnknownError;
45
46 assert(input_data != nullptr);
47 assert(output_data != nullptr);
48
49 assert(input_shape == output_shape);
50
51 for (int i = 0; i < flat_size; i++)
52 {
53 // Note that this implementation matches that of tensorFlow tf.round
54 // and corresponds to the bankers rounding method.
55 auto floor_val = std::floor(input_data[i]);
56 auto diff = input_data[i] - floor_val;
57 if ((diff < 0.5f) || ((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0)))
58 {
59 output_data[i] = floor_val;
60 }
61 else
62 {
63 output_data[i] = floor_val + 1.0f;
64 }
65 }
66
67 return Ok;
68}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ roundingDivideByPOT()

int32_t onert_micro::execute::pal::roundingDivideByPOT ( int32_t  x,
int32_t  exponent 
)
inline

Definition at line 92 of file PALUtils.h.

93{
94 assert(exponent >= 0);
95 assert(exponent <= 31);
96 const int32_t mask = int32_t((1ll << exponent) - 1);
97 const int32_t zero = int32_t(0);
98 const int32_t one = int32_t(1);
99 const int32_t remainder = x & mask;
100 const int32_t threshold = (mask >> 1) + ((x < zero ? one : zero) & one);
101 return (x >> exponent) + ((remainder > threshold ? one : zero) & one);
102}

Referenced by multiplyByQuantizedMultiplier(), and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Rsqrt()

template<typename T >
OMStatus onert_micro::execute::pal::Rsqrt ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALRsqrtCommon.h.

36{
37 return SISOOperation<T>(input_shape, input_data, output_shape, output_data,
38 [](T arg) -> T { return 1.f / std::sqrt(arg); });
39}

References output_shape.

◆ saturatingRoundingDoublingHighMul()

std::int32_t onert_micro::execute::pal::saturatingRoundingDoublingHighMul ( std::int32_t  a,
std::int32_t  b 
)
inline

Definition at line 79 of file PALUtils.h.

80{
81 bool overflow = a == b && a == std::numeric_limits<std::int32_t>::min();
82 std::int64_t a_64(a);
83 std::int64_t b_64(b);
84 std::int64_t ab_64 = a_64 * b_64;
85 std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
86 std::int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
87 return overflow ? std::numeric_limits<std::int32_t>::max() : ab_x2_high32;
88}

Referenced by multiplyByQuantizedMultiplier(), and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Sin()

template<typename T >
OMStatus onert_micro::execute::pal::Sin ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 35 of file PALSinCommon.h.

37{
38 const uint32_t flat_size = input_shape.flatSize();
39
40 if (flat_size == -1)
41 return UnknownError;
42
43 assert(input_data != nullptr);
44 assert(output_data != nullptr);
45
46 assert(input_shape == output_shape);
47
48 for (int i = 0; i < flat_size; i++)
49 {
50 output_data[i] = std::sin(input_data[i]);
51 }
52
53 return Ok;
54}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ SISOOperation()

template<typename T >
OMStatus onert_micro::execute::pal::SISOOperation ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data,
std::function< T(T)> const &  func 
)
inline

Definition at line 31 of file PALSISOOperation.h.

34{
35 const uint32_t flat_size = input_shape.flatSize();
36
37 if (flat_size == -1)
38 return UnknownError;
39
40 assert(input_data != nullptr);
41 assert(output_data != nullptr);
42
43 assert(input_shape == output_shape);
44
45 for (int i = 0; i < flat_size; i++)
46 {
47 output_data[i] = func(input_data[i]);
48 }
49
50 return Ok;
51}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Slice()

template<typename T >
OMStatus onert_micro::execute::pal::Slice ( const core::SliceParams op_params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
T *  output_data 
)

Definition at line 29 of file PALSlice.h.

31{
32 const core::OMRuntimeShape ext_shape = core::OMRuntimeShape::extendedShape(5, input_shape);
33 const int begin_count = op_params.begin_count;
34 const int size_count = op_params.size_count;
35 // We front-pad the begin and size vectors.
36 int start[5];
37 int stop[5];
38 for (int i = 0; i < 5; ++i)
39 {
40 int padded_i = 5 - i;
41 start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
42 stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
43 ? ext_shape.dims(i)
44 : start[i] + op_params.size[size_count - padded_i];
45 }
46
47 for (int i0 = start[0]; i0 < stop[0]; ++i0)
48 {
49 for (int i1 = start[1]; i1 < stop[1]; ++i1)
50 {
51 for (int i2 = start[2]; i2 < stop[2]; ++i2)
52 {
53 for (int i3 = start[3]; i3 < stop[3]; ++i3)
54 {
55 for (int i4 = start[4]; i4 < stop[4]; ++i4)
56 {
57 auto position =
58 (((i0 * ext_shape.dims(1) + i1) * ext_shape.dims(2) + i2) * ext_shape.dims(3) + i3) *
59 ext_shape.dims(4) +
60 i4;
61 *output_data++ = input_data[position];
62 }
63 }
64 }
65 }
66 }
67 return Ok;
68}
int8_t size_count
Definition Slice.cpp:34
int8_t begin_count
Definition Slice.cpp:32

References onert_micro::core::SliceParams::begin, begin_count, onert_micro::core::SliceParams::begin_count, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::Ok, onert_micro::core::SliceParams::size, size_count, and onert_micro::core::SliceParams::size_count.

◆ Softmax()

template<typename T , typename U >
OMStatus onert_micro::execute::pal::Softmax ( const core::SoftmaxParams params,
const T *  input_data,
U *  output_data 
)

Definition at line 33 of file PALSoftmaxCommon.h.

34{
35 const int outer_size = params.num_rows;
36 const int depth = params.row_size;
37 const double beta = params.beta;
38
39 const float input_scale = params.input_scale;
40 const float output_scale = params.output_scale;
41
42 const int input_zp = params.input_zp;
43 const int output_zp = params.output_zp;
44
45 for (int i = 0; i < outer_size; ++i)
46 {
47 // Find max element value which we'll use to ensure numerical stability
48 // taking advantage of the following equality:
49 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
50 float max = std::numeric_limits<float>::lowest();
51 for (int c = 0; c < depth; ++c)
52 {
53 auto t = input_data[i * depth + c] - input_zp;
54 auto t_f = static_cast<float>(input_data[i * depth + c] - input_zp);
55 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
56 max = std::max(max, cur_val);
57 }
58
59 static constexpr int32_t min_val = std::numeric_limits<U>::min();
60 static constexpr int32_t max_val = std::numeric_limits<U>::max();
61 // Compute sum.
62 float sum = 0.f;
63 for (int c = 0; c < depth; ++c)
64 {
65 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
66 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
67 sum += exp_c;
68 }
69
70 // Compute result.
71 for (int c = 0; c < depth; ++c)
72 {
73 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
74 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
75 float softmax_val = exp_c / sum;
76 auto unclamped = static_cast<int32_t>(std::round(softmax_val / output_scale) +
77 static_cast<float>(output_zp));
78 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
79 output_data[i * depth + c] = static_cast<U>(clamped);
80 }
81 }
82 return Ok;
83}

References onert_micro::core::SoftmaxParams::beta, onert_micro::core::SoftmaxParams::input_scale, onert_micro::core::SoftmaxParams::input_zp, onert_micro::core::SoftmaxParams::num_rows, onert_micro::Ok, onert_micro::core::SoftmaxParams::output_scale, onert_micro::core::SoftmaxParams::output_zp, and onert_micro::core::SoftmaxParams::row_size.

◆ Softmax< float, float >()

template<>
OMStatus onert_micro::execute::pal::Softmax< float, float > ( const core::SoftmaxParams params,
const float *  input_data,
float *  output_data 
)

Definition at line 86 of file PALSoftmaxCommon.h.

88{
89 const int outer_size = params.num_rows;
90 const int depth = params.row_size;
91 const double beta = params.beta;
92
93 for (int i = 0; i < outer_size; ++i)
94 {
95 // Find max element value which we'll use to ensure numerical stability
96 // taking advantage of the following equality:
97 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
98 float max = std::numeric_limits<float>::lowest();
99 for (int c = 0; c < depth; ++c)
100 {
101 max = std::max(max, input_data[i * depth + c]);
102 }
103
104 // Compute sum.
105 float sum = 0.f;
106 for (int c = 0; c < depth; ++c)
107 {
108 const float exp_c = std::exp((input_data[i * depth + c] - max) * static_cast<float>(beta));
109 output_data[i * depth + c] = exp_c;
110 sum += exp_c;
111 }
112
113 assert(sum != 0);
114
115 if (sum == 0)
116 return UnknownError;
117
118 // Compute result.
119 for (int c = 0; c < depth; ++c)
120 {
121 output_data[i * depth + c] = output_data[i * depth + c] / sum;
122 }
123 }
124 return Ok;
125}

References onert_micro::core::SoftmaxParams::beta, onert_micro::core::SoftmaxParams::num_rows, onert_micro::Ok, onert_micro::core::SoftmaxParams::row_size, and onert_micro::UnknownError.

◆ Softmax< int8_t, int8_t >()

◆ SpaceToBatchND()

template<typename T >
OMStatus onert_micro::execute::pal::SpaceToBatchND ( const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const int32_t *  block_shape_data,
const core::OMRuntimeShape unextended_input3_shape,
const int32_t *  paddings_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 51 of file PALSpaceToBatchNDCommon.h.

55{
56 // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C.
57 const core::OMRuntimeShape input1_shape = extendShapeSpaceToBatch(unextended_input1_shape);
58 const core::OMRuntimeShape output_shape = extendShapeSpaceToBatch(unextended_output_shape);
59
60 const int depth = input1_shape.dims(3);
61 const int input_width = input1_shape.dims(2);
62 const int input_height = input1_shape.dims(1);
63 const int input_batch_size = input1_shape.dims(0);
64
65 const int output_width = output_shape.dims(2);
66 const int output_height = output_shape.dims(1);
67 const int output_batch_size = output_shape.dims(0);
68
69 const int block_shape_height = block_shape_data[0];
70 const int block_shape_width =
71 unextended_input1_shape.dimensionsCount() == 4 ? block_shape_data[1] : 1;
72 const int padding_top = paddings_data[0];
73 const int padding_left = unextended_input1_shape.dimensionsCount() == 4 ? paddings_data[2] : 0;
74
75 const int32_t pad_value = 0;
76
77 for (int out_b = 0; out_b < output_batch_size; ++out_b)
78 {
79 int input_batch = out_b % input_batch_size;
80 int shift_w = (out_b / input_batch_size) % block_shape_width;
81 int shift_h = (out_b / input_batch_size) / block_shape_width;
82 for (int out_h = 0; out_h < output_height; ++out_h)
83 {
84 for (int out_w = 0; out_w < output_width; ++out_w)
85 {
86 T *out = output_data + offset(output_shape.dimsData(), out_b, out_h, out_w, 0);
87 if (out_h * block_shape_height + shift_h < padding_top ||
88 out_h * block_shape_height + shift_h >= padding_top + input_height ||
89 out_w * block_shape_width + shift_w < padding_left ||
90 out_w * block_shape_width + shift_w >= padding_left + input_width)
91 {
92 // This may not execute correctly when pad_value != 0 and T != uint8.
93 memset(out, pad_value, depth * sizeof(T));
94 }
95 else
96 {
97 const T *in =
98 input1_data + offset(input1_shape.dimsData(), input_batch,
99 (out_h * block_shape_height + shift_h) - padding_top,
100 (out_w * block_shape_width + shift_w) - padding_left, 0);
101 memcpy(out, in, depth * sizeof(T));
102 }
103 }
104 }
105 }
106 return Ok;
107}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), offset(), onert_micro::Ok, and output_shape.

◆ SpaceToDepth()

template<typename T >
OMStatus onert_micro::execute::pal::SpaceToDepth ( const int32_t  block_size,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 32 of file PALSpaceToDepthCommon.h.

36{
37 if (block_size == 0)
38 {
40 }
41
42 const core::OMRuntimeShape input_shape =
43 core::OMRuntimeShape::extendedShape(4, unextended_input_shape);
45 core::OMRuntimeShape::extendedShape(4, unextended_output_shape);
46
47 const int input_depth = input_shape.dims(3);
48 const int input_width = input_shape.dims(2);
49 const int input_height = input_shape.dims(1);
50 const int input_batch = input_shape.dims(0);
51
52 for (int in_b = 0; in_b < input_batch; ++in_b)
53 {
54 for (int in_h = 0; in_h < input_height; ++in_h)
55 {
56 for (int in_w = 0; in_w < input_width; ++in_w)
57 {
58 for (int in_d = 0; in_d < input_depth; ++in_d)
59 {
60 const int out_d =
61 in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
62 const int out_w = in_w / block_size;
63 const int out_h = in_h / block_size;
64 const int out_b = in_b;
65
66 const int input_index = offset(input_shape.dimsData(), in_b, in_h, in_w, in_d);
67 const int output_index = offset(output_shape.dimsData(), out_b, out_h, out_w, out_d);
68
69 output_data[output_index] = input_data[input_index];
70 }
71 }
72 }
73 }
74 return Ok;
75}

References onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::FailedCheckCondition, offset(), onert_micro::Ok, and output_shape.

◆ Split()

template<typename T >
OMStatus onert_micro::execute::pal::Split ( const core::SplitParams params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
int32_t  axis_value 
)

Definition at line 36 of file PALSplit.h.

38{
39 const auto output_count = params.num_outputs;
40
41 const auto split_dimensions = input_shape.dimensionsCount();
42
43 assert(axis_value < split_dimensions);
44 assert(output_shape.dimensionsCount() == split_dimensions);
45
46 int64_t outer_size = 1;
47 for (uint32_t i = 0; i < axis_value; ++i)
48 {
49 outer_size *= input_shape.dims(i);
50 }
51
52 int64_t base_inner_size = 1;
53 for (uint32_t i = axis_value + 1; i < split_dimensions; ++i)
54 {
55 base_inner_size *= input_shape.dims(i);
56 }
57
58 assert(input_data != nullptr);
59 for (int64_t k = 0; k < outer_size; ++k)
60 {
61 for (uint32_t i = 0; i < output_count; ++i)
62 {
63 T *output_data = core::utils::castOutputData<T>(params.output_data[i]);
64 assert(output_data != nullptr);
65 const auto copy_size = output_shape.dims(axis_value) * base_inner_size;
66 T *output_ptr = output_data + k * copy_size;
67 assert(output_ptr != nullptr);
68 for (int64_t j = 0; j < copy_size; ++j)
69 output_ptr[j] = input_data[j];
70 input_data += copy_size;
71 }
72 }
73 return Ok;
74}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SplitParams::num_outputs, onert_micro::Ok, onert_micro::core::SplitParams::output_data, and output_shape.

◆ Sqrt()

template<typename T >
OMStatus onert_micro::execute::pal::Sqrt ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALSqrtCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::sqrt(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Square()

template<typename T >
OMStatus onert_micro::execute::pal::Square ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALSquareCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = (input_data[i]) * (input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ SquaredDifference()

template<typename T >
OMStatus onert_micro::execute::pal::SquaredDifference ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 29 of file PALSquaredDifferenceCommon.h.

31{
32 ArithmeticOp<T, SquaredDifferenceFn<T>>(params, flat_size, input1_data, input2_data, output_data);
33 return Ok;
34}

References onert_micro::Ok.

◆ StridedSlice()

template<typename T >
OMStatus onert_micro::execute::pal::StridedSlice ( core::StridedSliceParams op_params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
T *  output_data 
)

Definition at line 206 of file PALStridedSlice.h.

209{
210 const core::OMRuntimeShape input_shape =
211 core::OMRuntimeShape::extendedShape(5, unextended_input_shape);
212
213 // Reverse and pad to 5 dimensions because that is what the runtime code
214 // requires (ie. all shapes must be 5D and are given backwards).
215 stridedSlicePadIndices(&op_params, 5);
216
217 const int start_0 = startForAxis(op_params, input_shape, 0);
218 const int stop_0 = stopForAxis(op_params, input_shape, 0, start_0);
219 const int start_1 = startForAxis(op_params, input_shape, 1);
220 const int stop_1 = stopForAxis(op_params, input_shape, 1, start_1);
221 const int start_2 = startForAxis(op_params, input_shape, 2);
222 const int stop_2 = stopForAxis(op_params, input_shape, 2, start_2);
223 const int start_3 = startForAxis(op_params, input_shape, 3);
224 const int stop_3 = stopForAxis(op_params, input_shape, 3, start_3);
225 const int start_4 = startForAxis(op_params, input_shape, 4);
226 const int stop_4 = stopForAxis(op_params, input_shape, 4, start_4);
227
228 for (int offset_0 = start_0 * input_shape.dims(1), end_0 = stop_0 * input_shape.dims(1),
229 step_0 = op_params.strides[0] * input_shape.dims(1);
230 !loopCondition(offset_0, end_0, op_params.strides[0]); offset_0 += step_0)
231 {
232 for (int offset_1 = (offset_0 + start_1) * input_shape.dims(2),
233 end_1 = (offset_0 + stop_1) * input_shape.dims(2),
234 step_1 = op_params.strides[1] * input_shape.dims(2);
235 !loopCondition(offset_1, end_1, op_params.strides[1]); offset_1 += step_1)
236 {
237 for (int offset_2 = (offset_1 + start_2) * input_shape.dims(3),
238 end_2 = (offset_1 + stop_2) * input_shape.dims(3),
239 step_2 = op_params.strides[2] * input_shape.dims(3);
240 !loopCondition(offset_2, end_2, op_params.strides[2]); offset_2 += step_2)
241 {
242 for (int offset_3 = (offset_2 + start_3) * input_shape.dims(4),
243 end_3 = (offset_2 + stop_3) * input_shape.dims(4),
244 step_3 = op_params.strides[3] * input_shape.dims(4);
245 !loopCondition(offset_3, end_3, op_params.strides[3]); offset_3 += step_3)
246 {
247 for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
248 !loopCondition(offset_4, end_4, op_params.strides[4]);
249 offset_4 += op_params.strides[4])
250 {
251 *output_data++ = input_data[offset_4];
252 }
253 }
254 }
255 }
256 }
257 return Ok;
258}

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::Ok, and onert_micro::core::StridedSliceParams::strides.

◆ Sub() [1/2]

OMStatus onert_micro::execute::pal::Sub ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

Definition at line 31 of file PALSub.h.

33{
34 ElementWise(flat_size, params, input1_data, input2_data, output_data, SubFunc);
35 return Ok;
36}
void ElementWise(const uint32_t size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References ElementWise(), onert_micro::Ok, and SubFunc().

◆ Sub() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::Sub ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 49 of file PALSubCommon.h.

51{
52 ArithmeticOp<T, SubFn<T>>(params, flat_size, input1_data, input2_data, output_data);
53 return Ok;
54}

References onert_micro::Ok.

◆ SubFunc()

int8_t onert_micro::execute::pal::SubFunc ( int8_t  x,
int8_t  y,
const core::ArithmeticQuantParams params 
)

Definition at line 29 of file PALSubCommon.h.

30{
31 const int32_t input1_val = params.input1_offset + x;
32 const int32_t input2_val = params.input2_offset + y;
33 const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
34 const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
35 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
36 shifted_input1_val, params.input1_multiplier, params.input1_shift);
37 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
38 shifted_input2_val, params.input2_multiplier, params.input2_shift);
39 const int32_t raw_sum = scaled_input1_val - scaled_input2_val;
40 const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp(
41 raw_sum, params.output_multiplier, params.output_shift) +
42 params.output_offset;
43 const int32_t clamped_output = std::min(params.quantized_activation_max,
44 std::max(params.quantized_activation_min, raw_output));
45 return static_cast<int8_t>(clamped_output);
46}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by BroadcastSub4DSlow(), and Sub().

◆ subscriptToIndex() [1/2]

int onert_micro::execute::pal::subscriptToIndex ( const NdArrayDesc< 4 > &  desc,
int  i0,
int  i1,
int  i2,
int  i3 
)
inline

◆ subscriptToIndex() [2/2]

int onert_micro::execute::pal::subscriptToIndex ( const NdArrayDesc< 5 > &  desc,
int  indexes[5] 
)
inline

Definition at line 135 of file ProcessBroadcastShapes.h.

136{
137 return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
138 indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + indexes[4] * desc.strides[4];
139}

References onert_micro::execute::pal::NdArrayDesc< N >::strides.

◆ SVDF() [1/2]

OMStatus onert_micro::execute::pal::SVDF ( const core::SVDFQuantParams params,
const int8_t *  input_data,
const int8_t *  weights_feature_data,
const int8_t *  weights_time_data,
const int32_t *  bias_data,
int8_t *  state_data,
int8_t *  output_data,
const core::OMRuntimeShape input_shape,
const core::OMRuntimeShape weights_feature_shape,
const core::OMRuntimeShape weights_time_shape,
const core::OMRuntimeShape bias_shape,
const core::OMRuntimeShape output_shape 
)

Definition at line 35 of file PALSVDF.h.

42{
43 cmsis_nn_dims input_dims;
44 input_dims.n = input_shape.dims(0);
45 input_dims.h = input_shape.dims(1);
46
47 cmsis_nn_dims weights_feature_dims;
48 weights_feature_dims.n = weights_feature_shape.dims(0);
49 weights_feature_dims.h = weights_feature_shape.dims(1);
50
51 cmsis_nn_dims weights_time_dims;
52 weights_time_dims.n = weights_time_shape.dims(0);
53 weights_time_dims.h = weights_time_shape.dims(1);
54
55 cmsis_nn_dims bias_dims;
56 bias_dims.n = bias_shape.dims(0);
57
58 cmsis_nn_dims state_dims;
59 state_dims.n = bias_shape.dims(0);
60 state_dims.h = bias_shape.dims(1);
61
62 cmsis_nn_dims output_dims;
63 output_dims.n = output_shape.dims(0);
64 output_dims.h = output_shape.dims(1);
65
66 cmsis_nn_svdf_params svdf_params;
67 svdf_params.rank = params.rank;
68 svdf_params.input_offset = params.input_zero_point;
69 svdf_params.output_offset = params.output_zero_point;
70
71 svdf_params.input_activation.min = INT16_MIN;
72 svdf_params.input_activation.max = INT16_MAX;
73
74 svdf_params.output_activation.min = INT8_MIN;
75 svdf_params.output_activation.max = INT8_MAX;
76
77 cmsis_nn_per_tensor_quant_params in_quant_params;
78 in_quant_params.multiplier = params.effective_scale_1_a;
79 in_quant_params.shift = params.effective_scale_1_b;
80
81 cmsis_nn_per_tensor_quant_params out_quant_params;
82 out_quant_params.multiplier = params.effective_scale_2_a;
83 out_quant_params.shift = params.effective_scale_2_b;
84
85 const int batch_size = input_shape.dims(0);
86 const int input_size = input_shape.dims(1);
87 const int num_filters = weights_feature_shape.dims(0);
88 const int num_units = num_filters / params.rank;
89
90 uint8_t *scratch_tensor_data;
91 OMStatus status = core::memory::OMMemoryManager::allocateMemory(
92 batch_size * num_filters * sizeof(int32_t), &scratch_tensor_data);
93 assert(status == Ok);
94 if (status != Ok)
95 return status;
96
97 uint8_t *scratch_output_tensor_data;
98 status = core::memory::OMMemoryManager::allocateMemory(batch_size * num_units * sizeof(int32_t),
99 &scratch_output_tensor_data);
100 assert(status == Ok);
101 if (status != Ok)
102 return status;
103
104 cmsis_nn_context scratch_ctx;
105 scratch_ctx.buf = reinterpret_cast<int32_t *>(scratch_tensor_data);
106
107 cmsis_nn_context scratch_output_ctx;
108 scratch_output_ctx.buf = reinterpret_cast<int32_t *>(scratch_output_tensor_data);
109
110 arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
111 &input_dims, input_data, &state_dims, state_data, &weights_feature_dims,
112 weights_feature_data, &weights_time_dims, weights_time_data, &bias_dims, bias_data,
113 &output_dims, output_data);
114
115 core::memory::OMMemoryManager::deallocateMemory(scratch_tensor_data);
116 core::memory::OMMemoryManager::deallocateMemory(scratch_output_tensor_data);
117
118 return Ok;
119}

References onert_micro::core::memory::OMMemoryManager::allocateMemory(), onert_micro::core::memory::OMMemoryManager::deallocateMemory(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SVDFQuantParams::effective_scale_1_a, onert_micro::core::SVDFQuantParams::effective_scale_1_b, onert_micro::core::SVDFQuantParams::effective_scale_2_a, onert_micro::core::SVDFQuantParams::effective_scale_2_b, onert_micro::core::SVDFQuantParams::input_zero_point, onert_micro::Ok, output_shape, onert_micro::core::SVDFQuantParams::output_zero_point, and onert_micro::core::SVDFQuantParams::rank.

◆ SVDF() [2/2]

OMStatus onert_micro::execute::pal::SVDF ( const float *  input_data,
const float *  weights_feature_data,
const float *  weights_time_data,
const float *  bias_data,
float *  state_data,
float *  scratch_data,
float *  output_data,
const int  rank,
const int  input_size,
const int  batch_size,
const int  num_filters,
const int  num_units,
const int  memory_size,
const circle::ActivationFunctionType  activation 
)

Definition at line 138 of file PALSVDFCommon.h.

143{
144 // Left shift the activation_state.
145 {
146 float *new_state_start = state_data;
147 const float *old_state_start = state_data + 1;
148 const float *old_state_end = state_data + batch_size * num_filters * memory_size;
149 while (old_state_start != old_state_end)
150 {
151 *new_state_start++ = *old_state_start++;
152 }
153 }
154
155 // Note: no need to clear the latest activation, matmul is not accumulative.
156
157 // Compute conv1d(inputs, weights_feature).
158 // The activation_state's rightmost column is used to save current cycle
159 // activation. This is achieved by starting at state_ptr[memory_size - 1] and
160 // having the stride equal to memory_size.
161
162 // Perform batched matrix vector multiply operation:
163 {
164 const float *matrix = weights_feature_data;
165 const float *vector = input_data;
166 float *result = &state_data[memory_size - 1];
167 float *result_in_batch = result;
168 for (int i = 0; i < batch_size; ++i)
169 {
170 const float *matrix_ptr = matrix;
171 for (int j = 0; j < num_filters; ++j)
172 {
173 float dot_prod = 0.0f;
174 const float *vector_in_batch = vector + i * input_size;
175 for (int k = 0; k < input_size; ++k)
176 {
177 dot_prod += *matrix_ptr++ * *vector_in_batch++;
178 }
179 *result_in_batch = dot_prod;
180 result_in_batch += memory_size;
181 }
182 }
183 }
184
185 applyTimeWeightsBiasAndActivation(batch_size, memory_size, num_filters, num_units, rank,
186 weights_time_data, bias_data, activation, state_data,
187 scratch_data, output_data);
188 return Ok;
189}

References onert_micro::Ok.

◆ Tanh()

template<typename T >
OMStatus onert_micro::execute::pal::Tanh ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 31 of file PALTanhCommon.h.

33{
34 const uint32_t flat_size = input_shape.flatSize();
35
36 if (flat_size == -1)
37 return UnknownError;
38
39 assert(input_data != nullptr);
40 assert(output_data != nullptr);
41
42 assert(input_shape == output_shape);
43
44 for (int i = 0; i < flat_size; i++)
45 {
46 output_data[i] = std::tanh(input_data[i]);
47 }
48
49 return Ok;
50}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Transpose()

template<typename T , int N = 5>
OMStatus onert_micro::execute::pal::Transpose ( const core::TransposeParams params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 78 of file PALTransposeCommon.h.

81{
82 // Transpose kernel only does rearranging values not numeric evaluations on
83 // each cell. It's safe to implement per size of scalar type and this trick
84 // keeps the total code size in a reasonable range.
85 OMStatus status;
86 switch (sizeof(T))
87 {
88 case 1:
89 status = TransposeImpl<int8_t, N>(
90 params, unextended_input_shape, reinterpret_cast<const int8_t *>(input_data),
91 unextended_output_shape, reinterpret_cast<int8_t *>(output_data));
92 break;
93 case 2:
94 status = TransposeImpl<int16_t, N>(
95 params, unextended_input_shape, reinterpret_cast<const int16_t *>(input_data),
96 unextended_output_shape, reinterpret_cast<int16_t *>(output_data));
97 break;
98
99 case 4:
100 status = TransposeImpl<int32_t, N>(
101 params, unextended_input_shape, reinterpret_cast<const int32_t *>(input_data),
102 unextended_output_shape, reinterpret_cast<int32_t *>(output_data));
103 break;
104 case 8:
105 status = TransposeImpl<int64_t, N>(
106 params, unextended_input_shape, reinterpret_cast<const int64_t *>(input_data),
107 unextended_output_shape, reinterpret_cast<int64_t *>(output_data));
108 break;
109 default:
110 status = UnknownError;
111 break;
112 }
113 return status;
114}

References onert_micro::UnknownError.

◆ TransposeConv()

template<typename T >
OMStatus onert_micro::execute::pal::TransposeConv ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape filter_shape,
const T *  filter_data,
const T *  bias_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 35 of file PALTransposeConvCommon.h.

38{
39 assert(false && "Not IMPL yet");
40}

◆ TransposeConv< float >()

template<>
OMStatus onert_micro::execute::pal::TransposeConv< float > ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 42 of file PALTransposeConvCommon.h.

47{
48 const int stride_width = params->stride_w;
49 const int stride_height = params->stride_h;
50 const int pad_width = params->pad_w;
51 const int pad_height = params->pad_h;
52
53 const int batches = input_shape.dims(0);
54 const int input_depth = input_shape.dims(3);
55 const int output_depth = filter_shape.dims(0);
56 const int input_height = input_shape.dims(1);
57 const int input_width = input_shape.dims(2);
58 const int filter_height = filter_shape.dims(1);
59 const int filter_width = filter_shape.dims(2);
60 const int output_height = output_shape.dims(1);
61 const int output_width = output_shape.dims(2);
62 const float output_activation_min = params->activation_min;
63 const float output_activation_max = params->activation_max;
64
65 // Although transpose convolution simplifies to convolution with transposed
66 // weights for strides of 1, non-unitary striding complicates matters. To
67 // keep this reference implementation as clear as possible, we use a
68 // "scatter" access pattern, where we loop through all the input elements,
69 // computing their influence on the output, rather than looping through the
70 // output elements in the typical "gather" access pattern of a conv. We
71 // therefore must initialize the output array to zero.
72 const int num_elements = output_shape.flatSize();
73 for (int i = 0; i < num_elements; i++)
74 {
75 output_data[i] = 0.0f;
76 }
77
78 // Loop through input elements one at a time.
79 for (int batch = 0; batch < batches; ++batch)
80 {
81 for (int in_y = 0; in_y < input_height; ++in_y)
82 {
83 for (int in_x = 0; in_x < input_width; ++in_x)
84 {
85 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
86 {
87 // Loop through the output elements it will influence
88 const int out_x_origin = (in_x * stride_width) - pad_width;
89 const int out_y_origin = (in_y * stride_height) - pad_height;
90 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
91 {
92 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
93 {
94 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
95 {
96 // Compute output element location
97 const int out_x = out_x_origin + filter_x;
98 const int out_y = out_y_origin + filter_y;
99 // We cannot accumulate out of bounds
100 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
101 (out_y < output_height))
102 {
103 float input_value =
104 input_data[offset(input_shape.dimsData(), batch, in_y, in_x, in_channel)];
105 float filter_value = filter_data[offset(filter_shape.dimsData(), out_channel,
106 filter_y, filter_x, in_channel)];
107 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)] +=
108 input_value * filter_value;
109 }
110 }
111 }
112 }
113 }
114 }
115 }
116 }
117
118 for (int batch = 0; batch < batches; ++batch)
119 {
120 for (int out_y = 0; out_y < output_height; ++out_y)
121 {
122 for (int out_x = 0; out_x < output_width; ++out_x)
123 {
124 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
125 {
126 float acc =
127 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)];
128 if (bias_data)
129 acc += bias_data[out_channel];
130
131 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)] =
132 activationFunctionWithMinMax(acc, output_activation_min, output_activation_max);
133 }
134 }
135 }
136 }
137 return Ok;
138}

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, activationFunctionWithMinMax(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), luci_interpreter::RuntimeShape::flatSize(), offset(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

◆ TransposeImpl()

template<typename T , int N>
OMStatus onert_micro::execute::pal::TransposeImpl ( const core::TransposeParams params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 36 of file PALTransposeCommon.h.

40{
41 const int unextended_input_size = unextended_input_shape.dimensionsCount();
42 const int unextended_output_size = unextended_output_shape.dimensionsCount();
43
44 const int input_ext_size = N - unextended_input_size;
45 const int output_ext_size = N - unextended_output_size;
46 NdArrayDesc<N> input_desc;
47 NdArrayDesc<N> output_desc;
48 copyDimsToDesc(core::OMRuntimeShape::extendedShape(N, unextended_input_shape), &input_desc);
49 copyDimsToDesc(core::OMRuntimeShape::extendedShape(N, unextended_output_shape), &output_desc);
50
51 // The perm data is extended to match the output, each index incremented by
52 // the amount of front padding of the input shape.
53 int extended_perm[N];
54 for (int i = 0; i < N; ++i)
55 {
56 extended_perm[i] = i < output_ext_size ? i : params.perm[i - output_ext_size] + input_ext_size;
57 }
58
59 // Permutes the input shape so we don't need to permute the indexes inside
60 // the loop. Check to make sure output_dims is matching input_dims.
61 NdArrayDesc<N> perm_input_desc;
62 for (int k = 0; k < N; ++k)
63 {
64 perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]];
65 perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]];
66 }
67
68 // Naive transpose loop (iterate on output index and compute input index).
69 auto tranpose_func = [&](int indexes[N]) {
70 output_data[subscriptToIndex(output_desc, indexes)] =
71 input_data[subscriptToIndex(perm_input_desc, indexes)];
72 };
73 NDOpsHelper<N>(output_desc, tranpose_func);
74 return Ok;
75}
int strides[N]
Definition NDArray.h:45
int extents[N]
Definition NDArray.h:41

References copyDimsToDesc(), onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::execute::pal::NdArrayDesc< N >::extents, onert_micro::Ok, onert_micro::core::TransposeParams::perm, onert_micro::execute::pal::NdArrayDesc< N >::strides, and subscriptToIndex().

◆ UnaryOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::UnaryOp ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 40 of file PALUnaryOpCommon.h.

42{
43 Fn func;
44 const uint32_t flat_size = input_shape.flatSize();
45
46 if (flat_size == -1)
47 return UnknownError;
48
49 assert(input_data != nullptr);
50 assert(output_data != nullptr);
51
52 assert(input_shape == output_shape);
53 for (int i = 0; i < flat_size; ++i)
54 {
55 output_data[i] = func(input_data[i]);
56 }
57 return Ok;
58}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Unpack()

template<typename T >
OMStatus onert_micro::execute::pal::Unpack ( const core::SplitParams params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
int32_t  axis_value 
)

Definition at line 31 of file PALUnpack.h.

33{
34 const auto dimensions = input_shape.dimensionsCount();
35
36 if (axis_value < 0)
37 {
38 axis_value += dimensions;
39 }
40
41 int outer_size = 1;
42 for (int i = 0; i < axis_value; ++i)
43 {
44 outer_size *= input_shape.dims(i);
45 }
46 int copy_size = 1;
47 for (int i = axis_value + 1; i < dimensions; ++i)
48 {
49 copy_size *= input_shape.dims(i);
50 }
51 int output_size = 1;
52 for (int i = 0; i < output_shape.dimensionsCount(); ++i)
53 {
54 output_size *= output_shape.dims(i);
55 }
56
57 for (int i = 0; i < params.num_outputs; ++i)
58 {
59 T *output_data = core::utils::castOutputData<T>(params.output_data[i]);
60 assert(output_data != nullptr);
61 for (int k = 0; k < outer_size; ++k)
62 {
63 T *output_ptr = output_data + copy_size * k;
64 int loc = k * params.num_outputs * copy_size + i * copy_size;
65 const T *input_ptr = input_data + loc;
66 for (int j = 0; j < copy_size; ++j)
67 output_ptr[j] = input_ptr[j];
68 }
69 }
70 return Ok;
71}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SplitParams::num_outputs, onert_micro::Ok, onert_micro::core::SplitParams::output_data, and output_shape.

Variable Documentation

◆ MAX_INDICES_ND

constexpr int onert_micro::execute::pal::MAX_INDICES_ND = 5
constexpr

Definition at line 32 of file PALGatherND.h.

Referenced by GatherND().