ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert_micro::execute::pal Namespace Reference

Data Structures

struct  AddFn
 
struct  DivFn
 
struct  FloorDivFn
 
struct  FloorModFn
 
struct  LogicalAndFn
 
struct  LogicalOrFn
 
struct  MaximumFn
 
struct  MinimumFn
 
struct  MulFn
 
struct  NdArrayDesc
 
struct  ReduceProductFn
 
struct  ReduceProductFn< int8_t >
 
struct  ReduceSumFn
 
struct  ReduceSumFn< int8_t >
 
struct  SquaredDifferenceFn
 
struct  SubFn
 
struct  TanhFunctor
 

Functions

OMStatus Add (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 
template<typename T1 , typename T2 , typename T3 >
OMStatus ArgMax (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data)
 
OMStatus AveragePool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus ConvPerChannel (const core::ConvQuant &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus DepthwiseConvPerChannel (const core::ConvQuant &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<>
OMStatus FullyConnected< int8_t > (const core::FullyConnectedParams &params, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<>
OMStatus FullyConnected (const core::FullyConnectedParams &params, const int16_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int64_t *bias_data, const core::OMRuntimeShape &output_shape, int16_t *output_data)
 
OMStatus MaxPool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus Mul (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 
template<>
OMStatus ReLUCommon< int8_t > (const int flat_size, const int8_t *input_data, int8_t *output_data, const float alpha, const bool is_relu_6)
 
template<>
OMStatus Softmax< int8_t, int8_t > (const core::SoftmaxParams &params, const int8_t *input_data, int8_t *output_data)
 
OMStatus SVDF (const core::SVDFQuantParams &params, const int8_t *input_data, const int8_t *weights_feature_data, const int8_t *weights_time_data, const int32_t *bias_data, int8_t *state_data, int8_t *output_data, const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &weights_feature_shape, const core::OMRuntimeShape &weights_time_shape, const core::OMRuntimeShape &bias_shape, const core::OMRuntimeShape &output_shape)
 
template<typename T >
OMStatus Abs (const core::OMRuntimeShape &shape, const T *input_data, T *output_data)
 
int8_t AddFunc (int8_t x, int8_t y, const core::ArithmeticQuantParams &params)
 
template<typename T >
OMStatus Add (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastAdd4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus BroadcastAdd4DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<typename T >
OMStatus AddN (const size_t flat_size, const size_t num_inputs, const T *const *input_data, T *output_data)
 
template<typename T1 , typename T2 , typename T3 , typename Cmp >
OMStatus ArgMinMax (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data, const Cmp &cmp)
 
template<typename T , typename Fn >
OMStatus ArithmeticOp (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T , typename Fn >
OMStatus QuantizedArithmeticOp (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
void ElementWise (const uint32_t size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T , typename Fn >
void ArithmeticOpScalar (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input_data, const T scalar_value, T *output_data)
 
template<typename T , typename Fn >
OMStatus BroadcastArithmeticOp4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T , typename Fn >
OMStatus QuantizedBroadcastArithmeticOp4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const core::OMRuntimeShape &input2_shape, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
void BroadcastInput1 (int size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastInput2 (int size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastRecursiveDimensions (const core::ArithmeticQuantParams &params, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastBinaryFunction6DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
OMStatus AveragePool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus BatchToSpaceND (const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *crops_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T , typename Fn >
OMStatus BinaryOp (const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T , typename Fn >
OMStatus BroadcastBinaryOp4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename FromT , typename ToT >
OMStatus Cast (const core::OMRuntimeShape &input_shape, const FromT *input_data, const core::OMRuntimeShape &output_shape, ToT *output_data)
 
template<typename T >
OMStatus Ceil (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
bool LessFn (T lhs, T rhs)
 
template<typename T >
bool LessEqualFn (T lhs, T rhs)
 
template<typename T >
bool EqualFn (T lhs, T rhs)
 
template<typename T >
bool GreaterFn (T lhs, T rhs)
 
template<typename T >
bool GreaterEqualFn (T lhs, T rhs)
 
template<typename T >
bool NotEqualFn (T lhs, T rhs)
 
template<typename T >
void ComparisonNoScaling (const int64_t flat_size, const T *input1_data, const T *input2_data, bool *output_data, bool F(T, T))
 
template<typename T , typename AccType >
void BroadcastComparison4DSlowWithScaling (const core::ComparisonParams &op_params, const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const T *input2_data, const core::OMRuntimeShape &unextended_output_shape, bool *output_data, bool F(AccType, AccType))
 
template<typename T , typename AccType >
void ComparisonWithScaling (const core::ComparisonParams &op_params, const int64_t flat_size, const T *input1_data, const T *input2_data, bool *output_data, bool F(AccType, AccType))
 
template<typename T >
void BroadcastComparison4DSlowNoScaling (const core::ComparisonParams &op_params, const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const T *input2_data, const core::OMRuntimeShape &unextended_output_shape, bool *output_data, bool F(T, T))
 
template<typename Scalar >
OMStatus Concatenation (core::ConcatenationParams &params, std::vector< uint32_t > &input_shapes, std::vector< const Scalar * > &input_data, const core::OMRuntimeShape &output_shape, Scalar *output_data)
 
OMStatus ConvFloat (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Cos (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus DepthwiseConv2D (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &filter_shape, const T *filter_data, const T *bias_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus DepthwiseConv2D< float > (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename InputT , typename OutputT >
OMStatus Dequantize (const core::QuantizationParams op_params, const uint32_t flat_size, const InputT *input_data, OutputT *output_data)
 
template<typename T >
OMStatus Div (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastDiv4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Elu (const int flat_size, const float *input_data, float *output_data)
 
template<typename T >
OMStatus Exp (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Fill (const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Floor (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
void FloorDiv (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
void BroadcastFloorDiv4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
void FloorMod (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
void BroadcastFloorMod4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename InputType , typename WeightType , typename OutputType , typename BiasType >
OMStatus FullyConnected (const core::FullyConnectedParams &params, const InputType *input_data, const core::OMRuntimeShape &filter_shape, const WeightType *filter_data, const BiasType *bias_data, const core::OMRuntimeShape &output_shape, OutputType *output_data)
 
template<typename WeightType >
OMStatus FullyConnected (const core::FullyConnectedParams &params, const float *input_data, const core::OMRuntimeShape &filter_shape, const WeightType *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename ParamsT , typename IndicesT >
OMStatus GatherND (core::OMRuntimeShape params_shape, const ParamsT *param_data, core::OMRuntimeShape indices_shape, const IndicesT *index_data, ParamsT *output_data)
 
OMStatus GRU (const float *input_data, const float *weight_input_data, const float *weight_hidden_data, const float *bias_input_data, const float *bias_hidden_data, const float *hidden_state_data, float *output_data, float *output_input_data, float *output_hidden_data, const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &output_shape, const core::OMRuntimeShape &weight_input_shape, const core::OMRuntimeShape &weight_hidden_shape, const size_t intermediate_buffer_size, float *intermediate_buffer)
 
OMStatus L2Normalization (const core::L2NormalizationParams &params, const float *input_data, float *output_data)
 
OMStatus L2Pool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Log (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<class Fn >
OMStatus LogicalCommon (const int flat_size, const bool *input1_data, const bool *input2_data, bool *output_data)
 
OMStatus LogicalNot (const int flat_size, const bool *input_data, bool *output_data)
 
OMStatus Logistic (const int flat_size, const float *input_data, float *output_data)
 
OMStatus Logistic (const int flat_size, const int8_t *input_data, float input_scale, int input_zero_point, int8_t *output_data, float output_scale, int output_zero_point)
 
OMStatus LogSoftmax (const core::LogSoftmaxParams &params, const float *input_data, float *output_data)
 
OMStatus Maximum (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
OMStatus BroadcastMaximum4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
OMStatus MaxPool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
OMStatus Minimum (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
template<typename T >
OMStatus BroadcastMinimum4DSlow (const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Mul (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastMul4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus BroadcastMul6DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Neg (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Pad (const core::PadParams &op_params, const core::OMRuntimeShape &input_shape, const float *input_data, const float pad_value, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus QuantizedZeroPad (const core::PadParams &op_params, const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename InputT , typename OutputT >
OMStatus Quantize (const core::QuantizationParams op_params, const uint32_t flat_size, const InputT *input_data, OutputT *output_data)
 
template<typename Type >
OMStatus ReLUCommon (const int flat_size, const Type *input_data, Type *output_data, const float alpha, const bool is_relu_6)
 
template<typename T >
OMStatus Round (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus Round< float > (const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Rsqrt (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus QuantizedRsqrt (const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
OMStatus Sin (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus SISOOperation (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data, std::function< T(T)> const &func)
 
template<typename T >
OMStatus SISOOperation (const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data, std::function< float(float)> const &func)
 
template<typename T >
OMStatus Slice (const core::SliceParams &op_params, const core::OMRuntimeShape &input_shape, const T *input_data, T *output_data)
 
template<typename T , typename U >
OMStatus Softmax (const core::SoftmaxParams &params, const T *input_data, U *output_data)
 
template<>
OMStatus Softmax< float, float > (const core::SoftmaxParams &params, const float *input_data, float *output_data)
 
template<typename T >
OMStatus SpaceToBatchND (const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *paddings_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus SpaceToDepth (const int32_t block_size, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus Split (const core::SplitParams &params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, int32_t axis_value)
 
template<typename T >
OMStatus Sqrt (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Square (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus SquaredDifference (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastSquaredDifference4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus QuantizedBroadcastSquaredDifference4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const core::OMRuntimeShape &input2_shape, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
OMStatus QuantizedSquaredDifference (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
OMStatus StridedSlice (core::StridedSliceParams &op_params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, T *output_data)
 
int8_t SubFunc (int8_t x, int8_t y, const core::ArithmeticQuantParams &params)
 
template<typename T >
OMStatus Sub (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastSub4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus BroadcastSub4DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus SVDF (const float *input_data, const float *weights_feature_data, const float *weights_time_data, const float *bias_data, float *state_data, float *scratch_data, float *output_data, const int rank, const int input_size, const int batch_size, const int num_filters, const int num_units, const int memory_size, const circle::ActivationFunctionType activation)
 
template<typename T >
OMStatus Tanh (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus QuantizedTanh (const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T , int N>
OMStatus TransposeImpl (const core::TransposeParams &params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T , int N = 5>
OMStatus Transpose (const core::TransposeParams &params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus TransposeConv (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &filter_shape, const T *filter_data, const T *bias_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus TransposeConv< float > (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T , typename Fn >
OMStatus UnaryOp (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Unpack (const core::SplitParams &params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, int32_t axis_value)
 
std::pair< uint32_t, uint32_t > getUpLowerWeightTensorDepth (core::OpTrainableRankType rank, const uint32_t output_depth)
 
std::int32_t saturatingRoundingDoublingHighMul (std::int32_t a, std::int32_t b)
 
int32_t roundingDivideByPOT (int32_t x, int32_t exponent)
 
int32_t multiplyByQuantizedMultiplier (int32_t x, int32_t quantized_multiplier, int shift)
 
int32_t multiplyByQuantizedMultiplierSmallerThanOneExp (int32_t x, int32_t quantized_multiplier, int left_shift)
 
template<typename P >
void getActivationParams (const P &params, int32_t *min, int32_t *max)
 
template<typename P >
void getActivationParams (const P &params, float *min, float *max)
 
template<typename P >
void getActivationParams (const P &params, int64_t *min, int64_t *max)
 
size_t reducedOutputOffset (const int32_t num_dims, const int32_t *dims, const int32_t *index, const int32_t num_axis, const int32_t *axis)
 
bool nextIndex (const int32_t num_dims, const int32_t *dims, int32_t *current)
 
int MatchingDim (const core::OMRuntimeShape &shape1, int index1, const core::OMRuntimeShape &shape2, int index2)
 
int flatSizeSkipDim (const int32_t *dims_data, int skip_dim, int num_dims)
 
int offset (const int32_t *dims_data, int i0, int i1, int i2, int i3)
 
int offset (const int32_t *dims_data, int i0, int i1, int i2, int i3, int i4)
 
template<typename T >
activationFunctionWithMinMax (T x, T output_activation_min, T output_activation_max)
 
template<int MAX_DIM = 6>
bool ReduceDimensionsForBroadcast (const core::OMRuntimeShape &input1_shape, const core::OMRuntimeShape &input2_shape, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape)
 
template<int N>
void copyDimsToDesc (const core::OMRuntimeShape &input_shape, NdArrayDesc< N > *desc_out)
 
template<int N, int DIM, typename Calc >
std::enable_if< DIM==N-1, void >::type NDOpsHelperImpl (const NdArrayDesc< N > &output, const Calc &calc, int indexes[N])
 
template<int N, int DIM, typename Calc >
std::enable_if< DIM!=N-1, void >::type NDOpsHelperImpl (const NdArrayDesc< N > &output, const Calc &calc, int indexes[N])
 
template<int N, typename Calc >
void NDOpsHelper (const NdArrayDesc< N > &output, const Calc &calc)
 
template<int N>
void NdArrayDescsForElementwiseBroadcast (const core::OMRuntimeShape &input0_shape, const core::OMRuntimeShape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
 
int subscriptToIndex (const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
 
int subscriptToIndex (const NdArrayDesc< 5 > &desc, int indexes[5])
 
bool processBroadcastShapes (const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
 
template<>
OMStatus AddN< int8_t > (const size_t flat_size, const size_t num_inputs, const int8_t *const *input_data, int8_t *output_data)
 
template<>
OMStatus AddN< int16_t > (const size_t flat_size, const size_t num_inputs, const int16_t *const *input_data, int16_t *output_data)
 
template<typename T1 , typename T2 , typename T3 >
OMStatus ArgMin (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data)
 
template<typename T , typename AccumT >
OMStatus BatchMatMul (const OMRuntimeShape &lhs_shape, const T *lhs_data, const OMRuntimeShape &rhs_shape, const T *rhs_data, const OMRuntimeShape &output_shape, T *output_data)
 
template<typename InputType , typename OutputType >
OMStatus Mul (const core::ArithmeticQuantParams &params, uint32_t size, const InputType *input1_data, const InputType *input2_data, OutputType *output_data)
 
bool resolveAxis (const int num_dims, const int *axis, const int64_t num_axis, int *out_axis, int *out_num_axis)
 
template<typename T >
bool ReduceGeneric (const T *input_data, const int *input_dims, const int input_num_dims, T *output_data, const int *axis, const int64_t num_axis_dimensions, T init_value, const int output_flat_size, T reducer(const T, const T))
 
template<typename T >
bool reduceSumImpl (const T *input_data, const int *input_dims, const int input_num_dims, T *output_data, const int *axis, const int num_axis, const int num_outputs)
 
template<typename T >
void MeanROWH (const OMRuntimeShape &unextended_input_shape, const T *input_data, const OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T , template< typename > class ReduceFn>
bool ReduceGeneric (OMReduceDataContext< T > &ctx)
 
template<typename T , template< typename > class ReduceFn>
bool Reduce (OMReduceDataContext< T > &ctx, bool mean=false)
 
template<typename D , typename T >
void Select (const core::OMRuntimeShape &input_condition_shape, const D *input_condition_data, const core::OMRuntimeShape &input_x_shape, const T *input_x_data, const core::OMRuntimeShape &input_y_shape, const T *input_y_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Sub (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 

Variables

constexpr int MAX_INDICES_ND = 5
 

Function Documentation

◆ Abs()

template<typename T >
OMStatus onert_micro::execute::pal::Abs ( const core::OMRuntimeShape shape,
const T *  input_data,
T *  output_data 
)
inline

Definition at line 33 of file PALAbs.h.

34{
35 const uint32_t flat_size = shape.flatSize();
36
37 if (flat_size == -1)
38 return UnknownError;
39
40 assert(input_data != nullptr);
41 assert(output_data != nullptr);
42
43 for (uint32_t i = 0; i < flat_size; ++i)
44 {
45 output_data[i] = std::abs(input_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleAbs().

◆ activationFunctionWithMinMax()

template<typename T >
T onert_micro::execute::pal::activationFunctionWithMinMax ( x,
output_activation_min,
output_activation_max 
)
inline

Definition at line 231 of file PALUtils.h.

232{
233 using std::max;
234 using std::min;
235 return min(max(x, output_activation_min), output_activation_max);
236}

Referenced by DepthwiseConv2D< float >(), L2Pool(), and TransposeConv< float >().

◆ Add() [1/2]

OMStatus onert_micro::execute::pal::Add ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

Definition at line 33 of file PALAdd.h.

35{
36 auto status = arm_elementwise_add_s8(
37 input1_data, input2_data, params.input1_offset, params.input1_multiplier, params.input1_shift,
38 params.input2_offset, params.input2_multiplier, params.input2_shift, params.left_shift,
39 output_data, params.output_offset, params.output_multiplier, params.output_shift,
40 params.quantized_activation_min, params.quantized_activation_max, flat_size);
41
42 assert(status == ARM_CMSIS_NN_SUCCESS);
43 if (status != ARM_CMSIS_NN_SUCCESS)
44 return UnknownError;
45
46 return Ok;
47}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, onert_micro::core::ArithmeticQuantParams::quantized_activation_min, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleAdd().

◆ Add() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::Add ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 50 of file PALAddCommon.h.

52{
53 ArithmeticOp<T, AddFn<T>>(params, flat_size, input1_data, input2_data, output_data);
54 return Ok;
55}

References onert_micro::Ok.

◆ AddFunc()

int8_t onert_micro::execute::pal::AddFunc ( int8_t  x,
int8_t  y,
const core::ArithmeticQuantParams params 
)

Definition at line 30 of file PALAddCommon.h.

31{
32 const int32_t input1_val = params.input1_offset + x;
33 const int32_t input2_val = params.input2_offset + y;
34 const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
35 const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
36 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
37 shifted_input1_val, params.input1_multiplier, params.input1_shift);
38 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
39 shifted_input2_val, params.input2_multiplier, params.input2_shift);
40 const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
41 const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp(
42 raw_sum, params.output_multiplier, params.output_shift) +
43 params.output_offset;
44 const int32_t clamped_output = std::min(params.quantized_activation_max,
45 std::max(params.quantized_activation_min, raw_output));
46 return static_cast<int8_t>(clamped_output);
47}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by BroadcastAdd4DSlow().

◆ AddN()

template<typename T >
OMStatus onert_micro::execute::pal::AddN ( const size_t  flat_size,
const size_t  num_inputs,
const T *const *  input_data,
T *  output_data 
)

Definition at line 32 of file PALAddNCommon.h.

34{
35 // All inputs and output should have the same shape, this is checked during
36 // Prepare stage.
37 for (size_t i = 0; i < flat_size; ++i)
38 {
39 T x = 0;
40 for (size_t j = 0; j < num_inputs; ++j)
41 {
42 x += input_data[j][i];
43 }
44 output_data[i] = x;
45 }
46 return Ok;
47}

References onert_micro::Ok.

◆ AddN< int16_t >()

template<>
OMStatus onert_micro::execute::pal::AddN< int16_t > ( const size_t  flat_size,
const size_t  num_inputs,
const int16_t *const *  input_data,
int16_t *  output_data 
)

Definition at line 38 of file PALAddN.h.

40{
41 assert(false && "Not IMPL yet");
42 return UnsupportedOp;
43}
@ UnsupportedOp
Definition OMStatus.h:29

References onert_micro::UnsupportedOp.

◆ AddN< int8_t >()

template<>
OMStatus onert_micro::execute::pal::AddN< int8_t > ( const size_t  flat_size,
const size_t  num_inputs,
const int8_t *const *  input_data,
int8_t *  output_data 
)

Definition at line 30 of file PALAddN.h.

32{
33 assert(false && "Not IMPL yet");
34 return UnsupportedOp;
35}

References onert_micro::UnsupportedOp.

◆ ArgMax()

template<typename T1 , typename T2 , typename T3 >
OMStatus onert_micro::execute::pal::ArgMax ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data 
)

Definition at line 30 of file PALArgMax.h.

32{
33 return ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
34 std::greater<T1>());
35}
const luci_interpreter::RuntimeShape output_shape

References ArgMinMax(), and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleArgMax().

◆ ArgMin()

template<typename T1 , typename T2 , typename T3 >
OMStatus onert_micro::execute::pal::ArgMin ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data 
)

Definition at line 30 of file PALArgMin.h.

32{
33 return ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
34 std::less<T1>());
35}

References ArgMinMax(), and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleArgMin().

◆ ArgMinMax()

template<typename T1 , typename T2 , typename T3 , typename Cmp >
OMStatus onert_micro::execute::pal::ArgMinMax ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data,
const Cmp &  cmp 
)

Definition at line 32 of file PALArgMinMaxCommon.h.

35{
36 int axis = input2_data[0];
37 if (axis < 0)
38 {
39 axis += input1_shape.dimensionsCount();
40 }
41 const int axis_size = input1_shape.dims(axis);
42
43 int outer_size = 1;
44 for (int i = 0; i < axis; ++i)
45 {
46 outer_size *= input1_shape.dims(i);
47 }
48
49 int inner_size = 1;
50 const int dims_count = input1_shape.dimensionsCount();
51 for (int i = axis + 1; i < dims_count; ++i)
52 {
53 inner_size *= input1_shape.dims(i);
54 }
55 for (int outer = 0; outer < outer_size; ++outer)
56 {
57 for (int inner = 0; inner < inner_size; ++inner)
58 {
59 auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
60 T2 min_max_index = 0;
61 for (int i = 1; i < axis_size; ++i)
62 {
63 const auto &curr_value = input1_data[(outer * axis_size + i) * inner_size + inner];
64 if (cmp(curr_value, min_max_value))
65 {
66 min_max_value = curr_value;
67 min_max_index = static_cast<T2>(i);
68 }
69 }
70 output_data[outer * inner_size + inner] = min_max_index;
71 }
72 }
73 return Ok;
74}
size_t dimensionsCount() const noexcept

References onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dims(), and onert_micro::Ok.

Referenced by ArgMax(), and ArgMin().

◆ ArithmeticOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::ArithmeticOp ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 54 of file PALArithmeticOpCommon.h.

56{
57 T activation_min, activation_max;
58 getActivationParams(params, &activation_min, &activation_max);
59
60 Fn func;
61 for (int i = 0; i < flat_size; ++i)
62 output_data[i] =
63 std::min(std::max(func(input1_data[i], input2_data[i]), activation_min), activation_max);
64
65 return Ok;
66}

References getActivationParams(), and onert_micro::Ok.

◆ ArithmeticOpScalar()

template<typename T , typename Fn >
void onert_micro::execute::pal::ArithmeticOpScalar ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input_data,
const T  scalar_value,
T *  output_data 
)
inline

Definition at line 111 of file PALArithmeticOpCommon.h.

114{
115 T activation_min, activation_max;
116 getActivationParams(params, &activation_min, &activation_max);
117
118 for (int i = 0; i < flat_size; ++i)
119 output_data[i] =
120 std::min(std::max(func(input_data[i], scalar_value), activation_min), activation_max);
121}

References getActivationParams().

◆ AveragePool() [1/2]

OMStatus onert_micro::execute::pal::AveragePool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 33 of file PALAveragePool2DCommon.h.

36{
37 const int32_t batches = input_shape.dims(0);
38 const int32_t depth = output_shape.dims(3);
39 const int32_t input_height = input_shape.dims(1);
40 const int32_t input_width = input_shape.dims(2);
41 const int32_t output_height = output_shape.dims(1);
42 const int32_t output_width = output_shape.dims(2);
43 const int32_t stride_height = params.stride_h;
44 const int32_t stride_width = params.stride_w;
45 for (int batch = 0; batch < batches; ++batch)
46 {
47 for (int out_y = 0; out_y < output_height; ++out_y)
48 {
49 for (int out_x = 0; out_x < output_width; ++out_x)
50 {
51 for (int channel = 0; channel < depth; ++channel)
52 {
53 const int in_x_origin = (out_x * stride_width) - params.pad_w;
54 const int in_y_origin = (out_y * stride_height) - params.pad_h;
55 // Compute the boundaries of the filter region clamped so as to
56 // ensure that the filter window fits in the input array.
57 const int filter_x_start = std::max(0, -in_x_origin);
58 const int filter_x_end = std::min(static_cast<int32_t>(params.filter_w),
59 static_cast<int32_t>(input_width - in_x_origin));
60 const int filter_y_start = std::max(0, -in_y_origin);
61 const int filter_y_end = std::min(static_cast<int32_t>(params.filter_h),
62 static_cast<int32_t>(input_height - in_y_origin));
63
64 float total = 0.f;
65 float filter_count = 0;
66
67 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
68 {
69 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
70 {
71 const int in_x = in_x_origin + filter_x;
72 const int in_y = in_y_origin + filter_y;
73
74 const int input_data_offset =
75 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
76 input_shape.dims(3) +
77 channel;
78
79 total += input_data[input_data_offset];
80 filter_count++;
81 }
82 }
83 const int output_data_offset =
84 ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
86 channel;
87
88 assert(filter_count != 0);
89 const float average = total / filter_count;
90
91 output_data[output_data_offset] =
92 std::min(std::max(average, params.activation_min), params.activation_max);
93 }
94 }
95 }
96 }
97 return Ok;
98}
int32_t dims(int i) const
Definition Tensor.h:108

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ AveragePool() [2/2]

OMStatus onert_micro::execute::pal::AveragePool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALAveragePool2D.h.

35{
36 cmsis_nn_dims input_dims;
37 cmsis_nn_dims output_dims;
38 cmsis_nn_pool_params pool_params;
39 cmsis_nn_dims filter_dims;
40 cmsis_nn_context ctx;
41
42 const int depth = input_shape.dims(3);
43 const int output_width = output_shape.dims(2);
44
45 input_dims.n = 1;
46 input_dims.h = input_shape.dims(1);
47 input_dims.w = input_shape.dims(2);
48 input_dims.c = depth;
49
50 output_dims.n = 1;
51 output_dims.h = output_shape.dims(1);
52 output_dims.w = output_width;
53 output_dims.c = depth;
54
55 pool_params.stride.h = params.stride_h;
56 pool_params.stride.w = params.stride_w;
57 pool_params.padding.h = params.pad_h;
58 pool_params.padding.w = params.pad_w;
59 pool_params.activation.min = params.quantized_activation_min;
60 pool_params.activation.max = params.quantized_activation_max;
61
62 filter_dims.n = 1;
63 filter_dims.h = params.filter_h;
64 filter_dims.w = params.filter_w;
65 filter_dims.c = 1;
66
67 const int32_t buffer_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
68 int8_t *buffer = nullptr;
69 if (buffer_size > 0)
70 {
71 buffer = new int8_t[buffer_size];
72 }
73
74 ctx.buf = buffer;
75 ctx.size = buffer_size;
76
77 auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
78 output_data);
79
80 if (buffer_size > 0)
81 delete[] buffer;
82
83 assert(res == ARM_CMSIS_NN_SUCCESS);
84 if (res != ARM_CMSIS_NN_SUCCESS)
85 return CmsisNNError;
86
87 return Ok;
88}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::quantized_activation_max, onert_micro::core::Pool2DParams::quantized_activation_min, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleAveragePool2D().

◆ BatchMatMul()

template<typename T , typename AccumT >
OMStatus onert_micro::execute::pal::BatchMatMul ( const OMRuntimeShape lhs_shape,
const T *  lhs_data,
const OMRuntimeShape rhs_shape,
const T *  rhs_data,
const OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 39 of file PALBatchMatMul.h.

42{
43
44 const OMRuntimeShape extended_lhs_shape = OMRuntimeShape::extendedShape(5, lhs_shape);
45 const OMRuntimeShape extended_rhs_shape = OMRuntimeShape::extendedShape(5, rhs_shape);
46
47 const int batch_dim0 = extended_lhs_shape.dims(0);
48 const int batch_dim1 = extended_lhs_shape.dims(1);
49 const int batch_dim2 = extended_lhs_shape.dims(2);
50
51 const int lhs_ext0 = extended_lhs_shape.dims(0);
52 const int lhs_ext1 = extended_lhs_shape.dims(1);
53 const int lhs_ext2 = extended_lhs_shape.dims(2);
54 const int rhs_ext0 = extended_rhs_shape.dims(0);
55 const int rhs_ext1 = extended_rhs_shape.dims(1);
56 const int rhs_ext2 = extended_rhs_shape.dims(2);
57
58 // Set params for each matrix multiply.
59 const int lhs_rows = extended_lhs_shape.dims(3);
60 const int rhs_cols = extended_rhs_shape.dims(4);
61 const int accum_depth = extended_lhs_shape.dims(4);
62
63 // stride for each batch dim
64 const int lhs_stride_b0 =
65 extended_lhs_shape.dims(1) * extended_lhs_shape.dims(2) * lhs_rows * accum_depth;
66 const int lhs_stride_b1 = extended_lhs_shape.dims(2) * lhs_rows * accum_depth;
67 const int lhs_stride_b2 = lhs_rows * accum_depth;
68
69 const int rhs_stride_b0 =
70 extended_rhs_shape.dims(1) * extended_rhs_shape.dims(2) * accum_depth * rhs_cols;
71 const int rhs_stride_b1 = extended_rhs_shape.dims(2) * accum_depth * rhs_cols;
72 const int rhs_stride_b2 = accum_depth * rhs_cols;
73
74 for (int b0 = 0; b0 < batch_dim0; ++b0)
75 {
76 const T *lhs_ptr0 = lhs_data + (b0 * lhs_stride_b0);
77 const T *rhs_ptr0 = rhs_data + (b0 * rhs_stride_b0);
78 for (int b1 = 0; b1 < batch_dim1; ++b1)
79 {
80 const T *lhs_ptr1 = lhs_ptr0 + b1 * lhs_stride_b1;
81 const T *rhs_ptr1 = rhs_ptr0 + b1 * rhs_stride_b1;
82 for (int b2 = 0; b2 < batch_dim2; ++b2)
83 {
84 const T *lhs_ptr2 = lhs_ptr1 + b2 * lhs_stride_b2;
85 const T *rhs_ptr2 = rhs_ptr1 + b2 * rhs_stride_b2;
86 T *out_ptr = output_data +
87 ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * lhs_rows * rhs_cols;
88
89 for (int j = 0; j < rhs_cols; ++j)
90 {
91 for (int i = 0; i < lhs_rows; ++i)
92 {
93 AccumT total = 0;
94 for (int k = 0; k < accum_depth; ++k)
95 {
96 AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
97 AccumT rhs_val = rhs_ptr2[j + k * rhs_cols];
98 total += (lhs_val) * (rhs_val);
99 }
100 const int idx = j + i * rhs_cols;
101 out_ptr[idx] = static_cast<T>(total);
102 }
103 }
104 }
105 }
106 }
107
108 return Ok;
109}
static OMRuntimeShape extendedShape(size_t new_shape_size, const OMRuntimeShape &shape)

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), and onert_micro::Ok.

◆ BatchToSpaceND()

template<typename T >
OMStatus onert_micro::execute::pal::BatchToSpaceND ( const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const int32_t *  block_shape_data,
const core::OMRuntimeShape unextended_input3_shape,
const int32_t *  crops_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 51 of file PALBatchToSpaceNDCommon.h.

55{
56 const core::OMRuntimeShape input1_shape = extendShapeBatchToSpace(unextended_input1_shape);
57 const core::OMRuntimeShape output_shape = extendShapeBatchToSpace(unextended_output_shape);
58
59 const int output_width = output_shape.dims(2);
60 const int output_height = output_shape.dims(1);
61 const int output_batch_size = output_shape.dims(0);
62
63 const int depth = input1_shape.dims(3);
64 const int input_width = input1_shape.dims(2);
65 const int input_height = input1_shape.dims(1);
66 const int input_batch_size = input1_shape.dims(0);
67
68 const int block_shape_height = block_shape_data[0];
69 const int block_shape_width =
70 unextended_input1_shape.dimensionsCount() == 4 ? block_shape_data[1] : 1;
71 const int crops_top = crops_data[0];
72 const int crops_left = unextended_input1_shape.dimensionsCount() == 4 ? crops_data[2] : 0;
73 for (int in_batch = 0; in_batch < input_batch_size; ++in_batch)
74 {
75 const int out_batch = in_batch % output_batch_size;
76 const int spatial_offset = in_batch / output_batch_size;
77 for (int in_h = 0; in_h < input_height; ++in_h)
78 {
79 const int out_h = in_h * block_shape_height + spatial_offset / block_shape_width - crops_top;
80 if (out_h < 0 || out_h >= output_height)
81 {
82 continue;
83 }
84 for (int in_w = 0; in_w < input_width; ++in_w)
85 {
86 const int out_w =
87 in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
88
89 if (out_w < 0 || out_w >= output_width)
90 {
91 continue;
92 }
93 T *out = output_data + offset(output_shape.dimsData(), out_batch, out_h, out_w, 0);
94 const T *in = input1_data + offset(input1_shape.dimsData(), in_batch, in_h, in_w, 0);
95 memcpy(out, in, depth * sizeof(T));
96 }
97 }
98 }
99 return Ok;
100}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), offset(), onert_micro::Ok, and output_shape.

◆ BinaryOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BinaryOp ( const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)
inline

Definition at line 62 of file PALBinaryOpCommon.h.

64{
65 Fn func;
66 for (int i = 0; i < flat_size; ++i)
67 {
68 output_data[i] = func(input1_data[i], input2_data[i]);
69 }
70 return Ok;
71}

References onert_micro::Ok.

◆ BroadcastAdd4DSlow() [1/2]

OMStatus onert_micro::execute::pal::BroadcastAdd4DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const int8_t *  input1_data,
const core::OMRuntimeShape input2_shape,
const int8_t *  input2_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 68 of file PALAddCommon.h.

72{
73 BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data,
74 output_shape, output_data, AddFunc);
75 return Ok;
76}
void BroadcastBinaryFunction6DSlow(const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References AddFunc(), BroadcastBinaryFunction6DSlow(), onert_micro::Ok, and output_shape.

◆ BroadcastAdd4DSlow() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastAdd4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 58 of file PALAddCommon.h.

62{
63 BroadcastArithmeticOp4DSlow<T, AddFn<T>>(params, input1_shape, input1_data, input2_shape,
64 input2_data, output_shape, output_data);
65 return Ok;
66}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleAdd().

◆ BroadcastArithmeticOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BroadcastArithmeticOp4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 124 of file PALArithmeticOpCommon.h.

128{
131 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
132 const core::OMRuntimeShape extended_output_shape =
133 core::OMRuntimeShape::extendedShape(4, output_shape);
134
135 T activation_min, activation_max;
136 getActivationParams(params, &activation_min, &activation_max);
137
138 // In Tensorflow, the dimensions are canonically named (batch_number, row,
139 // col, channel), with extents (batches, height, width, depth), with the
140 // trailing dimension changing most rapidly (channels has the smallest stride,
141 // typically 1 element).
142 //
143 // In generated C code, we store arrays with the dimensions reversed. The
144 // first dimension has smallest stride.
145 //
146 // We name our variables by their Tensorflow convention, but generate C code
147 // nesting loops such that the innermost loop has the smallest stride for the
148 // best cache behavior.
149 Fn func;
150 for (int b = 0; b < extended_output_shape.dims(0); ++b)
151 {
152 for (int y = 0; y < extended_output_shape.dims(1); ++y)
153 {
154 for (int x = 0; x < extended_output_shape.dims(2); ++x)
155 {
156 for (int c = 0; c < extended_output_shape.dims(3); ++c)
157 {
158 const int output_data_offset =
159 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
160 extended_output_shape.dims(3) +
161 c;
162
163 output_data[output_data_offset] =
164 std::min(std::max(func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
165 input2_data[subscriptToIndex(desc2, b, y, x, c)]),
166 activation_min),
167 activation_max);
168 }
169 }
170 }
171 }
172 return Ok;
173}
void NdArrayDescsForElementwiseBroadcast(const Dims< N > &input0_dims, const Dims< N > &input1_dims, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
Definition NDArray.h:89
NdArrayDesc< 4 > desc1
NdArrayDesc< 4 > desc2

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), getActivationParams(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, and subscriptToIndex().

◆ BroadcastBinaryFunction6DSlow()

template<typename T >
void onert_micro::execute::pal::BroadcastBinaryFunction6DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 320 of file PALArithmeticOpCommon.h.

325{
326 constexpr int kMaxBroadcastDim = 6;
327
328 // In Tensorflow, the dimensions are canonically named (batch_number, row,
329 // col, channel), with extents (batches, height, width, depth), with the
330 // trailing dimension changing most rapidly (channels has the smallest stride,
331 // typically 1 element).
332 //
333 // In generated C code, we store arrays with the dimensions reversed. The
334 // first dimension has smallest stride.
335 //
336 // We name our variables by their Tensorflow convention, but generate C code
337 // nesting loops such that the innermost loop has the smallest stride for the
338 // best cache behavior.
339 size_t compressed_input1_stride[kMaxBroadcastDim];
340 size_t compressed_input2_stride[kMaxBroadcastDim];
341 size_t compressed_output_shape[kMaxBroadcastDim];
342 bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
343 input1_shape, input2_shape, compressed_input1_stride, compressed_input2_stride,
344 compressed_output_shape);
345 // Skip broadcasting for degenerate shapes.
346 if (!broadcastable_shape)
347 {
348 return;
349 }
350
351 size_t input1_offset = 0;
352 size_t input2_offset = 0;
353 size_t output_offset = 0;
354 BroadcastRecursiveDimensions(params, kMaxBroadcastDim - 1, &input1_offset, &input2_offset,
355 &output_offset, compressed_input1_stride, compressed_input2_stride,
356 compressed_output_shape, input1_data, input2_data, output_data,
357 binary_func);
358}

References BroadcastRecursiveDimensions().

Referenced by BroadcastAdd4DSlow(), and BroadcastSub4DSlow().

◆ BroadcastBinaryOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BroadcastBinaryOp4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 75 of file PALBinaryOpCommon.h.

78{
81 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
82
83 const core::OMRuntimeShape extended_output_shape =
84 core::OMRuntimeShape::extendedShape(4, output_shape);
85
86 // In Tensorflow, the dimensions are canonically named (batch_number, row,
87 // col, channel), with extents (batches, height, width, depth), with the
88 // trailing dimension changing most rapidly (channels has the smallest stride,
89 // typically 1 element).
90 //
91 // In generated C code, we store arrays with the dimensions reversed. The
92 // first dimension has smallest stride.
93 //
94 // We name our variables by their Tensorflow convention, but generate C code
95 // nesting loops such that the innermost loop has the smallest stride for the
96 // best cache behavior.
97
98 Fn func;
99 for (int b = 0; b < extended_output_shape.dims(0); ++b)
100 {
101 for (int y = 0; y < extended_output_shape.dims(1); ++y)
102 {
103 for (int x = 0; x < extended_output_shape.dims(2); ++x)
104 {
105 for (int c = 0; c < extended_output_shape.dims(3); ++c)
106 {
107 const int output_data_offset =
108 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
109 extended_output_shape.dims(3) +
110 c;
111
112 output_data[output_data_offset] = func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
113 input2_data[subscriptToIndex(desc2, b, y, x, c)]);
114 }
115 }
116 }
117 }
118 return Ok;
119}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, and subscriptToIndex().

◆ BroadcastComparison4DSlowNoScaling()

template<typename T >
void onert_micro::execute::pal::BroadcastComparison4DSlowNoScaling ( const core::ComparisonParams op_params,
const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const T *  input2_data,
const core::OMRuntimeShape unextended_output_shape,
bool *  output_data,
bool   FT, T 
)
inline

Definition at line 149 of file PALComparisons.h.

153{
154 const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
155 unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
156
157 for (int b = 0; b < dims.output_shape.dims(0); ++b)
158 {
159 for (int y = 0; y < dims.output_shape.dims(1); ++y)
160 {
161 for (int x = 0; x < dims.output_shape.dims(2); ++x)
162 {
163 for (int c = 0; c < dims.output_shape.dims(3); ++c)
164 {
165 const int output_data_offset =
166 ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
167 dims.output_shape.dims(3) +
168 c;
169 output_data[output_data_offset] =
170 F(input1_data[subscriptToIndex(dims.desc1, b, y, x, c)],
171 input2_data[subscriptToIndex(dims.desc2, b, y, x, c)]);
172 }
173 }
174 }
175 }
176}

References subscriptToIndex().

◆ BroadcastComparison4DSlowWithScaling()

template<typename T , typename AccType >
void onert_micro::execute::pal::BroadcastComparison4DSlowWithScaling ( const core::ComparisonParams op_params,
const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const T *  input2_data,
const core::OMRuntimeShape unextended_output_shape,
bool *  output_data,
bool   FAccType, AccType 
)
inline

Definition at line 75 of file PALComparisons.h.

79{
80 const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
81 unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
82
83 int left_shift = op_params.left_shift;
84 int32_t input1_offset = op_params.input1_offset;
85 int32_t input1_multiplier = op_params.input1_multiplier;
86 int input1_shift = op_params.input1_shift;
87 int32_t input2_offset = op_params.input2_offset;
88 int32_t input2_multiplier = op_params.input2_multiplier;
89 int input2_shift = op_params.input2_shift;
90
91 for (int b = 0; b < dims.output_shape.dims(0); ++b)
92 {
93 for (int y = 0; y < dims.output_shape.dims(1); ++y)
94 {
95 for (int x = 0; x < dims.output_shape.dims(2); ++x)
96 {
97 for (int c = 0; c < dims.output_shape.dims(3); ++c)
98 {
99 const int32_t input1_val =
100 input1_offset + input1_data[subscriptToIndex(dims.desc1, b, y, x, c)];
101 const int32_t input2_val =
102 input2_offset + input2_data[subscriptToIndex(dims.desc2, b, y, x, c)];
103 const int32_t shifted_input1_val = input1_val * (1 << left_shift);
104 const int32_t shifted_input2_val = input2_val * (1 << left_shift);
105 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
106 shifted_input1_val, input1_multiplier, input1_shift);
107 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
108 shifted_input2_val, input2_multiplier, input2_shift);
109
110 const int output_data_offset =
111 ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
112 dims.output_shape.dims(3) +
113 c;
114 output_data[output_data_offset] = F(scaled_input1_val, scaled_input2_val);
115 }
116 }
117 }
118 }
119}

References onert_micro::core::ComparisonParams::input1_multiplier, onert_micro::core::ComparisonParams::input1_offset, onert_micro::core::ComparisonParams::input1_shift, onert_micro::core::ComparisonParams::input2_multiplier, onert_micro::core::ComparisonParams::input2_offset, onert_micro::core::ComparisonParams::input2_shift, onert_micro::core::ComparisonParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), and subscriptToIndex().

◆ BroadcastDiv4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastDiv4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 39 of file PALDivCommon.h.

43{
44 BroadcastArithmeticOp4DSlow<T, DivFn<T>>(params, input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46 return Ok;
47}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleDiv().

◆ BroadcastFloorDiv4DSlow()

void onert_micro::execute::pal::BroadcastFloorDiv4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 40 of file PALFloorDivCommon.h.

43{
44 BroadcastBinaryOp4DSlow<float, FloorDivFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleFloorDiv().

◆ BroadcastFloorMod4DSlow()

void onert_micro::execute::pal::BroadcastFloorMod4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 40 of file PALFloorModCommon.h.

43{
44 BroadcastBinaryOp4DSlow<float, FloorModFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleFloorMod().

◆ BroadcastInput1()

template<typename T >
void onert_micro::execute::pal::BroadcastInput1 ( int  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 241 of file PALArithmeticOpCommon.h.

244{
245 for (int i = 0; i < size; ++i)
246 {
247 output_data[i] = binary_func(input1_data[0], input2_data[i], params);
248 }
249}
int32_t size[5]
Definition Slice.cpp:35

References size.

◆ BroadcastInput2()

template<typename T >
void onert_micro::execute::pal::BroadcastInput2 ( int  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 252 of file PALArithmeticOpCommon.h.

255{
256 for (int i = 0; i < size; ++i)
257 {
258 output_data[i] = binary_func(input1_data[i], input2_data[0], params);
259 }
260}

References size.

◆ BroadcastMaximum4DSlow()

OMStatus onert_micro::execute::pal::BroadcastMaximum4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 38 of file PALMaximumCommon.h.

43{
44 return BroadcastBinaryOp4DSlow<float, MaximumFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleMaximum().

◆ BroadcastMinimum4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMinimum4DSlow ( const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 40 of file PALMinimumCommon.h.

43{
44 return BroadcastBinaryOp4DSlow<float, MinimumFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleMinimum().

◆ BroadcastMul4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMul4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 44 of file PALMulCommon.h.

48{
49 BroadcastArithmeticOp4DSlow<T, MulFn<T>>(params, input1_shape, input1_data, input2_shape,
50 input2_data, output_shape, output_data);
51 return Ok;
52}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleMul().

◆ BroadcastMul6DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMul6DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 55 of file PALMulCommon.h.

59{
62 // The input shapes are extended as part of NdArrayDesc initialization.
63 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
64 const core::OMRuntimeShape extended_output_shape =
65 core::OMRuntimeShape::extendedShape(kMaxMulBroadcastDim, output_shape);
66 // Cache output shape dimensions.
67 int32_t extended_output_shape_dims[kMaxMulBroadcastDim];
68 std::memcpy(extended_output_shape_dims, extended_output_shape.dimsData(),
69 sizeof(extended_output_shape_dims));
70
71 size_t input1_offset_a = 0;
72 size_t input2_offset_a = 0;
73 size_t output_offset_a = 0;
74 for (int a = 0; a < extended_output_shape_dims[0]; ++a)
75 {
76 size_t input1_offset_d = input1_offset_a;
77 size_t input2_offset_d = input2_offset_a;
78 size_t output_offset_d = output_offset_a;
79 for (int d = 0; d < extended_output_shape_dims[1]; ++d)
80 {
81 size_t input1_offset_b = input1_offset_d;
82 size_t input2_offset_b = input2_offset_d;
83 size_t output_offset_b = output_offset_d;
84 for (int b = 0; b < extended_output_shape_dims[2]; ++b)
85 {
86 size_t input1_offset_y = input1_offset_b;
87 size_t input2_offset_y = input2_offset_b;
88 size_t output_offset_y = output_offset_b;
89 for (int y = 0; y < extended_output_shape_dims[3]; ++y)
90 {
91 size_t input1_offset_x = input1_offset_y;
92 size_t input2_offset_x = input2_offset_y;
93 size_t output_offset_x = output_offset_y;
94 for (int x = 0; x < extended_output_shape_dims[4]; ++x)
95 {
96 size_t input1_offset_c = input1_offset_x;
97 size_t input2_offset_c = input2_offset_x;
98 size_t output_offset_c = output_offset_x;
99 for (int c = 0; c < extended_output_shape_dims[5]; ++c)
100 {
101 const int32_t input1_val = params.input1_offset + input1_data[input1_offset_c];
102 const int32_t input2_val = params.input2_offset + input2_data[input2_offset_c];
103 const int32_t unclamped_result =
104 params.output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
105 params.output_multiplier,
106 params.output_shift);
107 const int32_t clamped_output =
108 std::min(params.quantized_activation_max,
109 std::max(params.quantized_activation_min, unclamped_result));
110 output_data[output_offset_c] = static_cast<T>(clamped_output);
111 input1_offset_c += desc1.strides[5];
112 input2_offset_c += desc2.strides[5];
113 ++output_offset_c;
114 }
115 input1_offset_x += desc1.strides[4];
116 input2_offset_x += desc2.strides[4];
117 output_offset_x += extended_output_shape_dims[5];
118 }
119 input1_offset_y += desc1.strides[3];
120 input2_offset_y += desc2.strides[3];
121 output_offset_y += extended_output_shape_dims[4] * extended_output_shape_dims[5];
122 }
123 input1_offset_b += desc1.strides[2];
124 input2_offset_b += desc2.strides[2];
125 output_offset_b += extended_output_shape_dims[3] * extended_output_shape_dims[4] *
126 extended_output_shape_dims[5];
127 }
128 input1_offset_d += desc1.strides[1];
129 input2_offset_d += desc2.strides[1];
130 output_offset_d += extended_output_shape_dims[2] * extended_output_shape_dims[3] *
131 extended_output_shape_dims[4] * extended_output_shape_dims[5];
132 }
133 input1_offset_a += desc1.strides[0];
134 input2_offset_a += desc2.strides[0];
135 output_offset_a += extended_output_shape_dims[1] * extended_output_shape_dims[2] *
136 extended_output_shape_dims[3] * extended_output_shape_dims[4] *
137 extended_output_shape_dims[5];
138 }
139 return Ok;
140}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input2_offset, multiplyByQuantizedMultiplier(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, output_shape, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by onert_micro::execute::execute_kernel_CircleMul().

◆ BroadcastRecursiveDimensions()

template<typename T >
void onert_micro::execute::pal::BroadcastRecursiveDimensions ( const core::ArithmeticQuantParams params,
int  dimension,
size_t *  input1_offset_p,
size_t *  input2_offset_p,
size_t *  output_offset,
size_t *  compressed_input1_stride,
size_t *  compressed_input2_stride,
size_t *  compressed_output_shape,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 263 of file PALArithmeticOpCommon.h.

269{
270 if (dimension > 0)
271 {
272 for (size_t c = 0; c < compressed_output_shape[dimension]; ++c)
273 {
274 size_t input1_offset_c = *input1_offset_p;
275 size_t input2_offset_c = *input2_offset_p;
276 BroadcastRecursiveDimensions(params, dimension - 1, &input1_offset_c, &input2_offset_c,
277 output_offset, compressed_input1_stride,
278 compressed_input2_stride, compressed_output_shape, input1_data,
279 input2_data, output_data, binary_func);
280 *input1_offset_p += compressed_input1_stride[dimension];
281 *input2_offset_p += compressed_input2_stride[dimension];
282 }
283 }
284 else
285 {
286 assert(dimension == 0);
287 bool input1_is_broadcast = compressed_input1_stride[dimension] == 0;
288 bool input2_is_broadcast = compressed_input2_stride[dimension] == 0;
289 assert(!(input1_is_broadcast && input2_is_broadcast));
290 const T *input1_data_ptr = input1_data + *input1_offset_p;
291 const T *input2_data_ptr = input2_data + *input2_offset_p;
292 T *output_data_ptr = output_data + *output_offset;
293 if (input1_is_broadcast)
294 {
295 // input1 is broadcast.
296 BroadcastInput1<T>(compressed_output_shape[dimension], params, input1_data_ptr,
297 input2_data_ptr, output_data_ptr, binary_func);
298 *input2_offset_p += compressed_output_shape[dimension];
299 }
300 else if (input2_is_broadcast)
301 {
302 // input2 is broadcast.
303 BroadcastInput2<T>(compressed_output_shape[dimension], params, input1_data_ptr,
304 input2_data_ptr, output_data_ptr, binary_func);
305 *input1_offset_p += compressed_output_shape[dimension];
306 }
307 else
308 {
309 // Add element-wise.
310 ElementWise<T>(compressed_output_shape[dimension], params, input1_data_ptr, input2_data_ptr,
311 output_data_ptr, binary_func);
312 *input1_offset_p += compressed_output_shape[dimension];
313 *input2_offset_p += compressed_output_shape[dimension];
314 }
315 *output_offset += compressed_output_shape[dimension];
316 }
317}
void BroadcastRecursiveDimensions(const core::ArithmeticQuantParams &params, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References BroadcastRecursiveDimensions().

Referenced by BroadcastBinaryFunction6DSlow(), and BroadcastRecursiveDimensions().

◆ BroadcastSquaredDifference4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastSquaredDifference4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 38 of file PALSquaredDifferenceCommon.h.

42{
43 BroadcastArithmeticOp4DSlow<T, SquaredDifferenceFn<T>>(
44 params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data);
45 return Ok;
46}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ BroadcastSub4DSlow() [1/2]

OMStatus onert_micro::execute::pal::BroadcastSub4DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const int8_t *  input1_data,
const core::OMRuntimeShape input2_shape,
const int8_t *  input2_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 67 of file PALSubCommon.h.

71{
72 BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data,
73 output_shape, output_data, SubFunc);
74 return Ok;
75}

References BroadcastBinaryFunction6DSlow(), onert_micro::Ok, output_shape, and SubFunc().

◆ BroadcastSub4DSlow() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastSub4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 57 of file PALSubCommon.h.

61{
62 BroadcastArithmeticOp4DSlow<T, SubFn<T>>(params, input1_shape, input1_data, input2_shape,
63 input2_data, output_shape, output_data);
64 return Ok;
65}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleSub().

◆ Cast()

template<typename FromT , typename ToT >
OMStatus onert_micro::execute::pal::Cast ( const core::OMRuntimeShape input_shape,
const FromT *  input_data,
const core::OMRuntimeShape output_shape,
ToT *  output_data 
)
inline

Definition at line 34 of file PALCast.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = static_cast<ToT>(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleCast().

◆ Ceil()

template<typename T >
OMStatus onert_micro::execute::pal::Ceil ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALCeil.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::ceil(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleCeil().

◆ ComparisonNoScaling()

template<typename T >
void onert_micro::execute::pal::ComparisonNoScaling ( const int64_t  flat_size,
const T *  input1_data,
const T *  input2_data,
bool *  output_data,
bool   FT, T 
)
inline

Definition at line 65 of file PALComparisons.h.

67{
68 for (int64_t i = 0; i < flat_size; ++i)
69 {
70 output_data[i] = F(input1_data[i], input2_data[i]);
71 }
72}

◆ ComparisonWithScaling()

template<typename T , typename AccType >
void onert_micro::execute::pal::ComparisonWithScaling ( const core::ComparisonParams op_params,
const int64_t  flat_size,
const T *  input1_data,
const T *  input2_data,
bool *  output_data,
bool   FAccType, AccType 
)
inline

Definition at line 122 of file PALComparisons.h.

125{
126 int left_shift = op_params.left_shift;
127 int32_t input1_offset = op_params.input1_offset;
128 int32_t input1_multiplier = op_params.input1_multiplier;
129 int input1_shift = op_params.input1_shift;
130 int32_t input2_offset = op_params.input2_offset;
131 int32_t input2_multiplier = op_params.input2_multiplier;
132 int input2_shift = op_params.input2_shift;
133
134 for (int64_t i = 0; i < flat_size; ++i)
135 {
136 const int32_t input1_val = input1_offset + input1_data[i];
137 const int32_t input2_val = input2_offset + input2_data[i];
138 const int32_t shifted_input1_val = input1_val * (1 << left_shift);
139 const int32_t shifted_input2_val = input2_val * (1 << left_shift);
140 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
141 shifted_input1_val, input1_multiplier, input1_shift);
142 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
143 shifted_input2_val, input2_multiplier, input2_shift);
144 output_data[i] = F(scaled_input1_val, scaled_input2_val);
145 }
146}

References onert_micro::core::ComparisonParams::input1_multiplier, onert_micro::core::ComparisonParams::input1_offset, onert_micro::core::ComparisonParams::input1_shift, onert_micro::core::ComparisonParams::input2_multiplier, onert_micro::core::ComparisonParams::input2_offset, onert_micro::core::ComparisonParams::input2_shift, onert_micro::core::ComparisonParams::left_shift, and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Concatenation()

template<typename Scalar >
OMStatus onert_micro::execute::pal::Concatenation ( core::ConcatenationParams params,
std::vector< uint32_t > &  input_shapes,
std::vector< const Scalar * > &  input_data,
const core::OMRuntimeShape output_shape,
Scalar *  output_data 
)

Definition at line 30 of file PALConcatenation.h.

33{
34 int axis = params.axis;
35 int inputs_count = params.num_inputs;
36 const int concat_dimensions = output_shape.dimensionsCount();
37
38 int64_t concat_size = 0;
39 for (int i = 0; i < inputs_count; i++)
40 {
41 concat_size += input_shapes[i];
42 }
43 int64_t outer_size = 1;
44 for (int i = 0; i < axis; ++i)
45 {
46 outer_size *= output_shape.dims(i);
47 }
48 // For all input arrays,
49 int64_t base_inner_size = 1;
50 for (int i = axis + 1; i < concat_dimensions; ++i)
51 {
52 base_inner_size *= output_shape.dims(i);
53 }
54
55 Scalar *output_ptr = output_data;
56 for (int k = 0; k < outer_size; k++)
57 {
58 for (int i = 0; i < inputs_count; ++i)
59 {
60 const int copy_size = input_shapes[i] * base_inner_size;
61 const Scalar *input_ptr = input_data[i] + k * copy_size;
62 memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
63 output_ptr += copy_size;
64 }
65 }
66
67 return Ok;
68}
int32_t dimensionsCount() const
Definition Tensor.h:106

References onert_micro::core::ConcatenationParams::axis, luci_interpreter::RuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::ConcatenationParams::num_inputs, onert_micro::Ok, and output_shape.

◆ ConvFloat()

OMStatus onert_micro::execute::pal::ConvFloat ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 31 of file PALConv2DCommon.h.

35{
36 const int stride_width = params->stride_w;
37 const int stride_height = params->stride_h;
38 const int dilation_width_factor = params->dilation_width_factor;
39 const int dilation_height_factor = params->dilation_height_factor;
40 const int pad_width = params->pad_w;
41 const int pad_height = params->pad_h;
42 const float output_activation_min = params->activation_min;
43 const float output_activation_max = params->activation_max;
44
45 const auto batches = input_shape.dims(0);
46 const int input_height = input_shape.dims(1);
47 const int input_width = input_shape.dims(2);
48 const int input_depth = input_shape.dims(3);
49 const int output_depth = filter_shape.dims(0);
50 const int filter_height = filter_shape.dims(1);
51 const int filter_width = filter_shape.dims(2);
52 const int output_height = output_shape.dims(1);
53 const int output_width = output_shape.dims(2);
54 for (int batch = 0; batch < batches; ++batch)
55 {
56 for (int out_y = 0; out_y < output_height; ++out_y)
57 {
58 const int in_y_origin = (out_y * stride_height) - pad_height;
59 for (int out_x = 0; out_x < output_width; ++out_x)
60 {
61 const int in_x_origin = (out_x * stride_width) - pad_width;
62 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
63 {
64 float total = 0.f;
65 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
66 {
67 const int in_y = in_y_origin + dilation_height_factor * filter_y;
68 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
69 {
70 const int in_x = in_x_origin + dilation_width_factor * filter_x;
71
72 // Zero padding by omitting the areas outside the image.
73 const bool is_point_inside_image =
74 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
75
76 if (!is_point_inside_image)
77 {
78 continue;
79 }
80
81 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
82 {
83 const int input_data_offset =
84 ((batch * input_height + in_y) * input_width + in_x) * input_depth + in_channel;
85
86 const int filter_data_offset =
87 ((out_channel * filter_height + filter_y) * filter_width + filter_x) *
88 input_depth +
89 in_channel;
90
91 const float input_value = input_data[input_data_offset];
92 const float filter_value = filter_data[filter_data_offset];
93 total += (input_value * filter_value);
94 }
95 }
96 }
97 // float bias_value = 0.0f;
98 if (bias_data)
99 {
100 total += bias_data[out_channel];
101 }
102
103 const int output_data_offset =
104 ((batch * output_height + out_y) * output_width + out_x) * output_depth + out_channel;
105
106 output_data[output_data_offset] =
107 std::min(std::max(total, output_activation_min), output_activation_max);
108 }
109 }
110 }
111 }
112 return Ok;
113}

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, onert_micro::core::FloatConv2D::dilation_height_factor, onert_micro::core::FloatConv2D::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleConv2D().

◆ ConvPerChannel()

OMStatus onert_micro::execute::pal::ConvPerChannel ( const core::ConvQuant params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 36 of file PALConv2D.h.

40{
41 cmsis_nn_conv_params conv_params;
42 conv_params.dilation.h = params.dilation_height_factor;
43 conv_params.dilation.w = params.dilation_width_factor;
44
45 assert(conv_params.dilation.h == 1);
46 assert(conv_params.dilation.w == 1);
47
48 conv_params.input_offset = params.input_offset;
49 conv_params.output_offset = params.output_offset;
50 conv_params.stride.h = params.stride_height;
51 conv_params.stride.w = params.stride_width;
52 conv_params.padding.h = params.pad_h;
53 conv_params.padding.w = params.pad_w;
54 conv_params.activation.min = params.quantized_activation_min;
55 conv_params.activation.max = params.quantized_activation_max;
56
57 cmsis_nn_per_channel_quant_params quant_params;
58 quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data());
59 quant_params.shift = const_cast<int32_t *>(
60 reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data()));
61
62 assert(conv_params.activation.min <= conv_params.activation.max);
63 const int batch_size = input_shape.dims(0);
64 const int input_depth = input_shape.dims(3);
65 const int output_depth = filter_shape.dims(0);
66
67 cmsis_nn_dims input_dims;
68 input_dims.n = batch_size;
69 input_dims.h = input_shape.dims(1);
70 input_dims.w = input_shape.dims(2);
71 input_dims.c = input_depth;
72
73 cmsis_nn_dims filter_dims;
74 filter_dims.n = output_depth;
75 filter_dims.h = filter_shape.dims(1);
76 filter_dims.w = filter_shape.dims(2);
77 filter_dims.c = input_depth;
78
79 cmsis_nn_dims bias_dims;
80 bias_dims.n = 1;
81 bias_dims.h = 1;
82 bias_dims.w = 1;
83 bias_dims.c = output_depth;
84
85 cmsis_nn_dims output_dims;
86 output_dims.n = batch_size;
87 output_dims.h = output_shape.dims(1);
88 output_dims.w = output_shape.dims(2);
89 output_dims.c = output_depth;
90
91 auto buf_size =
92 arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
93
94 auto buffer = std::make_unique<int8_t[]>(buf_size);
95 assert(buffer != nullptr);
96
97 cmsis_nn_context ctx;
98 ctx.buf = buffer.get();
99 ctx.size = buf_size;
100
101 auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
102 &filter_dims, filter_data, &bias_dims, bias_data, &output_dims,
103 output_data);
104
105 assert(res == ARM_CMSIS_NN_SUCCESS);
106 if (res != ARM_CMSIS_NN_SUCCESS)
107 return CmsisNNError;
108 return Ok;
109}
std::vector< int > per_channel_output_shift
std::vector< int32_t > per_channel_output_multiplier

References onert_micro::CmsisNNError, onert_micro::core::ConvQuant::dilation_height_factor, onert_micro::core::ConvQuant::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::ConvQuant::input_offset, onert_micro::Ok, onert_micro::core::ConvQuant::output_offset, output_shape, onert_micro::core::ConvQuant::pad_h, onert_micro::core::ConvQuant::pad_w, onert_micro::core::ConvQuant::per_channel_output_multiplier, onert_micro::core::ConvQuant::per_channel_output_shift, onert_micro::core::ConvQuant::quantized_activation_max, onert_micro::core::ConvQuant::quantized_activation_min, onert_micro::core::ConvQuant::stride_height, and onert_micro::core::ConvQuant::stride_width.

Referenced by onert_micro::execute::execute_kernel_CircleConv2D().

◆ copyDimsToDesc()

template<int N>
void onert_micro::execute::pal::copyDimsToDesc ( const core::OMRuntimeShape input_shape,
NdArrayDesc< N > *  desc_out 
)
inline

Definition at line 53 of file ProcessBroadcastShapes.h.

54{
55 int desc_stride = 1;
56 for (int i = N - 1; i >= 0; --i)
57 {
58 desc_out->extents[i] = input_shape.dims(i);
59 desc_out->strides[i] = desc_stride;
60 desc_stride *= input_shape.dims(i);
61 }
62}

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::execute::pal::NdArrayDesc< N >::extents, and onert_micro::execute::pal::NdArrayDesc< N >::strides.

Referenced by TransposeImpl().

◆ Cos()

template<typename T >
OMStatus onert_micro::execute::pal::Cos ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALCosCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::cos(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleCos().

◆ DepthwiseConv2D()

template<typename T >
OMStatus onert_micro::execute::pal::DepthwiseConv2D ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape filter_shape,
const T *  filter_data,
const T *  bias_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALDepthwiseConv2DCommon.h.

37{
38 assert(false && "Not IMPL yet");
39}

◆ DepthwiseConv2D< float >()

template<>
OMStatus onert_micro::execute::pal::DepthwiseConv2D< float > ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 41 of file PALDepthwiseConv2DCommon.h.

46{
47 const int stride_width = params->stride_w;
48 const int stride_height = params->stride_h;
49 const int dilation_width_factor = params->dilation_width_factor;
50 const int dilation_height_factor = params->dilation_height_factor;
51 const int pad_width = params->pad_w;
52 const int pad_height = params->pad_h;
53 const int depth_multiplier = params->depth_multiplier;
54 const float output_activation_min = params->activation_min;
55 const float output_activation_max = params->activation_max;
56
57 const auto batches = input_shape.dims(0);
58 const int input_height = input_shape.dims(1);
59 const int input_width = input_shape.dims(2);
60 const int input_depth = input_shape.dims(3);
61 const int output_depth = filter_shape.dims(0);
62 const int filter_height = filter_shape.dims(1);
63 const int filter_width = filter_shape.dims(2);
64 const int output_height = output_shape.dims(1);
65 const int output_width = output_shape.dims(2);
66 for (int b = 0; b < batches; ++b)
67 {
68 for (int out_y = 0; out_y < output_height; ++out_y)
69 {
70 for (int out_x = 0; out_x < output_width; ++out_x)
71 {
72 for (int ic = 0; ic < input_depth; ++ic)
73 {
74 for (int m = 0; m < depth_multiplier; m++)
75 {
76 const int oc = m + ic * depth_multiplier;
77 const int in_x_origin = (out_x * stride_width) - pad_width;
78 const int in_y_origin = (out_y * stride_height) - pad_height;
79 float total = 0.f;
80 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
81 {
82 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
83 {
84 const int in_x = in_x_origin + dilation_width_factor * filter_x;
85 const int in_y = in_y_origin + dilation_height_factor * filter_y;
86 // If the location is outside the bounds of the input image,
87 // use zero as a default value.
88 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
89 {
90 float input_value = input_data[offset(input_shape.dimsData(), b, in_y, in_x, ic)];
91 float filter_value =
92 filter_data[offset(filter_shape.dimsData(), 0, filter_y, filter_x, oc)];
93 total += (input_value * filter_value);
94 }
95 }
96 }
97 float bias_value = 0.0f;
98 if (bias_data)
99 {
100 bias_value = bias_data[oc];
101 }
102 output_data[offset(output_shape.dimsData(), b, out_y, out_x, oc)] =
103 activationFunctionWithMinMax(total + bias_value, output_activation_min,
104 output_activation_max);
105 }
106 }
107 }
108 }
109 }
110 return Ok;
111}
T activationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
Definition PALUtils.h:204

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, activationFunctionWithMinMax(), onert_micro::core::FloatConv2D::depth_multiplier, onert_micro::core::FloatConv2D::dilation_height_factor, onert_micro::core::FloatConv2D::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), m, offset(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleDepthwiseConv2D().

◆ DepthwiseConvPerChannel()

OMStatus onert_micro::execute::pal::DepthwiseConvPerChannel ( const core::ConvQuant params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 36 of file PALDepthwiseConv2D.h.

41{
42 cmsis_nn_dw_conv_params conv_params;
43 conv_params.dilation.h = params.dilation_height_factor;
44 conv_params.dilation.w = params.dilation_width_factor;
45
46 assert(conv_params.dilation.h == 1);
47 assert(conv_params.dilation.w == 1);
48
49 conv_params.input_offset = params.input_offset;
50 conv_params.output_offset = params.output_offset;
51 conv_params.stride.h = params.stride_height;
52 conv_params.stride.w = params.stride_width;
53 conv_params.padding.h = params.pad_h;
54 conv_params.padding.w = params.pad_w;
55 conv_params.activation.min = params.quantized_activation_min;
56 conv_params.activation.max = params.quantized_activation_max;
57 conv_params.ch_mult = params.depth_multiplier;
58
59 cmsis_nn_per_channel_quant_params quant_params;
60 quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data());
61 quant_params.shift = const_cast<int32_t *>(
62 reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data()));
63
64 assert(conv_params.activation.min <= conv_params.activation.max);
65 const int batch_size = input_shape.dims(0);
66 const int input_depth = input_shape.dims(3);
67 const int output_depth = filter_shape.dims(0);
68
69 cmsis_nn_dims input_dims;
70 input_dims.n = batch_size;
71 input_dims.h = input_shape.dims(1);
72 input_dims.w = input_shape.dims(2);
73 input_dims.c = input_depth;
74
75 cmsis_nn_dims filter_dims;
76 filter_dims.n = output_depth;
77 filter_dims.h = filter_shape.dims(1);
78 filter_dims.w = filter_shape.dims(2);
79 filter_dims.c = input_depth;
80
81 cmsis_nn_dims bias_dims;
82 bias_dims.n = 1;
83 bias_dims.h = 1;
84 bias_dims.w = 1;
85 bias_dims.c = output_depth;
86
87 cmsis_nn_dims output_dims;
88 output_dims.n = batch_size;
89 output_dims.h = output_shape.dims(1);
90 output_dims.w = output_shape.dims(2);
91 output_dims.c = output_depth;
92
93 auto buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
94 &filter_dims, &output_dims);
95
96 auto buffer = std::make_unique<int8_t[]>(buf_size);
97 assert(buffer != nullptr);
98
99 cmsis_nn_context ctx;
100 ctx.buf = buffer.get();
101 ctx.size = buf_size;
102
103 auto res = arm_depthwise_conv_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims,
104 input_data, &filter_dims, filter_data, &bias_dims,
105 bias_data, &output_dims, output_data);
106
107 assert(res == ARM_CMSIS_NN_SUCCESS);
108 if (res != ARM_CMSIS_NN_SUCCESS)
109 return CmsisNNError;
110 return Ok;
111}

References onert_micro::CmsisNNError, onert_micro::core::ConvQuant::depth_multiplier, onert_micro::core::ConvQuant::dilation_height_factor, onert_micro::core::ConvQuant::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::ConvQuant::input_offset, onert_micro::Ok, onert_micro::core::ConvQuant::output_offset, output_shape, onert_micro::core::ConvQuant::pad_h, onert_micro::core::ConvQuant::pad_w, onert_micro::core::ConvQuant::per_channel_output_multiplier, onert_micro::core::ConvQuant::per_channel_output_shift, onert_micro::core::ConvQuant::quantized_activation_max, onert_micro::core::ConvQuant::quantized_activation_min, onert_micro::core::ConvQuant::stride_height, and onert_micro::core::ConvQuant::stride_width.

Referenced by onert_micro::execute::execute_kernel_CircleDepthwiseConv2D().

◆ Dequantize()

template<typename InputT , typename OutputT >
OMStatus onert_micro::execute::pal::Dequantize ( const core::QuantizationParams  op_params,
const uint32_t  flat_size,
const InputT *  input_data,
OutputT *  output_data 
)

Definition at line 35 of file PALDequantize.h.

37{
38 const int32_t zero_point = op_params.zero_point;
39 const double scale = op_params.scale;
40
41 for (uint32_t i = 0; i < flat_size; i++)
42 {
43 const int32_t val = input_data[i];
44 const auto result = static_cast<OutputT>(scale * (val - zero_point));
45 output_data[i] = result;
46 }
47 return Ok;
48}

References onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

Referenced by onert_micro::execute::execute_kernel_CircleDequantize().

◆ Div()

template<typename T >
OMStatus onert_micro::execute::pal::Div ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 31 of file PALDivCommon.h.

33{
34 ArithmeticOp<T, DivFn<T>>(params, flat_size, input1_data, input2_data, output_data);
35 return Ok;
36}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleDiv().

◆ ElementWise()

template<typename T >
void onert_micro::execute::pal::ElementWise ( const uint32_t  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 100 of file PALArithmeticOpCommon.h.

103{
104 for (int i = 0; i < size; ++i)
105 {
106 output_data[i] = binary_func(input1_data[i], input2_data[i], params);
107 }
108}

References size.

Referenced by Sub().

◆ Elu()

OMStatus onert_micro::execute::pal::Elu ( const int  flat_size,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALElu.h.

33{
34 for (int i = 0; i < flat_size; i++)
35 {
36 float val = input_data[i];
37 float result = val < 0.0f ? std::exp(val) - 1 : val;
38 output_data[i] = result;
39 }
40
41 return Ok;
42}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleElu().

◆ EqualFn()

template<typename T >
bool onert_micro::execute::pal::EqualFn ( lhs,
rhs 
)
inline

Definition at line 59 of file PALComparisons.h.

59{ return lhs == rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleEqual().

◆ Exp()

template<typename T >
OMStatus onert_micro::execute::pal::Exp ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALExpCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::exp(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleExp().

◆ Fill()

template<typename T >
OMStatus onert_micro::execute::pal::Fill ( const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 32 of file PALFill.h.

33{
34 const uint32_t flat_size = output_shape.flatSize();
35
36 if (flat_size == -1)
37 return UnknownError;
38
39 assert(input_data != nullptr);
40 assert(output_data != nullptr);
41
42 for (int i = 0; i < flat_size; ++i)
43 {
44 output_data[i] = input_data[0];
45 }
46
47 return Ok;
48}

References luci_interpreter::RuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleFill().

◆ flatSizeSkipDim()

int onert_micro::execute::pal::flatSizeSkipDim ( const int32_t *  dims_data,
int  skip_dim,
int  num_dims 
)
inline

Definition at line 210 of file PALUtils.h.

211{
212 int flat_size = 1;
213 for (int i = 0; i < num_dims; ++i)
214 {
215 flat_size *= (i == skip_dim) ? 1 : dims_data[i];
216 }
217 return flat_size;
218}

Referenced by onert_micro::execute::execute_kernel_CircleL2Normalize(), FullyConnected(), FullyConnected(), FullyConnected(), and FullyConnected< int8_t >().

◆ Floor()

template<typename T >
OMStatus onert_micro::execute::pal::Floor ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 33 of file PALFloorCommon.h.

35{
36 const uint32_t flat_size = input_shape.flatSize();
37
38 if (flat_size == -1)
39 return UnknownError;
40
41 assert(input_data != nullptr);
42 assert(output_data != nullptr);
43
44 // check that input and output dimensions are equal
45 int N = input_shape.dimensionsCount();
46 assert(N == output_shape.dimensionsCount());
47
48 // check that sizes of all dimensions are equal
49 for (int i = 0; i < N; ++i)
50 {
51 assert(input_shape.dims(i) == output_shape.dims(i));
52 }
53
54 for (int i = 0; i < flat_size; i++)
55 {
56 output_data[i] = std::floor(input_data[i]);
57 }
58
59 return Ok;
60}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleFloor().

◆ FloorDiv()

void onert_micro::execute::pal::FloorDiv ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALFloorDivCommon.h.

34{
35 BinaryOp<float, FloorDivFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleFloorDiv().

◆ FloorMod()

void onert_micro::execute::pal::FloorMod ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALFloorModCommon.h.

34{
35 BinaryOp<float, FloorModFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleFloorMod().

◆ FullyConnected() [1/3]

template<typename WeightType >
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const WeightType *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 80 of file PALFullyConnectedCommon.h.

84{
85 const float output_activation_min = params.float_activation_min;
86 const float output_activation_max = params.float_activation_max;
87
88 const int batches = flatSizeSkipDim(output_shape.dimsData(), output_shape.dimensionsCount() - 1,
90 const int output_depth = output_shape.dims(output_shape.dimensionsCount() - 1);
91 const int accum_depth = filter_shape.dims(filter_shape.dimensionsCount() - 1);
92
93 for (int b = 0; b < batches; ++b)
94 {
95 const float *weight_scale_ptr = params.weights_scales;
96 for (int out_c = 0; out_c < output_depth; ++out_c)
97 {
98 float total = 0.f;
99 for (int d = 0; d < accum_depth; ++d)
100 {
101 auto input_value = input_data[b * accum_depth + d];
102 if (std::is_same<WeightType, float>::value)
103 {
104 total += input_value * filter_data[out_c * accum_depth + d];
105 }
106 else
107 {
108 const float filter_scale = *weight_scale_ptr;
109 const float filter_value =
110 static_cast<float>(filter_data[out_c * accum_depth + d]) * filter_scale;
111 total += input_value * filter_value;
112 }
113 }
114 float bias_value = 0.0f;
115 if (bias_data)
116 {
117 bias_value = bias_data[out_c];
118 }
119 output_data[out_c + output_depth * b] =
120 std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
121
122 if (std::is_same<WeightType, int8_t>::value)
123 {
124 if (params.is_channel_wise_quant)
125 weight_scale_ptr++;
126 }
127 }
128 }
129 return Ok;
130}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::float_activation_max, onert_micro::core::FullyConnectedParams::float_activation_min, onert_micro::core::FullyConnectedParams::is_channel_wise_quant, onert_micro::Ok, output_shape, and onert_micro::core::FullyConnectedParams::weights_scales.

◆ FullyConnected() [2/3]

template<typename InputType , typename WeightType , typename OutputType , typename BiasType >
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const InputType *  input_data,
const core::OMRuntimeShape filter_shape,
const WeightType *  filter_data,
const BiasType *  bias_data,
const core::OMRuntimeShape output_shape,
OutputType *  output_data 
)

Definition at line 34 of file PALFullyConnectedCommon.h.

38{
39 const int32_t input_offset = params.input_offset;
40 const int32_t filter_offset = params.weights_offset;
41 const int32_t output_offset = params.output_offset;
42 const int32_t output_multiplier = params.output_multiplier;
43 const int output_shift = params.output_shift;
44 const int32_t output_activation_min = params.quantized_activation_min;
45 const int32_t output_activation_max = params.quantized_activation_max;
46
47 const int filter_dim_count = filter_shape.dimensionsCount();
48 const int output_dim_count = output_shape.dimensionsCount();
49 const int batches =
50 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
51 const int output_depth = output_shape.dims(output_dim_count - 1);
52
53 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
54 for (int b = 0; b < batches; ++b)
55 {
56 for (int out_c = 0; out_c < output_depth; ++out_c)
57 {
58 BiasType acc = 0;
59 for (int d = 0; d < accum_depth; ++d)
60 {
61 int32_t input_val = input_data[b * accum_depth + d];
62 int32_t filter_val = filter_data[out_c * accum_depth + d];
63 acc += (filter_val + filter_offset) * (input_val + input_offset);
64 }
65 if (bias_data)
66 {
67 acc += bias_data[out_c];
68 }
69 int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
70 acc_scaled += output_offset;
71 acc_scaled = std::max(acc_scaled, output_activation_min);
72 acc_scaled = std::min(acc_scaled, output_activation_max);
73 output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
74 }
75 }
76 return Ok;
77}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, multiplyByQuantizedMultiplier(), onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

◆ FullyConnected() [3/3]

template<>
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const int16_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int64_t *  bias_data,
const core::OMRuntimeShape output_shape,
int16_t *  output_data 
)

Definition at line 98 of file PALFullyConnected.h.

102{
103 const int filter_dim_count = filter_shape.dimensionsCount();
104 const int output_dim_count = output_shape.dimensionsCount();
105 const int batches =
106 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
107 const int output_depth = output_shape.dims(output_dim_count - 1);
108 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
109
110 cmsis_nn_fc_params fc_params;
111 fc_params.input_offset = params.input_offset;
112 fc_params.output_offset = params.output_offset;
113 fc_params.filter_offset = params.weights_offset;
114 fc_params.activation.min = params.quantized_activation_min;
115 fc_params.activation.max = params.quantized_activation_max;
116
117 cmsis_nn_per_tensor_quant_params quant_params;
118 quant_params.multiplier = params.output_multiplier;
119 quant_params.shift = params.output_shift;
120
121 cmsis_nn_dims input_dims;
122 input_dims.n = batches;
123 input_dims.h = 1;
124 input_dims.w = 1;
125 input_dims.c = accum_depth;
126
127 cmsis_nn_dims filter_dims;
128 filter_dims.n = accum_depth;
129 filter_dims.h = 1;
130 filter_dims.w = 1;
131 filter_dims.c = output_depth;
132
133 cmsis_nn_dims bias_dims;
134 bias_dims.n = 1;
135 bias_dims.h = 1;
136 bias_dims.w = 1;
137 bias_dims.c = output_depth;
138
139 cmsis_nn_dims output_dims;
140 output_dims.n = batches;
141 output_dims.h = 1;
142 output_dims.w = 1;
143 output_dims.c = output_depth;
144
145 int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
146 auto buffer = std::make_unique<int8_t[]>(buf_size);
147 assert(buffer != nullptr);
148
149 cmsis_nn_context ctx;
150 ctx.buf = buffer.get();
151 ctx.size = buf_size;
152
153 auto res =
154 arm_fully_connected_s16(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
155 filter_data, &bias_dims, bias_data, &output_dims, output_data);
156 assert(res == ARM_CMSIS_NN_SUCCESS);
157
158 if (res != ARM_CMSIS_NN_SUCCESS)
159 return CmsisNNError;
160
161 return Ok;
162}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

Referenced by onert_micro::execute::execute_kernel_CircleFullyConnected().

◆ FullyConnected< int8_t >()

template<>
OMStatus onert_micro::execute::pal::FullyConnected< int8_t > ( const core::FullyConnectedParams params,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALFullyConnected.h.

36{
37 const int filter_dim_count = filter_shape.dimensionsCount();
38 const int output_dim_count = output_shape.dimensionsCount();
39 const int batches =
40 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
41 const int output_depth = output_shape.dims(output_dim_count - 1);
42 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
43
44 cmsis_nn_fc_params fc_params;
45 fc_params.input_offset = params.input_offset;
46 fc_params.output_offset = params.output_offset;
47 fc_params.filter_offset = params.weights_offset;
48 fc_params.activation.min = params.quantized_activation_min;
49 fc_params.activation.max = params.quantized_activation_max;
50
51 cmsis_nn_per_tensor_quant_params quant_params;
52 quant_params.multiplier = params.output_multiplier;
53 quant_params.shift = params.output_shift;
54
55 cmsis_nn_dims input_dims;
56 input_dims.n = batches;
57 input_dims.h = 1;
58 input_dims.w = 1;
59 input_dims.c = accum_depth;
60
61 cmsis_nn_dims filter_dims;
62 filter_dims.n = accum_depth;
63 filter_dims.h = 1;
64 filter_dims.w = 1;
65 filter_dims.c = output_depth;
66
67 cmsis_nn_dims bias_dims;
68 bias_dims.n = 1;
69 bias_dims.h = 1;
70 bias_dims.w = 1;
71 bias_dims.c = output_depth;
72
73 cmsis_nn_dims output_dims;
74 output_dims.n = batches;
75 output_dims.h = 1;
76 output_dims.w = 1;
77 output_dims.c = output_depth;
78
79 int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
80 auto buffer = std::make_unique<int8_t[]>(buf_size);
81 assert(buffer != nullptr);
82
83 cmsis_nn_context ctx;
84 ctx.buf = buffer.get();
85 ctx.size = buf_size;
86
87 auto res =
88 arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
89 filter_data, &bias_dims, bias_data, &output_dims, output_data);
90 assert(res == ARM_CMSIS_NN_SUCCESS);
91 if (res != ARM_CMSIS_NN_SUCCESS)
92 return CmsisNNError;
93
94 return Ok;
95}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

◆ GatherND()

template<typename ParamsT , typename IndicesT >
OMStatus onert_micro::execute::pal::GatherND ( core::OMRuntimeShape  params_shape,
const ParamsT *  param_data,
core::OMRuntimeShape  indices_shape,
const IndicesT *  index_data,
ParamsT *  output_data 
)
inline

Definition at line 35 of file PALGatherND.h.

38{
39 const int indices_dims = indices_shape.dimensionsCount();
40 const int indices_nd = indices_shape.dims(indices_dims - 1);
41 const int params_dims = params_shape.dimensionsCount();
42
43 int n_slices = 1;
44 for (int i = 0; i < indices_dims - 1; ++i)
45 {
46 n_slices *= indices_shape.dims(i);
47 }
48
49 // If indices[-1] == params.rank, fetch single elements.
50 // If indices[-1] < params.rank, fetch slices.
51 int slice_size = 1;
52 for (int i = indices_nd; i < params_dims; ++i)
53 {
54 slice_size *= params_shape.dims(i);
55 }
56
57 int params_flat_size = params_shape.flatSize();
58 int remain_flat_size = params_flat_size;
59
60 // Number of elements per dimension
61 int dims_to_count[MAX_INDICES_ND];
62 for (int i = 0; i < indices_nd; ++i)
63 {
64 dims_to_count[i] = remain_flat_size / params_shape.dims(i);
65 remain_flat_size = dims_to_count[i];
66 }
67
68 for (int i = 0; i < n_slices; ++i)
69 {
70 int from_pos = 0;
71 for (int j = 0; j < indices_nd; ++j)
72 {
73 int offset = i * indices_nd + j;
74 IndicesT index = index_data[offset];
75 from_pos += index * dims_to_count[j];
76 }
77 if (from_pos < 0 || from_pos + slice_size > params_flat_size)
78 {
79 assert(false && "GatherND error");
80 return UnknownError;
81 }
82 std::memcpy(output_data + i * slice_size, param_data + from_pos, sizeof(ParamsT) * slice_size);
83 }
84
85 return Ok;
86}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::flatSize(), MAX_INDICES_ND, offset(), onert_micro::Ok, and onert_micro::UnknownError.

◆ getActivationParams() [1/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
float *  min,
float *  max 
)
inline

Definition at line 126 of file PALUtils.h.

127{
128 *min = params.float_activation_min;
129 *max = params.float_activation_max;
130}

◆ getActivationParams() [2/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
int32_t *  min,
int32_t *  max 
)
inline

Definition at line 120 of file PALUtils.h.

121{
122 *min = params.int32_activation_min;
123 *max = params.int32_activation_max;
124}

Referenced by ArithmeticOp(), ArithmeticOpScalar(), BroadcastArithmeticOp4DSlow(), QuantizedArithmeticOp(), and QuantizedBroadcastArithmeticOp4DSlow().

◆ getActivationParams() [3/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
int64_t *  min,
int64_t *  max 
)
inline

Definition at line 132 of file PALUtils.h.

133{
134 *min = params.int64_activation_min;
135 *max = params.int64_activation_max;
136}

◆ getUpLowerWeightTensorDepth()

std::pair< uint32_t, uint32_t > onert_micro::execute::pal::getUpLowerWeightTensorDepth ( core::OpTrainableRankType  rank,
const uint32_t  output_depth 
)
inline

Definition at line 30 of file PALUtils.h.

32{
33 std::pair<uint32_t, uint32_t> result(0u, output_depth);
34
35 switch (rank)
36 {
37 case core::ALL:
38 break;
39 case core::UP_1_2_PART:
40 result.second = static_cast<uint32_t>(static_cast<float>(output_depth) / 2.f);
41 break;
42 case core::LOWER_1_2_PART:
43 result.first = static_cast<uint32_t>(static_cast<float>(output_depth) / 2.f);
44 break;
45 default:
46 assert("Unsupported type");
47 break;
48 }
49
50 return result;
51}

References onert_micro::core::ALL, onert_micro::core::LOWER_1_2_PART, and onert_micro::core::UP_1_2_PART.

Referenced by onert_micro::train::pal::Conv2DWeightGrad(), and onert_micro::train::pal::FullyConnectedWeightGrad().

◆ GreaterEqualFn()

template<typename T >
bool onert_micro::execute::pal::GreaterEqualFn ( lhs,
rhs 
)
inline

Definition at line 61 of file PALComparisons.h.

61{ return lhs >= rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleGreaterEqual().

◆ GreaterFn()

template<typename T >
bool onert_micro::execute::pal::GreaterFn ( lhs,
rhs 
)
inline

Definition at line 60 of file PALComparisons.h.

60{ return lhs > rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleGreater().

◆ GRU()

OMStatus onert_micro::execute::pal::GRU ( const float *  input_data,
const float *  weight_input_data,
const float *  weight_hidden_data,
const float *  bias_input_data,
const float *  bias_hidden_data,
const float *  hidden_state_data,
float *  output_data,
float *  output_input_data,
float *  output_hidden_data,
const core::OMRuntimeShape input_shape,
const core::OMRuntimeShape output_shape,
const core::OMRuntimeShape weight_input_shape,
const core::OMRuntimeShape weight_hidden_shape,
const size_t  intermediate_buffer_size,
float *  intermediate_buffer 
)

Definition at line 183 of file PALGRUCommon.h.

191{
192 const int32_t time = input_shape.dims(0);
193
194 core::OMRuntimeShape output_shape_fc(2);
195 output_shape_fc.setDim(0, 1);
196 output_shape_fc.setDim(1, weight_hidden_shape.dims(0));
197
198 std::memcpy(output_data, hidden_state_data, output_shape.flatSize() * sizeof(float));
199
200 for (int i = 0; i < time; ++i)
201 {
202 calculateGRU(input_data, weight_input_data, weight_hidden_data, bias_input_data,
203 bias_hidden_data, output_data, input_shape, output_shape, weight_input_shape,
204 weight_hidden_shape, output_input_data, output_hidden_data, output_shape_fc,
205 intermediate_buffer);
206 input_data += input_shape.dims(2);
207 if (intermediate_buffer_size != 0)
208 {
209 assert(intermediate_buffer != nullptr);
210 intermediate_buffer += intermediate_buffer_size;
211 }
212 }
213 return Ok;
214}

References onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::core::OMRuntimeShape::setDim().

Referenced by onert_micro::execute::execute_kernel_CircleGRU().

◆ L2Normalization()

OMStatus onert_micro::execute::pal::L2Normalization ( const core::L2NormalizationParams params,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 33 of file PALL2Normalize.h.

35{
36
37 const int outer_size = params.num_rows;
38 const int depth = params.row_size;
39 const int epsilon = params.epsilon;
40
41 for (int i = 0; i < outer_size; ++i)
42 {
43 float squared_l2_norm = 0;
44 for (int c = 0; c < depth; ++c)
45 {
46 const float val = input_data[depth * i + c];
47 squared_l2_norm += val * val;
48 }
49 float l2_norm = std::sqrt(squared_l2_norm);
50 l2_norm = std::max(l2_norm, static_cast<float>(epsilon));
51 for (int c = 0; c < depth; ++c)
52 {
53 output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
54 }
55 }
56
57 return Ok;
58}

References onert_micro::core::L2NormalizationParams::epsilon, onert_micro::core::L2NormalizationParams::num_rows, onert_micro::Ok, and onert_micro::core::L2NormalizationParams::row_size.

Referenced by onert_micro::execute::execute_kernel_CircleL2Normalize().

◆ L2Pool()

OMStatus onert_micro::execute::pal::L2Pool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 34 of file PALL2Pool2DCommon.h.

37{
38 const int32_t batches = input_shape.dims(0);
39 const int32_t depth = output_shape.dims(3);
40 const int32_t input_height = input_shape.dims(1);
41 const int32_t input_width = input_shape.dims(2);
42 const int32_t output_height = output_shape.dims(1);
43 const int32_t output_width = output_shape.dims(2);
44 const int32_t stride_height = params.stride_h;
45 const int32_t stride_width = params.stride_w;
46 for (int batch = 0; batch < batches; ++batch)
47 {
48 for (int out_y = 0; out_y < output_height; ++out_y)
49 {
50 for (int out_x = 0; out_x < output_width; ++out_x)
51 {
52 for (int channel = 0; channel < depth; ++channel)
53 {
54 const int in_x_origin = (out_x * stride_width) - params.pad_w;
55 const int in_y_origin = (out_y * stride_height) - params.pad_h;
56 // Compute the boundaries of the filter region clamped so as to
57 // ensure that the filter window fits in the input array.
58 const int filter_x_start = std::max(0, -in_x_origin);
59 const int filter_x_end = std::min(params.filter_w, input_width - in_x_origin);
60 const int filter_y_start = std::max(0, -in_y_origin);
61 const int filter_y_end = std::min(params.filter_h, input_height - in_y_origin);
62 float sum_squares = 0.f;
63 int filter_count = 0;
64 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
65 {
66 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
67 {
68 const int in_x = in_x_origin + filter_x;
69 const int in_y = in_y_origin + filter_y;
70 const float val =
71 input_data[offset(input_shape.dimsData(), batch, in_y, in_x, channel)];
72 sum_squares += val * val;
73 filter_count++;
74 }
75 }
76 assert(filter_count != 0);
77 if (filter_count == 0)
78 {
79 std::cerr << "filter_count is zero" << std::endl;
81 }
82 const float l2pool_result = std::sqrt(sum_squares / filter_count);
83 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, channel)] =
84 activationFunctionWithMinMax(l2pool_result, params.activation_min,
85 params.activation_max);
86 }
87 }
88 }
89 }
90 return Ok;
91}
@ FailedCheckCondition
Definition OMStatus.h:32

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, activationFunctionWithMinMax(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::FailedCheckCondition, onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, offset(), onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleL2Pool2D().

◆ LessEqualFn()

template<typename T >
bool onert_micro::execute::pal::LessEqualFn ( lhs,
rhs 
)
inline

Definition at line 58 of file PALComparisons.h.

58{ return lhs <= rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleLessEqual().

◆ LessFn()

template<typename T >
bool onert_micro::execute::pal::LessFn ( lhs,
rhs 
)
inline

Definition at line 57 of file PALComparisons.h.

57{ return lhs < rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleLess().

◆ Log()

template<typename T >
OMStatus onert_micro::execute::pal::Log ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALLogCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::log(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleLog().

◆ LogicalCommon()

template<class Fn >
OMStatus onert_micro::execute::pal::LogicalCommon ( const int  flat_size,
const bool *  input1_data,
const bool *  input2_data,
bool *  output_data 
)

Definition at line 38 of file PALLogicalCommon.h.

40{
41 Fn func;
42
43 for (int i = 0; i < flat_size; ++i)
44 {
45 output_data[i] = func(input1_data[i], input2_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::Ok.

◆ LogicalNot()

OMStatus onert_micro::execute::pal::LogicalNot ( const int  flat_size,
const bool *  input_data,
bool *  output_data 
)

Definition at line 25 of file PALLogicalNotCommon.h.

26{
27 for (int i = 0; i < flat_size; ++i)
28 {
29 output_data[i] = !(input_data[i]);
30 }
31
32 return Ok;
33}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleLogicalNot().

◆ Logistic() [1/2]

OMStatus onert_micro::execute::pal::Logistic ( const int  flat_size,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALLogistic.h.

33{
34 const float cutoff_upper = 16.619047164916992188f;
35 const float cutoff_lower = -9.f;
36
37 // Rational for using approximation in reference kernel.
38 // 0. This approximation gives enough precision for float.
39 // 1. This works around an issue on an embedded chipset where exp() does not
40 // return correctly as expected - exp(x) should return inf when overflown
41 // not 1.701417 IEEE 754 defines representation for inf.
42 // 2. This will speed up calculation and is matching the behavior in the
43 // optimized kernels. (check the definition of scalar_logistic_op<float>)
44
45 for (int i = 0; i < flat_size; i++)
46 {
47 float val = input_data[i];
48 float result;
49 if (val > cutoff_upper)
50 {
51 result = 1.0f;
52 }
53 else if (val < cutoff_lower)
54 {
55 result = std::exp(val);
56 }
57 else
58 {
59 result = 1.f / (1.f + std::exp(-val));
60 }
61 output_data[i] = result;
62 }
63 return Ok;
64}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleLogistic().

◆ Logistic() [2/2]

OMStatus onert_micro::execute::pal::Logistic ( const int  flat_size,
const int8_t *  input_data,
float  input_scale,
int  input_zero_point,
int8_t *  output_data,
float  output_scale,
int  output_zero_point 
)
inline

Definition at line 66 of file PALLogistic.h.

69{
70 const float cutoff_upper = 16.619047164916992188f;
71 const float cutoff_lower = -9.f;
72
73 // Rational for using approximation in reference kernel.
74 // 0. This approximation gives enough precision for float.
75 // 1. This works around an issue on an embedded chipset where exp() does not
76 // return correctly as expected - exp(x) should return inf when overflown
77 // not 1.701417 IEEE 754 defines representation for inf.
78 // 2. This will speed up calculation and is matching the behavior in the
79 // optimized kernels. (check the definition of scalar_logistic_op<float>)
80
81 for (int i = 0; i < flat_size; i++)
82 {
83 // Dequantize.
84 float val = static_cast<float>((input_data[i] - input_zero_point) * input_scale);
85 float result;
86 if (val > cutoff_upper)
87 {
88 result = 1.0f;
89 }
90 else if (val < cutoff_lower)
91 {
92 result = std::exp(val);
93 }
94 else
95 {
96 result = 1.f / (1.f + std::exp(-val));
97 }
98 // Requantize
99 int8_t output = static_cast<int8_t>(std::round(result / output_scale) + output_zero_point);
100 output_data[i] = output;
101 }
102 return Ok;
103}

References onert_micro::Ok.

◆ LogSoftmax()

OMStatus onert_micro::execute::pal::LogSoftmax ( const core::LogSoftmaxParams params,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALLogSoftmax.h.

34{
35 const int outer_size = params.num_rows;
36 const int depth = params.row_size;
37
38 for (int i = 0; i < outer_size; ++i)
39 {
40 // Find max element value which we'll use to ensure numerical stability
41 // taking advantage of the following equality:
42 // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
43 float max = std::numeric_limits<float>::lowest();
44 for (int c = 0; c < depth; ++c)
45 {
46 max = std::max(max, input_data[i * depth + c]);
47 }
48
49 // Compute sum.
50 float sum = 0.f;
51 for (int c = 0; c < depth; ++c)
52 {
53 sum += std::exp(input_data[i * depth + c] - max);
54 }
55
56 // Compute result.
57 const float log_sum = std::log(sum);
58 for (int c = 0; c < depth; ++c)
59 {
60 output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
61 }
62 }
63
64 return Ok;
65}

References onert_micro::core::LogSoftmaxParams::num_rows, onert_micro::Ok, and onert_micro::core::LogSoftmaxParams::row_size.

Referenced by onert_micro::execute::execute_kernel_CircleLogSoftmax().

◆ MatchingDim()

int onert_micro::execute::pal::MatchingDim ( const core::OMRuntimeShape shape1,
int  index1,
const core::OMRuntimeShape shape2,
int  index2 
)
inline

Definition at line 200 of file PALUtils.h.

202{
203 assert(shape1.dims(index1) == shape2.dims(index2));
204 return shape1.dims(index1);
205}

References onert_micro::core::OMRuntimeShape::dims().

◆ Maximum()

OMStatus onert_micro::execute::pal::Maximum ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALMaximumCommon.h.

34{
35 return BinaryOp<float, MaximumFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleMaximum().

◆ MaxPool() [1/2]

OMStatus onert_micro::execute::pal::MaxPool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 32 of file PALMaxPool2DCommon.h.

35{
36 const int32_t batches = input_shape.dims(0);
37 const int32_t depth = output_shape.dims(3);
38 const int32_t input_height = input_shape.dims(1);
39 const int32_t input_width = input_shape.dims(2);
40 const int32_t output_height = output_shape.dims(1);
41 const int32_t output_width = output_shape.dims(2);
42 const int32_t stride_height = params.stride_h;
43 const int32_t stride_width = params.stride_w;
44 for (int batch = 0; batch < batches; ++batch)
45 {
46 for (int out_y = 0; out_y < output_height; ++out_y)
47 {
48 for (int out_x = 0; out_x < output_width; ++out_x)
49 {
50 for (int channel = 0; channel < depth; ++channel)
51 {
52 const int in_x_origin = (out_x * stride_width) - params.pad_w;
53 const int in_y_origin = (out_y * stride_height) - params.pad_h;
54 // Compute the boundaries of the filter region clamped so as to
55 // ensure that the filter window fits in the input array.
56 const int filter_x_start = std::max(0, -in_x_origin);
57 const int filter_x_end = std::min(params.filter_w, input_width - in_x_origin);
58 const int filter_y_start = std::max(0, -in_y_origin);
59 const int filter_y_end = std::min(params.filter_h, input_height - in_y_origin);
60 float max = std::numeric_limits<float>::lowest();
61 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
62 {
63 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
64 {
65 const int in_x = in_x_origin + filter_x;
66 const int in_y = in_y_origin + filter_y;
67
68 const int input_data_offset =
69 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
70 input_shape.dims(3) +
71 channel;
72
73 max = std::max(max, input_data[input_data_offset]);
74 }
75 }
76 const int output_data_offset =
77 ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
79 channel;
80
81 output_data[output_data_offset] =
82 std::min(std::max(max, params.activation_min), params.activation_max);
83 }
84 }
85 }
86 }
87 return Ok;
88}

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ MaxPool() [2/2]

OMStatus onert_micro::execute::pal::MaxPool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALMaxPool2D.h.

35{
36 cmsis_nn_dims input_dims;
37 cmsis_nn_dims output_dims;
38 cmsis_nn_pool_params pool_params;
39 cmsis_nn_dims filter_dims;
40 cmsis_nn_context ctx;
41
42 const int depth = input_shape.dims(3);
43 const int output_width = output_shape.dims(2);
44
45 input_dims.n = 1;
46 input_dims.h = input_shape.dims(1);
47 input_dims.w = input_shape.dims(2);
48 input_dims.c = depth;
49
50 output_dims.n = 1;
51 output_dims.h = output_shape.dims(1);
52 output_dims.w = output_width;
53 output_dims.c = depth;
54
55 pool_params.stride.h = params.stride_h;
56 pool_params.stride.w = params.stride_w;
57 pool_params.padding.h = params.pad_h;
58 pool_params.padding.w = params.pad_w;
59 pool_params.activation.min = params.quantized_activation_min;
60 pool_params.activation.max = params.quantized_activation_max;
61
62 filter_dims.n = 1;
63 filter_dims.h = params.filter_h;
64 filter_dims.w = params.filter_w;
65 filter_dims.c = 1;
66
67 auto res = arm_max_pool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims,
68 &output_dims, output_data);
69
70 assert(res == ARM_CMSIS_NN_SUCCESS);
71 if (res != ARM_CMSIS_NN_SUCCESS)
72 return CmsisNNError;
73
74 return Ok;
75}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::quantized_activation_max, onert_micro::core::Pool2DParams::quantized_activation_min, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleMaxPool2D().

◆ MeanROWH()

template<typename T >
void onert_micro::execute::pal::MeanROWH ( const OMRuntimeShape unextended_input_shape,
const T *  input_data,
const OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 194 of file PALReduceCommon.h.

196{
197 // Current implementation only supports dimension equals 4 and simultaneous
198 // reduction over width and height.
199 const OMRuntimeShape input_shape = OMRuntimeShape::extendedShape(4, unextended_input_shape);
200 const OMRuntimeShape output_shape = OMRuntimeShape::extendedShape(4, unextended_output_shape);
201
202 const int output_batch = output_shape.dims(0);
203 const int output_depth = output_shape.dims(3);
204
205 const int input_height = input_shape.dims(1);
206 const int input_width = input_shape.dims(2);
207
208 for (int out_b = 0; out_b < output_batch; ++out_b)
209 {
210 for (int out_d = 0; out_d < output_depth; ++out_d)
211 {
212 float value = 0;
213 for (int in_h = 0; in_h < input_height; ++in_h)
214 {
215 for (int in_w = 0; in_w < input_width; ++in_w)
216 {
217 value += static_cast<float>(
218 input_data[offset(input_shape.dimsData(), out_b, in_h, in_w, out_d)]);
219 }
220 }
221 float result = value / (input_width * input_height);
222 output_data[offset(output_shape.dimsData(), out_b, 0, 0, out_d)] = static_cast<T>(result);
223 }
224 }
225}

References luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::extendedShape(), offset(), and output_shape.

◆ Minimum()

OMStatus onert_micro::execute::pal::Minimum ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALMinimumCommon.h.

34{
35 return BinaryOp<float, MinimumFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleMinimum().

◆ Mul() [1/3]

OMStatus onert_micro::execute::pal::Mul ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

◆ Mul() [2/3]

template<typename InputType , typename OutputType >
OMStatus onert_micro::execute::pal::Mul ( const core::ArithmeticQuantParams params,
uint32_t  size,
const InputType *  input1_data,
const InputType *  input2_data,
OutputType *  output_data 
)

Definition at line 31 of file PALMul.h.

33{
34 for (int i = 0; i < size; ++i)
35 {
36 const int32_t input1_val = params.input1_offset + input1_data[i];
37 const int32_t input2_val = params.input2_offset + input2_data[i];
38 const int32_t unclamped_result =
39 params.output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
40 params.output_multiplier,
41 params.output_shift);
42 const int32_t clamped_output = std::min(
43 params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
44 output_data[i] = static_cast<OutputType>(clamped_output);
45 }
46 return Ok;
47}

References onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input2_offset, multiplyByQuantizedMultiplier(), onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, onert_micro::core::ArithmeticQuantParams::quantized_activation_min, and size.

◆ Mul() [3/3]

template<typename T >
OMStatus onert_micro::execute::pal::Mul ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 36 of file PALMulCommon.h.

38{
39 ArithmeticOp<T, MulFn<T>>(params, flat_size, input1_data, input2_data, output_data);
40 return Ok;
41}

References onert_micro::Ok.

◆ multiplyByQuantizedMultiplier()

int32_t onert_micro::execute::pal::multiplyByQuantizedMultiplier ( int32_t  x,
int32_t  quantized_multiplier,
int  shift 
)
inline

Definition at line 104 of file PALUtils.h.

105{
106 int left_shift = shift > 0 ? shift : 0;
107 int right_shift = shift > 0 ? 0 : -shift;
108 return roundingDivideByPOT(
109 saturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
110}

References roundingDivideByPOT(), and saturatingRoundingDoublingHighMul().

Referenced by BroadcastMul6DSlow(), FullyConnected(), and Mul().

◆ multiplyByQuantizedMultiplierSmallerThanOneExp()

int32_t onert_micro::execute::pal::multiplyByQuantizedMultiplierSmallerThanOneExp ( int32_t  x,
int32_t  quantized_multiplier,
int  left_shift 
)
inline

Definition at line 112 of file PALUtils.h.

115{
116 return roundingDivideByPOT(saturatingRoundingDoublingHighMul(x, quantized_multiplier),
117 -left_shift);
118}

References roundingDivideByPOT(), and saturatingRoundingDoublingHighMul().

Referenced by AddFunc(), BroadcastComparison4DSlowWithScaling(), ComparisonWithScaling(), and SubFunc().

◆ NdArrayDescsForElementwiseBroadcast()

template<int N>
void onert_micro::execute::pal::NdArrayDescsForElementwiseBroadcast ( const core::OMRuntimeShape input0_shape,
const core::OMRuntimeShape input1_shape,
NdArrayDesc< N > *  desc0_out,
NdArrayDesc< N > *  desc1_out 
)
inline

Definition at line 94 of file ProcessBroadcastShapes.h.

98{
99
100 auto extended_input0_shape = core::OMRuntimeShape::extendedShape(N, input0_shape);
101 auto extended_input1_shape = core::OMRuntimeShape::extendedShape(N, input1_shape);
102
103 // Copy dims to desc, calculating strides.
104 copyDimsToDesc<N>(extended_input0_shape, desc0_out);
105 copyDimsToDesc<N>(extended_input1_shape, desc1_out);
106
107 // Walk over each dimension. If the extents are equal do nothing.
108 // Otherwise, set the desc with extent 1 to have extent equal to the other and
109 // stride 0.
110 for (int i = 0; i < N; ++i)
111 {
112 const int extent0 = extended_input0_shape.dims(i);
113 const int extent1 = extended_input1_shape.dims(i);
114 if (extent0 != extent1)
115 {
116 if (extent0 == 1)
117 {
118 desc0_out->strides[i] = 0;
119 desc0_out->extents[i] = extent1;
120 }
121 else
122 {
123 desc1_out->strides[i] = 0;
124 desc1_out->extents[i] = extent0;
125 }
126 }
127 }
128}

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::execute::pal::NdArrayDesc< N >::extents, and onert_micro::execute::pal::NdArrayDesc< N >::strides.

Referenced by BroadcastArithmeticOp4DSlow(), BroadcastBinaryOp4DSlow(), BroadcastMul6DSlow(), and QuantizedBroadcastArithmeticOp4DSlow().

◆ NDOpsHelper()

template<int N, typename Calc >
void onert_micro::execute::pal::NDOpsHelper ( const NdArrayDesc< N > &  output,
const Calc &  calc 
)
inline

Definition at line 87 of file ProcessBroadcastShapes.h.

88{
89 int indexes[N] = {0};
90 NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
91}

◆ NDOpsHelperImpl() [1/2]

template<int N, int DIM, typename Calc >
std::enable_if< DIM==N-1, void >::type onert_micro::execute::pal::NDOpsHelperImpl ( const NdArrayDesc< N > &  output,
const Calc &  calc,
int  indexes[N] 
)

Definition at line 65 of file ProcessBroadcastShapes.h.

67{
68 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
69 {
70 calc(indexes);
71 }
72}

◆ NDOpsHelperImpl() [2/2]

template<int N, int DIM, typename Calc >
std::enable_if< DIM!=N-1, void >::type onert_micro::execute::pal::NDOpsHelperImpl ( const NdArrayDesc< N > &  output,
const Calc &  calc,
int  indexes[N] 
)

Definition at line 75 of file ProcessBroadcastShapes.h.

77{
78 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
79 {
80 NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
81 }
82}

◆ Neg()

template<typename T >
OMStatus onert_micro::execute::pal::Neg ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 30 of file PALNegCommon.h.

32{
33 const uint32_t flat_size = input_shape.flatSize();
34
35 if (flat_size == -1)
36 return UnknownError;
37
38 assert(input_data != nullptr);
39 assert(output_data != nullptr);
40
41 assert(input_shape == output_shape);
42
43 for (int i = 0; i < flat_size; i++)
44 {
45 output_data[i] = -(input_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleNeg().

◆ nextIndex()

bool onert_micro::execute::pal::nextIndex ( const int32_t  num_dims,
const int32_t *  dims,
int32_t *  current 
)
inline

Definition at line 175 of file PALUtils.h.

176{
177 if (num_dims == 0)
178 {
179 return false;
180 }
181 int carry = 1;
182 for (int idx = num_dims - 1; idx >= 0; --idx)
183 {
184 int current_val = current[idx] + carry;
185 if (dims[idx] == current_val)
186 {
187 current[idx] = 0;
188 }
189 else
190 {
191 current[idx] = current_val;
192 carry = 0;
193 break;
194 }
195 }
196 return (carry == 0);
197}

Referenced by ReduceGeneric(), and ReduceGeneric().

◆ NotEqualFn()

template<typename T >
bool onert_micro::execute::pal::NotEqualFn ( lhs,
rhs 
)
inline

Definition at line 62 of file PALComparisons.h.

62{ return lhs != rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleNotEqual().

◆ offset() [1/2]

int onert_micro::execute::pal::offset ( const int32_t *  dims_data,
int  i0,
int  i1,
int  i2,
int  i3 
)
inline

Definition at line 220 of file PALUtils.h.

221{
222 return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
223}

Referenced by BatchToSpaceND(), DepthwiseConv2D< float >(), GatherND(), L2Pool(), MeanROWH(), reducedOutputOffset(), SpaceToBatchND(), SpaceToDepth(), and TransposeConv< float >().

◆ offset() [2/2]

int onert_micro::execute::pal::offset ( const int32_t *  dims_data,
int  i0,
int  i1,
int  i2,
int  i3,
int  i4 
)
inline

Definition at line 225 of file PALUtils.h.

226{
227 return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) * dims_data[4] + i4;
228}

◆ Pad()

OMStatus onert_micro::execute::pal::Pad ( const core::PadParams op_params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const float  pad_value,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 35 of file PALPad.h.

38{
39 // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
40 // pad them to 5 dims (yes, we are "padding the padding").
41 int left_padding_copy[padKernelMaxDimensionCount];
42 for (int &i : left_padding_copy)
43 {
44 i = 0;
45 }
46 for (int i = 0; i < op_params.left_padding_count; ++i)
47 {
48 left_padding_copy[i + padKernelMaxDimensionCount - op_params.left_padding_count] =
49 op_params.left_padding[i];
50 }
51 int right_padding_copy[padKernelMaxDimensionCount];
52 for (int &i : right_padding_copy)
53 {
54 i = 0;
55 }
56 for (int i = 0; i < op_params.right_padding_count; ++i)
57 {
58 right_padding_copy[i + padKernelMaxDimensionCount - op_params.right_padding_count] =
59 op_params.right_padding[i];
60 }
61 const auto extended_output =
62 core::OMRuntimeShape::extendedShape(padKernelMaxDimensionCount, output_shape);
63 const int output_batch = extended_output.dims(0);
64 const int output_plane = extended_output.dims(1);
65 const int output_height = extended_output.dims(2);
66 const int output_width = extended_output.dims(3);
67 const int output_depth = extended_output.dims(4);
68
69 const int left_b_padding = left_padding_copy[0];
70 const int left_p_padding = left_padding_copy[1];
71 const int left_h_padding = left_padding_copy[2];
72 const int left_w_padding = left_padding_copy[3];
73 const int left_d_padding = left_padding_copy[4];
74
75 const int right_b_padding = right_padding_copy[0];
76 const int right_p_padding = right_padding_copy[1];
77 const int right_h_padding = right_padding_copy[2];
78 const int right_w_padding = right_padding_copy[3];
79 const int right_d_padding = right_padding_copy[4];
80
81 const float *in_ptr = input_data;
82 float *out_ptr = output_data;
83 for (int out_b = 0; out_b < output_batch; ++out_b)
84 {
85 for (int out_p = 0; out_p < output_plane; ++out_p)
86 {
87 for (int out_h = 0; out_h < output_height; ++out_h)
88 {
89 for (int out_w = 0; out_w < output_width; ++out_w)
90 {
91 for (int out_d = 0; out_d < output_depth; ++out_d)
92 {
93 if (out_b < left_b_padding || out_b >= output_batch - right_b_padding ||
94 out_p < left_p_padding || out_p >= output_plane - right_p_padding ||
95 out_h < left_h_padding || out_h >= output_height - right_h_padding ||
96 out_w < left_w_padding || out_w >= output_width - right_w_padding ||
97 out_d < left_d_padding || out_d >= output_depth - right_d_padding)
98 {
99 *out_ptr++ = pad_value;
100 }
101 else
102 {
103 *out_ptr++ = *in_ptr++;
104 }
105 }
106 }
107 }
108 }
109 }
110
111 return Ok;
112}
list input_data
Definition infer.py:29

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::PadParams::left_padding, onert_micro::core::PadParams::left_padding_count, onert_micro::Ok, output_shape, onert_micro::core::PadParams::right_padding, and onert_micro::core::PadParams::right_padding_count.

Referenced by onert_micro::execute::execute_kernel_CirclePad().

◆ processBroadcastShapes()

bool onert_micro::execute::pal::processBroadcastShapes ( const core::OMRuntimeShape shape0,
const core::OMRuntimeShape shape1,
core::BinaryArithmeticBroadcastParams params 
)
inline

Definition at line 155 of file ProcessBroadcastShapes.h.

158{
159 const int dims_count = std::max(shape0.dimensionsCount(), shape1.dimensionsCount());
160
161 params->broadcast_category = core::BroadcastableOpCategory::kGenericBroadcast;
162
163 auto extended_shape0 = core::OMRuntimeShape::extendedShape(dims_count, shape0);
164 auto extended_shape1 = core::OMRuntimeShape::extendedShape(dims_count, shape1);
165
166 // Check for "exact" match, implicitly accepting any scalar shapes.
167 if (extended_shape0 == extended_shape1)
168 {
169 params->broadcast_category = core::BroadcastableOpCategory::kNonBroadcast;
170 return false;
171 }
172
173 if (shape0.flatSize() == 1)
174 {
175 params->broadcast_category = core::BroadcastableOpCategory::kScalarFirstBroadcast;
176 return true;
177 }
178 else if (shape1.flatSize() == 1)
179 {
180 params->broadcast_category = core::BroadcastableOpCategory::kScalarSecondBroadcast;
181 return true;
182 }
183
184 for (int i = dims_count - 1; i >= 0; --i)
185 {
186 if (extended_shape0.dims(i) == extended_shape1.dims(i))
187 {
188 continue;
189 }
190 else if (extended_shape0.dims(i) == 1)
191 {
192 params->broadcast_category = core::BroadcastableOpCategory::kFirstInputBroadcastsFast;
193 return true;
194 }
195 else if (extended_shape1.dims(i) == 1)
196 {
197 params->broadcast_category = core::BroadcastableOpCategory::kSecondInputBroadcastsFast;
198 return true;
199 }
200 else
201 {
202 // This case is erroneous: there is a dimension that does not match and
203 // is not a broadcast from one shape to the other.
204 params->broadcast_category = core::BroadcastableOpCategory::kGenericBroadcast;
205 return true;
206 }
207 }
208
209 return false;
210}

References onert_micro::core::BinaryArithmeticBroadcastParams::broadcast_category, onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::core::kFirstInputBroadcastsFast, onert_micro::core::kGenericBroadcast, onert_micro::core::kNonBroadcast, onert_micro::core::kScalarFirstBroadcast, onert_micro::core::kScalarSecondBroadcast, and onert_micro::core::kSecondInputBroadcastsFast.

Referenced by onert_micro::execute::execute_kernel_CircleAdd(), onert_micro::execute::execute_kernel_CircleDiv(), onert_micro::execute::execute_kernel_CircleMul(), onert_micro::execute::execute_kernel_CircleSquaredDifference(), and onert_micro::execute::execute_kernel_CircleSub().

◆ Quantize()

template<typename InputT , typename OutputT >
OMStatus onert_micro::execute::pal::Quantize ( const core::QuantizationParams  op_params,
const uint32_t  flat_size,
const InputT *  input_data,
OutputT *  output_data 
)

Definition at line 35 of file PALQuantize.h.

37{
38 const int32_t zero_point = op_params.zero_point;
39 const double scale = op_params.scale;
40 static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
41 static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
42
43 for (int i = 0; i < flat_size; i++)
44 {
45 const InputT val = input_data[i];
46 int32_t unclamped =
47 static_cast<int32_t>(std::round(val / static_cast<float>(scale))) + zero_point;
48 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
49 output_data[i] = clamped;
50 }
51
52 return Ok;
53}

References onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

Referenced by onert_micro::execute::execute_kernel_CircleQuantize().

◆ QuantizedArithmeticOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::QuantizedArithmeticOp ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 69 of file PALArithmeticOpCommon.h.

74{
75 float activation_min, activation_max;
76 getActivationParams(params, &activation_min, &activation_max);
77
78 Fn func;
79 for (int i = 0; i < flat_size; ++i)
80 {
81 // Dequantize input1
82 float input1 = static_cast<float>((input1_data[i] - static_cast<T>(input1_qparams.zero_point)) *
83 input1_qparams.scale);
84 // Dequantize input2
85 float input2 = static_cast<float>((input2_data[i] - static_cast<T>(input2_qparams.zero_point)) *
86 input2_qparams.scale);
87 float result = std::min(std::max(func(input1, input2), activation_min), activation_max);
88
89 // Quantize result
90 result = result / output_qparams.scale + output_qparams.zero_point;
91 result = std::max<float>(std::numeric_limits<T>::min(), result);
92 result = std::min<float>(std::numeric_limits<T>::max(), result);
93 output_data[i] = static_cast<T>(result);
94 }
95
96 return Ok;
97}

References getActivationParams(), onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

◆ QuantizedBroadcastArithmeticOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::QuantizedBroadcastArithmeticOp4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 176 of file PALArithmeticOpCommon.h.

183{
186 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
187 const core::OMRuntimeShape extended_output_shape =
188 core::OMRuntimeShape::extendedShape(4, output_shape);
189
190 float activation_min, activation_max;
191 getActivationParams(params, &activation_min, &activation_max);
192
193 // In Tensorflow, the dimensions are canonically named (batch_number, row,
194 // col, channel), with extents (batches, height, width, depth), with the
195 // trailing dimension changing most rapidly (channels has the smallest stride,
196 // typically 1 element).
197 //
198 // In generated C code, we store arrays with the dimensions reversed. The
199 // first dimension has smallest stride.
200 //
201 // We name our variables by their Tensorflow convention, but generate C code
202 // nesting loops such that the innermost loop has the smallest stride for the
203 // best cache behavior.
204 Fn func;
205 for (int b = 0; b < extended_output_shape.dims(0); ++b)
206 {
207 for (int y = 0; y < extended_output_shape.dims(1); ++y)
208 {
209 for (int x = 0; x < extended_output_shape.dims(2); ++x)
210 {
211 for (int c = 0; c < extended_output_shape.dims(3); ++c)
212 {
213 // Dequantize input1
214 float input1 = static_cast<float>((input1_data[subscriptToIndex(desc1, b, y, x, c)] -
215 static_cast<T>(input1_qparams.zero_point)) *
216 input1_qparams.scale);
217 // Dequantize input2
218 float input2 = static_cast<float>((input2_data[subscriptToIndex(desc2, b, y, x, c)] -
219 static_cast<T>(input2_qparams.zero_point)) *
220 input2_qparams.scale);
221
222 float result = std::min(std::max(func(input1, input2), activation_min), activation_max);
223
224 // Quantize result
225 result = result / output_qparams.scale + output_qparams.zero_point;
226 result = std::max<float>(std::numeric_limits<T>::min(), result);
227 result = std::min<float>(std::numeric_limits<T>::max(), result);
228 const int output_data_offset =
229 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
230 extended_output_shape.dims(3) +
231 c;
232 output_data[output_data_offset] = static_cast<T>(result);
233 }
234 }
235 }
236 }
237 return Ok;
238}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), getActivationParams(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, onert_micro::core::QuantizationParams::scale, subscriptToIndex(), and onert_micro::core::QuantizationParams::zero_point.

◆ QuantizedBroadcastSquaredDifference4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedBroadcastSquaredDifference4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 49 of file PALSquaredDifferenceCommon.h.

56{
57 QuantizedBroadcastArithmeticOp4DSlow<T, SquaredDifferenceFn<float>>(
58 params, input1_shape, input1_qparams, input1_data, input2_shape, input2_qparams, input2_data,
59 output_shape, output_qparams, output_data);
60 return Ok;
61}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ QuantizedRsqrt()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedRsqrt ( const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)
inline

Definition at line 42 of file PALRsqrtCommon.h.

47{
48 return SISOOperation<T>(input_shape, input_qparams, input_data, output_shape, output_qparams,
49 output_data, [](float arg) -> float { return 1.f / std::sqrt(arg); });
50}

References output_shape.

◆ QuantizedSquaredDifference()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedSquaredDifference ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 64 of file PALSquaredDifferenceCommon.h.

69{
70
71 QuantizedArithmeticOp<T, SquaredDifferenceFn<float>>(params, flat_size, input1_qparams,
72 input1_data, input2_qparams, input2_data,
73 output_qparams, output_data);
74
75 return Ok;
76}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ QuantizedTanh()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedTanh ( const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)
inline

Definition at line 53 of file PALTanhCommon.h.

58{
59 const uint32_t flat_size = input_shape.flatSize();
60 for (int i = 0; i < flat_size; i++)
61 {
62 // Dequantize.
63 float val = static_cast<float>((input_data[i] - static_cast<T>(input_qparams.zero_point)) *
64 input_qparams.scale);
65 // float result
66 float result = std::tanh(val);
67
68 // Quantize float to output type
69 result = result / output_qparams.scale + output_qparams.zero_point;
70 result = std::max<float>(std::numeric_limits<T>::min(), result);
71 result = std::min<float>(std::numeric_limits<T>::max(), result);
72
73 output_data[i] = static_cast<T>(result);
74 }
75
76 return Ok;
77}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

◆ QuantizedZeroPad()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedZeroPad ( const core::PadParams op_params,
const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 115 of file PALPad.h.

120{
121 // TODO reduce code duplication with Pad
122
123 // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
124 // pad them to 5 dims (yes, we are "padding the padding").
125 int left_padding_copy[padKernelMaxDimensionCount];
126 for (int &i : left_padding_copy)
127 {
128 i = 0;
129 }
130 for (int i = 0; i < op_params.left_padding_count; ++i)
131 {
132 left_padding_copy[i + padKernelMaxDimensionCount - op_params.left_padding_count] =
133 op_params.left_padding[i];
134 }
135 int right_padding_copy[padKernelMaxDimensionCount];
136 for (int &i : right_padding_copy)
137 {
138 i = 0;
139 }
140 for (int i = 0; i < op_params.right_padding_count; ++i)
141 {
142 right_padding_copy[i + padKernelMaxDimensionCount - op_params.right_padding_count] =
143 op_params.right_padding[i];
144 }
145 const auto extended_output =
146 core::OMRuntimeShape::extendedShape(padKernelMaxDimensionCount, output_shape);
147 const int output_batch = extended_output.dims(0);
148 const int output_plane = extended_output.dims(1);
149 const int output_height = extended_output.dims(2);
150 const int output_width = extended_output.dims(3);
151 const int output_depth = extended_output.dims(4);
152
153 const int left_b_padding = left_padding_copy[0];
154 const int left_p_padding = left_padding_copy[1];
155 const int left_h_padding = left_padding_copy[2];
156 const int left_w_padding = left_padding_copy[3];
157 const int left_d_padding = left_padding_copy[4];
158
159 const int right_b_padding = right_padding_copy[0];
160 const int right_p_padding = right_padding_copy[1];
161 const int right_h_padding = right_padding_copy[2];
162 const int right_w_padding = right_padding_copy[3];
163 const int right_d_padding = right_padding_copy[4];
164
165 const T *in_ptr = input_data;
166 T *out_ptr = output_data;
167 T pad_value = output_qparams.zero_point;
168 for (int out_b = 0; out_b < output_batch; ++out_b)
169 {
170 for (int out_p = 0; out_p < output_plane; ++out_p)
171 {
172 for (int out_h = 0; out_h < output_height; ++out_h)
173 {
174 for (int out_w = 0; out_w < output_width; ++out_w)
175 {
176 for (int out_d = 0; out_d < output_depth; ++out_d)
177 {
178 if (out_b < left_b_padding || out_b >= output_batch - right_b_padding ||
179 out_p < left_p_padding || out_p >= output_plane - right_p_padding ||
180 out_h < left_h_padding || out_h >= output_height - right_h_padding ||
181 out_w < left_w_padding || out_w >= output_width - right_w_padding ||
182 out_d < left_d_padding || out_d >= output_depth - right_d_padding)
183 {
184 *out_ptr++ = pad_value;
185 }
186 else
187 {
188 float result = static_cast<float>(
189 (*in_ptr - static_cast<T>(input_qparams.zero_point)) * input_qparams.scale);
190
191 // Quantize result
192 result = result / output_qparams.scale + output_qparams.zero_point;
193 result = std::max<float>(std::numeric_limits<T>::min(), result);
194 result = std::min<float>(std::numeric_limits<T>::max(), result);
195
196 *out_ptr++ = static_cast<T>(result);
197 in_ptr++;
198 }
199 }
200 }
201 }
202 }
203 }
204
205 return Ok;
206}
result
Definition infer.py:103

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::PadParams::left_padding, onert_micro::core::PadParams::left_padding_count, onert_micro::Ok, output_shape, onert_micro::core::PadParams::right_padding, onert_micro::core::PadParams::right_padding_count, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

Referenced by onert_micro::execute::execute_kernel_CirclePad().

◆ Reduce()

template<typename T , template< typename > class ReduceFn>
bool onert_micro::execute::pal::Reduce ( OMReduceDataContext< T > &  ctx,
bool  mean = false 
)

Definition at line 291 of file PALReduceCommon.h.

292{
293 // Special case mean implementation exists for 4D mean across axes 1
294 // and 2
295 const int *axis_value = ctx.Axis().Data().Get();
296 bool special_case_4d_axes_1_and_2 =
297 ctx.Input().DimsCount() == 4 && ctx.Axis().ShapeFlatSize() == 2 &&
298 ((axis_value[0] == 1 && axis_value[1] == 2) || (axis_value[0] == 2 && axis_value[1] == 1));
299 if (special_case_4d_axes_1_and_2)
300 {
301 OMRuntimeShape input_shape(ctx.Input().Shape());
302 OMRuntimeShape output_shape(ctx.Output().Shape());
303 MeanROWH<T>(input_shape, ctx.Input().Data().Get(), output_shape, ctx.Output().Data().Get());
304 return true;
305 }
306
307 constexpr static T kInitValue = T(0);
308
309 if (!ReduceGeneric<T, ReduceFn>(ctx))
310 {
311 return false;
312 }
313
314 auto &input = ctx.Input();
315 auto input_dims = input.Dims();
316 auto input_num_dims = input.DimsCount();
317
318 auto &output = ctx.Output();
319 auto &output_data = output.Data();
320 auto num_outputs = output.ShapeFlatSize();
321
322 auto &axis = ctx.Axis().Data();
323 auto num_axis_dimensions = ctx.Axis().DimsCount();
324
325 // Resolve axis again for computing mean
326 int num_resolved_axis = 0;
327 int resolved_axis[2];
328
329 if (!resolveAxis(input_num_dims, axis.Get(), num_axis_dimensions, resolved_axis,
330 &num_resolved_axis))
331 {
332 return false;
333 }
334
335 // clang-format off
336
337 auto fnReduceOutput = [&](size_t divide_by = 1)
338 {
339 for (size_t idx = 0; idx < num_outputs; ++idx)
340 {
341 auto value = output_data.ValueAt(idx);
342 value /= static_cast<T>(divide_by);
343 output_data.SetAt(idx, value);
344 }
345 };
346
347 // clang-format on
348
349 if (!mean)
350 {
351 fnReduceOutput();
352 return true;
353 }
354
355 // Calculate mean by dividing output_data by num of aggregated element.
356 size_t num_elements_in_axis = 1;
357 for (int idx = 0; idx < num_resolved_axis; ++idx)
358 {
359 size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
360 // Overflow prevention.
361 if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis))
362 {
363 return false;
364 }
365 num_elements_in_axis *= current;
366 }
367
368 if (num_elements_in_axis > 0)
369 {
370 fnReduceOutput(num_elements_in_axis);
371 }
372
373 return true;
374}

References onert_micro::core::OMDataContext< T, Mixins >::Input(), onert_micro::core::OMDataContext< T, Mixins >::Output(), output_shape, and resolveAxis().

◆ ReduceDimensionsForBroadcast()

template<int MAX_DIM = 6>
bool onert_micro::execute::pal::ReduceDimensionsForBroadcast ( const core::OMRuntimeShape input1_shape,
const core::OMRuntimeShape input2_shape,
size_t *  compressed_input1_stride,
size_t *  compressed_input2_stride,
size_t *  compressed_output_shape 
)

Definition at line 242 of file PALUtils.h.

246{
247 size_t num_compressed_dims = 0;
248 size_t compressed_input1_shape[MAX_DIM];
249 size_t compressed_input2_shape[MAX_DIM];
250 std::fill(compressed_input1_shape, compressed_input1_shape + MAX_DIM, 1);
251 std::fill(compressed_input2_shape, compressed_input2_shape + MAX_DIM, 1);
252 std::fill(compressed_output_shape, compressed_output_shape + MAX_DIM, 1);
253 bool broadcast_input1 = false;
254 bool broadcast_input2 = false;
255 bool first_nonunit = true;
256
257 if (input1_shape.dimensionsCount() < 0 || input2_shape.dimensionsCount() < 0)
258 {
259 return false;
260 }
261 const size_t num_input1_dims = input1_shape.dimensionsCount();
262 const size_t num_input2_dims = input2_shape.dimensionsCount();
263 const int32_t *input1_dims = input1_shape.dimsData();
264 const int32_t *input2_dims = input2_shape.dimsData();
265 const size_t num_common_dims = std::min(num_input1_dims, num_input2_dims);
266 for (size_t i = 1; i <= num_common_dims; i++)
267 {
268 if (input1_dims[num_input1_dims - i] < 0 || input2_dims[num_input2_dims - i] < 0)
269 {
270 return false;
271 }
272 const size_t input1_dim = input1_dims[num_input1_dims - i];
273 const size_t input2_dim = input2_dims[num_input2_dims - i];
274 if (input1_dim == 0 || input2_dim == 0)
275 {
276 return false;
277 }
278 if (input1_dim == 1 && input2_dim == 1)
279 {
280 continue;
281 }
282 assert(!broadcast_input1 || !broadcast_input2);
283
284 if (input1_dim == 1)
285 {
286 if (!broadcast_input1)
287 {
288 broadcast_input1 = true;
289 broadcast_input2 = false;
290 num_compressed_dims++;
291 }
292 compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
293 compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
294 }
295 else if (input2_dim == 1)
296 {
297 if (!broadcast_input2)
298 {
299 broadcast_input1 = false;
300 broadcast_input2 = true;
301 num_compressed_dims++;
302 }
303 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
304 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
305 }
306 else
307 {
308 assert(input1_dim == input2_dim);
309 if (broadcast_input1 || broadcast_input2 || first_nonunit)
310 {
311 broadcast_input1 = false;
312 broadcast_input2 = false;
313 num_compressed_dims++;
314 }
315 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
316 compressed_input2_shape[num_compressed_dims - 1] *= input1_dim;
317 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
318 }
319 first_nonunit = false;
320 }
321 if (num_input1_dims > num_input2_dims)
322 {
323 if (!broadcast_input2)
324 {
325 num_compressed_dims++;
326 }
327 for (size_t i = 0; i < num_input1_dims - num_input2_dims; i++)
328 {
329 if (input1_dims[i] < 0)
330 return false;
331 const size_t input1_dim = input1_dims[i];
332 if (input1_dim == 0)
333 {
334 return false;
335 }
336 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
337 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
338 }
339 }
340 else if (num_input2_dims > num_input1_dims)
341 {
342 if (!broadcast_input1)
343 {
344 num_compressed_dims++;
345 }
346 for (size_t i = 0; i < num_input2_dims - num_input1_dims; i++)
347 {
348 if (input2_dims[i] < 0)
349 return false;
350 const size_t input2_dim = input2_dims[i];
351 if (input2_dim == 0)
352 {
353 return false;
354 }
355 compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
356 compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
357 }
358 }
359 num_compressed_dims = (num_compressed_dims > 1) ? num_compressed_dims : 1;
360
361 int input1_stride = 1;
362 int input2_stride = 1;
363 for (int i = 0; i < MAX_DIM; ++i)
364 {
365 compressed_input1_stride[i] = input1_stride;
366 input1_stride *= compressed_input1_shape[i];
367 compressed_input2_stride[i] = input2_stride;
368 input2_stride *= compressed_input2_shape[i];
369 }
370 for (int i = 0; i < MAX_DIM; ++i)
371 {
372 if (compressed_input1_shape[i] != compressed_input2_shape[i])
373 {
374 if (compressed_input1_shape[i] == 1)
375 {
376 compressed_input1_stride[i] = 0;
377 }
378 else
379 {
380 assert(compressed_input2_shape[i] == 1);
381 compressed_input2_stride[i] = 0;
382 }
383 }
384 }
385 return true;
386}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), and onert_micro::core::OMRuntimeShape::dimsData().

◆ reducedOutputOffset()

size_t onert_micro::execute::pal::reducedOutputOffset ( const int32_t  num_dims,
const int32_t *  dims,
const int32_t *  index,
const int32_t  num_axis,
const int32_t *  axis 
)
inline

Definition at line 143 of file PALUtils.h.

145{
146 if (num_dims == 0)
147 {
148 return 0;
149 }
150 size_t offset = 0;
151 for (int idx = 0; idx < num_dims; ++idx)
152 {
153 // if we need to skip this axis
154 bool is_axis = false;
155 if (axis != nullptr)
156 {
157 for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
158 {
159 if (idx == axis[axis_idx])
160 {
161 is_axis = true;
162 break;
163 }
164 }
165 }
166 if (!is_axis)
167 {
168 offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
169 }
170 }
171 return offset;
172}
std::vector< int > dims(const std::string &src)
Definition Utils.h:35

References offset().

Referenced by ReduceGeneric(), and ReduceGeneric().

◆ ReduceGeneric() [1/2]

template<typename T >
bool onert_micro::execute::pal::ReduceGeneric ( const T *  input_data,
const int *  input_dims,
const int  input_num_dims,
T *  output_data,
const int *  axis,
const int64_t  num_axis_dimensions,
init_value,
const int  output_flat_size,
T   reducerconst T, const T 
)
inline

Definition at line 138 of file PALReduceCommon.h.

141{
142 // Return early when input shape has zero dim.
143 for (int i = 0; i < input_num_dims; ++i)
144 {
145 if (input_dims[i] == 0)
146 return false;
147 }
148
149 for (size_t idx = 0; idx < output_flat_size; ++idx)
150 {
151 output_data[idx] = init_value;
152 }
153
154 // Resolve axis.
155 int num_resolved_axis = 0;
156 int resolved_axis[2];
157
158 if (!resolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, &num_resolved_axis))
159 {
160 return false;
161 }
162
163 int temp_index[5];
164 // Reset input iterator.
165 for (int idx = 0; idx < input_num_dims; ++idx)
166 {
167 temp_index[idx] = 0;
168 }
169 // Iterate through input_data.
170 do
171 {
172 size_t input_offset = reducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
173 size_t output_offset =
174 reducedOutputOffset(input_num_dims, input_dims, temp_index, num_resolved_axis, axis);
175 output_data[output_offset] = reducer(output_data[output_offset], input_data[input_offset]);
176 } while (nextIndex(input_num_dims, input_dims, temp_index));
177
178 return true;
179}
bool nextIndex(const int num_dims, const int *dims, int *current)
Definition PALUtils.h:148
size_t reducedOutputOffset(const int num_dims, const int *dims, const int *index, const int num_axis, const int *axis)
Definition PALUtils.h:116

References nextIndex(), reducedOutputOffset(), and resolveAxis().

◆ ReduceGeneric() [2/2]

template<typename T , template< typename > class ReduceFn>
bool onert_micro::execute::pal::ReduceGeneric ( OMReduceDataContext< T > &  ctx)

Definition at line 228 of file PALReduceCommon.h.

229{
230 auto &input = ctx.Input();
231 auto &input_data = input.Data();
232 auto input_dims = input.Dims();
233 auto input_num_dims = input.DimsCount();
234
235 auto &output = ctx.Output();
236 auto &output_data = output.Data();
237 auto output_flat_size = output.ShapeFlatSize();
238
239 auto &axis_ctx = ctx.Axis();
240 auto &axis = axis_ctx.Data();
241 auto num_axis_dimensions = axis_ctx.DimsCount();
242
243 // Return early when input shape has zero dim.
244 for (size_t i = 0; i < input_num_dims; ++i)
245 {
246 if (input_dims[i] == 0)
247 return false;
248 }
249
250 for (size_t idx = 0; idx < output_flat_size; ++idx)
251 {
252 output_data.SetValueAt(idx, ReduceFn<T>::InitValue);
253 }
254
255 // Resolve axis.
256 int num_resolved_axis = 0;
257 int resolved_axis[2];
258
259 if (!resolveAxis(input_num_dims, axis.Get(), num_axis_dimensions, resolved_axis,
260 &num_resolved_axis))
261 {
262 return false;
263 }
264
265 int temp_index[5];
266 // Reset input iterator.
267 for (size_t idx = 0; idx < input_num_dims; ++idx)
268 {
269 temp_index[idx] = 0;
270 }
271
272 // Iterate through input_data.
273 do
274 {
275 size_t input_offset = reducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
276 size_t output_offset =
277 reducedOutputOffset(input_num_dims, input_dims, temp_index, num_resolved_axis, axis.Get());
278
279 ReduceFn<T> reducer;
280 auto value = reducer(output_data.ValueAt(output_offset), input_data.ValueAt(input_offset));
281 output_data.SetValueAt(output_offset, value);
282
283 } while (nextIndex(input_num_dims, input_dims, temp_index));
284
285 return true;
286}

References onert_micro::core::OMDataContext< T, Mixins >::Input(), nextIndex(), onert_micro::core::OMDataContext< T, Mixins >::Output(), reducedOutputOffset(), and resolveAxis().

◆ reduceSumImpl()

template<typename T >
bool onert_micro::execute::pal::reduceSumImpl ( const T *  input_data,
const int *  input_dims,
const int  input_num_dims,
T *  output_data,
const int *  axis,
const int  num_axis,
const int  num_outputs 
)
inline

Definition at line 183 of file PALReduceCommon.h.

186{
187 return ReduceGeneric<T>(input_data, input_dims, input_num_dims, output_data, axis, num_axis,
188 static_cast<T>(0), num_outputs,
189 [](const T current, const T in) -> T { return in + current; });
190}

◆ ReLUCommon()

template<typename Type >
OMStatus onert_micro::execute::pal::ReLUCommon ( const int  flat_size,
const Type *  input_data,
Type *  output_data,
const float  alpha,
const bool  is_relu_6 
)

Definition at line 33 of file PALReluCommon.h.

35{
36 const Type relu_6_value = 6.0f;
37 for (int i = 0; i < flat_size; i++)
38 {
39 const Type val = input_data[i];
40 Type result = val > 0 ? val : val * alpha;
41 result = is_relu_6 ? (result > relu_6_value ? relu_6_value : result) : result;
42 output_data[i] = result;
43 }
44
45 return Ok;
46}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_relu_common().

◆ ReLUCommon< int8_t >()

template<>
OMStatus onert_micro::execute::pal::ReLUCommon< int8_t > ( const int  flat_size,
const int8_t *  input_data,
int8_t *  output_data,
const float  alpha,
const bool  is_relu_6 
)

Definition at line 33 of file PALRelu.h.

35{
36 // 1. Relu
37 if (is_relu_6 == false && alpha == 0)
38 {
39 memcpy(output_data, input_data, flat_size);
40 arm_relu_q7(output_data, flat_size);
41 }
42 // 2. Relu6
43 else if (is_relu_6 && alpha == 0)
44 {
45 memcpy(output_data, input_data, flat_size);
46 arm_relu6_s8(output_data, flat_size);
47 }
48 // 3. Leaky_Relu not supported by cmsis_nn
49 else if (alpha != 0)
50 {
51 for (int i = 0; i < flat_size; i++)
52 {
53 const int8_t val = input_data[i];
54 int8_t result = val > 0 ? val : val * alpha;
55 output_data[i] = result;
56 }
57 }
58
59 return Ok;
60}

References onert_micro::Ok.

◆ resolveAxis()

bool onert_micro::execute::pal::resolveAxis ( const int  num_dims,
const int *  axis,
const int64_t  num_axis,
int *  out_axis,
int *  out_num_axis 
)
inline

Definition at line 90 of file PALReduceCommon.h.

92{
93 *out_num_axis = 0; // Just in case.
94 // Short-circuit axis resolution for scalars; the axis will go unused.
95 if (num_dims == 0)
96 {
97 return true;
98 }
99 // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
100 for (int64_t idx = 0; idx < num_axis; ++idx)
101 {
102 // Handle negative index. A positive index 'p_idx' can be represented as a
103 // negative index 'n_idx' as: n_idx = p_idx-num_dims
104 // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */
105 int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
106 if (current < 0 || current >= num_dims)
107 {
108 return false;
109 }
110 bool is_dup = false;
111 for (int j = 0; j < *out_num_axis; ++j)
112 {
113 if (out_axis[j] == current)
114 {
115 is_dup = true;
116 break;
117 }
118 }
119 if (!is_dup)
120 {
121 if (*out_num_axis > 1)
122 {
123 return false;
124 }
125 out_axis[*out_num_axis] = current;
126 *out_num_axis += 1;
127 }
128 }
129 return true;
130}

Referenced by Reduce(), ReduceGeneric(), and ReduceGeneric().

◆ Round()

template<typename T >
OMStatus onert_micro::execute::pal::Round ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

◆ Round< float >()

template<>
OMStatus onert_micro::execute::pal::Round< float > ( const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 38 of file PALRoundCommon.h.

40{
41 const uint32_t flat_size = input_shape.flatSize();
42
43 if (flat_size == -1)
44 return UnknownError;
45
46 assert(input_data != nullptr);
47 assert(output_data != nullptr);
48
49 assert(input_shape == output_shape);
50
51 for (int i = 0; i < flat_size; i++)
52 {
53 // Note that this implementation matches that of tensorFlow tf.round
54 // and corresponds to the bankers rounding method.
55 auto floor_val = std::floor(input_data[i]);
56 auto diff = input_data[i] - floor_val;
57 if ((diff < 0.5f) || ((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0)))
58 {
59 output_data[i] = floor_val;
60 }
61 else
62 {
63 output_data[i] = floor_val + 1.0f;
64 }
65 }
66
67 return Ok;
68}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ roundingDivideByPOT()

int32_t onert_micro::execute::pal::roundingDivideByPOT ( int32_t  x,
int32_t  exponent 
)
inline

Definition at line 92 of file PALUtils.h.

93{
94 assert(exponent >= 0);
95 assert(exponent <= 31);
96 const int32_t mask = int32_t((1ll << exponent) - 1);
97 const int32_t zero = int32_t(0);
98 const int32_t one = int32_t(1);
99 const int32_t remainder = x & mask;
100 const int32_t threshold = (mask >> 1) + ((x < zero ? one : zero) & one);
101 return (x >> exponent) + ((remainder > threshold ? one : zero) & one);
102}

Referenced by multiplyByQuantizedMultiplier(), and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Rsqrt()

template<typename T >
OMStatus onert_micro::execute::pal::Rsqrt ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALRsqrtCommon.h.

36{
37 return SISOOperation<T>(input_shape, input_data, output_shape, output_data,
38 [](T arg) -> T { return 1.f / std::sqrt(arg); });
39}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleRsqrt().

◆ saturatingRoundingDoublingHighMul()

std::int32_t onert_micro::execute::pal::saturatingRoundingDoublingHighMul ( std::int32_t  a,
std::int32_t  b 
)
inline

Definition at line 79 of file PALUtils.h.

80{
81 bool overflow = a == b && a == std::numeric_limits<std::int32_t>::min();
82 std::int64_t a_64(a);
83 std::int64_t b_64(b);
84 std::int64_t ab_64 = a_64 * b_64;
85 std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
86 std::int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
87 return overflow ? std::numeric_limits<std::int32_t>::max() : ab_x2_high32;
88}

Referenced by multiplyByQuantizedMultiplier(), and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Select()

template<typename D , typename T >
void onert_micro::execute::pal::Select ( const core::OMRuntimeShape input_condition_shape,
const D *  input_condition_data,
const core::OMRuntimeShape input_x_shape,
const T *  input_x_data,
const core::OMRuntimeShape input_y_shape,
const T *  input_y_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 33 of file PALSelectV2.h.

37{
38 int64_t flatsize;
39 // Allow select operator executions on mixed scalar tensors and one element
40 // tensors.
41 if (input_condition_shape.flatSize() == 1 && input_x_shape.flatSize() == 1 &&
42 input_y_shape.flatSize() == 1 && output_shape.flatSize() == 1)
43 {
44 flatsize = 1;
45 }
46 else
47 {
48 flatsize = input_condition_shape.flatSize();
49 }
50 for (int64_t i = 0; i < flatsize; ++i)
51 {
52 output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
53 }
54}

References luci_interpreter::RuntimeShape::flatSize(), onert_micro::core::OMRuntimeShape::flatSize(), and output_shape.

◆ Sin()

template<typename T >
OMStatus onert_micro::execute::pal::Sin ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 35 of file PALSinCommon.h.

37{
38 const uint32_t flat_size = input_shape.flatSize();
39
40 if (flat_size == -1)
41 return UnknownError;
42
43 assert(input_data != nullptr);
44 assert(output_data != nullptr);
45
46 assert(input_shape == output_shape);
47
48 for (int i = 0; i < flat_size; i++)
49 {
50 output_data[i] = std::sin(input_data[i]);
51 }
52
53 return Ok;
54}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleSin().

◆ SISOOperation() [1/2]

template<typename T >
OMStatus onert_micro::execute::pal::SISOOperation ( const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data,
std::function< float(float)> const &  func 
)
inline

Definition at line 54 of file PALSISOOperation.h.

59{
60 const uint32_t flat_size = input_shape.flatSize();
61
62 if (flat_size == -1)
63 return UnknownError;
64
65 assert(input_data != nullptr);
66 assert(output_data != nullptr);
67
68 assert(input_shape == output_shape);
69
70 for (int i = 0; i < flat_size; i++)
71 {
72 // Dequantize input
73 float result = static_cast<float>((input_data[i] - static_cast<T>(input_qparams.zero_point)) *
74 input_qparams.scale);
75 // float result
76 result = func(result);
77
78 // Quantize result to output type
79 result = result / output_qparams.scale + output_qparams.zero_point;
80 result = std::max<float>(std::numeric_limits<T>::min(), result);
81 result = std::min<float>(std::numeric_limits<T>::max(), result);
82
83 output_data[i] = static_cast<T>(result);
84 }
85
86 return Ok;
87}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, onert_micro::core::QuantizationParams::scale, onert_micro::UnknownError, and onert_micro::core::QuantizationParams::zero_point.

◆ SISOOperation() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::SISOOperation ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data,
std::function< T(T)> const &  func 
)
inline

Definition at line 31 of file PALSISOOperation.h.

34{
35 const uint32_t flat_size = input_shape.flatSize();
36
37 if (flat_size == -1)
38 return UnknownError;
39
40 assert(input_data != nullptr);
41 assert(output_data != nullptr);
42
43 assert(input_shape == output_shape);
44
45 for (int i = 0; i < flat_size; i++)
46 {
47 output_data[i] = func(input_data[i]);
48 }
49
50 return Ok;
51}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Slice()

template<typename T >
OMStatus onert_micro::execute::pal::Slice ( const core::SliceParams op_params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
T *  output_data 
)

Definition at line 29 of file PALSlice.h.

31{
32 const core::OMRuntimeShape ext_shape = core::OMRuntimeShape::extendedShape(5, input_shape);
33 const int begin_count = op_params.begin_count;
34 const int size_count = op_params.size_count;
35 // We front-pad the begin and size vectors.
36 int start[5];
37 int stop[5];
38 for (int i = 0; i < 5; ++i)
39 {
40 int padded_i = 5 - i;
41 start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
42 stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
43 ? ext_shape.dims(i)
44 : start[i] + op_params.size[size_count - padded_i];
45 }
46
47 for (int i0 = start[0]; i0 < stop[0]; ++i0)
48 {
49 for (int i1 = start[1]; i1 < stop[1]; ++i1)
50 {
51 for (int i2 = start[2]; i2 < stop[2]; ++i2)
52 {
53 for (int i3 = start[3]; i3 < stop[3]; ++i3)
54 {
55 for (int i4 = start[4]; i4 < stop[4]; ++i4)
56 {
57 auto position =
58 (((i0 * ext_shape.dims(1) + i1) * ext_shape.dims(2) + i2) * ext_shape.dims(3) + i3) *
59 ext_shape.dims(4) +
60 i4;
61 *output_data++ = input_data[position];
62 }
63 }
64 }
65 }
66 }
67 return Ok;
68}
int8_t size_count
Definition Slice.cpp:34
int8_t begin_count
Definition Slice.cpp:32

References onert_micro::core::SliceParams::begin, begin_count, onert_micro::core::SliceParams::begin_count, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::Ok, onert_micro::core::SliceParams::size, size_count, and onert_micro::core::SliceParams::size_count.

Referenced by onert_micro::execute::execute_kernel_CircleSlice().

◆ Softmax()

template<typename T , typename U >
OMStatus onert_micro::execute::pal::Softmax ( const core::SoftmaxParams params,
const T *  input_data,
U *  output_data 
)

Definition at line 33 of file PALSoftmaxCommon.h.

34{
35 const int outer_size = params.num_rows;
36 const int depth = params.row_size;
37 const double beta = params.beta;
38
39 const float input_scale = params.input_scale;
40 const float output_scale = params.output_scale;
41
42 const int input_zp = params.input_zp;
43 const int output_zp = params.output_zp;
44
45 for (int i = 0; i < outer_size; ++i)
46 {
47 // Find max element value which we'll use to ensure numerical stability
48 // taking advantage of the following equality:
49 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
50 float max = std::numeric_limits<float>::lowest();
51 for (int c = 0; c < depth; ++c)
52 {
53 auto t = input_data[i * depth + c] - input_zp;
54 auto t_f = static_cast<float>(input_data[i * depth + c] - input_zp);
55 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
56 max = std::max(max, cur_val);
57 }
58
59 static constexpr int32_t min_val = std::numeric_limits<U>::min();
60 static constexpr int32_t max_val = std::numeric_limits<U>::max();
61 // Compute sum.
62 float sum = 0.f;
63 for (int c = 0; c < depth; ++c)
64 {
65 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
66 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
67 sum += exp_c;
68 }
69
70 // Compute result.
71 for (int c = 0; c < depth; ++c)
72 {
73 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
74 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
75 float softmax_val = exp_c / sum;
76 auto unclamped = static_cast<int32_t>(std::round(softmax_val / output_scale) +
77 static_cast<float>(output_zp));
78 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
79 output_data[i * depth + c] = static_cast<U>(clamped);
80 }
81 }
82 return Ok;
83}

References onert_micro::core::SoftmaxParams::beta, onert_micro::core::SoftmaxParams::input_scale, onert_micro::core::SoftmaxParams::input_zp, onert_micro::core::SoftmaxParams::num_rows, onert_micro::Ok, onert_micro::core::SoftmaxParams::output_scale, onert_micro::core::SoftmaxParams::output_zp, and onert_micro::core::SoftmaxParams::row_size.

Referenced by onert_micro::execute::execute_kernel_CircleSoftmax().

◆ Softmax< float, float >()

template<>
OMStatus onert_micro::execute::pal::Softmax< float, float > ( const core::SoftmaxParams params,
const float *  input_data,
float *  output_data 
)

Definition at line 86 of file PALSoftmaxCommon.h.

88{
89 const int outer_size = params.num_rows;
90 const int depth = params.row_size;
91 const double beta = params.beta;
92
93 for (int i = 0; i < outer_size; ++i)
94 {
95 // Find max element value which we'll use to ensure numerical stability
96 // taking advantage of the following equality:
97 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
98 float max = std::numeric_limits<float>::lowest();
99 for (int c = 0; c < depth; ++c)
100 {
101 max = std::max(max, input_data[i * depth + c]);
102 }
103
104 // Compute sum.
105 float sum = 0.f;
106 for (int c = 0; c < depth; ++c)
107 {
108 const float exp_c = std::exp((input_data[i * depth + c] - max) * static_cast<float>(beta));
109 output_data[i * depth + c] = exp_c;
110 sum += exp_c;
111 }
112
113 assert(sum != 0);
114
115 if (sum == 0)
116 return UnknownError;
117
118 // Compute result.
119 for (int c = 0; c < depth; ++c)
120 {
121 output_data[i * depth + c] = output_data[i * depth + c] / sum;
122 }
123 }
124 return Ok;
125}

References onert_micro::core::SoftmaxParams::beta, onert_micro::core::SoftmaxParams::num_rows, onert_micro::Ok, onert_micro::core::SoftmaxParams::row_size, and onert_micro::UnknownError.

◆ Softmax< int8_t, int8_t >()

◆ SpaceToBatchND()

template<typename T >
OMStatus onert_micro::execute::pal::SpaceToBatchND ( const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const int32_t *  block_shape_data,
const core::OMRuntimeShape unextended_input3_shape,
const int32_t *  paddings_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 51 of file PALSpaceToBatchNDCommon.h.

55{
56 // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C.
57 const core::OMRuntimeShape input1_shape = extendShapeSpaceToBatch(unextended_input1_shape);
58 const core::OMRuntimeShape output_shape = extendShapeSpaceToBatch(unextended_output_shape);
59
60 const int depth = input1_shape.dims(3);
61 const int input_width = input1_shape.dims(2);
62 const int input_height = input1_shape.dims(1);
63 const int input_batch_size = input1_shape.dims(0);
64
65 const int output_width = output_shape.dims(2);
66 const int output_height = output_shape.dims(1);
67 const int output_batch_size = output_shape.dims(0);
68
69 const int block_shape_height = block_shape_data[0];
70 const int block_shape_width =
71 unextended_input1_shape.dimensionsCount() == 4 ? block_shape_data[1] : 1;
72 const int padding_top = paddings_data[0];
73 const int padding_left = unextended_input1_shape.dimensionsCount() == 4 ? paddings_data[2] : 0;
74
75 const int32_t pad_value = 0;
76
77 for (int out_b = 0; out_b < output_batch_size; ++out_b)
78 {
79 int input_batch = out_b % input_batch_size;
80 int shift_w = (out_b / input_batch_size) % block_shape_width;
81 int shift_h = (out_b / input_batch_size) / block_shape_width;
82 for (int out_h = 0; out_h < output_height; ++out_h)
83 {
84 for (int out_w = 0; out_w < output_width; ++out_w)
85 {
86 T *out = output_data + offset(output_shape.dimsData(), out_b, out_h, out_w, 0);
87 if (out_h * block_shape_height + shift_h < padding_top ||
88 out_h * block_shape_height + shift_h >= padding_top + input_height ||
89 out_w * block_shape_width + shift_w < padding_left ||
90 out_w * block_shape_width + shift_w >= padding_left + input_width)
91 {
92 // This may not execute correctly when pad_value != 0 and T != uint8.
93 memset(out, pad_value, depth * sizeof(T));
94 }
95 else
96 {
97 const T *in =
98 input1_data + offset(input1_shape.dimsData(), input_batch,
99 (out_h * block_shape_height + shift_h) - padding_top,
100 (out_w * block_shape_width + shift_w) - padding_left, 0);
101 memcpy(out, in, depth * sizeof(T));
102 }
103 }
104 }
105 }
106 return Ok;
107}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), offset(), onert_micro::Ok, and output_shape.

◆ SpaceToDepth()

template<typename T >
OMStatus onert_micro::execute::pal::SpaceToDepth ( const int32_t  block_size,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 32 of file PALSpaceToDepthCommon.h.

36{
37 if (block_size == 0)
38 {
40 }
41
42 const core::OMRuntimeShape input_shape =
43 core::OMRuntimeShape::extendedShape(4, unextended_input_shape);
45 core::OMRuntimeShape::extendedShape(4, unextended_output_shape);
46
47 const int input_depth = input_shape.dims(3);
48 const int input_width = input_shape.dims(2);
49 const int input_height = input_shape.dims(1);
50 const int input_batch = input_shape.dims(0);
51
52 for (int in_b = 0; in_b < input_batch; ++in_b)
53 {
54 for (int in_h = 0; in_h < input_height; ++in_h)
55 {
56 for (int in_w = 0; in_w < input_width; ++in_w)
57 {
58 for (int in_d = 0; in_d < input_depth; ++in_d)
59 {
60 const int out_d =
61 in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
62 const int out_w = in_w / block_size;
63 const int out_h = in_h / block_size;
64 const int out_b = in_b;
65
66 const int input_index = offset(input_shape.dimsData(), in_b, in_h, in_w, in_d);
67 const int output_index = offset(output_shape.dimsData(), out_b, out_h, out_w, out_d);
68
69 output_data[output_index] = input_data[input_index];
70 }
71 }
72 }
73 }
74 return Ok;
75}

References onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::FailedCheckCondition, offset(), onert_micro::Ok, and output_shape.

◆ Split()

template<typename T >
OMStatus onert_micro::execute::pal::Split ( const core::SplitParams params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
int32_t  axis_value 
)

Definition at line 36 of file PALSplit.h.

38{
39 const auto output_count = params.num_outputs;
40
41 const auto split_dimensions = input_shape.dimensionsCount();
42
43 assert(axis_value < split_dimensions);
44 assert(output_shape.dimensionsCount() == split_dimensions);
45
46 int64_t outer_size = 1;
47 for (uint32_t i = 0; i < axis_value; ++i)
48 {
49 outer_size *= input_shape.dims(i);
50 }
51
52 int64_t base_inner_size = 1;
53 for (uint32_t i = axis_value + 1; i < split_dimensions; ++i)
54 {
55 base_inner_size *= input_shape.dims(i);
56 }
57
58 assert(input_data != nullptr);
59 for (int64_t k = 0; k < outer_size; ++k)
60 {
61 for (uint32_t i = 0; i < output_count; ++i)
62 {
63 T *output_data = core::utils::castOutputData<T>(params.output_data[i]);
64 assert(output_data != nullptr);
65 const auto copy_size = output_shape.dims(axis_value) * base_inner_size;
66 T *output_ptr = output_data + k * copy_size;
67 assert(output_ptr != nullptr);
68 for (int64_t j = 0; j < copy_size; ++j)
69 output_ptr[j] = input_data[j];
70 input_data += copy_size;
71 }
72 }
73 return Ok;
74}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SplitParams::num_outputs, onert_micro::Ok, onert_micro::core::SplitParams::output_data, and output_shape.

◆ Sqrt()

template<typename T >
OMStatus onert_micro::execute::pal::Sqrt ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALSqrtCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::sqrt(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleSqrt().

◆ Square()

template<typename T >
OMStatus onert_micro::execute::pal::Square ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALSquareCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = (input_data[i]) * (input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleSquare().

◆ SquaredDifference()

template<typename T >
OMStatus onert_micro::execute::pal::SquaredDifference ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 29 of file PALSquaredDifferenceCommon.h.

31{
32 ArithmeticOp<T, SquaredDifferenceFn<T>>(params, flat_size, input1_data, input2_data, output_data);
33 return Ok;
34}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ StridedSlice()

template<typename T >
OMStatus onert_micro::execute::pal::StridedSlice ( core::StridedSliceParams op_params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
T *  output_data 
)

Definition at line 206 of file PALStridedSlice.h.

209{
210 const core::OMRuntimeShape input_shape =
211 core::OMRuntimeShape::extendedShape(5, unextended_input_shape);
212
213 // Reverse and pad to 5 dimensions because that is what the runtime code
214 // requires (ie. all shapes must be 5D and are given backwards).
215 stridedSlicePadIndices(&op_params, 5);
216
217 const int start_0 = startForAxis(op_params, input_shape, 0);
218 const int stop_0 = stopForAxis(op_params, input_shape, 0, start_0);
219 const int start_1 = startForAxis(op_params, input_shape, 1);
220 const int stop_1 = stopForAxis(op_params, input_shape, 1, start_1);
221 const int start_2 = startForAxis(op_params, input_shape, 2);
222 const int stop_2 = stopForAxis(op_params, input_shape, 2, start_2);
223 const int start_3 = startForAxis(op_params, input_shape, 3);
224 const int stop_3 = stopForAxis(op_params, input_shape, 3, start_3);
225 const int start_4 = startForAxis(op_params, input_shape, 4);
226 const int stop_4 = stopForAxis(op_params, input_shape, 4, start_4);
227
228 for (int offset_0 = start_0 * input_shape.dims(1), end_0 = stop_0 * input_shape.dims(1),
229 step_0 = op_params.strides[0] * input_shape.dims(1);
230 !loopCondition(offset_0, end_0, op_params.strides[0]); offset_0 += step_0)
231 {
232 for (int offset_1 = (offset_0 + start_1) * input_shape.dims(2),
233 end_1 = (offset_0 + stop_1) * input_shape.dims(2),
234 step_1 = op_params.strides[1] * input_shape.dims(2);
235 !loopCondition(offset_1, end_1, op_params.strides[1]); offset_1 += step_1)
236 {
237 for (int offset_2 = (offset_1 + start_2) * input_shape.dims(3),
238 end_2 = (offset_1 + stop_2) * input_shape.dims(3),
239 step_2 = op_params.strides[2] * input_shape.dims(3);
240 !loopCondition(offset_2, end_2, op_params.strides[2]); offset_2 += step_2)
241 {
242 for (int offset_3 = (offset_2 + start_3) * input_shape.dims(4),
243 end_3 = (offset_2 + stop_3) * input_shape.dims(4),
244 step_3 = op_params.strides[3] * input_shape.dims(4);
245 !loopCondition(offset_3, end_3, op_params.strides[3]); offset_3 += step_3)
246 {
247 for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
248 !loopCondition(offset_4, end_4, op_params.strides[4]);
249 offset_4 += op_params.strides[4])
250 {
251 *output_data++ = input_data[offset_4];
252 }
253 }
254 }
255 }
256 }
257 return Ok;
258}

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::Ok, and onert_micro::core::StridedSliceParams::strides.

Referenced by onert_micro::execute::execute_kernel_CircleStridedSlice().

◆ Sub() [1/2]

OMStatus onert_micro::execute::pal::Sub ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

Definition at line 31 of file PALSub.h.

33{
34 ElementWise(flat_size, params, input1_data, input2_data, output_data, SubFunc);
35 return Ok;
36}
void ElementWise(const uint32_t size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References ElementWise(), onert_micro::Ok, and SubFunc().

◆ Sub() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::Sub ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 49 of file PALSubCommon.h.

51{
52 ArithmeticOp<T, SubFn<T>>(params, flat_size, input1_data, input2_data, output_data);
53 return Ok;
54}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleSub().

◆ SubFunc()

int8_t onert_micro::execute::pal::SubFunc ( int8_t  x,
int8_t  y,
const core::ArithmeticQuantParams params 
)

Definition at line 29 of file PALSubCommon.h.

30{
31 const int32_t input1_val = params.input1_offset + x;
32 const int32_t input2_val = params.input2_offset + y;
33 const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
34 const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
35 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
36 shifted_input1_val, params.input1_multiplier, params.input1_shift);
37 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
38 shifted_input2_val, params.input2_multiplier, params.input2_shift);
39 const int32_t raw_sum = scaled_input1_val - scaled_input2_val;
40 const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp(
41 raw_sum, params.output_multiplier, params.output_shift) +
42 params.output_offset;
43 const int32_t clamped_output = std::min(params.quantized_activation_max,
44 std::max(params.quantized_activation_min, raw_output));
45 return static_cast<int8_t>(clamped_output);
46}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by BroadcastSub4DSlow(), and Sub().

◆ subscriptToIndex() [1/2]

int onert_micro::execute::pal::subscriptToIndex ( const NdArrayDesc< 4 > &  desc,
int  i0,
int  i1,
int  i2,
int  i3 
)
inline

◆ subscriptToIndex() [2/2]

int onert_micro::execute::pal::subscriptToIndex ( const NdArrayDesc< 5 > &  desc,
int  indexes[5] 
)
inline

Definition at line 135 of file ProcessBroadcastShapes.h.

136{
137 return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
138 indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + indexes[4] * desc.strides[4];
139}

References onert_micro::execute::pal::NdArrayDesc< N >::strides.

◆ SVDF() [1/2]

OMStatus onert_micro::execute::pal::SVDF ( const core::SVDFQuantParams params,
const int8_t *  input_data,
const int8_t *  weights_feature_data,
const int8_t *  weights_time_data,
const int32_t *  bias_data,
int8_t *  state_data,
int8_t *  output_data,
const core::OMRuntimeShape input_shape,
const core::OMRuntimeShape weights_feature_shape,
const core::OMRuntimeShape weights_time_shape,
const core::OMRuntimeShape bias_shape,
const core::OMRuntimeShape output_shape 
)

Definition at line 35 of file PALSVDF.h.

42{
43 cmsis_nn_dims input_dims;
44 input_dims.n = input_shape.dims(0);
45 input_dims.h = input_shape.dims(1);
46
47 cmsis_nn_dims weights_feature_dims;
48 weights_feature_dims.n = weights_feature_shape.dims(0);
49 weights_feature_dims.h = weights_feature_shape.dims(1);
50
51 cmsis_nn_dims weights_time_dims;
52 weights_time_dims.n = weights_time_shape.dims(0);
53 weights_time_dims.h = weights_time_shape.dims(1);
54
55 cmsis_nn_dims bias_dims;
56 bias_dims.n = bias_shape.dims(0);
57
58 cmsis_nn_dims state_dims;
59 state_dims.n = bias_shape.dims(0);
60 state_dims.h = bias_shape.dims(1);
61
62 cmsis_nn_dims output_dims;
63 output_dims.n = output_shape.dims(0);
64 output_dims.h = output_shape.dims(1);
65
66 cmsis_nn_svdf_params svdf_params;
67 svdf_params.rank = params.rank;
68 svdf_params.input_offset = params.input_zero_point;
69 svdf_params.output_offset = params.output_zero_point;
70
71 svdf_params.input_activation.min = INT16_MIN;
72 svdf_params.input_activation.max = INT16_MAX;
73
74 svdf_params.output_activation.min = INT8_MIN;
75 svdf_params.output_activation.max = INT8_MAX;
76
77 cmsis_nn_per_tensor_quant_params in_quant_params;
78 in_quant_params.multiplier = params.effective_scale_1_a;
79 in_quant_params.shift = params.effective_scale_1_b;
80
81 cmsis_nn_per_tensor_quant_params out_quant_params;
82 out_quant_params.multiplier = params.effective_scale_2_a;
83 out_quant_params.shift = params.effective_scale_2_b;
84
85 const int batch_size = input_shape.dims(0);
86 const int input_size = input_shape.dims(1);
87 const int num_filters = weights_feature_shape.dims(0);
88 const int num_units = num_filters / params.rank;
89
90 uint8_t *scratch_tensor_data;
91 OMStatus status = core::memory::OMMemoryManager::allocateMemory(
92 batch_size * num_filters * sizeof(int32_t), &scratch_tensor_data);
93 assert(status == Ok);
94 if (status != Ok)
95 return status;
96
97 uint8_t *scratch_output_tensor_data;
98 status = core::memory::OMMemoryManager::allocateMemory(batch_size * num_units * sizeof(int32_t),
99 &scratch_output_tensor_data);
100 assert(status == Ok);
101 if (status != Ok)
102 return status;
103
104 cmsis_nn_context scratch_ctx;
105 scratch_ctx.buf = reinterpret_cast<int32_t *>(scratch_tensor_data);
106
107 cmsis_nn_context scratch_output_ctx;
108 scratch_output_ctx.buf = reinterpret_cast<int32_t *>(scratch_output_tensor_data);
109
110 arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
111 &input_dims, input_data, &state_dims, state_data, &weights_feature_dims,
112 weights_feature_data, &weights_time_dims, weights_time_data, &bias_dims, bias_data,
113 &output_dims, output_data);
114
115 core::memory::OMMemoryManager::deallocateMemory(scratch_tensor_data);
116 core::memory::OMMemoryManager::deallocateMemory(scratch_output_tensor_data);
117
118 return Ok;
119}

References onert_micro::core::memory::OMMemoryManager::allocateMemory(), onert_micro::core::memory::OMMemoryManager::deallocateMemory(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SVDFQuantParams::effective_scale_1_a, onert_micro::core::SVDFQuantParams::effective_scale_1_b, onert_micro::core::SVDFQuantParams::effective_scale_2_a, onert_micro::core::SVDFQuantParams::effective_scale_2_b, onert_micro::core::SVDFQuantParams::input_zero_point, onert_micro::Ok, output_shape, onert_micro::core::SVDFQuantParams::output_zero_point, and onert_micro::core::SVDFQuantParams::rank.

Referenced by onert_micro::execute::execute_kernel_CircleSVDF().

◆ SVDF() [2/2]

OMStatus onert_micro::execute::pal::SVDF ( const float *  input_data,
const float *  weights_feature_data,
const float *  weights_time_data,
const float *  bias_data,
float *  state_data,
float *  scratch_data,
float *  output_data,
const int  rank,
const int  input_size,
const int  batch_size,
const int  num_filters,
const int  num_units,
const int  memory_size,
const circle::ActivationFunctionType  activation 
)

Definition at line 138 of file PALSVDFCommon.h.

143{
144 // Left shift the activation_state.
145 {
146 float *new_state_start = state_data;
147 const float *old_state_start = state_data + 1;
148 const float *old_state_end = state_data + batch_size * num_filters * memory_size;
149 while (old_state_start != old_state_end)
150 {
151 *new_state_start++ = *old_state_start++;
152 }
153 }
154
155 // Note: no need to clear the latest activation, matmul is not accumulative.
156
157 // Compute conv1d(inputs, weights_feature).
158 // The activation_state's rightmost column is used to save current cycle
159 // activation. This is achieved by starting at state_ptr[memory_size - 1] and
160 // having the stride equal to memory_size.
161
162 // Perform batched matrix vector multiply operation:
163 {
164 const float *matrix = weights_feature_data;
165 const float *vector = input_data;
166 float *result = &state_data[memory_size - 1];
167 float *result_in_batch = result;
168 for (int i = 0; i < batch_size; ++i)
169 {
170 const float *matrix_ptr = matrix;
171 for (int j = 0; j < num_filters; ++j)
172 {
173 float dot_prod = 0.0f;
174 const float *vector_in_batch = vector + i * input_size;
175 for (int k = 0; k < input_size; ++k)
176 {
177 dot_prod += *matrix_ptr++ * *vector_in_batch++;
178 }
179 *result_in_batch = dot_prod;
180 result_in_batch += memory_size;
181 }
182 }
183 }
184
185 applyTimeWeightsBiasAndActivation(batch_size, memory_size, num_filters, num_units, rank,
186 weights_time_data, bias_data, activation, state_data,
187 scratch_data, output_data);
188 return Ok;
189}

References onert_micro::Ok.

◆ Tanh()

template<typename T >
OMStatus onert_micro::execute::pal::Tanh ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 31 of file PALTanhCommon.h.

33{
34 const uint32_t flat_size = input_shape.flatSize();
35
36 if (flat_size == -1)
37 return UnknownError;
38
39 assert(input_data != nullptr);
40 assert(output_data != nullptr);
41
42 assert(input_shape == output_shape);
43
44 for (int i = 0; i < flat_size; i++)
45 {
46 output_data[i] = std::tanh(input_data[i]);
47 }
48
49 return Ok;
50}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Transpose()

template<typename T , int N = 5>
OMStatus onert_micro::execute::pal::Transpose ( const core::TransposeParams params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 78 of file PALTransposeCommon.h.

81{
82 // Transpose kernel only does rearranging values not numeric evaluations on
83 // each cell. It's safe to implement per size of scalar type and this trick
84 // keeps the total code size in a reasonable range.
85 OMStatus status;
86 switch (sizeof(T))
87 {
88 case 1:
89 status = TransposeImpl<int8_t, N>(
90 params, unextended_input_shape, reinterpret_cast<const int8_t *>(input_data),
91 unextended_output_shape, reinterpret_cast<int8_t *>(output_data));
92 break;
93 case 2:
94 status = TransposeImpl<int16_t, N>(
95 params, unextended_input_shape, reinterpret_cast<const int16_t *>(input_data),
96 unextended_output_shape, reinterpret_cast<int16_t *>(output_data));
97 break;
98
99 case 4:
100 status = TransposeImpl<int32_t, N>(
101 params, unextended_input_shape, reinterpret_cast<const int32_t *>(input_data),
102 unextended_output_shape, reinterpret_cast<int32_t *>(output_data));
103 break;
104 case 8:
105 status = TransposeImpl<int64_t, N>(
106 params, unextended_input_shape, reinterpret_cast<const int64_t *>(input_data),
107 unextended_output_shape, reinterpret_cast<int64_t *>(output_data));
108 break;
109 default:
110 status = UnknownError;
111 break;
112 }
113 return status;
114}

References onert_micro::UnknownError.

◆ TransposeConv()

template<typename T >
OMStatus onert_micro::execute::pal::TransposeConv ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape filter_shape,
const T *  filter_data,
const T *  bias_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 35 of file PALTransposeConvCommon.h.

38{
39 assert(false && "Not IMPL yet");
40}

◆ TransposeConv< float >()

template<>
OMStatus onert_micro::execute::pal::TransposeConv< float > ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 42 of file PALTransposeConvCommon.h.

47{
48 const int stride_width = params->stride_w;
49 const int stride_height = params->stride_h;
50 const int pad_width = params->pad_w;
51 const int pad_height = params->pad_h;
52
53 const int batches = input_shape.dims(0);
54 const int input_depth = input_shape.dims(3);
55 const int output_depth = filter_shape.dims(0);
56 const int input_height = input_shape.dims(1);
57 const int input_width = input_shape.dims(2);
58 const int filter_height = filter_shape.dims(1);
59 const int filter_width = filter_shape.dims(2);
60 const int output_height = output_shape.dims(1);
61 const int output_width = output_shape.dims(2);
62 const float output_activation_min = params->activation_min;
63 const float output_activation_max = params->activation_max;
64
65 // Although transpose convolution simplifies to convolution with transposed
66 // weights for strides of 1, non-unitary striding complicates matters. To
67 // keep this reference implementation as clear as possible, we use a
68 // "scatter" access pattern, where we loop through all the input elements,
69 // computing their influence on the output, rather than looping through the
70 // output elements in the typical "gather" access pattern of a conv. We
71 // therefore must initialize the output array to zero.
72 const int num_elements = output_shape.flatSize();
73 for (int i = 0; i < num_elements; i++)
74 {
75 output_data[i] = 0.0f;
76 }
77
78 // Loop through input elements one at a time.
79 for (int batch = 0; batch < batches; ++batch)
80 {
81 for (int in_y = 0; in_y < input_height; ++in_y)
82 {
83 for (int in_x = 0; in_x < input_width; ++in_x)
84 {
85 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
86 {
87 // Loop through the output elements it will influence
88 const int out_x_origin = (in_x * stride_width) - pad_width;
89 const int out_y_origin = (in_y * stride_height) - pad_height;
90 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
91 {
92 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
93 {
94 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
95 {
96 // Compute output element location
97 const int out_x = out_x_origin + filter_x;
98 const int out_y = out_y_origin + filter_y;
99 // We cannot accumulate out of bounds
100 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
101 (out_y < output_height))
102 {
103 float input_value =
104 input_data[offset(input_shape.dimsData(), batch, in_y, in_x, in_channel)];
105 float filter_value = filter_data[offset(filter_shape.dimsData(), out_channel,
106 filter_y, filter_x, in_channel)];
107 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)] +=
108 input_value * filter_value;
109 }
110 }
111 }
112 }
113 }
114 }
115 }
116 }
117
118 for (int batch = 0; batch < batches; ++batch)
119 {
120 for (int out_y = 0; out_y < output_height; ++out_y)
121 {
122 for (int out_x = 0; out_x < output_width; ++out_x)
123 {
124 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
125 {
126 float acc =
127 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)];
128 if (bias_data)
129 acc += bias_data[out_channel];
130
131 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)] =
132 activationFunctionWithMinMax(acc, output_activation_min, output_activation_max);
133 }
134 }
135 }
136 }
137 return Ok;
138}

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, activationFunctionWithMinMax(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), luci_interpreter::RuntimeShape::flatSize(), offset(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleTransposeConv().

◆ TransposeImpl()

template<typename T , int N>
OMStatus onert_micro::execute::pal::TransposeImpl ( const core::TransposeParams params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 36 of file PALTransposeCommon.h.

40{
41 const int unextended_input_size = unextended_input_shape.dimensionsCount();
42 const int unextended_output_size = unextended_output_shape.dimensionsCount();
43
44 const int input_ext_size = N - unextended_input_size;
45 const int output_ext_size = N - unextended_output_size;
46 NdArrayDesc<N> input_desc;
47 NdArrayDesc<N> output_desc;
48 copyDimsToDesc(core::OMRuntimeShape::extendedShape(N, unextended_input_shape), &input_desc);
49 copyDimsToDesc(core::OMRuntimeShape::extendedShape(N, unextended_output_shape), &output_desc);
50
51 // The perm data is extended to match the output, each index incremented by
52 // the amount of front padding of the input shape.
53 int extended_perm[N];
54 for (int i = 0; i < N; ++i)
55 {
56 extended_perm[i] = i < output_ext_size ? i : params.perm[i - output_ext_size] + input_ext_size;
57 }
58
59 // Permutes the input shape so we don't need to permute the indexes inside
60 // the loop. Check to make sure output_dims is matching input_dims.
61 NdArrayDesc<N> perm_input_desc;
62 for (int k = 0; k < N; ++k)
63 {
64 perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]];
65 perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]];
66 }
67
68 // Naive transpose loop (iterate on output index and compute input index).
69 auto tranpose_func = [&](int indexes[N]) {
70 output_data[subscriptToIndex(output_desc, indexes)] =
71 input_data[subscriptToIndex(perm_input_desc, indexes)];
72 };
73 NDOpsHelper<N>(output_desc, tranpose_func);
74 return Ok;
75}
int strides[N]
Definition NDArray.h:45
int extents[N]
Definition NDArray.h:41

References copyDimsToDesc(), onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::execute::pal::NdArrayDesc< N >::extents, onert_micro::Ok, onert_micro::core::TransposeParams::perm, onert_micro::execute::pal::NdArrayDesc< N >::strides, and subscriptToIndex().

◆ UnaryOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::UnaryOp ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 40 of file PALUnaryOpCommon.h.

42{
43 Fn func;
44 const uint32_t flat_size = input_shape.flatSize();
45
46 if (flat_size == -1)
47 return UnknownError;
48
49 assert(input_data != nullptr);
50 assert(output_data != nullptr);
51
52 assert(input_shape == output_shape);
53 for (int i = 0; i < flat_size; ++i)
54 {
55 output_data[i] = func(input_data[i]);
56 }
57 return Ok;
58}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

◆ Unpack()

template<typename T >
OMStatus onert_micro::execute::pal::Unpack ( const core::SplitParams params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
int32_t  axis_value 
)

Definition at line 31 of file PALUnpack.h.

33{
34 const auto dimensions = input_shape.dimensionsCount();
35
36 if (axis_value < 0)
37 {
38 axis_value += dimensions;
39 }
40
41 int outer_size = 1;
42 for (int i = 0; i < axis_value; ++i)
43 {
44 outer_size *= input_shape.dims(i);
45 }
46 int copy_size = 1;
47 for (int i = axis_value + 1; i < dimensions; ++i)
48 {
49 copy_size *= input_shape.dims(i);
50 }
51 int output_size = 1;
52 for (int i = 0; i < output_shape.dimensionsCount(); ++i)
53 {
54 output_size *= output_shape.dims(i);
55 }
56
57 for (int i = 0; i < params.num_outputs; ++i)
58 {
59 T *output_data = core::utils::castOutputData<T>(params.output_data[i]);
60 assert(output_data != nullptr);
61 for (int k = 0; k < outer_size; ++k)
62 {
63 T *output_ptr = output_data + copy_size * k;
64 int loc = k * params.num_outputs * copy_size + i * copy_size;
65 const T *input_ptr = input_data + loc;
66 for (int j = 0; j < copy_size; ++j)
67 output_ptr[j] = input_ptr[j];
68 }
69 }
70 return Ok;
71}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SplitParams::num_outputs, onert_micro::Ok, onert_micro::core::SplitParams::output_data, and output_shape.

Variable Documentation

◆ MAX_INDICES_ND

constexpr int onert_micro::execute::pal::MAX_INDICES_ND = 5
constexpr

Definition at line 32 of file PALGatherND.h.

Referenced by onert_micro::import::configure_kernel_CircleGatherND(), and GatherND().