ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert_micro::execute::pal Namespace Reference

Data Structures

struct  AddFn
 
struct  DivFn
 
struct  FloorDivFn
 
struct  FloorModFn
 
struct  LogicalAndFn
 
struct  LogicalOrFn
 
struct  MaximumFn
 
struct  MinimumFn
 
struct  MulFn
 
struct  NdArrayDesc
 
struct  ReduceMaxFn
 
struct  ReduceProductFn
 
class  Reducer
 
struct  ReduceSumFn
 
struct  SquaredDifferenceFn
 
struct  SubFn
 
struct  TanhFunctor
 

Functions

OMStatus Add (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 
template<typename T1 , typename T2 , typename T3 >
OMStatus ArgMax (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data)
 
OMStatus AveragePool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus ConvPerChannel (const core::ConvQuant &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus DepthwiseConvPerChannel (const core::ConvQuant &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<>
OMStatus FullyConnected< int8_t > (const core::FullyConnectedParams &params, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<>
OMStatus FullyConnected (const core::FullyConnectedParams &params, const int16_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int64_t *bias_data, const core::OMRuntimeShape &output_shape, int16_t *output_data)
 
OMStatus MaxPool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus Mul (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 
template<>
OMStatus ReLUCommon< int8_t > (const int flat_size, const int8_t *input_data, int8_t *output_data, const float alpha, const bool is_relu_6)
 
template<>
OMStatus Softmax< int8_t, int8_t > (const core::SoftmaxParams &params, const int8_t *input_data, int8_t *output_data)
 
OMStatus SVDF (const core::SVDFQuantParams &params, const int8_t *input_data, const int8_t *weights_feature_data, const int8_t *weights_time_data, const int32_t *bias_data, int8_t *state_data, int8_t *output_data, const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &weights_feature_shape, const core::OMRuntimeShape &weights_time_shape, const core::OMRuntimeShape &bias_shape, const core::OMRuntimeShape &output_shape)
 
template<typename T >
OMStatus Abs (const core::OMRuntimeShape &shape, const T *input_data, T *output_data)
 
int8_t AddFunc (int8_t x, int8_t y, const core::ArithmeticQuantParams &params)
 
template<typename T >
OMStatus Add (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastAdd4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus BroadcastAdd4DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
template<typename T >
OMStatus AddN (const size_t flat_size, const size_t num_inputs, const T *const *input_data, T *output_data)
 
template<typename T1 , typename T2 , typename T3 , typename Cmp >
OMStatus ArgMinMax (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data, const Cmp &cmp)
 
template<typename T , typename Fn >
OMStatus ArithmeticOp (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T , typename Fn >
OMStatus QuantizedArithmeticOp (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
void ElementWise (const uint32_t size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T , typename Fn >
void ArithmeticOpScalar (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input_data, const T scalar_value, T *output_data)
 
template<typename T , typename Fn >
OMStatus BroadcastArithmeticOp4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T , typename Fn >
OMStatus QuantizedBroadcastArithmeticOp4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const core::OMRuntimeShape &input2_shape, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
void BroadcastInput1 (int size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastInput2 (int size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastRecursiveDimensions (const core::ArithmeticQuantParams &params, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
template<typename T >
void BroadcastBinaryFunction6DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
 
OMStatus AveragePool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus BatchToSpaceND (const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *crops_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T , typename Fn >
OMStatus BinaryOp (const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T , typename Fn >
OMStatus BroadcastBinaryOp4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename FromT , typename ToT >
OMStatus Cast (const core::OMRuntimeShape &input_shape, const FromT *input_data, const core::OMRuntimeShape &output_shape, ToT *output_data)
 
template<typename T >
OMStatus Ceil (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
bool LessFn (T lhs, T rhs)
 
template<typename T >
bool LessEqualFn (T lhs, T rhs)
 
template<typename T >
bool EqualFn (T lhs, T rhs)
 
template<typename T >
bool GreaterFn (T lhs, T rhs)
 
template<typename T >
bool GreaterEqualFn (T lhs, T rhs)
 
template<typename T >
bool NotEqualFn (T lhs, T rhs)
 
template<typename T >
void ComparisonNoScaling (const int64_t flat_size, const T *input1_data, const T *input2_data, bool *output_data, bool F(T, T))
 
template<typename T , typename AccType >
void BroadcastComparison4DSlowWithScaling (const core::ComparisonParams &op_params, const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const T *input2_data, const core::OMRuntimeShape &unextended_output_shape, bool *output_data, bool F(AccType, AccType))
 
template<typename T , typename AccType >
void ComparisonWithScaling (const core::ComparisonParams &op_params, const int64_t flat_size, const T *input1_data, const T *input2_data, bool *output_data, bool F(AccType, AccType))
 
template<typename T >
void BroadcastComparison4DSlowNoScaling (const core::ComparisonParams &op_params, const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const T *input2_data, const core::OMRuntimeShape &unextended_output_shape, bool *output_data, bool F(T, T))
 
template<typename Scalar >
OMStatus Concatenation (core::ConcatenationParams &params, std::vector< uint32_t > &input_shapes, std::vector< const Scalar * > &input_data, const core::OMRuntimeShape &output_shape, Scalar *output_data)
 
OMStatus ConvFloat (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Cos (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus DepthwiseConv2D (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &filter_shape, const T *filter_data, const T *bias_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus DepthwiseConv2D< float > (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename InputT , typename OutputT >
OMStatus Dequantize (const core::QuantizationParams op_params, const uint32_t flat_size, const InputT *input_data, OutputT *output_data)
 
template<typename T >
OMStatus Div (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastDiv4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Elu (const int flat_size, const float *input_data, float *output_data)
 
template<typename T >
OMStatus Exp (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Fill (const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Floor (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
void FloorDiv (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
void BroadcastFloorDiv4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
void FloorMod (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
void BroadcastFloorMod4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename InputType , typename WeightType , typename OutputType , typename BiasType >
OMStatus FullyConnected (const core::FullyConnectedParams &params, const InputType *input_data, const core::OMRuntimeShape &filter_shape, const WeightType *filter_data, const BiasType *bias_data, const core::OMRuntimeShape &output_shape, OutputType *output_data)
 
template<typename WeightType >
OMStatus FullyConnected (const core::FullyConnectedParams &params, const float *input_data, const core::OMRuntimeShape &filter_shape, const WeightType *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename ParamsT , typename IndicesT >
OMStatus GatherND (core::OMRuntimeShape params_shape, const ParamsT *param_data, core::OMRuntimeShape indices_shape, const IndicesT *index_data, ParamsT *output_data)
 
OMStatus GRU (const float *input_data, const float *weight_input_data, const float *weight_hidden_data, const float *bias_input_data, const float *bias_hidden_data, const float *hidden_state_data, float *output_data, float *output_input_data, float *output_hidden_data, const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &output_shape, const core::OMRuntimeShape &weight_input_shape, const core::OMRuntimeShape &weight_hidden_shape, const size_t intermediate_buffer_size, float *intermediate_buffer)
 
OMStatus L2Normalization (const core::L2NormalizationParams &params, const float *input_data, float *output_data)
 
OMStatus L2Pool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Log (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<class Fn >
OMStatus LogicalCommon (const int flat_size, const bool *input1_data, const bool *input2_data, bool *output_data)
 
OMStatus LogicalNot (const int flat_size, const bool *input_data, bool *output_data)
 
OMStatus Logistic (const int flat_size, const float *input_data, float *output_data)
 
OMStatus Logistic (const int flat_size, const int8_t *input_data, float input_scale, int input_zero_point, int8_t *output_data, float output_scale, int output_zero_point)
 
OMStatus LogSoftmax (const core::LogSoftmaxParams &params, const float *input_data, float *output_data)
 
OMStatus Maximum (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
OMStatus BroadcastMaximum4DSlow (const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
OMStatus MaxPool (const core::Pool2DParams &params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
OMStatus Minimum (const int flat_size, const float *input1_data, const float *input2_data, float *output_data)
 
template<typename T >
OMStatus BroadcastMinimum4DSlow (const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Mul (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastMul4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus BroadcastMul6DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Neg (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Pad (const core::PadParams &op_params, const core::OMRuntimeShape &input_shape, const float *input_data, const float pad_value, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus QuantizedZeroPad (const core::PadParams &op_params, const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename InputT , typename OutputT >
OMStatus Quantize (const core::QuantizationParams op_params, const uint32_t flat_size, const InputT *input_data, OutputT *output_data)
 
template<typename Type >
OMStatus ReLUCommon (const int flat_size, const Type *input_data, Type *output_data, const float alpha, const bool is_relu_6)
 
template<typename T >
OMStatus Round (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus Round< float > (const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T >
OMStatus Rsqrt (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus QuantizedRsqrt (const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
OMStatus Sin (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus SISOOperation (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data, std::function< T(T)> const &func)
 
template<typename T >
OMStatus SISOOperation (const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data, std::function< float(float)> const &func)
 
template<typename T >
OMStatus Slice (const core::SliceParams &op_params, const core::OMRuntimeShape &input_shape, const T *input_data, T *output_data)
 
template<typename T , typename U >
OMStatus Softmax (const core::SoftmaxParams &params, const T *input_data, U *output_data)
 
template<>
OMStatus Softmax< float, float > (const core::SoftmaxParams &params, const float *input_data, float *output_data)
 
template<typename T >
OMStatus SpaceToBatchND (const core::OMRuntimeShape &unextended_input1_shape, const T *input1_data, const core::OMRuntimeShape &unextended_input2_shape, const int32_t *block_shape_data, const core::OMRuntimeShape &unextended_input3_shape, const int32_t *paddings_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus SpaceToDepth (const int32_t block_size, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus Split (const core::SplitParams &params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, int32_t axis_value)
 
template<typename T >
OMStatus Sqrt (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Square (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus SquaredDifference (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastSquaredDifference4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus QuantizedBroadcastSquaredDifference4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const core::OMRuntimeShape &input2_shape, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
OMStatus QuantizedSquaredDifference (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T >
OMStatus StridedSlice (core::StridedSliceParams &op_params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, T *output_data)
 
int8_t SubFunc (int8_t x, int8_t y, const core::ArithmeticQuantParams &params)
 
template<typename T >
OMStatus Sub (const core::BinaryArithmeticBroadcastParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
 
template<typename T >
OMStatus BroadcastSub4DSlow (const core::BinaryArithmeticBroadcastParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus BroadcastSub4DSlow (const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const int8_t *input1_data, const core::OMRuntimeShape &input2_shape, const int8_t *input2_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
 
OMStatus SVDF (const float *input_data, const float *weights_feature_data, const float *weights_time_data, const float *bias_data, float *state_data, float *scratch_data, float *output_data, const int rank, const int input_size, const int batch_size, const int num_filters, const int num_units, const int memory_size, const circle::ActivationFunctionType activation)
 
template<typename T >
OMStatus Tanh (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus QuantizedTanh (const core::OMRuntimeShape &input_shape, const onert_micro::core::QuantizationParams &input_qparams, const T *input_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
 
template<typename T , int N>
OMStatus TransposeImpl (const core::TransposeParams &params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T , int N = 5>
OMStatus Transpose (const core::TransposeParams &params, const core::OMRuntimeShape &unextended_input_shape, const T *input_data, const core::OMRuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
OMStatus TransposeConv (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &filter_shape, const T *filter_data, const T *bias_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<>
OMStatus TransposeConv< float > (const core::FloatConv2D *params, const core::OMRuntimeShape &input_shape, const float *input_data, const core::OMRuntimeShape &filter_shape, const float *filter_data, const float *bias_data, const core::OMRuntimeShape &output_shape, float *output_data)
 
template<typename T , typename Fn >
OMStatus UnaryOp (const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
template<typename T >
OMStatus Unpack (const core::SplitParams &params, const core::OMRuntimeShape &input_shape, const T *input_data, const core::OMRuntimeShape &output_shape, int32_t axis_value)
 
std::pair< uint32_t, uint32_t > getUpLowerWeightTensorDepth (core::OpTrainableRankType rank, const uint32_t output_depth)
 
std::int32_t saturatingRoundingDoublingHighMul (std::int32_t a, std::int32_t b)
 
int32_t roundingDivideByPOT (int32_t x, int32_t exponent)
 
int32_t multiplyByQuantizedMultiplier (int32_t x, int32_t quantized_multiplier, int shift)
 
int32_t multiplyByQuantizedMultiplierSmallerThanOneExp (int32_t x, int32_t quantized_multiplier, int left_shift)
 
template<typename P >
void getActivationParams (const P &params, int32_t *min, int32_t *max)
 
template<typename P >
void getActivationParams (const P &params, float *min, float *max)
 
template<typename P >
void getActivationParams (const P &params, int64_t *min, int64_t *max)
 
int MatchingDim (const core::OMRuntimeShape &shape1, int index1, const core::OMRuntimeShape &shape2, int index2)
 
int flatSizeSkipDim (const int32_t *dims_data, int skip_dim, int num_dims)
 
int offset (const int32_t *dims_data, int i0, int i1, int i2, int i3)
 
int offset (const int32_t *dims_data, int i0, int i1, int i2, int i3, int i4)
 
template<typename T >
activationFunctionWithMinMax (T x, T output_activation_min, T output_activation_max)
 
template<int MAX_DIM = 6>
bool ReduceDimensionsForBroadcast (const core::OMRuntimeShape &input1_shape, const core::OMRuntimeShape &input2_shape, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape)
 
template<int N>
void copyDimsToDesc (const core::OMRuntimeShape &input_shape, NdArrayDesc< N > *desc_out)
 
template<int N, int DIM, typename Calc >
std::enable_if< DIM==N-1, void >::type NDOpsHelperImpl (const NdArrayDesc< N > &output, const Calc &calc, int indexes[N])
 
template<int N, int DIM, typename Calc >
std::enable_if< DIM!=N-1, void >::type NDOpsHelperImpl (const NdArrayDesc< N > &output, const Calc &calc, int indexes[N])
 
template<int N, typename Calc >
void NDOpsHelper (const NdArrayDesc< N > &output, const Calc &calc)
 
template<int N>
void NdArrayDescsForElementwiseBroadcast (const core::OMRuntimeShape &input0_shape, const core::OMRuntimeShape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
 
int subscriptToIndex (const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
 
int subscriptToIndex (const NdArrayDesc< 5 > &desc, int indexes[5])
 
bool processBroadcastShapes (const core::OMRuntimeShape &shape0, const core::OMRuntimeShape &shape1, core::BinaryArithmeticBroadcastParams *params)
 
template<>
OMStatus AddN< int8_t > (const size_t flat_size, const size_t num_inputs, const int8_t *const *input_data, int8_t *output_data)
 
template<>
OMStatus AddN< int16_t > (const size_t flat_size, const size_t num_inputs, const int16_t *const *input_data, int16_t *output_data)
 
template<typename T1 , typename T2 , typename T3 >
OMStatus ArgMin (const core::OMRuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const core::OMRuntimeShape &output_shape, T2 *output_data)
 
template<typename T , typename AccumT >
OMStatus BatchMatMul (const OMRuntimeShape &lhs_shape, const T *lhs_data, const OMRuntimeShape &rhs_shape, const T *rhs_data, const OMRuntimeShape &output_shape, T *output_data)
 
template<typename InputType , typename OutputType >
OMStatus Mul (const core::ArithmeticQuantParams &params, uint32_t size, const InputType *input1_data, const InputType *input2_data, OutputType *output_data)
 
template<typename D , typename T >
void Select (const core::OMRuntimeShape &input_condition_shape, const D *input_condition_data, const core::OMRuntimeShape &input_x_shape, const T *input_x_data, const core::OMRuntimeShape &input_y_shape, const T *input_y_data, const core::OMRuntimeShape &output_shape, T *output_data)
 
OMStatus Sub (const core::ArithmeticQuantParams &params, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
 

Variables

constexpr int MAX_INDICES_ND = 5
 

Function Documentation

◆ Abs()

template<typename T >
OMStatus onert_micro::execute::pal::Abs ( const core::OMRuntimeShape shape,
const T *  input_data,
T *  output_data 
)
inline

Definition at line 33 of file PALAbs.h.

34{
35 const uint32_t flat_size = shape.flatSize();
36
37 if (flat_size == -1)
38 return UnknownError;
39
40 assert(input_data != nullptr);
41 assert(output_data != nullptr);
42
43 for (uint32_t i = 0; i < flat_size; ++i)
44 {
45 output_data[i] = std::abs(input_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleAbs().

◆ activationFunctionWithMinMax()

template<typename T >
T onert_micro::execute::pal::activationFunctionWithMinMax ( x,
output_activation_min,
output_activation_max 
)
inline

Definition at line 170 of file PALUtils.h.

171{
172 using std::max;
173 using std::min;
174 return min(max(x, output_activation_min), output_activation_max);
175}

Referenced by DepthwiseConv2D< float >(), L2Pool(), and TransposeConv< float >().

◆ Add() [1/2]

OMStatus onert_micro::execute::pal::Add ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

Definition at line 33 of file PALAdd.h.

35{
36 auto status = arm_elementwise_add_s8(
37 input1_data, input2_data, params.input1_offset, params.input1_multiplier, params.input1_shift,
38 params.input2_offset, params.input2_multiplier, params.input2_shift, params.left_shift,
39 output_data, params.output_offset, params.output_multiplier, params.output_shift,
40 params.quantized_activation_min, params.quantized_activation_max, flat_size);
41
42 assert(status == ARM_CMSIS_NN_SUCCESS);
43 if (status != ARM_CMSIS_NN_SUCCESS)
44 return UnknownError;
45
46 return Ok;
47}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, onert_micro::core::ArithmeticQuantParams::quantized_activation_min, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleAdd().

◆ Add() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::Add ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 50 of file PALAddCommon.h.

52{
53 ArithmeticOp<T, AddFn<T>>(params, flat_size, input1_data, input2_data, output_data);
54 return Ok;
55}

References onert_micro::Ok.

◆ AddFunc()

int8_t onert_micro::execute::pal::AddFunc ( int8_t  x,
int8_t  y,
const core::ArithmeticQuantParams params 
)

Definition at line 30 of file PALAddCommon.h.

31{
32 const int32_t input1_val = params.input1_offset + x;
33 const int32_t input2_val = params.input2_offset + y;
34 const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
35 const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
36 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
37 shifted_input1_val, params.input1_multiplier, params.input1_shift);
38 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
39 shifted_input2_val, params.input2_multiplier, params.input2_shift);
40 const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
41 const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp(
42 raw_sum, params.output_multiplier, params.output_shift) +
43 params.output_offset;
44 const int32_t clamped_output = std::min(params.quantized_activation_max,
45 std::max(params.quantized_activation_min, raw_output));
46 return static_cast<int8_t>(clamped_output);
47}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by BroadcastAdd4DSlow().

◆ AddN()

template<typename T >
OMStatus onert_micro::execute::pal::AddN ( const size_t  flat_size,
const size_t  num_inputs,
const T *const *  input_data,
T *  output_data 
)

Definition at line 32 of file PALAddNCommon.h.

34{
35 // All inputs and output should have the same shape, this is checked during
36 // Prepare stage.
37 for (size_t i = 0; i < flat_size; ++i)
38 {
39 T x = 0;
40 for (size_t j = 0; j < num_inputs; ++j)
41 {
42 x += input_data[j][i];
43 }
44 output_data[i] = x;
45 }
46 return Ok;
47}

References onert_micro::Ok.

◆ AddN< int16_t >()

template<>
OMStatus onert_micro::execute::pal::AddN< int16_t > ( const size_t  flat_size,
const size_t  num_inputs,
const int16_t *const *  input_data,
int16_t *  output_data 
)

Definition at line 38 of file PALAddN.h.

40{
41 assert(false && "Not IMPL yet");
42 return UnsupportedOp;
43}
@ UnsupportedOp
Definition OMStatus.h:29

References onert_micro::UnsupportedOp.

◆ AddN< int8_t >()

template<>
OMStatus onert_micro::execute::pal::AddN< int8_t > ( const size_t  flat_size,
const size_t  num_inputs,
const int8_t *const *  input_data,
int8_t *  output_data 
)

Definition at line 30 of file PALAddN.h.

32{
33 assert(false && "Not IMPL yet");
34 return UnsupportedOp;
35}

References onert_micro::UnsupportedOp.

◆ ArgMax()

template<typename T1 , typename T2 , typename T3 >
OMStatus onert_micro::execute::pal::ArgMax ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data 
)

Definition at line 30 of file PALArgMax.h.

32{
33 return ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
34 std::greater<T1>());
35}
const luci_interpreter::RuntimeShape output_shape

References ArgMinMax(), and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleArgMax().

◆ ArgMin()

template<typename T1 , typename T2 , typename T3 >
OMStatus onert_micro::execute::pal::ArgMin ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data 
)

Definition at line 30 of file PALArgMin.h.

32{
33 return ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
34 std::less<T1>());
35}

References ArgMinMax(), and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleArgMin().

◆ ArgMinMax()

template<typename T1 , typename T2 , typename T3 , typename Cmp >
OMStatus onert_micro::execute::pal::ArgMinMax ( const core::OMRuntimeShape input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const core::OMRuntimeShape output_shape,
T2 *  output_data,
const Cmp &  cmp 
)

Definition at line 32 of file PALArgMinMaxCommon.h.

35{
36 int axis = input2_data[0];
37 if (axis < 0)
38 {
39 axis += input1_shape.dimensionsCount();
40 }
41 const int axis_size = input1_shape.dims(axis);
42
43 int outer_size = 1;
44 for (int i = 0; i < axis; ++i)
45 {
46 outer_size *= input1_shape.dims(i);
47 }
48
49 int inner_size = 1;
50 const int dims_count = input1_shape.dimensionsCount();
51 for (int i = axis + 1; i < dims_count; ++i)
52 {
53 inner_size *= input1_shape.dims(i);
54 }
55 for (int outer = 0; outer < outer_size; ++outer)
56 {
57 for (int inner = 0; inner < inner_size; ++inner)
58 {
59 auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
60 T2 min_max_index = 0;
61 for (int i = 1; i < axis_size; ++i)
62 {
63 const auto &curr_value = input1_data[(outer * axis_size + i) * inner_size + inner];
64 if (cmp(curr_value, min_max_value))
65 {
66 min_max_value = curr_value;
67 min_max_index = static_cast<T2>(i);
68 }
69 }
70 output_data[outer * inner_size + inner] = min_max_index;
71 }
72 }
73 return Ok;
74}
size_t dimensionsCount() const noexcept

References onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dims(), and onert_micro::Ok.

Referenced by ArgMax(), and ArgMin().

◆ ArithmeticOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::ArithmeticOp ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 54 of file PALArithmeticOpCommon.h.

56{
57 T activation_min, activation_max;
58 getActivationParams(params, &activation_min, &activation_max);
59
60 Fn func;
61 for (int i = 0; i < flat_size; ++i)
62 output_data[i] =
63 std::min(std::max(func(input1_data[i], input2_data[i]), activation_min), activation_max);
64
65 return Ok;
66}

References getActivationParams(), and onert_micro::Ok.

◆ ArithmeticOpScalar()

template<typename T , typename Fn >
void onert_micro::execute::pal::ArithmeticOpScalar ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input_data,
const T  scalar_value,
T *  output_data 
)
inline

Definition at line 111 of file PALArithmeticOpCommon.h.

114{
115 T activation_min, activation_max;
116 getActivationParams(params, &activation_min, &activation_max);
117
118 for (int i = 0; i < flat_size; ++i)
119 output_data[i] =
120 std::min(std::max(func(input_data[i], scalar_value), activation_min), activation_max);
121}

References getActivationParams().

◆ AveragePool() [1/2]

OMStatus onert_micro::execute::pal::AveragePool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 33 of file PALAveragePool2DCommon.h.

36{
37 const int32_t batches = input_shape.dims(0);
38 const int32_t depth = output_shape.dims(3);
39 const int32_t input_height = input_shape.dims(1);
40 const int32_t input_width = input_shape.dims(2);
41 const int32_t output_height = output_shape.dims(1);
42 const int32_t output_width = output_shape.dims(2);
43 const int32_t stride_height = params.stride_h;
44 const int32_t stride_width = params.stride_w;
45 for (int batch = 0; batch < batches; ++batch)
46 {
47 for (int out_y = 0; out_y < output_height; ++out_y)
48 {
49 for (int out_x = 0; out_x < output_width; ++out_x)
50 {
51 for (int channel = 0; channel < depth; ++channel)
52 {
53 const int in_x_origin = (out_x * stride_width) - params.pad_w;
54 const int in_y_origin = (out_y * stride_height) - params.pad_h;
55 // Compute the boundaries of the filter region clamped so as to
56 // ensure that the filter window fits in the input array.
57 const int filter_x_start = std::max(0, -in_x_origin);
58 const int filter_x_end = std::min(static_cast<int32_t>(params.filter_w),
59 static_cast<int32_t>(input_width - in_x_origin));
60 const int filter_y_start = std::max(0, -in_y_origin);
61 const int filter_y_end = std::min(static_cast<int32_t>(params.filter_h),
62 static_cast<int32_t>(input_height - in_y_origin));
63
64 float total = 0.f;
65 float filter_count = 0;
66
67 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
68 {
69 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
70 {
71 const int in_x = in_x_origin + filter_x;
72 const int in_y = in_y_origin + filter_y;
73
74 const int input_data_offset =
75 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
76 input_shape.dims(3) +
77 channel;
78
79 total += input_data[input_data_offset];
80 filter_count++;
81 }
82 }
83 const int output_data_offset =
84 ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
86 channel;
87
88 assert(filter_count != 0);
89 const float average = total / filter_count;
90
91 output_data[output_data_offset] =
92 std::min(std::max(average, params.activation_min), params.activation_max);
93 }
94 }
95 }
96 }
97 return Ok;
98}
int32_t dims(int i) const
Definition Tensor.h:108

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ AveragePool() [2/2]

OMStatus onert_micro::execute::pal::AveragePool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALAveragePool2D.h.

35{
36 cmsis_nn_dims input_dims;
37 cmsis_nn_dims output_dims;
38 cmsis_nn_pool_params pool_params;
39 cmsis_nn_dims filter_dims;
40 cmsis_nn_context ctx;
41
42 const int depth = input_shape.dims(3);
43 const int output_width = output_shape.dims(2);
44
45 input_dims.n = 1;
46 input_dims.h = input_shape.dims(1);
47 input_dims.w = input_shape.dims(2);
48 input_dims.c = depth;
49
50 output_dims.n = 1;
51 output_dims.h = output_shape.dims(1);
52 output_dims.w = output_width;
53 output_dims.c = depth;
54
55 pool_params.stride.h = params.stride_h;
56 pool_params.stride.w = params.stride_w;
57 pool_params.padding.h = params.pad_h;
58 pool_params.padding.w = params.pad_w;
59 pool_params.activation.min = params.quantized_activation_min;
60 pool_params.activation.max = params.quantized_activation_max;
61
62 filter_dims.n = 1;
63 filter_dims.h = params.filter_h;
64 filter_dims.w = params.filter_w;
65 filter_dims.c = 1;
66
67 const int32_t buffer_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
68 int8_t *buffer = nullptr;
69 if (buffer_size > 0)
70 {
71 buffer = new int8_t[buffer_size];
72 }
73
74 ctx.buf = buffer;
75 ctx.size = buffer_size;
76
77 auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
78 output_data);
79
80 if (buffer_size > 0)
81 delete[] buffer;
82
83 assert(res == ARM_CMSIS_NN_SUCCESS);
84 if (res != ARM_CMSIS_NN_SUCCESS)
85 return CmsisNNError;
86
87 return Ok;
88}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::quantized_activation_max, onert_micro::core::Pool2DParams::quantized_activation_min, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleAveragePool2D().

◆ BatchMatMul()

template<typename T , typename AccumT >
OMStatus onert_micro::execute::pal::BatchMatMul ( const OMRuntimeShape lhs_shape,
const T *  lhs_data,
const OMRuntimeShape rhs_shape,
const T *  rhs_data,
const OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 39 of file PALBatchMatMul.h.

42{
43
44 const OMRuntimeShape extended_lhs_shape = OMRuntimeShape::extendedShape(5, lhs_shape);
45 const OMRuntimeShape extended_rhs_shape = OMRuntimeShape::extendedShape(5, rhs_shape);
46
47 const int batch_dim0 = extended_lhs_shape.dims(0);
48 const int batch_dim1 = extended_lhs_shape.dims(1);
49 const int batch_dim2 = extended_lhs_shape.dims(2);
50
51 const int lhs_ext0 = extended_lhs_shape.dims(0);
52 const int lhs_ext1 = extended_lhs_shape.dims(1);
53 const int lhs_ext2 = extended_lhs_shape.dims(2);
54 const int rhs_ext0 = extended_rhs_shape.dims(0);
55 const int rhs_ext1 = extended_rhs_shape.dims(1);
56 const int rhs_ext2 = extended_rhs_shape.dims(2);
57
58 // Set params for each matrix multiply.
59 const int lhs_rows = extended_lhs_shape.dims(3);
60 const int rhs_cols = extended_rhs_shape.dims(4);
61 const int accum_depth = extended_lhs_shape.dims(4);
62
63 // stride for each batch dim
64 const int lhs_stride_b0 =
65 extended_lhs_shape.dims(1) * extended_lhs_shape.dims(2) * lhs_rows * accum_depth;
66 const int lhs_stride_b1 = extended_lhs_shape.dims(2) * lhs_rows * accum_depth;
67 const int lhs_stride_b2 = lhs_rows * accum_depth;
68
69 const int rhs_stride_b0 =
70 extended_rhs_shape.dims(1) * extended_rhs_shape.dims(2) * accum_depth * rhs_cols;
71 const int rhs_stride_b1 = extended_rhs_shape.dims(2) * accum_depth * rhs_cols;
72 const int rhs_stride_b2 = accum_depth * rhs_cols;
73
74 for (int b0 = 0; b0 < batch_dim0; ++b0)
75 {
76 const T *lhs_ptr0 = lhs_data + (b0 * lhs_stride_b0);
77 const T *rhs_ptr0 = rhs_data + (b0 * rhs_stride_b0);
78 for (int b1 = 0; b1 < batch_dim1; ++b1)
79 {
80 const T *lhs_ptr1 = lhs_ptr0 + b1 * lhs_stride_b1;
81 const T *rhs_ptr1 = rhs_ptr0 + b1 * rhs_stride_b1;
82 for (int b2 = 0; b2 < batch_dim2; ++b2)
83 {
84 const T *lhs_ptr2 = lhs_ptr1 + b2 * lhs_stride_b2;
85 const T *rhs_ptr2 = rhs_ptr1 + b2 * rhs_stride_b2;
86 T *out_ptr = output_data +
87 ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * lhs_rows * rhs_cols;
88
89 for (int j = 0; j < rhs_cols; ++j)
90 {
91 for (int i = 0; i < lhs_rows; ++i)
92 {
93 AccumT total = 0;
94 for (int k = 0; k < accum_depth; ++k)
95 {
96 AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
97 AccumT rhs_val = rhs_ptr2[j + k * rhs_cols];
98 total += (lhs_val) * (rhs_val);
99 }
100 const int idx = j + i * rhs_cols;
101 out_ptr[idx] = static_cast<T>(total);
102 }
103 }
104 }
105 }
106 }
107
108 return Ok;
109}
static OMRuntimeShape extendedShape(size_t new_shape_size, const OMRuntimeShape &shape)

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), and onert_micro::Ok.

◆ BatchToSpaceND()

template<typename T >
OMStatus onert_micro::execute::pal::BatchToSpaceND ( const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const int32_t *  block_shape_data,
const core::OMRuntimeShape unextended_input3_shape,
const int32_t *  crops_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 51 of file PALBatchToSpaceNDCommon.h.

55{
56 const core::OMRuntimeShape input1_shape = extendShapeBatchToSpace(unextended_input1_shape);
57 const core::OMRuntimeShape output_shape = extendShapeBatchToSpace(unextended_output_shape);
58
59 const int output_width = output_shape.dims(2);
60 const int output_height = output_shape.dims(1);
61 const int output_batch_size = output_shape.dims(0);
62
63 const int depth = input1_shape.dims(3);
64 const int input_width = input1_shape.dims(2);
65 const int input_height = input1_shape.dims(1);
66 const int input_batch_size = input1_shape.dims(0);
67
68 const int block_shape_height = block_shape_data[0];
69 const int block_shape_width =
70 unextended_input1_shape.dimensionsCount() == 4 ? block_shape_data[1] : 1;
71 const int crops_top = crops_data[0];
72 const int crops_left = unextended_input1_shape.dimensionsCount() == 4 ? crops_data[2] : 0;
73 for (int in_batch = 0; in_batch < input_batch_size; ++in_batch)
74 {
75 const int out_batch = in_batch % output_batch_size;
76 const int spatial_offset = in_batch / output_batch_size;
77 for (int in_h = 0; in_h < input_height; ++in_h)
78 {
79 const int out_h = in_h * block_shape_height + spatial_offset / block_shape_width - crops_top;
80 if (out_h < 0 || out_h >= output_height)
81 {
82 continue;
83 }
84 for (int in_w = 0; in_w < input_width; ++in_w)
85 {
86 const int out_w =
87 in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
88
89 if (out_w < 0 || out_w >= output_width)
90 {
91 continue;
92 }
93 T *out = output_data + offset(output_shape.dimsData(), out_batch, out_h, out_w, 0);
94 const T *in = input1_data + offset(input1_shape.dimsData(), in_batch, in_h, in_w, 0);
95 memcpy(out, in, depth * sizeof(T));
96 }
97 }
98 }
99 return Ok;
100}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), offset(), onert_micro::Ok, and output_shape.

◆ BinaryOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BinaryOp ( const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)
inline

Definition at line 62 of file PALBinaryOpCommon.h.

64{
65 Fn func;
66 for (int i = 0; i < flat_size; ++i)
67 {
68 output_data[i] = func(input1_data[i], input2_data[i]);
69 }
70 return Ok;
71}

References onert_micro::Ok.

◆ BroadcastAdd4DSlow() [1/2]

OMStatus onert_micro::execute::pal::BroadcastAdd4DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const int8_t *  input1_data,
const core::OMRuntimeShape input2_shape,
const int8_t *  input2_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 68 of file PALAddCommon.h.

72{
73 BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data,
74 output_shape, output_data, AddFunc);
75 return Ok;
76}
void BroadcastBinaryFunction6DSlow(const core::ArithmeticQuantParams &params, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References AddFunc(), BroadcastBinaryFunction6DSlow(), onert_micro::Ok, and output_shape.

◆ BroadcastAdd4DSlow() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastAdd4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 58 of file PALAddCommon.h.

62{
63 BroadcastArithmeticOp4DSlow<T, AddFn<T>>(params, input1_shape, input1_data, input2_shape,
64 input2_data, output_shape, output_data);
65 return Ok;
66}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleAdd().

◆ BroadcastArithmeticOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BroadcastArithmeticOp4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 124 of file PALArithmeticOpCommon.h.

128{
131 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
132 const core::OMRuntimeShape extended_output_shape =
133 core::OMRuntimeShape::extendedShape(4, output_shape);
134
135 T activation_min, activation_max;
136 getActivationParams(params, &activation_min, &activation_max);
137
138 // In Tensorflow, the dimensions are canonically named (batch_number, row,
139 // col, channel), with extents (batches, height, width, depth), with the
140 // trailing dimension changing most rapidly (channels has the smallest stride,
141 // typically 1 element).
142 //
143 // In generated C code, we store arrays with the dimensions reversed. The
144 // first dimension has smallest stride.
145 //
146 // We name our variables by their Tensorflow convention, but generate C code
147 // nesting loops such that the innermost loop has the smallest stride for the
148 // best cache behavior.
149 Fn func;
150 for (int b = 0; b < extended_output_shape.dims(0); ++b)
151 {
152 for (int y = 0; y < extended_output_shape.dims(1); ++y)
153 {
154 for (int x = 0; x < extended_output_shape.dims(2); ++x)
155 {
156 for (int c = 0; c < extended_output_shape.dims(3); ++c)
157 {
158 const int output_data_offset =
159 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
160 extended_output_shape.dims(3) +
161 c;
162
163 output_data[output_data_offset] =
164 std::min(std::max(func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
165 input2_data[subscriptToIndex(desc2, b, y, x, c)]),
166 activation_min),
167 activation_max);
168 }
169 }
170 }
171 }
172 return Ok;
173}
void NdArrayDescsForElementwiseBroadcast(const Dims< N > &input0_dims, const Dims< N > &input1_dims, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
Definition NDArray.h:89
NdArrayDesc< 4 > desc1
NdArrayDesc< 4 > desc2

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), getActivationParams(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, and subscriptToIndex().

◆ BroadcastBinaryFunction6DSlow()

template<typename T >
void onert_micro::execute::pal::BroadcastBinaryFunction6DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 320 of file PALArithmeticOpCommon.h.

325{
326 constexpr int kMaxBroadcastDim = 6;
327
328 // In Tensorflow, the dimensions are canonically named (batch_number, row,
329 // col, channel), with extents (batches, height, width, depth), with the
330 // trailing dimension changing most rapidly (channels has the smallest stride,
331 // typically 1 element).
332 //
333 // In generated C code, we store arrays with the dimensions reversed. The
334 // first dimension has smallest stride.
335 //
336 // We name our variables by their Tensorflow convention, but generate C code
337 // nesting loops such that the innermost loop has the smallest stride for the
338 // best cache behavior.
339 size_t compressed_input1_stride[kMaxBroadcastDim];
340 size_t compressed_input2_stride[kMaxBroadcastDim];
341 size_t compressed_output_shape[kMaxBroadcastDim];
342 bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
343 input1_shape, input2_shape, compressed_input1_stride, compressed_input2_stride,
344 compressed_output_shape);
345 // Skip broadcasting for degenerate shapes.
346 if (!broadcastable_shape)
347 {
348 return;
349 }
350
351 size_t input1_offset = 0;
352 size_t input2_offset = 0;
353 size_t output_offset = 0;
354 BroadcastRecursiveDimensions(params, kMaxBroadcastDim - 1, &input1_offset, &input2_offset,
355 &output_offset, compressed_input1_stride, compressed_input2_stride,
356 compressed_output_shape, input1_data, input2_data, output_data,
357 binary_func);
358}

References BroadcastRecursiveDimensions().

Referenced by BroadcastAdd4DSlow(), and BroadcastSub4DSlow().

◆ BroadcastBinaryOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::BroadcastBinaryOp4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 75 of file PALBinaryOpCommon.h.

78{
81 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
82
83 const core::OMRuntimeShape extended_output_shape =
84 core::OMRuntimeShape::extendedShape(4, output_shape);
85
86 // In Tensorflow, the dimensions are canonically named (batch_number, row,
87 // col, channel), with extents (batches, height, width, depth), with the
88 // trailing dimension changing most rapidly (channels has the smallest stride,
89 // typically 1 element).
90 //
91 // In generated C code, we store arrays with the dimensions reversed. The
92 // first dimension has smallest stride.
93 //
94 // We name our variables by their Tensorflow convention, but generate C code
95 // nesting loops such that the innermost loop has the smallest stride for the
96 // best cache behavior.
97
98 Fn func;
99 for (int b = 0; b < extended_output_shape.dims(0); ++b)
100 {
101 for (int y = 0; y < extended_output_shape.dims(1); ++y)
102 {
103 for (int x = 0; x < extended_output_shape.dims(2); ++x)
104 {
105 for (int c = 0; c < extended_output_shape.dims(3); ++c)
106 {
107 const int output_data_offset =
108 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
109 extended_output_shape.dims(3) +
110 c;
111
112 output_data[output_data_offset] = func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
113 input2_data[subscriptToIndex(desc2, b, y, x, c)]);
114 }
115 }
116 }
117 }
118 return Ok;
119}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, and subscriptToIndex().

◆ BroadcastComparison4DSlowNoScaling()

template<typename T >
void onert_micro::execute::pal::BroadcastComparison4DSlowNoScaling ( const core::ComparisonParams op_params,
const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const T *  input2_data,
const core::OMRuntimeShape unextended_output_shape,
bool *  output_data,
bool   FT, T 
)
inline

Definition at line 149 of file PALComparisons.h.

153{
154 const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
155 unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
156
157 for (int b = 0; b < dims.output_shape.dims(0); ++b)
158 {
159 for (int y = 0; y < dims.output_shape.dims(1); ++y)
160 {
161 for (int x = 0; x < dims.output_shape.dims(2); ++x)
162 {
163 for (int c = 0; c < dims.output_shape.dims(3); ++c)
164 {
165 const int output_data_offset =
166 ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
167 dims.output_shape.dims(3) +
168 c;
169 output_data[output_data_offset] =
170 F(input1_data[subscriptToIndex(dims.desc1, b, y, x, c)],
171 input2_data[subscriptToIndex(dims.desc2, b, y, x, c)]);
172 }
173 }
174 }
175 }
176}

References subscriptToIndex().

◆ BroadcastComparison4DSlowWithScaling()

template<typename T , typename AccType >
void onert_micro::execute::pal::BroadcastComparison4DSlowWithScaling ( const core::ComparisonParams op_params,
const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const T *  input2_data,
const core::OMRuntimeShape unextended_output_shape,
bool *  output_data,
bool   FAccType, AccType 
)
inline

Definition at line 75 of file PALComparisons.h.

79{
80 const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
81 unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
82
83 int left_shift = op_params.left_shift;
84 int32_t input1_offset = op_params.input1_offset;
85 int32_t input1_multiplier = op_params.input1_multiplier;
86 int input1_shift = op_params.input1_shift;
87 int32_t input2_offset = op_params.input2_offset;
88 int32_t input2_multiplier = op_params.input2_multiplier;
89 int input2_shift = op_params.input2_shift;
90
91 for (int b = 0; b < dims.output_shape.dims(0); ++b)
92 {
93 for (int y = 0; y < dims.output_shape.dims(1); ++y)
94 {
95 for (int x = 0; x < dims.output_shape.dims(2); ++x)
96 {
97 for (int c = 0; c < dims.output_shape.dims(3); ++c)
98 {
99 const int32_t input1_val =
100 input1_offset + input1_data[subscriptToIndex(dims.desc1, b, y, x, c)];
101 const int32_t input2_val =
102 input2_offset + input2_data[subscriptToIndex(dims.desc2, b, y, x, c)];
103 const int32_t shifted_input1_val = input1_val * (1 << left_shift);
104 const int32_t shifted_input2_val = input2_val * (1 << left_shift);
105 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
106 shifted_input1_val, input1_multiplier, input1_shift);
107 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
108 shifted_input2_val, input2_multiplier, input2_shift);
109
110 const int output_data_offset =
111 ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
112 dims.output_shape.dims(3) +
113 c;
114 output_data[output_data_offset] = F(scaled_input1_val, scaled_input2_val);
115 }
116 }
117 }
118 }
119}

References onert_micro::core::ComparisonParams::input1_multiplier, onert_micro::core::ComparisonParams::input1_offset, onert_micro::core::ComparisonParams::input1_shift, onert_micro::core::ComparisonParams::input2_multiplier, onert_micro::core::ComparisonParams::input2_offset, onert_micro::core::ComparisonParams::input2_shift, onert_micro::core::ComparisonParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), and subscriptToIndex().

◆ BroadcastDiv4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastDiv4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 39 of file PALDivCommon.h.

43{
44 BroadcastArithmeticOp4DSlow<T, DivFn<T>>(params, input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46 return Ok;
47}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleDiv().

◆ BroadcastFloorDiv4DSlow()

void onert_micro::execute::pal::BroadcastFloorDiv4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 40 of file PALFloorDivCommon.h.

43{
44 BroadcastBinaryOp4DSlow<float, FloorDivFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleFloorDiv().

◆ BroadcastFloorMod4DSlow()

void onert_micro::execute::pal::BroadcastFloorMod4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 40 of file PALFloorModCommon.h.

43{
44 BroadcastBinaryOp4DSlow<float, FloorModFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleFloorMod().

◆ BroadcastInput1()

template<typename T >
void onert_micro::execute::pal::BroadcastInput1 ( int  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 241 of file PALArithmeticOpCommon.h.

244{
245 for (int i = 0; i < size; ++i)
246 {
247 output_data[i] = binary_func(input1_data[0], input2_data[i], params);
248 }
249}
int32_t size[5]
Definition Slice.cpp:35

References size.

◆ BroadcastInput2()

template<typename T >
void onert_micro::execute::pal::BroadcastInput2 ( int  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 252 of file PALArithmeticOpCommon.h.

255{
256 for (int i = 0; i < size; ++i)
257 {
258 output_data[i] = binary_func(input1_data[i], input2_data[0], params);
259 }
260}

References size.

◆ BroadcastMaximum4DSlow()

OMStatus onert_micro::execute::pal::BroadcastMaximum4DSlow ( const core::OMRuntimeShape input1_shape,
const float *  input1_data,
const core::OMRuntimeShape input2_shape,
const float *  input2_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 38 of file PALMaximumCommon.h.

43{
44 return BroadcastBinaryOp4DSlow<float, MaximumFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleMaximum().

◆ BroadcastMinimum4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMinimum4DSlow ( const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 40 of file PALMinimumCommon.h.

43{
44 return BroadcastBinaryOp4DSlow<float, MinimumFn<float>>(input1_shape, input1_data, input2_shape,
45 input2_data, output_shape, output_data);
46}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleMinimum().

◆ BroadcastMul4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMul4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 44 of file PALMulCommon.h.

48{
49 BroadcastArithmeticOp4DSlow<T, MulFn<T>>(params, input1_shape, input1_data, input2_shape,
50 input2_data, output_shape, output_data);
51 return Ok;
52}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleMul().

◆ BroadcastMul6DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastMul6DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 55 of file PALMulCommon.h.

59{
62 // The input shapes are extended as part of NdArrayDesc initialization.
63 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
64 const core::OMRuntimeShape extended_output_shape =
65 core::OMRuntimeShape::extendedShape(kMaxMulBroadcastDim, output_shape);
66 // Cache output shape dimensions.
67 int32_t extended_output_shape_dims[kMaxMulBroadcastDim];
68 std::memcpy(extended_output_shape_dims, extended_output_shape.dimsData(),
69 sizeof(extended_output_shape_dims));
70
71 size_t input1_offset_a = 0;
72 size_t input2_offset_a = 0;
73 size_t output_offset_a = 0;
74 for (int a = 0; a < extended_output_shape_dims[0]; ++a)
75 {
76 size_t input1_offset_d = input1_offset_a;
77 size_t input2_offset_d = input2_offset_a;
78 size_t output_offset_d = output_offset_a;
79 for (int d = 0; d < extended_output_shape_dims[1]; ++d)
80 {
81 size_t input1_offset_b = input1_offset_d;
82 size_t input2_offset_b = input2_offset_d;
83 size_t output_offset_b = output_offset_d;
84 for (int b = 0; b < extended_output_shape_dims[2]; ++b)
85 {
86 size_t input1_offset_y = input1_offset_b;
87 size_t input2_offset_y = input2_offset_b;
88 size_t output_offset_y = output_offset_b;
89 for (int y = 0; y < extended_output_shape_dims[3]; ++y)
90 {
91 size_t input1_offset_x = input1_offset_y;
92 size_t input2_offset_x = input2_offset_y;
93 size_t output_offset_x = output_offset_y;
94 for (int x = 0; x < extended_output_shape_dims[4]; ++x)
95 {
96 size_t input1_offset_c = input1_offset_x;
97 size_t input2_offset_c = input2_offset_x;
98 size_t output_offset_c = output_offset_x;
99 for (int c = 0; c < extended_output_shape_dims[5]; ++c)
100 {
101 const int32_t input1_val = params.input1_offset + input1_data[input1_offset_c];
102 const int32_t input2_val = params.input2_offset + input2_data[input2_offset_c];
103 const int32_t unclamped_result =
104 params.output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
105 params.output_multiplier,
106 params.output_shift);
107 const int32_t clamped_output =
108 std::min(params.quantized_activation_max,
109 std::max(params.quantized_activation_min, unclamped_result));
110 output_data[output_offset_c] = static_cast<T>(clamped_output);
111 input1_offset_c += desc1.strides[5];
112 input2_offset_c += desc2.strides[5];
113 ++output_offset_c;
114 }
115 input1_offset_x += desc1.strides[4];
116 input2_offset_x += desc2.strides[4];
117 output_offset_x += extended_output_shape_dims[5];
118 }
119 input1_offset_y += desc1.strides[3];
120 input2_offset_y += desc2.strides[3];
121 output_offset_y += extended_output_shape_dims[4] * extended_output_shape_dims[5];
122 }
123 input1_offset_b += desc1.strides[2];
124 input2_offset_b += desc2.strides[2];
125 output_offset_b += extended_output_shape_dims[3] * extended_output_shape_dims[4] *
126 extended_output_shape_dims[5];
127 }
128 input1_offset_d += desc1.strides[1];
129 input2_offset_d += desc2.strides[1];
130 output_offset_d += extended_output_shape_dims[2] * extended_output_shape_dims[3] *
131 extended_output_shape_dims[4] * extended_output_shape_dims[5];
132 }
133 input1_offset_a += desc1.strides[0];
134 input2_offset_a += desc2.strides[0];
135 output_offset_a += extended_output_shape_dims[1] * extended_output_shape_dims[2] *
136 extended_output_shape_dims[3] * extended_output_shape_dims[4] *
137 extended_output_shape_dims[5];
138 }
139 return Ok;
140}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input2_offset, multiplyByQuantizedMultiplier(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, output_shape, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by onert_micro::execute::execute_kernel_CircleMul().

◆ BroadcastRecursiveDimensions()

template<typename T >
void onert_micro::execute::pal::BroadcastRecursiveDimensions ( const core::ArithmeticQuantParams params,
int  dimension,
size_t *  input1_offset_p,
size_t *  input2_offset_p,
size_t *  output_offset,
size_t *  compressed_input1_stride,
size_t *  compressed_input2_stride,
size_t *  compressed_output_shape,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 263 of file PALArithmeticOpCommon.h.

269{
270 if (dimension > 0)
271 {
272 for (size_t c = 0; c < compressed_output_shape[dimension]; ++c)
273 {
274 size_t input1_offset_c = *input1_offset_p;
275 size_t input2_offset_c = *input2_offset_p;
276 BroadcastRecursiveDimensions(params, dimension - 1, &input1_offset_c, &input2_offset_c,
277 output_offset, compressed_input1_stride,
278 compressed_input2_stride, compressed_output_shape, input1_data,
279 input2_data, output_data, binary_func);
280 *input1_offset_p += compressed_input1_stride[dimension];
281 *input2_offset_p += compressed_input2_stride[dimension];
282 }
283 }
284 else
285 {
286 assert(dimension == 0);
287 bool input1_is_broadcast = compressed_input1_stride[dimension] == 0;
288 bool input2_is_broadcast = compressed_input2_stride[dimension] == 0;
289 assert(!(input1_is_broadcast && input2_is_broadcast));
290 const T *input1_data_ptr = input1_data + *input1_offset_p;
291 const T *input2_data_ptr = input2_data + *input2_offset_p;
292 T *output_data_ptr = output_data + *output_offset;
293 if (input1_is_broadcast)
294 {
295 // input1 is broadcast.
296 BroadcastInput1<T>(compressed_output_shape[dimension], params, input1_data_ptr,
297 input2_data_ptr, output_data_ptr, binary_func);
298 *input2_offset_p += compressed_output_shape[dimension];
299 }
300 else if (input2_is_broadcast)
301 {
302 // input2 is broadcast.
303 BroadcastInput2<T>(compressed_output_shape[dimension], params, input1_data_ptr,
304 input2_data_ptr, output_data_ptr, binary_func);
305 *input1_offset_p += compressed_output_shape[dimension];
306 }
307 else
308 {
309 // Add element-wise.
310 ElementWise<T>(compressed_output_shape[dimension], params, input1_data_ptr, input2_data_ptr,
311 output_data_ptr, binary_func);
312 *input1_offset_p += compressed_output_shape[dimension];
313 *input2_offset_p += compressed_output_shape[dimension];
314 }
315 *output_offset += compressed_output_shape[dimension];
316 }
317}
void BroadcastRecursiveDimensions(const core::ArithmeticQuantParams &params, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References BroadcastRecursiveDimensions().

Referenced by BroadcastBinaryFunction6DSlow(), and BroadcastRecursiveDimensions().

◆ BroadcastSquaredDifference4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastSquaredDifference4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 38 of file PALSquaredDifferenceCommon.h.

42{
43 BroadcastArithmeticOp4DSlow<T, SquaredDifferenceFn<T>>(
44 params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data);
45 return Ok;
46}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ BroadcastSub4DSlow() [1/2]

OMStatus onert_micro::execute::pal::BroadcastSub4DSlow ( const core::ArithmeticQuantParams params,
const core::OMRuntimeShape input1_shape,
const int8_t *  input1_data,
const core::OMRuntimeShape input2_shape,
const int8_t *  input2_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 67 of file PALSubCommon.h.

71{
72 BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape, input2_data,
73 output_shape, output_data, SubFunc);
74 return Ok;
75}

References BroadcastBinaryFunction6DSlow(), onert_micro::Ok, output_shape, and SubFunc().

◆ BroadcastSub4DSlow() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::BroadcastSub4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 57 of file PALSubCommon.h.

61{
62 BroadcastArithmeticOp4DSlow<T, SubFn<T>>(params, input1_shape, input1_data, input2_shape,
63 input2_data, output_shape, output_data);
64 return Ok;
65}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleSub().

◆ Cast()

template<typename FromT , typename ToT >
OMStatus onert_micro::execute::pal::Cast ( const core::OMRuntimeShape input_shape,
const FromT *  input_data,
const core::OMRuntimeShape output_shape,
ToT *  output_data 
)
inline

Definition at line 34 of file PALCast.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 return UnknownError;
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = static_cast<ToT>(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleCast().

◆ Ceil()

template<typename T >
OMStatus onert_micro::execute::pal::Ceil ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALCeil.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::ceil(input_data[i]);
50 }
51
52 return Ok;
53}
#define OM_LOG_AND_RETURN(err, msg)
Definition OMLog.h:31

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleCeil().

◆ ComparisonNoScaling()

template<typename T >
void onert_micro::execute::pal::ComparisonNoScaling ( const int64_t  flat_size,
const T *  input1_data,
const T *  input2_data,
bool *  output_data,
bool   FT, T 
)
inline

Definition at line 65 of file PALComparisons.h.

67{
68 for (int64_t i = 0; i < flat_size; ++i)
69 {
70 output_data[i] = F(input1_data[i], input2_data[i]);
71 }
72}

◆ ComparisonWithScaling()

template<typename T , typename AccType >
void onert_micro::execute::pal::ComparisonWithScaling ( const core::ComparisonParams op_params,
const int64_t  flat_size,
const T *  input1_data,
const T *  input2_data,
bool *  output_data,
bool   FAccType, AccType 
)
inline

Definition at line 122 of file PALComparisons.h.

125{
126 int left_shift = op_params.left_shift;
127 int32_t input1_offset = op_params.input1_offset;
128 int32_t input1_multiplier = op_params.input1_multiplier;
129 int input1_shift = op_params.input1_shift;
130 int32_t input2_offset = op_params.input2_offset;
131 int32_t input2_multiplier = op_params.input2_multiplier;
132 int input2_shift = op_params.input2_shift;
133
134 for (int64_t i = 0; i < flat_size; ++i)
135 {
136 const int32_t input1_val = input1_offset + input1_data[i];
137 const int32_t input2_val = input2_offset + input2_data[i];
138 const int32_t shifted_input1_val = input1_val * (1 << left_shift);
139 const int32_t shifted_input2_val = input2_val * (1 << left_shift);
140 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
141 shifted_input1_val, input1_multiplier, input1_shift);
142 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
143 shifted_input2_val, input2_multiplier, input2_shift);
144 output_data[i] = F(scaled_input1_val, scaled_input2_val);
145 }
146}

References onert_micro::core::ComparisonParams::input1_multiplier, onert_micro::core::ComparisonParams::input1_offset, onert_micro::core::ComparisonParams::input1_shift, onert_micro::core::ComparisonParams::input2_multiplier, onert_micro::core::ComparisonParams::input2_offset, onert_micro::core::ComparisonParams::input2_shift, onert_micro::core::ComparisonParams::left_shift, and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Concatenation()

template<typename Scalar >
OMStatus onert_micro::execute::pal::Concatenation ( core::ConcatenationParams params,
std::vector< uint32_t > &  input_shapes,
std::vector< const Scalar * > &  input_data,
const core::OMRuntimeShape output_shape,
Scalar *  output_data 
)

Definition at line 30 of file PALConcatenation.h.

33{
34 int axis = params.axis;
35 int inputs_count = params.num_inputs;
36 const int concat_dimensions = output_shape.dimensionsCount();
37
38 int64_t concat_size = 0;
39 for (int i = 0; i < inputs_count; i++)
40 {
41 concat_size += input_shapes[i];
42 }
43 int64_t outer_size = 1;
44 for (int i = 0; i < axis; ++i)
45 {
46 outer_size *= output_shape.dims(i);
47 }
48 // For all input arrays,
49 int64_t base_inner_size = 1;
50 for (int i = axis + 1; i < concat_dimensions; ++i)
51 {
52 base_inner_size *= output_shape.dims(i);
53 }
54
55 Scalar *output_ptr = output_data;
56 for (int k = 0; k < outer_size; k++)
57 {
58 for (int i = 0; i < inputs_count; ++i)
59 {
60 const int copy_size = input_shapes[i] * base_inner_size;
61 const Scalar *input_ptr = input_data[i] + k * copy_size;
62 memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
63 output_ptr += copy_size;
64 }
65 }
66
67 return Ok;
68}
int32_t dimensionsCount() const
Definition Tensor.h:106

References onert_micro::core::ConcatenationParams::axis, luci_interpreter::RuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::ConcatenationParams::num_inputs, onert_micro::Ok, and output_shape.

◆ ConvFloat()

OMStatus onert_micro::execute::pal::ConvFloat ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 31 of file PALConv2DCommon.h.

35{
36 const int stride_width = params->stride_w;
37 const int stride_height = params->stride_h;
38 const int dilation_width_factor = params->dilation_width_factor;
39 const int dilation_height_factor = params->dilation_height_factor;
40 const int pad_width = params->pad_w;
41 const int pad_height = params->pad_h;
42 const float output_activation_min = params->activation_min;
43 const float output_activation_max = params->activation_max;
44
45 const auto batches = input_shape.dims(0);
46 const int input_height = input_shape.dims(1);
47 const int input_width = input_shape.dims(2);
48 const int input_depth = input_shape.dims(3);
49 const int output_depth = filter_shape.dims(0);
50 const int filter_height = filter_shape.dims(1);
51 const int filter_width = filter_shape.dims(2);
52 const int output_height = output_shape.dims(1);
53 const int output_width = output_shape.dims(2);
54 for (int batch = 0; batch < batches; ++batch)
55 {
56 for (int out_y = 0; out_y < output_height; ++out_y)
57 {
58 const int in_y_origin = (out_y * stride_height) - pad_height;
59 for (int out_x = 0; out_x < output_width; ++out_x)
60 {
61 const int in_x_origin = (out_x * stride_width) - pad_width;
62 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
63 {
64 float total = 0.f;
65 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
66 {
67 const int in_y = in_y_origin + dilation_height_factor * filter_y;
68 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
69 {
70 const int in_x = in_x_origin + dilation_width_factor * filter_x;
71
72 // Zero padding by omitting the areas outside the image.
73 const bool is_point_inside_image =
74 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
75
76 if (!is_point_inside_image)
77 {
78 continue;
79 }
80
81 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
82 {
83 const int input_data_offset =
84 ((batch * input_height + in_y) * input_width + in_x) * input_depth + in_channel;
85
86 const int filter_data_offset =
87 ((out_channel * filter_height + filter_y) * filter_width + filter_x) *
88 input_depth +
89 in_channel;
90
91 const float input_value = input_data[input_data_offset];
92 const float filter_value = filter_data[filter_data_offset];
93 total += (input_value * filter_value);
94 }
95 }
96 }
97 // float bias_value = 0.0f;
98 if (bias_data)
99 {
100 total += bias_data[out_channel];
101 }
102
103 const int output_data_offset =
104 ((batch * output_height + out_y) * output_width + out_x) * output_depth + out_channel;
105
106 output_data[output_data_offset] =
107 std::min(std::max(total, output_activation_min), output_activation_max);
108 }
109 }
110 }
111 }
112 return Ok;
113}

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, onert_micro::core::FloatConv2D::dilation_height_factor, onert_micro::core::FloatConv2D::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleConv2D().

◆ ConvPerChannel()

OMStatus onert_micro::execute::pal::ConvPerChannel ( const core::ConvQuant params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 36 of file PALConv2D.h.

40{
41 cmsis_nn_conv_params conv_params;
42 conv_params.dilation.h = params.dilation_height_factor;
43 conv_params.dilation.w = params.dilation_width_factor;
44
45 assert(conv_params.dilation.h == 1);
46 assert(conv_params.dilation.w == 1);
47
48 conv_params.input_offset = params.input_offset;
49 conv_params.output_offset = params.output_offset;
50 conv_params.stride.h = params.stride_height;
51 conv_params.stride.w = params.stride_width;
52 conv_params.padding.h = params.pad_h;
53 conv_params.padding.w = params.pad_w;
54 conv_params.activation.min = params.quantized_activation_min;
55 conv_params.activation.max = params.quantized_activation_max;
56
57 cmsis_nn_per_channel_quant_params quant_params;
58 quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data());
59 quant_params.shift = const_cast<int32_t *>(
60 reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data()));
61
62 assert(conv_params.activation.min <= conv_params.activation.max);
63 const int batch_size = input_shape.dims(0);
64 const int input_depth = input_shape.dims(3);
65 const int output_depth = filter_shape.dims(0);
66
67 cmsis_nn_dims input_dims;
68 input_dims.n = batch_size;
69 input_dims.h = input_shape.dims(1);
70 input_dims.w = input_shape.dims(2);
71 input_dims.c = input_depth;
72
73 cmsis_nn_dims filter_dims;
74 filter_dims.n = output_depth;
75 filter_dims.h = filter_shape.dims(1);
76 filter_dims.w = filter_shape.dims(2);
77 filter_dims.c = input_depth;
78
79 cmsis_nn_dims bias_dims;
80 bias_dims.n = 1;
81 bias_dims.h = 1;
82 bias_dims.w = 1;
83 bias_dims.c = output_depth;
84
85 cmsis_nn_dims output_dims;
86 output_dims.n = batch_size;
87 output_dims.h = output_shape.dims(1);
88 output_dims.w = output_shape.dims(2);
89 output_dims.c = output_depth;
90
91 auto buf_size =
92 arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
93
94 auto buffer = std::make_unique<int8_t[]>(buf_size);
95 assert(buffer != nullptr);
96
97 cmsis_nn_context ctx;
98 ctx.buf = buffer.get();
99 ctx.size = buf_size;
100
101 auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
102 &filter_dims, filter_data, &bias_dims, bias_data, &output_dims,
103 output_data);
104
105 assert(res == ARM_CMSIS_NN_SUCCESS);
106 if (res != ARM_CMSIS_NN_SUCCESS)
107 return CmsisNNError;
108 return Ok;
109}
std::vector< int > per_channel_output_shift
std::vector< int32_t > per_channel_output_multiplier

References onert_micro::CmsisNNError, onert_micro::core::ConvQuant::dilation_height_factor, onert_micro::core::ConvQuant::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::ConvQuant::input_offset, onert_micro::Ok, onert_micro::core::ConvQuant::output_offset, output_shape, onert_micro::core::ConvQuant::pad_h, onert_micro::core::ConvQuant::pad_w, onert_micro::core::ConvQuant::per_channel_output_multiplier, onert_micro::core::ConvQuant::per_channel_output_shift, onert_micro::core::ConvQuant::quantized_activation_max, onert_micro::core::ConvQuant::quantized_activation_min, onert_micro::core::ConvQuant::stride_height, and onert_micro::core::ConvQuant::stride_width.

Referenced by onert_micro::execute::execute_kernel_CircleConv2D().

◆ copyDimsToDesc()

template<int N>
void onert_micro::execute::pal::copyDimsToDesc ( const core::OMRuntimeShape input_shape,
NdArrayDesc< N > *  desc_out 
)
inline

Definition at line 53 of file ProcessBroadcastShapes.h.

54{
55 int desc_stride = 1;
56 for (int i = N - 1; i >= 0; --i)
57 {
58 desc_out->extents[i] = input_shape.dims(i);
59 desc_out->strides[i] = desc_stride;
60 desc_stride *= input_shape.dims(i);
61 }
62}

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::execute::pal::NdArrayDesc< N >::extents, and onert_micro::execute::pal::NdArrayDesc< N >::strides.

Referenced by TransposeImpl().

◆ Cos()

template<typename T >
OMStatus onert_micro::execute::pal::Cos ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALCosCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::cos(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleCos().

◆ DepthwiseConv2D()

template<typename T >
OMStatus onert_micro::execute::pal::DepthwiseConv2D ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape filter_shape,
const T *  filter_data,
const T *  bias_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALDepthwiseConv2DCommon.h.

37{
38 assert(false && "Not IMPL yet");
39}

◆ DepthwiseConv2D< float >()

template<>
OMStatus onert_micro::execute::pal::DepthwiseConv2D< float > ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 41 of file PALDepthwiseConv2DCommon.h.

46{
47 const int stride_width = params->stride_w;
48 const int stride_height = params->stride_h;
49 const int dilation_width_factor = params->dilation_width_factor;
50 const int dilation_height_factor = params->dilation_height_factor;
51 const int pad_width = params->pad_w;
52 const int pad_height = params->pad_h;
53 const int depth_multiplier = params->depth_multiplier;
54 const float output_activation_min = params->activation_min;
55 const float output_activation_max = params->activation_max;
56
57 const auto batches = input_shape.dims(0);
58 const int input_height = input_shape.dims(1);
59 const int input_width = input_shape.dims(2);
60 const int input_depth = input_shape.dims(3);
61 const int output_depth = filter_shape.dims(0);
62 const int filter_height = filter_shape.dims(1);
63 const int filter_width = filter_shape.dims(2);
64 const int output_height = output_shape.dims(1);
65 const int output_width = output_shape.dims(2);
66 for (int b = 0; b < batches; ++b)
67 {
68 for (int out_y = 0; out_y < output_height; ++out_y)
69 {
70 for (int out_x = 0; out_x < output_width; ++out_x)
71 {
72 for (int ic = 0; ic < input_depth; ++ic)
73 {
74 for (int m = 0; m < depth_multiplier; m++)
75 {
76 const int oc = m + ic * depth_multiplier;
77 const int in_x_origin = (out_x * stride_width) - pad_width;
78 const int in_y_origin = (out_y * stride_height) - pad_height;
79 float total = 0.f;
80 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
81 {
82 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
83 {
84 const int in_x = in_x_origin + dilation_width_factor * filter_x;
85 const int in_y = in_y_origin + dilation_height_factor * filter_y;
86 // If the location is outside the bounds of the input image,
87 // use zero as a default value.
88 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
89 {
90 float input_value = input_data[offset(input_shape.dimsData(), b, in_y, in_x, ic)];
91 float filter_value =
92 filter_data[offset(filter_shape.dimsData(), 0, filter_y, filter_x, oc)];
93 total += (input_value * filter_value);
94 }
95 }
96 }
97 float bias_value = 0.0f;
98 if (bias_data)
99 {
100 bias_value = bias_data[oc];
101 }
102 output_data[offset(output_shape.dimsData(), b, out_y, out_x, oc)] =
103 activationFunctionWithMinMax(total + bias_value, output_activation_min,
104 output_activation_max);
105 }
106 }
107 }
108 }
109 }
110 return Ok;
111}
T activationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
Definition PALUtils.h:204

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, activationFunctionWithMinMax(), onert_micro::core::FloatConv2D::depth_multiplier, onert_micro::core::FloatConv2D::dilation_height_factor, onert_micro::core::FloatConv2D::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), m, offset(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleDepthwiseConv2D().

◆ DepthwiseConvPerChannel()

OMStatus onert_micro::execute::pal::DepthwiseConvPerChannel ( const core::ConvQuant params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 36 of file PALDepthwiseConv2D.h.

41{
42 cmsis_nn_dw_conv_params conv_params;
43 conv_params.dilation.h = params.dilation_height_factor;
44 conv_params.dilation.w = params.dilation_width_factor;
45
46 assert(conv_params.dilation.h == 1);
47 assert(conv_params.dilation.w == 1);
48
49 conv_params.input_offset = params.input_offset;
50 conv_params.output_offset = params.output_offset;
51 conv_params.stride.h = params.stride_height;
52 conv_params.stride.w = params.stride_width;
53 conv_params.padding.h = params.pad_h;
54 conv_params.padding.w = params.pad_w;
55 conv_params.activation.min = params.quantized_activation_min;
56 conv_params.activation.max = params.quantized_activation_max;
57 conv_params.ch_mult = params.depth_multiplier;
58
59 cmsis_nn_per_channel_quant_params quant_params;
60 quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data());
61 quant_params.shift = const_cast<int32_t *>(
62 reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data()));
63
64 assert(conv_params.activation.min <= conv_params.activation.max);
65 const int batch_size = input_shape.dims(0);
66 const int input_depth = input_shape.dims(3);
67 const int output_depth = filter_shape.dims(0);
68
69 cmsis_nn_dims input_dims;
70 input_dims.n = batch_size;
71 input_dims.h = input_shape.dims(1);
72 input_dims.w = input_shape.dims(2);
73 input_dims.c = input_depth;
74
75 cmsis_nn_dims filter_dims;
76 filter_dims.n = output_depth;
77 filter_dims.h = filter_shape.dims(1);
78 filter_dims.w = filter_shape.dims(2);
79 filter_dims.c = input_depth;
80
81 cmsis_nn_dims bias_dims;
82 bias_dims.n = 1;
83 bias_dims.h = 1;
84 bias_dims.w = 1;
85 bias_dims.c = output_depth;
86
87 cmsis_nn_dims output_dims;
88 output_dims.n = batch_size;
89 output_dims.h = output_shape.dims(1);
90 output_dims.w = output_shape.dims(2);
91 output_dims.c = output_depth;
92
93 auto buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
94 &filter_dims, &output_dims);
95
96 auto buffer = std::make_unique<int8_t[]>(buf_size);
97 assert(buffer != nullptr);
98
99 cmsis_nn_context ctx;
100 ctx.buf = buffer.get();
101 ctx.size = buf_size;
102
103 auto res = arm_depthwise_conv_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims,
104 input_data, &filter_dims, filter_data, &bias_dims,
105 bias_data, &output_dims, output_data);
106
107 assert(res == ARM_CMSIS_NN_SUCCESS);
108 if (res != ARM_CMSIS_NN_SUCCESS)
109 return CmsisNNError;
110 return Ok;
111}

References onert_micro::CmsisNNError, onert_micro::core::ConvQuant::depth_multiplier, onert_micro::core::ConvQuant::dilation_height_factor, onert_micro::core::ConvQuant::dilation_width_factor, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::ConvQuant::input_offset, onert_micro::Ok, onert_micro::core::ConvQuant::output_offset, output_shape, onert_micro::core::ConvQuant::pad_h, onert_micro::core::ConvQuant::pad_w, onert_micro::core::ConvQuant::per_channel_output_multiplier, onert_micro::core::ConvQuant::per_channel_output_shift, onert_micro::core::ConvQuant::quantized_activation_max, onert_micro::core::ConvQuant::quantized_activation_min, onert_micro::core::ConvQuant::stride_height, and onert_micro::core::ConvQuant::stride_width.

Referenced by onert_micro::execute::execute_kernel_CircleDepthwiseConv2D().

◆ Dequantize()

template<typename InputT , typename OutputT >
OMStatus onert_micro::execute::pal::Dequantize ( const core::QuantizationParams  op_params,
const uint32_t  flat_size,
const InputT *  input_data,
OutputT *  output_data 
)

Definition at line 35 of file PALDequantize.h.

37{
38 const int32_t zero_point = op_params.zero_point;
39 const double scale = op_params.scale;
40
41 for (uint32_t i = 0; i < flat_size; i++)
42 {
43 const int32_t val = input_data[i];
44 const auto result = static_cast<OutputT>(scale * (val - zero_point));
45 output_data[i] = result;
46 }
47 return Ok;
48}

References onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

Referenced by onert_micro::execute::execute_kernel_CircleDequantize().

◆ Div()

template<typename T >
OMStatus onert_micro::execute::pal::Div ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 31 of file PALDivCommon.h.

33{
34 ArithmeticOp<T, DivFn<T>>(params, flat_size, input1_data, input2_data, output_data);
35 return Ok;
36}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleDiv().

◆ ElementWise()

template<typename T >
void onert_micro::execute::pal::ElementWise ( const uint32_t  size,
const core::ArithmeticQuantParams params,
const T *  input1_data,
const T *  input2_data,
T *  output_data,
T(*)(T, T, const core::ArithmeticQuantParams &)  binary_func 
)

Definition at line 100 of file PALArithmeticOpCommon.h.

103{
104 for (int i = 0; i < size; ++i)
105 {
106 output_data[i] = binary_func(input1_data[i], input2_data[i], params);
107 }
108}

References size.

Referenced by Sub().

◆ Elu()

OMStatus onert_micro::execute::pal::Elu ( const int  flat_size,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALElu.h.

33{
34 for (int i = 0; i < flat_size; i++)
35 {
36 float val = input_data[i];
37 float result = val < 0.0f ? std::exp(val) - 1 : val;
38 output_data[i] = result;
39 }
40
41 return Ok;
42}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleElu().

◆ EqualFn()

template<typename T >
bool onert_micro::execute::pal::EqualFn ( lhs,
rhs 
)
inline

Definition at line 59 of file PALComparisons.h.

59{ return lhs == rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleEqual().

◆ Exp()

template<typename T >
OMStatus onert_micro::execute::pal::Exp ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALExpCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::exp(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleExp().

◆ Fill()

template<typename T >
OMStatus onert_micro::execute::pal::Fill ( const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 32 of file PALFill.h.

33{
34 const uint32_t flat_size = output_shape.flatSize();
35
36 if (flat_size == -1)
37 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
38
39 assert(input_data != nullptr);
40 assert(output_data != nullptr);
41
42 for (int i = 0; i < flat_size; ++i)
43 {
44 output_data[i] = input_data[0];
45 }
46
47 return Ok;
48}

References luci_interpreter::RuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleFill().

◆ flatSizeSkipDim()

int onert_micro::execute::pal::flatSizeSkipDim ( const int32_t *  dims_data,
int  skip_dim,
int  num_dims 
)
inline

Definition at line 149 of file PALUtils.h.

150{
151 int flat_size = 1;
152 for (int i = 0; i < num_dims; ++i)
153 {
154 flat_size *= (i == skip_dim) ? 1 : dims_data[i];
155 }
156 return flat_size;
157}

Referenced by onert_micro::execute::execute_kernel_CircleL2Normalize(), FullyConnected(), FullyConnected(), FullyConnected(), and FullyConnected< int8_t >().

◆ Floor()

template<typename T >
OMStatus onert_micro::execute::pal::Floor ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 33 of file PALFloorCommon.h.

35{
36 const uint32_t flat_size = input_shape.flatSize();
37
38 if (flat_size == -1)
39 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
40
41 assert(input_data != nullptr);
42 assert(output_data != nullptr);
43
44 // check that input and output dimensions are equal
45 int N = input_shape.dimensionsCount();
46 assert(N == output_shape.dimensionsCount());
47
48 // check that sizes of all dimensions are equal
49 for (int i = 0; i < N; ++i)
50 {
51 assert(input_shape.dims(i) == output_shape.dims(i));
52 }
53
54 for (int i = 0; i < flat_size; i++)
55 {
56 output_data[i] = std::floor(input_data[i]);
57 }
58
59 return Ok;
60}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleFloor().

◆ FloorDiv()

void onert_micro::execute::pal::FloorDiv ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALFloorDivCommon.h.

34{
35 BinaryOp<float, FloorDivFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleFloorDiv().

◆ FloorMod()

void onert_micro::execute::pal::FloorMod ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALFloorModCommon.h.

34{
35 BinaryOp<float, FloorModFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleFloorMod().

◆ FullyConnected() [1/3]

template<typename WeightType >
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const WeightType *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 80 of file PALFullyConnectedCommon.h.

84{
85 const float output_activation_min = params.float_activation_min;
86 const float output_activation_max = params.float_activation_max;
87
88 const int batches = flatSizeSkipDim(output_shape.dimsData(), output_shape.dimensionsCount() - 1,
90 const int output_depth = output_shape.dims(output_shape.dimensionsCount() - 1);
91 const int accum_depth = filter_shape.dims(filter_shape.dimensionsCount() - 1);
92
93 for (int b = 0; b < batches; ++b)
94 {
95 const float *weight_scale_ptr = params.weights_scales;
96 for (int out_c = 0; out_c < output_depth; ++out_c)
97 {
98 float total = 0.f;
99 for (int d = 0; d < accum_depth; ++d)
100 {
101 auto input_value = input_data[b * accum_depth + d];
102 if (std::is_same<WeightType, float>::value)
103 {
104 total += input_value * filter_data[out_c * accum_depth + d];
105 }
106 else
107 {
108 const float filter_scale = *weight_scale_ptr;
109 const float filter_value =
110 static_cast<float>(filter_data[out_c * accum_depth + d]) * filter_scale;
111 total += input_value * filter_value;
112 }
113 }
114 float bias_value = 0.0f;
115 if (bias_data)
116 {
117 bias_value = bias_data[out_c];
118 }
119 output_data[out_c + output_depth * b] =
120 std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
121
122 if (std::is_same<WeightType, int8_t>::value)
123 {
124 if (params.is_channel_wise_quant)
125 weight_scale_ptr++;
126 }
127 }
128 }
129 return Ok;
130}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::float_activation_max, onert_micro::core::FullyConnectedParams::float_activation_min, onert_micro::core::FullyConnectedParams::is_channel_wise_quant, onert_micro::Ok, output_shape, and onert_micro::core::FullyConnectedParams::weights_scales.

◆ FullyConnected() [2/3]

template<typename InputType , typename WeightType , typename OutputType , typename BiasType >
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const InputType *  input_data,
const core::OMRuntimeShape filter_shape,
const WeightType *  filter_data,
const BiasType *  bias_data,
const core::OMRuntimeShape output_shape,
OutputType *  output_data 
)

Definition at line 34 of file PALFullyConnectedCommon.h.

38{
39 const int32_t input_offset = params.input_offset;
40 const int32_t filter_offset = params.weights_offset;
41 const int32_t output_offset = params.output_offset;
42 const int32_t output_multiplier = params.output_multiplier;
43 const int output_shift = params.output_shift;
44 const int32_t output_activation_min = params.quantized_activation_min;
45 const int32_t output_activation_max = params.quantized_activation_max;
46
47 const int filter_dim_count = filter_shape.dimensionsCount();
48 const int output_dim_count = output_shape.dimensionsCount();
49 const int batches =
50 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
51 const int output_depth = output_shape.dims(output_dim_count - 1);
52
53 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
54 for (int b = 0; b < batches; ++b)
55 {
56 for (int out_c = 0; out_c < output_depth; ++out_c)
57 {
58 BiasType acc = 0;
59 for (int d = 0; d < accum_depth; ++d)
60 {
61 int32_t input_val = input_data[b * accum_depth + d];
62 int32_t filter_val = filter_data[out_c * accum_depth + d];
63 acc += (filter_val + filter_offset) * (input_val + input_offset);
64 }
65 if (bias_data)
66 {
67 acc += bias_data[out_c];
68 }
69 int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
70 acc_scaled += output_offset;
71 acc_scaled = std::max(acc_scaled, output_activation_min);
72 acc_scaled = std::min(acc_scaled, output_activation_max);
73 output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
74 }
75 }
76 return Ok;
77}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, multiplyByQuantizedMultiplier(), onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

◆ FullyConnected() [3/3]

template<>
OMStatus onert_micro::execute::pal::FullyConnected ( const core::FullyConnectedParams params,
const int16_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int64_t *  bias_data,
const core::OMRuntimeShape output_shape,
int16_t *  output_data 
)

Definition at line 98 of file PALFullyConnected.h.

102{
103 const int filter_dim_count = filter_shape.dimensionsCount();
104 const int output_dim_count = output_shape.dimensionsCount();
105 const int batches =
106 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
107 const int output_depth = output_shape.dims(output_dim_count - 1);
108 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
109
110 cmsis_nn_fc_params fc_params;
111 fc_params.input_offset = params.input_offset;
112 fc_params.output_offset = params.output_offset;
113 fc_params.filter_offset = params.weights_offset;
114 fc_params.activation.min = params.quantized_activation_min;
115 fc_params.activation.max = params.quantized_activation_max;
116
117 cmsis_nn_per_tensor_quant_params quant_params;
118 quant_params.multiplier = params.output_multiplier;
119 quant_params.shift = params.output_shift;
120
121 cmsis_nn_dims input_dims;
122 input_dims.n = batches;
123 input_dims.h = 1;
124 input_dims.w = 1;
125 input_dims.c = accum_depth;
126
127 cmsis_nn_dims filter_dims;
128 filter_dims.n = accum_depth;
129 filter_dims.h = 1;
130 filter_dims.w = 1;
131 filter_dims.c = output_depth;
132
133 cmsis_nn_dims bias_dims;
134 bias_dims.n = 1;
135 bias_dims.h = 1;
136 bias_dims.w = 1;
137 bias_dims.c = output_depth;
138
139 cmsis_nn_dims output_dims;
140 output_dims.n = batches;
141 output_dims.h = 1;
142 output_dims.w = 1;
143 output_dims.c = output_depth;
144
145 int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
146 auto buffer = std::make_unique<int8_t[]>(buf_size);
147 assert(buffer != nullptr);
148
149 cmsis_nn_context ctx;
150 ctx.buf = buffer.get();
151 ctx.size = buf_size;
152
153 auto res =
154 arm_fully_connected_s16(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
155 filter_data, &bias_dims, bias_data, &output_dims, output_data);
156 assert(res == ARM_CMSIS_NN_SUCCESS);
157
158 if (res != ARM_CMSIS_NN_SUCCESS)
159 return CmsisNNError;
160
161 return Ok;
162}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

Referenced by onert_micro::execute::execute_kernel_CircleFullyConnected().

◆ FullyConnected< int8_t >()

template<>
OMStatus onert_micro::execute::pal::FullyConnected< int8_t > ( const core::FullyConnectedParams params,
const int8_t *  input_data,
const core::OMRuntimeShape filter_shape,
const int8_t *  filter_data,
const int32_t *  bias_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALFullyConnected.h.

36{
37 const int filter_dim_count = filter_shape.dimensionsCount();
38 const int output_dim_count = output_shape.dimensionsCount();
39 const int batches =
40 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
41 const int output_depth = output_shape.dims(output_dim_count - 1);
42 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
43
44 cmsis_nn_fc_params fc_params;
45 fc_params.input_offset = params.input_offset;
46 fc_params.output_offset = params.output_offset;
47 fc_params.filter_offset = params.weights_offset;
48 fc_params.activation.min = params.quantized_activation_min;
49 fc_params.activation.max = params.quantized_activation_max;
50
51 cmsis_nn_per_tensor_quant_params quant_params;
52 quant_params.multiplier = params.output_multiplier;
53 quant_params.shift = params.output_shift;
54
55 cmsis_nn_dims input_dims;
56 input_dims.n = batches;
57 input_dims.h = 1;
58 input_dims.w = 1;
59 input_dims.c = accum_depth;
60
61 cmsis_nn_dims filter_dims;
62 filter_dims.n = accum_depth;
63 filter_dims.h = 1;
64 filter_dims.w = 1;
65 filter_dims.c = output_depth;
66
67 cmsis_nn_dims bias_dims;
68 bias_dims.n = 1;
69 bias_dims.h = 1;
70 bias_dims.w = 1;
71 bias_dims.c = output_depth;
72
73 cmsis_nn_dims output_dims;
74 output_dims.n = batches;
75 output_dims.h = 1;
76 output_dims.w = 1;
77 output_dims.c = output_depth;
78
79 int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
80 auto buffer = std::make_unique<int8_t[]>(buf_size);
81 assert(buffer != nullptr);
82
83 cmsis_nn_context ctx;
84 ctx.buf = buffer.get();
85 ctx.size = buf_size;
86
87 auto res =
88 arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
89 filter_data, &bias_dims, bias_data, &output_dims, output_data);
90 assert(res == ARM_CMSIS_NN_SUCCESS);
91 if (res != ARM_CMSIS_NN_SUCCESS)
92 return CmsisNNError;
93
94 return Ok;
95}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), flatSizeSkipDim(), onert_micro::core::FullyConnectedParams::input_offset, onert_micro::Ok, onert_micro::core::FullyConnectedParams::output_multiplier, onert_micro::core::FullyConnectedParams::output_offset, output_shape, onert_micro::core::FullyConnectedParams::output_shift, onert_micro::core::FullyConnectedParams::quantized_activation_max, onert_micro::core::FullyConnectedParams::quantized_activation_min, and onert_micro::core::FullyConnectedParams::weights_offset.

◆ GatherND()

template<typename ParamsT , typename IndicesT >
OMStatus onert_micro::execute::pal::GatherND ( core::OMRuntimeShape  params_shape,
const ParamsT *  param_data,
core::OMRuntimeShape  indices_shape,
const IndicesT *  index_data,
ParamsT *  output_data 
)
inline

Definition at line 35 of file PALGatherND.h.

38{
39 const int indices_dims = indices_shape.dimensionsCount();
40 const int indices_nd = indices_shape.dims(indices_dims - 1);
41 const int params_dims = params_shape.dimensionsCount();
42
43 int n_slices = 1;
44 for (int i = 0; i < indices_dims - 1; ++i)
45 {
46 n_slices *= indices_shape.dims(i);
47 }
48
49 // If indices[-1] == params.rank, fetch single elements.
50 // If indices[-1] < params.rank, fetch slices.
51 int slice_size = 1;
52 for (int i = indices_nd; i < params_dims; ++i)
53 {
54 slice_size *= params_shape.dims(i);
55 }
56
57 int params_flat_size = params_shape.flatSize();
58 int remain_flat_size = params_flat_size;
59
60 // Number of elements per dimension
61 int dims_to_count[MAX_INDICES_ND];
62 for (int i = 0; i < indices_nd; ++i)
63 {
64 dims_to_count[i] = remain_flat_size / params_shape.dims(i);
65 remain_flat_size = dims_to_count[i];
66 }
67
68 for (int i = 0; i < n_slices; ++i)
69 {
70 int from_pos = 0;
71 for (int j = 0; j < indices_nd; ++j)
72 {
73 int offset = i * indices_nd + j;
74 IndicesT index = index_data[offset];
75 from_pos += index * dims_to_count[j];
76 }
77 if (from_pos < 0 || from_pos + slice_size > params_flat_size)
78 {
79 assert(false && "GatherND error");
80 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
81 }
82 std::memcpy(output_data + i * slice_size, param_data + from_pos, sizeof(ParamsT) * slice_size);
83 }
84
85 return Ok;
86}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::flatSize(), MAX_INDICES_ND, offset(), onert_micro::Ok, OM_LOG_AND_RETURN, and onert_micro::UnknownError.

◆ getActivationParams() [1/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
float *  min,
float *  max 
)
inline

Definition at line 126 of file PALUtils.h.

127{
128 *min = params.float_activation_min;
129 *max = params.float_activation_max;
130}

◆ getActivationParams() [2/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
int32_t *  min,
int32_t *  max 
)
inline

Definition at line 120 of file PALUtils.h.

121{
122 *min = params.int32_activation_min;
123 *max = params.int32_activation_max;
124}

Referenced by ArithmeticOp(), ArithmeticOpScalar(), BroadcastArithmeticOp4DSlow(), QuantizedArithmeticOp(), and QuantizedBroadcastArithmeticOp4DSlow().

◆ getActivationParams() [3/3]

template<typename P >
void onert_micro::execute::pal::getActivationParams ( const P &  params,
int64_t *  min,
int64_t *  max 
)
inline

Definition at line 132 of file PALUtils.h.

133{
134 *min = params.int64_activation_min;
135 *max = params.int64_activation_max;
136}

◆ getUpLowerWeightTensorDepth()

std::pair< uint32_t, uint32_t > onert_micro::execute::pal::getUpLowerWeightTensorDepth ( core::OpTrainableRankType  rank,
const uint32_t  output_depth 
)
inline

Definition at line 30 of file PALUtils.h.

32{
33 std::pair<uint32_t, uint32_t> result(0u, output_depth);
34
35 switch (rank)
36 {
37 case core::ALL:
38 break;
40 result.second = static_cast<uint32_t>(static_cast<float>(output_depth) / 2.f);
41 break;
43 result.first = static_cast<uint32_t>(static_cast<float>(output_depth) / 2.f);
44 break;
45 default:
46 assert("Unsupported type");
47 break;
48 }
49
50 return result;
51}

References onert_micro::core::ALL, onert_micro::core::LOWER_1_2_PART, and onert_micro::core::UP_1_2_PART.

Referenced by onert_micro::train::pal::Conv2DWeightGrad(), and onert_micro::train::pal::FullyConnectedWeightGrad().

◆ GreaterEqualFn()

template<typename T >
bool onert_micro::execute::pal::GreaterEqualFn ( lhs,
rhs 
)
inline

Definition at line 61 of file PALComparisons.h.

61{ return lhs >= rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleGreaterEqual().

◆ GreaterFn()

template<typename T >
bool onert_micro::execute::pal::GreaterFn ( lhs,
rhs 
)
inline

Definition at line 60 of file PALComparisons.h.

60{ return lhs > rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleGreater().

◆ GRU()

OMStatus onert_micro::execute::pal::GRU ( const float *  input_data,
const float *  weight_input_data,
const float *  weight_hidden_data,
const float *  bias_input_data,
const float *  bias_hidden_data,
const float *  hidden_state_data,
float *  output_data,
float *  output_input_data,
float *  output_hidden_data,
const core::OMRuntimeShape input_shape,
const core::OMRuntimeShape output_shape,
const core::OMRuntimeShape weight_input_shape,
const core::OMRuntimeShape weight_hidden_shape,
const size_t  intermediate_buffer_size,
float *  intermediate_buffer 
)

Definition at line 183 of file PALGRUCommon.h.

191{
192 const int32_t time = input_shape.dims(0);
193
194 core::OMRuntimeShape output_shape_fc(2);
195 output_shape_fc.setDim(0, 1);
196 output_shape_fc.setDim(1, weight_hidden_shape.dims(0));
197
198 std::memcpy(output_data, hidden_state_data, output_shape.flatSize() * sizeof(float));
199
200 for (int i = 0; i < time; ++i)
201 {
202 calculateGRU(input_data, weight_input_data, weight_hidden_data, bias_input_data,
203 bias_hidden_data, output_data, input_shape, output_shape, weight_input_shape,
204 weight_hidden_shape, output_input_data, output_hidden_data, output_shape_fc,
205 intermediate_buffer);
206 input_data += input_shape.dims(2);
207 if (intermediate_buffer_size != 0)
208 {
209 assert(intermediate_buffer != nullptr);
210 intermediate_buffer += intermediate_buffer_size;
211 }
212 }
213 return Ok;
214}

References onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::flatSize(), onert_micro::Ok, output_shape, and onert_micro::core::OMRuntimeShape::setDim().

Referenced by onert_micro::execute::execute_kernel_CircleGRU().

◆ L2Normalization()

OMStatus onert_micro::execute::pal::L2Normalization ( const core::L2NormalizationParams params,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 33 of file PALL2Normalize.h.

35{
36
37 const int outer_size = params.num_rows;
38 const int depth = params.row_size;
39 const int epsilon = params.epsilon;
40
41 for (int i = 0; i < outer_size; ++i)
42 {
43 float squared_l2_norm = 0;
44 for (int c = 0; c < depth; ++c)
45 {
46 const float val = input_data[depth * i + c];
47 squared_l2_norm += val * val;
48 }
49 float l2_norm = std::sqrt(squared_l2_norm);
50 l2_norm = std::max(l2_norm, static_cast<float>(epsilon));
51 for (int c = 0; c < depth; ++c)
52 {
53 output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
54 }
55 }
56
57 return Ok;
58}

References onert_micro::core::L2NormalizationParams::epsilon, onert_micro::core::L2NormalizationParams::num_rows, onert_micro::Ok, and onert_micro::core::L2NormalizationParams::row_size.

Referenced by onert_micro::execute::execute_kernel_CircleL2Normalize().

◆ L2Pool()

OMStatus onert_micro::execute::pal::L2Pool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 34 of file PALL2Pool2DCommon.h.

37{
38 const int32_t batches = input_shape.dims(0);
39 const int32_t depth = output_shape.dims(3);
40 const int32_t input_height = input_shape.dims(1);
41 const int32_t input_width = input_shape.dims(2);
42 const int32_t output_height = output_shape.dims(1);
43 const int32_t output_width = output_shape.dims(2);
44 const int32_t stride_height = params.stride_h;
45 const int32_t stride_width = params.stride_w;
46 for (int batch = 0; batch < batches; ++batch)
47 {
48 for (int out_y = 0; out_y < output_height; ++out_y)
49 {
50 for (int out_x = 0; out_x < output_width; ++out_x)
51 {
52 for (int channel = 0; channel < depth; ++channel)
53 {
54 const int in_x_origin = (out_x * stride_width) - params.pad_w;
55 const int in_y_origin = (out_y * stride_height) - params.pad_h;
56 // Compute the boundaries of the filter region clamped so as to
57 // ensure that the filter window fits in the input array.
58 const int filter_x_start = std::max(0, -in_x_origin);
59 const int filter_x_end = std::min(params.filter_w, input_width - in_x_origin);
60 const int filter_y_start = std::max(0, -in_y_origin);
61 const int filter_y_end = std::min(params.filter_h, input_height - in_y_origin);
62 float sum_squares = 0.f;
63 int filter_count = 0;
64 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
65 {
66 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
67 {
68 const int in_x = in_x_origin + filter_x;
69 const int in_y = in_y_origin + filter_y;
70 const float val =
71 input_data[offset(input_shape.dimsData(), batch, in_y, in_x, channel)];
72 sum_squares += val * val;
73 filter_count++;
74 }
75 }
76 assert(filter_count != 0);
77 if (filter_count == 0)
78 {
79 std::cerr << "filter_count is zero" << std::endl;
81 }
82 const float l2pool_result = std::sqrt(sum_squares / filter_count);
83 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, channel)] =
84 activationFunctionWithMinMax(l2pool_result, params.activation_min,
85 params.activation_max);
86 }
87 }
88 }
89 }
90 return Ok;
91}
@ FailedCheckCondition
Definition OMStatus.h:32

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, activationFunctionWithMinMax(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::FailedCheckCondition, onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, offset(), onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleL2Pool2D().

◆ LessEqualFn()

template<typename T >
bool onert_micro::execute::pal::LessEqualFn ( lhs,
rhs 
)
inline

Definition at line 58 of file PALComparisons.h.

58{ return lhs <= rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleLessEqual().

◆ LessFn()

template<typename T >
bool onert_micro::execute::pal::LessFn ( lhs,
rhs 
)
inline

Definition at line 57 of file PALComparisons.h.

57{ return lhs < rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleLess().

◆ Log()

template<typename T >
OMStatus onert_micro::execute::pal::Log ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALLogCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::log(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleLog().

◆ LogicalCommon()

template<class Fn >
OMStatus onert_micro::execute::pal::LogicalCommon ( const int  flat_size,
const bool *  input1_data,
const bool *  input2_data,
bool *  output_data 
)

Definition at line 38 of file PALLogicalCommon.h.

40{
41 Fn func;
42
43 for (int i = 0; i < flat_size; ++i)
44 {
45 output_data[i] = func(input1_data[i], input2_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::Ok.

◆ LogicalNot()

OMStatus onert_micro::execute::pal::LogicalNot ( const int  flat_size,
const bool *  input_data,
bool *  output_data 
)

Definition at line 25 of file PALLogicalNotCommon.h.

26{
27 for (int i = 0; i < flat_size; ++i)
28 {
29 output_data[i] = !(input_data[i]);
30 }
31
32 return Ok;
33}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleLogicalNot().

◆ Logistic() [1/2]

OMStatus onert_micro::execute::pal::Logistic ( const int  flat_size,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALLogistic.h.

33{
34 const float cutoff_upper = 16.619047164916992188f;
35 const float cutoff_lower = -9.f;
36
37 // Rational for using approximation in reference kernel.
38 // 0. This approximation gives enough precision for float.
39 // 1. This works around an issue on an embedded chipset where exp() does not
40 // return correctly as expected - exp(x) should return inf when overflown
41 // not 1.701417 IEEE 754 defines representation for inf.
42 // 2. This will speed up calculation and is matching the behavior in the
43 // optimized kernels. (check the definition of scalar_logistic_op<float>)
44
45 for (int i = 0; i < flat_size; i++)
46 {
47 float val = input_data[i];
48 float result;
49 if (val > cutoff_upper)
50 {
51 result = 1.0f;
52 }
53 else if (val < cutoff_lower)
54 {
55 result = std::exp(val);
56 }
57 else
58 {
59 result = 1.f / (1.f + std::exp(-val));
60 }
61 output_data[i] = result;
62 }
63 return Ok;
64}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleLogistic().

◆ Logistic() [2/2]

OMStatus onert_micro::execute::pal::Logistic ( const int  flat_size,
const int8_t *  input_data,
float  input_scale,
int  input_zero_point,
int8_t *  output_data,
float  output_scale,
int  output_zero_point 
)
inline

Definition at line 66 of file PALLogistic.h.

69{
70 const float cutoff_upper = 16.619047164916992188f;
71 const float cutoff_lower = -9.f;
72
73 // Rational for using approximation in reference kernel.
74 // 0. This approximation gives enough precision for float.
75 // 1. This works around an issue on an embedded chipset where exp() does not
76 // return correctly as expected - exp(x) should return inf when overflown
77 // not 1.701417 IEEE 754 defines representation for inf.
78 // 2. This will speed up calculation and is matching the behavior in the
79 // optimized kernels. (check the definition of scalar_logistic_op<float>)
80
81 for (int i = 0; i < flat_size; i++)
82 {
83 // Dequantize.
84 float val = static_cast<float>((input_data[i] - input_zero_point) * input_scale);
85 float result;
86 if (val > cutoff_upper)
87 {
88 result = 1.0f;
89 }
90 else if (val < cutoff_lower)
91 {
92 result = std::exp(val);
93 }
94 else
95 {
96 result = 1.f / (1.f + std::exp(-val));
97 }
98 // Requantize
99 int8_t output = static_cast<int8_t>(std::round(result / output_scale) + output_zero_point);
100 output_data[i] = output;
101 }
102 return Ok;
103}

References onert_micro::Ok.

◆ LogSoftmax()

OMStatus onert_micro::execute::pal::LogSoftmax ( const core::LogSoftmaxParams params,
const float *  input_data,
float *  output_data 
)
inline

Definition at line 32 of file PALLogSoftmax.h.

34{
35 const int outer_size = params.num_rows;
36 const int depth = params.row_size;
37
38 for (int i = 0; i < outer_size; ++i)
39 {
40 // Find max element value which we'll use to ensure numerical stability
41 // taking advantage of the following equality:
42 // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
43 float max = std::numeric_limits<float>::lowest();
44 for (int c = 0; c < depth; ++c)
45 {
46 max = std::max(max, input_data[i * depth + c]);
47 }
48
49 // Compute sum.
50 float sum = 0.f;
51 for (int c = 0; c < depth; ++c)
52 {
53 sum += std::exp(input_data[i * depth + c] - max);
54 }
55
56 // Compute result.
57 const float log_sum = std::log(sum);
58 for (int c = 0; c < depth; ++c)
59 {
60 output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
61 }
62 }
63
64 return Ok;
65}

References onert_micro::core::LogSoftmaxParams::num_rows, onert_micro::Ok, and onert_micro::core::LogSoftmaxParams::row_size.

Referenced by onert_micro::execute::execute_kernel_CircleLogSoftmax().

◆ MatchingDim()

int onert_micro::execute::pal::MatchingDim ( const core::OMRuntimeShape shape1,
int  index1,
const core::OMRuntimeShape shape2,
int  index2 
)
inline

Definition at line 139 of file PALUtils.h.

141{
142 assert(shape1.dims(index1) == shape2.dims(index2));
143 return shape1.dims(index1);
144}

References onert_micro::core::OMRuntimeShape::dims().

◆ Maximum()

OMStatus onert_micro::execute::pal::Maximum ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALMaximumCommon.h.

34{
35 return BinaryOp<float, MaximumFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleMaximum().

◆ MaxPool() [1/2]

OMStatus onert_micro::execute::pal::MaxPool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 32 of file PALMaxPool2DCommon.h.

35{
36 const int32_t batches = input_shape.dims(0);
37 const int32_t depth = output_shape.dims(3);
38 const int32_t input_height = input_shape.dims(1);
39 const int32_t input_width = input_shape.dims(2);
40 const int32_t output_height = output_shape.dims(1);
41 const int32_t output_width = output_shape.dims(2);
42 const int32_t stride_height = params.stride_h;
43 const int32_t stride_width = params.stride_w;
44 for (int batch = 0; batch < batches; ++batch)
45 {
46 for (int out_y = 0; out_y < output_height; ++out_y)
47 {
48 for (int out_x = 0; out_x < output_width; ++out_x)
49 {
50 for (int channel = 0; channel < depth; ++channel)
51 {
52 const int in_x_origin = (out_x * stride_width) - params.pad_w;
53 const int in_y_origin = (out_y * stride_height) - params.pad_h;
54 // Compute the boundaries of the filter region clamped so as to
55 // ensure that the filter window fits in the input array.
56 const int filter_x_start = std::max(0, -in_x_origin);
57 const int filter_x_end = std::min(params.filter_w, input_width - in_x_origin);
58 const int filter_y_start = std::max(0, -in_y_origin);
59 const int filter_y_end = std::min(params.filter_h, input_height - in_y_origin);
60 float max = std::numeric_limits<float>::lowest();
61 for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
62 {
63 for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
64 {
65 const int in_x = in_x_origin + filter_x;
66 const int in_y = in_y_origin + filter_y;
67
68 const int input_data_offset =
69 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
70 input_shape.dims(3) +
71 channel;
72
73 max = std::max(max, input_data[input_data_offset]);
74 }
75 }
76 const int output_data_offset =
77 ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
79 channel;
80
81 output_data[output_data_offset] =
82 std::min(std::max(max, params.activation_min), params.activation_max);
83 }
84 }
85 }
86 }
87 return Ok;
88}

References onert_micro::core::Pool2DParams::activation_max, onert_micro::core::Pool2DParams::activation_min, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

◆ MaxPool() [2/2]

OMStatus onert_micro::execute::pal::MaxPool ( const core::Pool2DParams params,
const core::OMRuntimeShape input_shape,
const int8_t *  input_data,
const core::OMRuntimeShape output_shape,
int8_t *  output_data 
)

Definition at line 32 of file PALMaxPool2D.h.

35{
36 cmsis_nn_dims input_dims;
37 cmsis_nn_dims output_dims;
38 cmsis_nn_pool_params pool_params;
39 cmsis_nn_dims filter_dims;
40 cmsis_nn_context ctx;
41
42 const int depth = input_shape.dims(3);
43 const int output_width = output_shape.dims(2);
44
45 input_dims.n = 1;
46 input_dims.h = input_shape.dims(1);
47 input_dims.w = input_shape.dims(2);
48 input_dims.c = depth;
49
50 output_dims.n = 1;
51 output_dims.h = output_shape.dims(1);
52 output_dims.w = output_width;
53 output_dims.c = depth;
54
55 pool_params.stride.h = params.stride_h;
56 pool_params.stride.w = params.stride_w;
57 pool_params.padding.h = params.pad_h;
58 pool_params.padding.w = params.pad_w;
59 pool_params.activation.min = params.quantized_activation_min;
60 pool_params.activation.max = params.quantized_activation_max;
61
62 filter_dims.n = 1;
63 filter_dims.h = params.filter_h;
64 filter_dims.w = params.filter_w;
65 filter_dims.c = 1;
66
67 auto res = arm_max_pool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims,
68 &output_dims, output_data);
69
70 assert(res == ARM_CMSIS_NN_SUCCESS);
71 if (res != ARM_CMSIS_NN_SUCCESS)
72 return CmsisNNError;
73
74 return Ok;
75}

References onert_micro::CmsisNNError, luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::Pool2DParams::filter_h, onert_micro::core::Pool2DParams::filter_w, onert_micro::Ok, output_shape, onert_micro::core::Pool2DParams::pad_h, onert_micro::core::Pool2DParams::pad_w, onert_micro::core::Pool2DParams::quantized_activation_max, onert_micro::core::Pool2DParams::quantized_activation_min, onert_micro::core::Pool2DParams::stride_h, and onert_micro::core::Pool2DParams::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleMaxPool2D().

◆ Minimum()

OMStatus onert_micro::execute::pal::Minimum ( const int  flat_size,
const float *  input1_data,
const float *  input2_data,
float *  output_data 
)
inline

Definition at line 32 of file PALMinimumCommon.h.

34{
35 return BinaryOp<float, MinimumFn<float>>(flat_size, input1_data, input2_data, output_data);
36}

Referenced by onert_micro::execute::execute_kernel_CircleMinimum().

◆ Mul() [1/3]

OMStatus onert_micro::execute::pal::Mul ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

◆ Mul() [2/3]

template<typename InputType , typename OutputType >
OMStatus onert_micro::execute::pal::Mul ( const core::ArithmeticQuantParams params,
uint32_t  size,
const InputType *  input1_data,
const InputType *  input2_data,
OutputType *  output_data 
)

Definition at line 31 of file PALMul.h.

33{
34 for (int i = 0; i < size; ++i)
35 {
36 const int32_t input1_val = params.input1_offset + input1_data[i];
37 const int32_t input2_val = params.input2_offset + input2_data[i];
38 const int32_t unclamped_result =
39 params.output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
40 params.output_multiplier,
41 params.output_shift);
42 const int32_t clamped_output = std::min(
43 params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
44 output_data[i] = static_cast<OutputType>(clamped_output);
45 }
46 return Ok;
47}

References onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input2_offset, multiplyByQuantizedMultiplier(), onert_micro::Ok, onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, onert_micro::core::ArithmeticQuantParams::quantized_activation_min, and size.

◆ Mul() [3/3]

template<typename T >
OMStatus onert_micro::execute::pal::Mul ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 36 of file PALMulCommon.h.

38{
39 ArithmeticOp<T, MulFn<T>>(params, flat_size, input1_data, input2_data, output_data);
40 return Ok;
41}

References onert_micro::Ok.

◆ multiplyByQuantizedMultiplier()

int32_t onert_micro::execute::pal::multiplyByQuantizedMultiplier ( int32_t  x,
int32_t  quantized_multiplier,
int  shift 
)
inline

Definition at line 104 of file PALUtils.h.

105{
106 int left_shift = shift > 0 ? shift : 0;
107 int right_shift = shift > 0 ? 0 : -shift;
108 return roundingDivideByPOT(
109 saturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
110}

References roundingDivideByPOT(), and saturatingRoundingDoublingHighMul().

Referenced by BroadcastMul6DSlow(), FullyConnected(), and Mul().

◆ multiplyByQuantizedMultiplierSmallerThanOneExp()

int32_t onert_micro::execute::pal::multiplyByQuantizedMultiplierSmallerThanOneExp ( int32_t  x,
int32_t  quantized_multiplier,
int  left_shift 
)
inline

Definition at line 112 of file PALUtils.h.

115{
116 return roundingDivideByPOT(saturatingRoundingDoublingHighMul(x, quantized_multiplier),
117 -left_shift);
118}

References roundingDivideByPOT(), and saturatingRoundingDoublingHighMul().

Referenced by AddFunc(), BroadcastComparison4DSlowWithScaling(), ComparisonWithScaling(), and SubFunc().

◆ NdArrayDescsForElementwiseBroadcast()

template<int N>
void onert_micro::execute::pal::NdArrayDescsForElementwiseBroadcast ( const core::OMRuntimeShape input0_shape,
const core::OMRuntimeShape input1_shape,
NdArrayDesc< N > *  desc0_out,
NdArrayDesc< N > *  desc1_out 
)
inline

Definition at line 94 of file ProcessBroadcastShapes.h.

98{
99
100 auto extended_input0_shape = core::OMRuntimeShape::extendedShape(N, input0_shape);
101 auto extended_input1_shape = core::OMRuntimeShape::extendedShape(N, input1_shape);
102
103 // Copy dims to desc, calculating strides.
104 copyDimsToDesc<N>(extended_input0_shape, desc0_out);
105 copyDimsToDesc<N>(extended_input1_shape, desc1_out);
106
107 // Walk over each dimension. If the extents are equal do nothing.
108 // Otherwise, set the desc with extent 1 to have extent equal to the other and
109 // stride 0.
110 for (int i = 0; i < N; ++i)
111 {
112 const int extent0 = extended_input0_shape.dims(i);
113 const int extent1 = extended_input1_shape.dims(i);
114 if (extent0 != extent1)
115 {
116 if (extent0 == 1)
117 {
118 desc0_out->strides[i] = 0;
119 desc0_out->extents[i] = extent1;
120 }
121 else
122 {
123 desc1_out->strides[i] = 0;
124 desc1_out->extents[i] = extent0;
125 }
126 }
127 }
128}

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::execute::pal::NdArrayDesc< N >::extents, and onert_micro::execute::pal::NdArrayDesc< N >::strides.

Referenced by BroadcastArithmeticOp4DSlow(), BroadcastBinaryOp4DSlow(), BroadcastMul6DSlow(), and QuantizedBroadcastArithmeticOp4DSlow().

◆ NDOpsHelper()

template<int N, typename Calc >
void onert_micro::execute::pal::NDOpsHelper ( const NdArrayDesc< N > &  output,
const Calc &  calc 
)
inline

Definition at line 87 of file ProcessBroadcastShapes.h.

88{
89 int indexes[N] = {0};
90 NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
91}

◆ NDOpsHelperImpl() [1/2]

template<int N, int DIM, typename Calc >
std::enable_if< DIM==N-1, void >::type onert_micro::execute::pal::NDOpsHelperImpl ( const NdArrayDesc< N > &  output,
const Calc &  calc,
int  indexes[N] 
)

Definition at line 65 of file ProcessBroadcastShapes.h.

67{
68 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
69 {
70 calc(indexes);
71 }
72}

◆ NDOpsHelperImpl() [2/2]

template<int N, int DIM, typename Calc >
std::enable_if< DIM!=N-1, void >::type onert_micro::execute::pal::NDOpsHelperImpl ( const NdArrayDesc< N > &  output,
const Calc &  calc,
int  indexes[N] 
)

Definition at line 75 of file ProcessBroadcastShapes.h.

77{
78 for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
79 {
80 NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
81 }
82}

◆ Neg()

template<typename T >
OMStatus onert_micro::execute::pal::Neg ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 30 of file PALNegCommon.h.

32{
33 const uint32_t flat_size = input_shape.flatSize();
34
35 if (flat_size == -1)
36 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
37
38 assert(input_data != nullptr);
39 assert(output_data != nullptr);
40
41 assert(input_shape == output_shape);
42
43 for (int i = 0; i < flat_size; i++)
44 {
45 output_data[i] = -(input_data[i]);
46 }
47
48 return Ok;
49}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleNeg().

◆ NotEqualFn()

template<typename T >
bool onert_micro::execute::pal::NotEqualFn ( lhs,
rhs 
)
inline

Definition at line 62 of file PALComparisons.h.

62{ return lhs != rhs; }

Referenced by onert_micro::execute::execute_kernel_CircleNotEqual().

◆ offset() [1/2]

int onert_micro::execute::pal::offset ( const int32_t *  dims_data,
int  i0,
int  i1,
int  i2,
int  i3 
)
inline

Definition at line 159 of file PALUtils.h.

160{
161 return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
162}

Referenced by BatchToSpaceND(), DepthwiseConv2D< float >(), GatherND(), L2Pool(), SpaceToBatchND(), SpaceToDepth(), and TransposeConv< float >().

◆ offset() [2/2]

int onert_micro::execute::pal::offset ( const int32_t *  dims_data,
int  i0,
int  i1,
int  i2,
int  i3,
int  i4 
)
inline

Definition at line 164 of file PALUtils.h.

165{
166 return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) * dims_data[4] + i4;
167}

◆ Pad()

OMStatus onert_micro::execute::pal::Pad ( const core::PadParams op_params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const float  pad_value,
const core::OMRuntimeShape output_shape,
float *  output_data 
)

Definition at line 35 of file PALPad.h.

38{
39 // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
40 // pad them to 5 dims (yes, we are "padding the padding").
41 int left_padding_copy[padKernelMaxDimensionCount];
42 for (int &i : left_padding_copy)
43 {
44 i = 0;
45 }
46 for (int i = 0; i < op_params.left_padding_count; ++i)
47 {
48 left_padding_copy[i + padKernelMaxDimensionCount - op_params.left_padding_count] =
49 op_params.left_padding[i];
50 }
51 int right_padding_copy[padKernelMaxDimensionCount];
52 for (int &i : right_padding_copy)
53 {
54 i = 0;
55 }
56 for (int i = 0; i < op_params.right_padding_count; ++i)
57 {
58 right_padding_copy[i + padKernelMaxDimensionCount - op_params.right_padding_count] =
59 op_params.right_padding[i];
60 }
61 const auto extended_output =
62 core::OMRuntimeShape::extendedShape(padKernelMaxDimensionCount, output_shape);
63 const int output_batch = extended_output.dims(0);
64 const int output_plane = extended_output.dims(1);
65 const int output_height = extended_output.dims(2);
66 const int output_width = extended_output.dims(3);
67 const int output_depth = extended_output.dims(4);
68
69 const int left_b_padding = left_padding_copy[0];
70 const int left_p_padding = left_padding_copy[1];
71 const int left_h_padding = left_padding_copy[2];
72 const int left_w_padding = left_padding_copy[3];
73 const int left_d_padding = left_padding_copy[4];
74
75 const int right_b_padding = right_padding_copy[0];
76 const int right_p_padding = right_padding_copy[1];
77 const int right_h_padding = right_padding_copy[2];
78 const int right_w_padding = right_padding_copy[3];
79 const int right_d_padding = right_padding_copy[4];
80
81 const float *in_ptr = input_data;
82 float *out_ptr = output_data;
83 for (int out_b = 0; out_b < output_batch; ++out_b)
84 {
85 for (int out_p = 0; out_p < output_plane; ++out_p)
86 {
87 for (int out_h = 0; out_h < output_height; ++out_h)
88 {
89 for (int out_w = 0; out_w < output_width; ++out_w)
90 {
91 for (int out_d = 0; out_d < output_depth; ++out_d)
92 {
93 if (out_b < left_b_padding || out_b >= output_batch - right_b_padding ||
94 out_p < left_p_padding || out_p >= output_plane - right_p_padding ||
95 out_h < left_h_padding || out_h >= output_height - right_h_padding ||
96 out_w < left_w_padding || out_w >= output_width - right_w_padding ||
97 out_d < left_d_padding || out_d >= output_depth - right_d_padding)
98 {
99 *out_ptr++ = pad_value;
100 }
101 else
102 {
103 *out_ptr++ = *in_ptr++;
104 }
105 }
106 }
107 }
108 }
109 }
110
111 return Ok;
112}
list input_data
Definition infer.py:29

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::PadParams::left_padding, onert_micro::core::PadParams::left_padding_count, onert_micro::Ok, output_shape, onert_micro::core::PadParams::right_padding, and onert_micro::core::PadParams::right_padding_count.

Referenced by onert_micro::execute::execute_kernel_CirclePad().

◆ processBroadcastShapes()

bool onert_micro::execute::pal::processBroadcastShapes ( const core::OMRuntimeShape shape0,
const core::OMRuntimeShape shape1,
core::BinaryArithmeticBroadcastParams params 
)
inline

Definition at line 155 of file ProcessBroadcastShapes.h.

158{
159 const int dims_count = std::max(shape0.dimensionsCount(), shape1.dimensionsCount());
160
161 params->broadcast_category = core::BroadcastableOpCategory::kGenericBroadcast;
162
163 auto extended_shape0 = core::OMRuntimeShape::extendedShape(dims_count, shape0);
164 auto extended_shape1 = core::OMRuntimeShape::extendedShape(dims_count, shape1);
165
166 // Check for "exact" match, implicitly accepting any scalar shapes.
167 if (extended_shape0 == extended_shape1)
168 {
169 params->broadcast_category = core::BroadcastableOpCategory::kNonBroadcast;
170 return false;
171 }
172
173 if (shape0.flatSize() == 1)
174 {
175 params->broadcast_category = core::BroadcastableOpCategory::kScalarFirstBroadcast;
176 return true;
177 }
178 else if (shape1.flatSize() == 1)
179 {
180 params->broadcast_category = core::BroadcastableOpCategory::kScalarSecondBroadcast;
181 return true;
182 }
183
184 for (int i = dims_count - 1; i >= 0; --i)
185 {
186 if (extended_shape0.dims(i) == extended_shape1.dims(i))
187 {
188 continue;
189 }
190 else if (extended_shape0.dims(i) == 1)
191 {
192 params->broadcast_category = core::BroadcastableOpCategory::kFirstInputBroadcastsFast;
193 return true;
194 }
195 else if (extended_shape1.dims(i) == 1)
196 {
197 params->broadcast_category = core::BroadcastableOpCategory::kSecondInputBroadcastsFast;
198 return true;
199 }
200 else
201 {
202 // This case is erroneous: there is a dimension that does not match and
203 // is not a broadcast from one shape to the other.
204 params->broadcast_category = core::BroadcastableOpCategory::kGenericBroadcast;
205 return true;
206 }
207 }
208
209 return false;
210}

References onert_micro::core::BinaryArithmeticBroadcastParams::broadcast_category, onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::core::kFirstInputBroadcastsFast, onert_micro::core::kGenericBroadcast, onert_micro::core::kNonBroadcast, onert_micro::core::kScalarFirstBroadcast, onert_micro::core::kScalarSecondBroadcast, and onert_micro::core::kSecondInputBroadcastsFast.

Referenced by onert_micro::execute::execute_kernel_CircleAdd(), onert_micro::execute::execute_kernel_CircleDiv(), onert_micro::execute::execute_kernel_CircleMul(), onert_micro::execute::execute_kernel_CircleSquaredDifference(), and onert_micro::execute::execute_kernel_CircleSub().

◆ Quantize()

template<typename InputT , typename OutputT >
OMStatus onert_micro::execute::pal::Quantize ( const core::QuantizationParams  op_params,
const uint32_t  flat_size,
const InputT *  input_data,
OutputT *  output_data 
)

Definition at line 35 of file PALQuantize.h.

37{
38 const int32_t zero_point = op_params.zero_point;
39 const double scale = op_params.scale;
40 static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
41 static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
42
43 for (int i = 0; i < flat_size; i++)
44 {
45 const InputT val = input_data[i];
46 int32_t unclamped =
47 static_cast<int32_t>(std::round(val / static_cast<float>(scale))) + zero_point;
48 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
49 output_data[i] = clamped;
50 }
51
52 return Ok;
53}

References onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

Referenced by onert_micro::execute::execute_kernel_CircleQuantize().

◆ QuantizedArithmeticOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::QuantizedArithmeticOp ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 69 of file PALArithmeticOpCommon.h.

74{
75 float activation_min, activation_max;
76 getActivationParams(params, &activation_min, &activation_max);
77
78 Fn func;
79 for (int i = 0; i < flat_size; ++i)
80 {
81 // Dequantize input1
82 float input1 = static_cast<float>((input1_data[i] - static_cast<T>(input1_qparams.zero_point)) *
83 input1_qparams.scale);
84 // Dequantize input2
85 float input2 = static_cast<float>((input2_data[i] - static_cast<T>(input2_qparams.zero_point)) *
86 input2_qparams.scale);
87 float result = std::min(std::max(func(input1, input2), activation_min), activation_max);
88
89 // Quantize result
90 result = result / output_qparams.scale + output_qparams.zero_point;
91 result = std::max<float>(std::numeric_limits<T>::min(), result);
92 result = std::min<float>(std::numeric_limits<T>::max(), result);
93 output_data[i] = static_cast<T>(result);
94 }
95
96 return Ok;
97}

References getActivationParams(), onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

◆ QuantizedBroadcastArithmeticOp4DSlow()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::QuantizedBroadcastArithmeticOp4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 176 of file PALArithmeticOpCommon.h.

183{
186 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
187 const core::OMRuntimeShape extended_output_shape =
188 core::OMRuntimeShape::extendedShape(4, output_shape);
189
190 float activation_min, activation_max;
191 getActivationParams(params, &activation_min, &activation_max);
192
193 // In Tensorflow, the dimensions are canonically named (batch_number, row,
194 // col, channel), with extents (batches, height, width, depth), with the
195 // trailing dimension changing most rapidly (channels has the smallest stride,
196 // typically 1 element).
197 //
198 // In generated C code, we store arrays with the dimensions reversed. The
199 // first dimension has smallest stride.
200 //
201 // We name our variables by their Tensorflow convention, but generate C code
202 // nesting loops such that the innermost loop has the smallest stride for the
203 // best cache behavior.
204 Fn func;
205 for (int b = 0; b < extended_output_shape.dims(0); ++b)
206 {
207 for (int y = 0; y < extended_output_shape.dims(1); ++y)
208 {
209 for (int x = 0; x < extended_output_shape.dims(2); ++x)
210 {
211 for (int c = 0; c < extended_output_shape.dims(3); ++c)
212 {
213 // Dequantize input1
214 float input1 = static_cast<float>((input1_data[subscriptToIndex(desc1, b, y, x, c)] -
215 static_cast<T>(input1_qparams.zero_point)) *
216 input1_qparams.scale);
217 // Dequantize input2
218 float input2 = static_cast<float>((input2_data[subscriptToIndex(desc2, b, y, x, c)] -
219 static_cast<T>(input2_qparams.zero_point)) *
220 input2_qparams.scale);
221
222 float result = std::min(std::max(func(input1, input2), activation_min), activation_max);
223
224 // Quantize result
225 result = result / output_qparams.scale + output_qparams.zero_point;
226 result = std::max<float>(std::numeric_limits<T>::min(), result);
227 result = std::min<float>(std::numeric_limits<T>::max(), result);
228 const int output_data_offset =
229 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
230 extended_output_shape.dims(3) +
231 c;
232 output_data[output_data_offset] = static_cast<T>(result);
233 }
234 }
235 }
236 }
237 return Ok;
238}

References desc1, desc2, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), getActivationParams(), NdArrayDescsForElementwiseBroadcast(), onert_micro::Ok, output_shape, onert_micro::core::QuantizationParams::scale, subscriptToIndex(), and onert_micro::core::QuantizationParams::zero_point.

◆ QuantizedBroadcastSquaredDifference4DSlow()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedBroadcastSquaredDifference4DSlow ( const core::BinaryArithmeticBroadcastParams params,
const core::OMRuntimeShape input1_shape,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const core::OMRuntimeShape input2_shape,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 49 of file PALSquaredDifferenceCommon.h.

56{
57 QuantizedBroadcastArithmeticOp4DSlow<T, SquaredDifferenceFn<float>>(
58 params, input1_shape, input1_qparams, input1_data, input2_shape, input2_qparams, input2_data,
59 output_shape, output_qparams, output_data);
60 return Ok;
61}

References onert_micro::Ok, and output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ QuantizedRsqrt()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedRsqrt ( const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)
inline

Definition at line 42 of file PALRsqrtCommon.h.

47{
48 return SISOOperation<T>(input_shape, input_qparams, input_data, output_shape, output_qparams,
49 output_data, [](float arg) -> float { return 1.f / std::sqrt(arg); });
50}

References output_shape.

◆ QuantizedSquaredDifference()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedSquaredDifference ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const onert_micro::core::QuantizationParams input1_qparams,
const T *  input1_data,
const onert_micro::core::QuantizationParams input2_qparams,
const T *  input2_data,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 64 of file PALSquaredDifferenceCommon.h.

69{
70
71 QuantizedArithmeticOp<T, SquaredDifferenceFn<float>>(params, flat_size, input1_qparams,
72 input1_data, input2_qparams, input2_data,
73 output_qparams, output_data);
74
75 return Ok;
76}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ QuantizedTanh()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedTanh ( const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)
inline

Definition at line 53 of file PALTanhCommon.h.

58{
59 const uint32_t flat_size = input_shape.flatSize();
60 for (int i = 0; i < flat_size; i++)
61 {
62 // Dequantize.
63 float val = static_cast<float>((input_data[i] - static_cast<T>(input_qparams.zero_point)) *
64 input_qparams.scale);
65 // float result
66 float result = std::tanh(val);
67
68 // Quantize float to output type
69 result = result / output_qparams.scale + output_qparams.zero_point;
70 result = std::max<float>(std::numeric_limits<T>::min(), result);
71 result = std::min<float>(std::numeric_limits<T>::max(), result);
72
73 output_data[i] = static_cast<T>(result);
74 }
75
76 return Ok;
77}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

◆ QuantizedZeroPad()

template<typename T >
OMStatus onert_micro::execute::pal::QuantizedZeroPad ( const core::PadParams op_params,
const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data 
)

Definition at line 115 of file PALPad.h.

120{
121 // TODO reduce code duplication with Pad
122
123 // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
124 // pad them to 5 dims (yes, we are "padding the padding").
125 int left_padding_copy[padKernelMaxDimensionCount];
126 for (int &i : left_padding_copy)
127 {
128 i = 0;
129 }
130 for (int i = 0; i < op_params.left_padding_count; ++i)
131 {
132 left_padding_copy[i + padKernelMaxDimensionCount - op_params.left_padding_count] =
133 op_params.left_padding[i];
134 }
135 int right_padding_copy[padKernelMaxDimensionCount];
136 for (int &i : right_padding_copy)
137 {
138 i = 0;
139 }
140 for (int i = 0; i < op_params.right_padding_count; ++i)
141 {
142 right_padding_copy[i + padKernelMaxDimensionCount - op_params.right_padding_count] =
143 op_params.right_padding[i];
144 }
145 const auto extended_output =
146 core::OMRuntimeShape::extendedShape(padKernelMaxDimensionCount, output_shape);
147 const int output_batch = extended_output.dims(0);
148 const int output_plane = extended_output.dims(1);
149 const int output_height = extended_output.dims(2);
150 const int output_width = extended_output.dims(3);
151 const int output_depth = extended_output.dims(4);
152
153 const int left_b_padding = left_padding_copy[0];
154 const int left_p_padding = left_padding_copy[1];
155 const int left_h_padding = left_padding_copy[2];
156 const int left_w_padding = left_padding_copy[3];
157 const int left_d_padding = left_padding_copy[4];
158
159 const int right_b_padding = right_padding_copy[0];
160 const int right_p_padding = right_padding_copy[1];
161 const int right_h_padding = right_padding_copy[2];
162 const int right_w_padding = right_padding_copy[3];
163 const int right_d_padding = right_padding_copy[4];
164
165 const T *in_ptr = input_data;
166 T *out_ptr = output_data;
167 T pad_value = output_qparams.zero_point;
168 for (int out_b = 0; out_b < output_batch; ++out_b)
169 {
170 for (int out_p = 0; out_p < output_plane; ++out_p)
171 {
172 for (int out_h = 0; out_h < output_height; ++out_h)
173 {
174 for (int out_w = 0; out_w < output_width; ++out_w)
175 {
176 for (int out_d = 0; out_d < output_depth; ++out_d)
177 {
178 if (out_b < left_b_padding || out_b >= output_batch - right_b_padding ||
179 out_p < left_p_padding || out_p >= output_plane - right_p_padding ||
180 out_h < left_h_padding || out_h >= output_height - right_h_padding ||
181 out_w < left_w_padding || out_w >= output_width - right_w_padding ||
182 out_d < left_d_padding || out_d >= output_depth - right_d_padding)
183 {
184 *out_ptr++ = pad_value;
185 }
186 else
187 {
188 float result = static_cast<float>(
189 (*in_ptr - static_cast<T>(input_qparams.zero_point)) * input_qparams.scale);
190
191 // Quantize result
192 result = result / output_qparams.scale + output_qparams.zero_point;
193 result = std::max<float>(std::numeric_limits<T>::min(), result);
194 result = std::min<float>(std::numeric_limits<T>::max(), result);
195
196 *out_ptr++ = static_cast<T>(result);
197 in_ptr++;
198 }
199 }
200 }
201 }
202 }
203 }
204
205 return Ok;
206}
result
Definition infer.py:103

References onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::core::PadParams::left_padding, onert_micro::core::PadParams::left_padding_count, onert_micro::Ok, output_shape, onert_micro::core::PadParams::right_padding, onert_micro::core::PadParams::right_padding_count, onert_micro::core::QuantizationParams::scale, and onert_micro::core::QuantizationParams::zero_point.

Referenced by onert_micro::execute::execute_kernel_CirclePad().

◆ ReduceDimensionsForBroadcast()

template<int MAX_DIM = 6>
bool onert_micro::execute::pal::ReduceDimensionsForBroadcast ( const core::OMRuntimeShape input1_shape,
const core::OMRuntimeShape input2_shape,
size_t *  compressed_input1_stride,
size_t *  compressed_input2_stride,
size_t *  compressed_output_shape 
)

Definition at line 181 of file PALUtils.h.

185{
186 size_t num_compressed_dims = 0;
187 size_t compressed_input1_shape[MAX_DIM];
188 size_t compressed_input2_shape[MAX_DIM];
189 std::fill(compressed_input1_shape, compressed_input1_shape + MAX_DIM, 1);
190 std::fill(compressed_input2_shape, compressed_input2_shape + MAX_DIM, 1);
191 std::fill(compressed_output_shape, compressed_output_shape + MAX_DIM, 1);
192 bool broadcast_input1 = false;
193 bool broadcast_input2 = false;
194 bool first_nonunit = true;
195
196 if (input1_shape.dimensionsCount() < 0 || input2_shape.dimensionsCount() < 0)
197 {
198 return false;
199 }
200 const size_t num_input1_dims = input1_shape.dimensionsCount();
201 const size_t num_input2_dims = input2_shape.dimensionsCount();
202 const int32_t *input1_dims = input1_shape.dimsData();
203 const int32_t *input2_dims = input2_shape.dimsData();
204 const size_t num_common_dims = std::min(num_input1_dims, num_input2_dims);
205 for (size_t i = 1; i <= num_common_dims; i++)
206 {
207 if (input1_dims[num_input1_dims - i] < 0 || input2_dims[num_input2_dims - i] < 0)
208 {
209 return false;
210 }
211 const size_t input1_dim = input1_dims[num_input1_dims - i];
212 const size_t input2_dim = input2_dims[num_input2_dims - i];
213 if (input1_dim == 0 || input2_dim == 0)
214 {
215 return false;
216 }
217 if (input1_dim == 1 && input2_dim == 1)
218 {
219 continue;
220 }
221 assert(!broadcast_input1 || !broadcast_input2);
222
223 if (input1_dim == 1)
224 {
225 if (!broadcast_input1)
226 {
227 broadcast_input1 = true;
228 broadcast_input2 = false;
229 num_compressed_dims++;
230 }
231 compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
232 compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
233 }
234 else if (input2_dim == 1)
235 {
236 if (!broadcast_input2)
237 {
238 broadcast_input1 = false;
239 broadcast_input2 = true;
240 num_compressed_dims++;
241 }
242 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
243 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
244 }
245 else
246 {
247 assert(input1_dim == input2_dim);
248 if (broadcast_input1 || broadcast_input2 || first_nonunit)
249 {
250 broadcast_input1 = false;
251 broadcast_input2 = false;
252 num_compressed_dims++;
253 }
254 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
255 compressed_input2_shape[num_compressed_dims - 1] *= input1_dim;
256 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
257 }
258 first_nonunit = false;
259 }
260 if (num_input1_dims > num_input2_dims)
261 {
262 if (!broadcast_input2)
263 {
264 num_compressed_dims++;
265 }
266 for (size_t i = 0; i < num_input1_dims - num_input2_dims; i++)
267 {
268 if (input1_dims[i] < 0)
269 return false;
270 const size_t input1_dim = input1_dims[i];
271 if (input1_dim == 0)
272 {
273 return false;
274 }
275 compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
276 compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
277 }
278 }
279 else if (num_input2_dims > num_input1_dims)
280 {
281 if (!broadcast_input1)
282 {
283 num_compressed_dims++;
284 }
285 for (size_t i = 0; i < num_input2_dims - num_input1_dims; i++)
286 {
287 if (input2_dims[i] < 0)
288 return false;
289 const size_t input2_dim = input2_dims[i];
290 if (input2_dim == 0)
291 {
292 return false;
293 }
294 compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
295 compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
296 }
297 }
298 num_compressed_dims = (num_compressed_dims > 1) ? num_compressed_dims : 1;
299
300 int input1_stride = 1;
301 int input2_stride = 1;
302 for (int i = 0; i < MAX_DIM; ++i)
303 {
304 compressed_input1_stride[i] = input1_stride;
305 input1_stride *= compressed_input1_shape[i];
306 compressed_input2_stride[i] = input2_stride;
307 input2_stride *= compressed_input2_shape[i];
308 }
309 for (int i = 0; i < MAX_DIM; ++i)
310 {
311 if (compressed_input1_shape[i] != compressed_input2_shape[i])
312 {
313 if (compressed_input1_shape[i] == 1)
314 {
315 compressed_input1_stride[i] = 0;
316 }
317 else
318 {
319 assert(compressed_input2_shape[i] == 1);
320 compressed_input2_stride[i] = 0;
321 }
322 }
323 }
324 return true;
325}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), and onert_micro::core::OMRuntimeShape::dimsData().

◆ ReLUCommon()

template<typename Type >
OMStatus onert_micro::execute::pal::ReLUCommon ( const int  flat_size,
const Type *  input_data,
Type *  output_data,
const float  alpha,
const bool  is_relu_6 
)

Definition at line 33 of file PALReluCommon.h.

35{
36 const Type relu_6_value = 6.0f;
37 for (int i = 0; i < flat_size; i++)
38 {
39 const Type val = input_data[i];
40 Type result = val > 0 ? val : val * alpha;
41 result = is_relu_6 ? (result > relu_6_value ? relu_6_value : result) : result;
42 output_data[i] = result;
43 }
44
45 return Ok;
46}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_relu_common().

◆ ReLUCommon< int8_t >()

template<>
OMStatus onert_micro::execute::pal::ReLUCommon< int8_t > ( const int  flat_size,
const int8_t *  input_data,
int8_t *  output_data,
const float  alpha,
const bool  is_relu_6 
)

Definition at line 33 of file PALRelu.h.

35{
36 // 1. Relu
37 if (is_relu_6 == false && alpha == 0)
38 {
39 memcpy(output_data, input_data, flat_size);
40 arm_relu_q7(output_data, flat_size);
41 }
42 // 2. Relu6
43 else if (is_relu_6 && alpha == 0)
44 {
45 memcpy(output_data, input_data, flat_size);
46 arm_relu6_s8(output_data, flat_size);
47 }
48 // 3. Leaky_Relu not supported by cmsis_nn
49 else if (alpha != 0)
50 {
51 for (int i = 0; i < flat_size; i++)
52 {
53 const int8_t val = input_data[i];
54 int8_t result = val > 0 ? val : val * alpha;
55 output_data[i] = result;
56 }
57 }
58
59 return Ok;
60}

References onert_micro::Ok.

◆ Round()

template<typename T >
OMStatus onert_micro::execute::pal::Round ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

◆ Round< float >()

template<>
OMStatus onert_micro::execute::pal::Round< float > ( const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 38 of file PALRoundCommon.h.

40{
41 const uint32_t flat_size = input_shape.flatSize();
42
43 if (flat_size == -1)
44 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
45
46 assert(input_data != nullptr);
47 assert(output_data != nullptr);
48
49 assert(input_shape == output_shape);
50
51 for (int i = 0; i < flat_size; i++)
52 {
53 // Note that this implementation matches that of tensorFlow tf.round
54 // and corresponds to the bankers rounding method.
55 auto floor_val = std::floor(input_data[i]);
56 auto diff = input_data[i] - floor_val;
57 if ((diff < 0.5f) || ((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0)))
58 {
59 output_data[i] = floor_val;
60 }
61 else
62 {
63 output_data[i] = floor_val + 1.0f;
64 }
65 }
66
67 return Ok;
68}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

◆ roundingDivideByPOT()

int32_t onert_micro::execute::pal::roundingDivideByPOT ( int32_t  x,
int32_t  exponent 
)
inline

Definition at line 92 of file PALUtils.h.

93{
94 assert(exponent >= 0);
95 assert(exponent <= 31);
96 const int32_t mask = int32_t((1ll << exponent) - 1);
97 const int32_t zero = int32_t(0);
98 const int32_t one = int32_t(1);
99 const int32_t remainder = x & mask;
100 const int32_t threshold = (mask >> 1) + ((x < zero ? one : zero) & one);
101 return (x >> exponent) + ((remainder > threshold ? one : zero) & one);
102}

Referenced by multiplyByQuantizedMultiplier(), and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Rsqrt()

template<typename T >
OMStatus onert_micro::execute::pal::Rsqrt ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALRsqrtCommon.h.

36{
37 return SISOOperation<T>(input_shape, input_data, output_shape, output_data,
38 [](T arg) -> T { return 1.f / std::sqrt(arg); });
39}

References output_shape.

Referenced by onert_micro::execute::execute_kernel_CircleRsqrt().

◆ saturatingRoundingDoublingHighMul()

std::int32_t onert_micro::execute::pal::saturatingRoundingDoublingHighMul ( std::int32_t  a,
std::int32_t  b 
)
inline

Definition at line 79 of file PALUtils.h.

80{
81 bool overflow = a == b && a == std::numeric_limits<std::int32_t>::min();
82 std::int64_t a_64(a);
83 std::int64_t b_64(b);
84 std::int64_t ab_64 = a_64 * b_64;
85 std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
86 std::int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
87 return overflow ? std::numeric_limits<std::int32_t>::max() : ab_x2_high32;
88}

Referenced by multiplyByQuantizedMultiplier(), and multiplyByQuantizedMultiplierSmallerThanOneExp().

◆ Select()

template<typename D , typename T >
void onert_micro::execute::pal::Select ( const core::OMRuntimeShape input_condition_shape,
const D *  input_condition_data,
const core::OMRuntimeShape input_x_shape,
const T *  input_x_data,
const core::OMRuntimeShape input_y_shape,
const T *  input_y_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)

Definition at line 33 of file PALSelectV2.h.

37{
38 int64_t flatsize;
39 // Allow select operator executions on mixed scalar tensors and one element
40 // tensors.
41 if (input_condition_shape.flatSize() == 1 && input_x_shape.flatSize() == 1 &&
42 input_y_shape.flatSize() == 1 && output_shape.flatSize() == 1)
43 {
44 flatsize = 1;
45 }
46 else
47 {
48 flatsize = input_condition_shape.flatSize();
49 }
50 for (int64_t i = 0; i < flatsize; ++i)
51 {
52 output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
53 }
54}

References luci_interpreter::RuntimeShape::flatSize(), onert_micro::core::OMRuntimeShape::flatSize(), and output_shape.

◆ Sin()

template<typename T >
OMStatus onert_micro::execute::pal::Sin ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 35 of file PALSinCommon.h.

37{
38 const uint32_t flat_size = input_shape.flatSize();
39
40 if (flat_size == -1)
41 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
42
43 assert(input_data != nullptr);
44 assert(output_data != nullptr);
45
46 assert(input_shape == output_shape);
47
48 for (int i = 0; i < flat_size; i++)
49 {
50 output_data[i] = std::sin(input_data[i]);
51 }
52
53 return Ok;
54}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleSin().

◆ SISOOperation() [1/2]

template<typename T >
OMStatus onert_micro::execute::pal::SISOOperation ( const core::OMRuntimeShape input_shape,
const onert_micro::core::QuantizationParams input_qparams,
const T *  input_data,
const core::OMRuntimeShape output_shape,
const onert_micro::core::QuantizationParams output_qparams,
T *  output_data,
std::function< float(float)> const &  func 
)
inline

Definition at line 54 of file PALSISOOperation.h.

59{
60 const uint32_t flat_size = input_shape.flatSize();
61
62 if (flat_size == -1)
63 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
64
65 assert(input_data != nullptr);
66 assert(output_data != nullptr);
67
68 assert(input_shape == output_shape);
69
70 for (int i = 0; i < flat_size; i++)
71 {
72 // Dequantize input
73 float result = static_cast<float>((input_data[i] - static_cast<T>(input_qparams.zero_point)) *
74 input_qparams.scale);
75 // float result
76 result = func(result);
77
78 // Quantize result to output type
79 result = result / output_qparams.scale + output_qparams.zero_point;
80 result = std::max<float>(std::numeric_limits<T>::min(), result);
81 result = std::min<float>(std::numeric_limits<T>::max(), result);
82
83 output_data[i] = static_cast<T>(result);
84 }
85
86 return Ok;
87}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, onert_micro::core::QuantizationParams::scale, onert_micro::UnknownError, and onert_micro::core::QuantizationParams::zero_point.

◆ SISOOperation() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::SISOOperation ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data,
std::function< T(T)> const &  func 
)
inline

Definition at line 31 of file PALSISOOperation.h.

34{
35 const uint32_t flat_size = input_shape.flatSize();
36
37 if (flat_size == -1)
38 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
39
40 assert(input_data != nullptr);
41 assert(output_data != nullptr);
42
43 assert(input_shape == output_shape);
44
45 for (int i = 0; i < flat_size; i++)
46 {
47 output_data[i] = func(input_data[i]);
48 }
49
50 return Ok;
51}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

◆ Slice()

template<typename T >
OMStatus onert_micro::execute::pal::Slice ( const core::SliceParams op_params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
T *  output_data 
)

Definition at line 29 of file PALSlice.h.

31{
32 const core::OMRuntimeShape ext_shape = core::OMRuntimeShape::extendedShape(5, input_shape);
33 const int begin_count = op_params.begin_count;
34 const int size_count = op_params.size_count;
35 // We front-pad the begin and size vectors.
36 int start[5];
37 int stop[5];
38 for (int i = 0; i < 5; ++i)
39 {
40 int padded_i = 5 - i;
41 start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
42 stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
43 ? ext_shape.dims(i)
44 : start[i] + op_params.size[size_count - padded_i];
45 }
46
47 for (int i0 = start[0]; i0 < stop[0]; ++i0)
48 {
49 for (int i1 = start[1]; i1 < stop[1]; ++i1)
50 {
51 for (int i2 = start[2]; i2 < stop[2]; ++i2)
52 {
53 for (int i3 = start[3]; i3 < stop[3]; ++i3)
54 {
55 for (int i4 = start[4]; i4 < stop[4]; ++i4)
56 {
57 auto position =
58 (((i0 * ext_shape.dims(1) + i1) * ext_shape.dims(2) + i2) * ext_shape.dims(3) + i3) *
59 ext_shape.dims(4) +
60 i4;
61 *output_data++ = input_data[position];
62 }
63 }
64 }
65 }
66 }
67 return Ok;
68}
int8_t size_count
Definition Slice.cpp:34
int8_t begin_count
Definition Slice.cpp:32

References onert_micro::core::SliceParams::begin, begin_count, onert_micro::core::SliceParams::begin_count, onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::Ok, onert_micro::core::SliceParams::size, size_count, and onert_micro::core::SliceParams::size_count.

Referenced by onert_micro::execute::execute_kernel_CircleSlice().

◆ Softmax()

template<typename T , typename U >
OMStatus onert_micro::execute::pal::Softmax ( const core::SoftmaxParams params,
const T *  input_data,
U *  output_data 
)

Definition at line 33 of file PALSoftmaxCommon.h.

34{
35 const int outer_size = params.num_rows;
36 const int depth = params.row_size;
37 const double beta = params.beta;
38
39 const float input_scale = params.input_scale;
40 const float output_scale = params.output_scale;
41
42 const int input_zp = params.input_zp;
43 const int output_zp = params.output_zp;
44
45 for (int i = 0; i < outer_size; ++i)
46 {
47 // Find max element value which we'll use to ensure numerical stability
48 // taking advantage of the following equality:
49 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
50 float max = std::numeric_limits<float>::lowest();
51 for (int c = 0; c < depth; ++c)
52 {
53 auto t = input_data[i * depth + c] - input_zp;
54 auto t_f = static_cast<float>(input_data[i * depth + c] - input_zp);
55 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
56 max = std::max(max, cur_val);
57 }
58
59 static constexpr int32_t min_val = std::numeric_limits<U>::min();
60 static constexpr int32_t max_val = std::numeric_limits<U>::max();
61 // Compute sum.
62 float sum = 0.f;
63 for (int c = 0; c < depth; ++c)
64 {
65 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
66 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
67 sum += exp_c;
68 }
69
70 // Compute result.
71 for (int c = 0; c < depth; ++c)
72 {
73 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
74 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
75 float softmax_val = exp_c / sum;
76 auto unclamped = static_cast<int32_t>(std::round(softmax_val / output_scale) +
77 static_cast<float>(output_zp));
78 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
79 output_data[i * depth + c] = static_cast<U>(clamped);
80 }
81 }
82 return Ok;
83}

References onert_micro::core::SoftmaxParams::beta, onert_micro::core::SoftmaxParams::input_scale, onert_micro::core::SoftmaxParams::input_zp, onert_micro::core::SoftmaxParams::num_rows, onert_micro::Ok, onert_micro::core::SoftmaxParams::output_scale, onert_micro::core::SoftmaxParams::output_zp, and onert_micro::core::SoftmaxParams::row_size.

Referenced by onert_micro::execute::execute_kernel_CircleSoftmax().

◆ Softmax< float, float >()

template<>
OMStatus onert_micro::execute::pal::Softmax< float, float > ( const core::SoftmaxParams params,
const float *  input_data,
float *  output_data 
)

Definition at line 86 of file PALSoftmaxCommon.h.

88{
89 const int outer_size = params.num_rows;
90 const int depth = params.row_size;
91 const double beta = params.beta;
92
93 for (int i = 0; i < outer_size; ++i)
94 {
95 // Find max element value which we'll use to ensure numerical stability
96 // taking advantage of the following equality:
97 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
98 float max = std::numeric_limits<float>::lowest();
99 for (int c = 0; c < depth; ++c)
100 {
101 max = std::max(max, input_data[i * depth + c]);
102 }
103
104 // Compute sum.
105 float sum = 0.f;
106 for (int c = 0; c < depth; ++c)
107 {
108 const float exp_c = std::exp((input_data[i * depth + c] - max) * static_cast<float>(beta));
109 output_data[i * depth + c] = exp_c;
110 sum += exp_c;
111 }
112
113 assert(sum != 0);
114
115 if (sum == 0)
116 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
117
118 // Compute result.
119 for (int c = 0; c < depth; ++c)
120 {
121 output_data[i * depth + c] = output_data[i * depth + c] / sum;
122 }
123 }
124 return Ok;
125}

References onert_micro::core::SoftmaxParams::beta, onert_micro::core::SoftmaxParams::num_rows, onert_micro::Ok, OM_LOG_AND_RETURN, onert_micro::core::SoftmaxParams::row_size, and onert_micro::UnknownError.

◆ Softmax< int8_t, int8_t >()

◆ SpaceToBatchND()

template<typename T >
OMStatus onert_micro::execute::pal::SpaceToBatchND ( const core::OMRuntimeShape unextended_input1_shape,
const T *  input1_data,
const core::OMRuntimeShape unextended_input2_shape,
const int32_t *  block_shape_data,
const core::OMRuntimeShape unextended_input3_shape,
const int32_t *  paddings_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 51 of file PALSpaceToBatchNDCommon.h.

55{
56 // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C.
57 const core::OMRuntimeShape input1_shape = extendShapeSpaceToBatch(unextended_input1_shape);
58 const core::OMRuntimeShape output_shape = extendShapeSpaceToBatch(unextended_output_shape);
59
60 const int depth = input1_shape.dims(3);
61 const int input_width = input1_shape.dims(2);
62 const int input_height = input1_shape.dims(1);
63 const int input_batch_size = input1_shape.dims(0);
64
65 const int output_width = output_shape.dims(2);
66 const int output_height = output_shape.dims(1);
67 const int output_batch_size = output_shape.dims(0);
68
69 const int block_shape_height = block_shape_data[0];
70 const int block_shape_width =
71 unextended_input1_shape.dimensionsCount() == 4 ? block_shape_data[1] : 1;
72 const int padding_top = paddings_data[0];
73 const int padding_left = unextended_input1_shape.dimensionsCount() == 4 ? paddings_data[2] : 0;
74
75 const int32_t pad_value = 0;
76
77 for (int out_b = 0; out_b < output_batch_size; ++out_b)
78 {
79 int input_batch = out_b % input_batch_size;
80 int shift_w = (out_b / input_batch_size) % block_shape_width;
81 int shift_h = (out_b / input_batch_size) / block_shape_width;
82 for (int out_h = 0; out_h < output_height; ++out_h)
83 {
84 for (int out_w = 0; out_w < output_width; ++out_w)
85 {
86 T *out = output_data + offset(output_shape.dimsData(), out_b, out_h, out_w, 0);
87 if (out_h * block_shape_height + shift_h < padding_top ||
88 out_h * block_shape_height + shift_h >= padding_top + input_height ||
89 out_w * block_shape_width + shift_w < padding_left ||
90 out_w * block_shape_width + shift_w >= padding_left + input_width)
91 {
92 // This may not execute correctly when pad_value != 0 and T != uint8.
93 memset(out, pad_value, depth * sizeof(T));
94 }
95 else
96 {
97 const T *in =
98 input1_data + offset(input1_shape.dimsData(), input_batch,
99 (out_h * block_shape_height + shift_h) - padding_top,
100 (out_w * block_shape_width + shift_w) - padding_left, 0);
101 memcpy(out, in, depth * sizeof(T));
102 }
103 }
104 }
105 }
106 return Ok;
107}

References onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), offset(), onert_micro::Ok, and output_shape.

◆ SpaceToDepth()

template<typename T >
OMStatus onert_micro::execute::pal::SpaceToDepth ( const int32_t  block_size,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 32 of file PALSpaceToDepthCommon.h.

36{
37 if (block_size == 0)
38 {
40 }
41
42 const core::OMRuntimeShape input_shape =
43 core::OMRuntimeShape::extendedShape(4, unextended_input_shape);
45 core::OMRuntimeShape::extendedShape(4, unextended_output_shape);
46
47 const int input_depth = input_shape.dims(3);
48 const int input_width = input_shape.dims(2);
49 const int input_height = input_shape.dims(1);
50 const int input_batch = input_shape.dims(0);
51
52 for (int in_b = 0; in_b < input_batch; ++in_b)
53 {
54 for (int in_h = 0; in_h < input_height; ++in_h)
55 {
56 for (int in_w = 0; in_w < input_width; ++in_w)
57 {
58 for (int in_d = 0; in_d < input_depth; ++in_d)
59 {
60 const int out_d =
61 in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
62 const int out_w = in_w / block_size;
63 const int out_h = in_h / block_size;
64 const int out_b = in_b;
65
66 const int input_index = offset(input_shape.dimsData(), in_b, in_h, in_w, in_d);
67 const int output_index = offset(output_shape.dimsData(), out_b, out_h, out_w, out_d);
68
69 output_data[output_index] = input_data[input_index];
70 }
71 }
72 }
73 }
74 return Ok;
75}

References onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::FailedCheckCondition, offset(), onert_micro::Ok, and output_shape.

◆ Split()

template<typename T >
OMStatus onert_micro::execute::pal::Split ( const core::SplitParams params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
int32_t  axis_value 
)

Definition at line 36 of file PALSplit.h.

38{
39 const auto output_count = params.num_outputs;
40
41 const auto split_dimensions = input_shape.dimensionsCount();
42
43 assert(axis_value < split_dimensions);
44 assert(output_shape.dimensionsCount() == split_dimensions);
45
46 int64_t outer_size = 1;
47 for (uint32_t i = 0; i < axis_value; ++i)
48 {
49 outer_size *= input_shape.dims(i);
50 }
51
52 int64_t base_inner_size = 1;
53 for (uint32_t i = axis_value + 1; i < split_dimensions; ++i)
54 {
55 base_inner_size *= input_shape.dims(i);
56 }
57
58 assert(input_data != nullptr);
59 for (int64_t k = 0; k < outer_size; ++k)
60 {
61 for (uint32_t i = 0; i < output_count; ++i)
62 {
63 T *output_data = core::utils::castOutputData<T>(params.output_data[i]);
64 assert(output_data != nullptr);
65 const auto copy_size = output_shape.dims(axis_value) * base_inner_size;
66 T *output_ptr = output_data + k * copy_size;
67 assert(output_ptr != nullptr);
68 for (int64_t j = 0; j < copy_size; ++j)
69 output_ptr[j] = input_data[j];
70 input_data += copy_size;
71 }
72 }
73 return Ok;
74}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SplitParams::num_outputs, onert_micro::Ok, onert_micro::core::SplitParams::output_data, and output_shape.

◆ Sqrt()

template<typename T >
OMStatus onert_micro::execute::pal::Sqrt ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALSqrtCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = std::sqrt(input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleSqrt().

◆ Square()

template<typename T >
OMStatus onert_micro::execute::pal::Square ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 34 of file PALSquareCommon.h.

36{
37 const uint32_t flat_size = input_shape.flatSize();
38
39 if (flat_size == -1)
40 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
41
42 assert(input_data != nullptr);
43 assert(output_data != nullptr);
44
45 assert(input_shape == output_shape);
46
47 for (int i = 0; i < flat_size; i++)
48 {
49 output_data[i] = (input_data[i]) * (input_data[i]);
50 }
51
52 return Ok;
53}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

Referenced by onert_micro::execute::execute_kernel_CircleSquare().

◆ SquaredDifference()

template<typename T >
OMStatus onert_micro::execute::pal::SquaredDifference ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 29 of file PALSquaredDifferenceCommon.h.

31{
32 ArithmeticOp<T, SquaredDifferenceFn<T>>(params, flat_size, input1_data, input2_data, output_data);
33 return Ok;
34}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleSquaredDifference().

◆ StridedSlice()

template<typename T >
OMStatus onert_micro::execute::pal::StridedSlice ( core::StridedSliceParams op_params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
T *  output_data 
)

Definition at line 206 of file PALStridedSlice.h.

209{
210 const core::OMRuntimeShape input_shape =
211 core::OMRuntimeShape::extendedShape(5, unextended_input_shape);
212
213 // Reverse and pad to 5 dimensions because that is what the runtime code
214 // requires (ie. all shapes must be 5D and are given backwards).
215 stridedSlicePadIndices(&op_params, 5);
216
217 const int start_0 = startForAxis(op_params, input_shape, 0);
218 const int stop_0 = stopForAxis(op_params, input_shape, 0, start_0);
219 const int start_1 = startForAxis(op_params, input_shape, 1);
220 const int stop_1 = stopForAxis(op_params, input_shape, 1, start_1);
221 const int start_2 = startForAxis(op_params, input_shape, 2);
222 const int stop_2 = stopForAxis(op_params, input_shape, 2, start_2);
223 const int start_3 = startForAxis(op_params, input_shape, 3);
224 const int stop_3 = stopForAxis(op_params, input_shape, 3, start_3);
225 const int start_4 = startForAxis(op_params, input_shape, 4);
226 const int stop_4 = stopForAxis(op_params, input_shape, 4, start_4);
227
228 for (int offset_0 = start_0 * input_shape.dims(1), end_0 = stop_0 * input_shape.dims(1),
229 step_0 = op_params.strides[0] * input_shape.dims(1);
230 !loopCondition(offset_0, end_0, op_params.strides[0]); offset_0 += step_0)
231 {
232 for (int offset_1 = (offset_0 + start_1) * input_shape.dims(2),
233 end_1 = (offset_0 + stop_1) * input_shape.dims(2),
234 step_1 = op_params.strides[1] * input_shape.dims(2);
235 !loopCondition(offset_1, end_1, op_params.strides[1]); offset_1 += step_1)
236 {
237 for (int offset_2 = (offset_1 + start_2) * input_shape.dims(3),
238 end_2 = (offset_1 + stop_2) * input_shape.dims(3),
239 step_2 = op_params.strides[2] * input_shape.dims(3);
240 !loopCondition(offset_2, end_2, op_params.strides[2]); offset_2 += step_2)
241 {
242 for (int offset_3 = (offset_2 + start_3) * input_shape.dims(4),
243 end_3 = (offset_2 + stop_3) * input_shape.dims(4),
244 step_3 = op_params.strides[3] * input_shape.dims(4);
245 !loopCondition(offset_3, end_3, op_params.strides[3]); offset_3 += step_3)
246 {
247 for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
248 !loopCondition(offset_4, end_4, op_params.strides[4]);
249 offset_4 += op_params.strides[4])
250 {
251 *output_data++ = input_data[offset_4];
252 }
253 }
254 }
255 }
256 }
257 return Ok;
258}

References onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::Ok, and onert_micro::core::StridedSliceParams::strides.

Referenced by onert_micro::execute::execute_kernel_CircleStridedSlice().

◆ Sub() [1/2]

OMStatus onert_micro::execute::pal::Sub ( const core::ArithmeticQuantParams params,
const uint32_t  flat_size,
const int8_t *  input1_data,
const int8_t *  input2_data,
int8_t *  output_data 
)

Definition at line 31 of file PALSub.h.

33{
34 ElementWise(flat_size, params, input1_data, input2_data, output_data, SubFunc);
35 return Ok;
36}
void ElementWise(const uint32_t size, const core::ArithmeticQuantParams &params, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))

References ElementWise(), onert_micro::Ok, and SubFunc().

◆ Sub() [2/2]

template<typename T >
OMStatus onert_micro::execute::pal::Sub ( const core::BinaryArithmeticBroadcastParams params,
const int  flat_size,
const T *  input1_data,
const T *  input2_data,
T *  output_data 
)

Definition at line 49 of file PALSubCommon.h.

51{
52 ArithmeticOp<T, SubFn<T>>(params, flat_size, input1_data, input2_data, output_data);
53 return Ok;
54}

References onert_micro::Ok.

Referenced by onert_micro::execute::execute_kernel_CircleSub().

◆ SubFunc()

int8_t onert_micro::execute::pal::SubFunc ( int8_t  x,
int8_t  y,
const core::ArithmeticQuantParams params 
)

Definition at line 29 of file PALSubCommon.h.

30{
31 const int32_t input1_val = params.input1_offset + x;
32 const int32_t input2_val = params.input2_offset + y;
33 const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
34 const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
35 const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
36 shifted_input1_val, params.input1_multiplier, params.input1_shift);
37 const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
38 shifted_input2_val, params.input2_multiplier, params.input2_shift);
39 const int32_t raw_sum = scaled_input1_val - scaled_input2_val;
40 const int32_t raw_output = multiplyByQuantizedMultiplierSmallerThanOneExp(
41 raw_sum, params.output_multiplier, params.output_shift) +
42 params.output_offset;
43 const int32_t clamped_output = std::min(params.quantized_activation_max,
44 std::max(params.quantized_activation_min, raw_output));
45 return static_cast<int8_t>(clamped_output);
46}

References onert_micro::core::ArithmeticQuantParams::input1_multiplier, onert_micro::core::ArithmeticQuantParams::input1_offset, onert_micro::core::ArithmeticQuantParams::input1_shift, onert_micro::core::ArithmeticQuantParams::input2_multiplier, onert_micro::core::ArithmeticQuantParams::input2_offset, onert_micro::core::ArithmeticQuantParams::input2_shift, onert_micro::core::ArithmeticQuantParams::left_shift, multiplyByQuantizedMultiplierSmallerThanOneExp(), onert_micro::core::ArithmeticQuantParams::output_multiplier, onert_micro::core::ArithmeticQuantParams::output_offset, onert_micro::core::ArithmeticQuantParams::output_shift, onert_micro::core::ArithmeticQuantParams::quantized_activation_max, and onert_micro::core::ArithmeticQuantParams::quantized_activation_min.

Referenced by BroadcastSub4DSlow(), and Sub().

◆ subscriptToIndex() [1/2]

int onert_micro::execute::pal::subscriptToIndex ( const NdArrayDesc< 4 > &  desc,
int  i0,
int  i1,
int  i2,
int  i3 
)
inline

◆ subscriptToIndex() [2/2]

int onert_micro::execute::pal::subscriptToIndex ( const NdArrayDesc< 5 > &  desc,
int  indexes[5] 
)
inline

Definition at line 135 of file ProcessBroadcastShapes.h.

136{
137 return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
138 indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + indexes[4] * desc.strides[4];
139}

References onert_micro::execute::pal::NdArrayDesc< N >::strides.

◆ SVDF() [1/2]

OMStatus onert_micro::execute::pal::SVDF ( const core::SVDFQuantParams params,
const int8_t *  input_data,
const int8_t *  weights_feature_data,
const int8_t *  weights_time_data,
const int32_t *  bias_data,
int8_t *  state_data,
int8_t *  output_data,
const core::OMRuntimeShape input_shape,
const core::OMRuntimeShape weights_feature_shape,
const core::OMRuntimeShape weights_time_shape,
const core::OMRuntimeShape bias_shape,
const core::OMRuntimeShape output_shape 
)

Definition at line 35 of file PALSVDF.h.

42{
43 cmsis_nn_dims input_dims;
44 input_dims.n = input_shape.dims(0);
45 input_dims.h = input_shape.dims(1);
46
47 cmsis_nn_dims weights_feature_dims;
48 weights_feature_dims.n = weights_feature_shape.dims(0);
49 weights_feature_dims.h = weights_feature_shape.dims(1);
50
51 cmsis_nn_dims weights_time_dims;
52 weights_time_dims.n = weights_time_shape.dims(0);
53 weights_time_dims.h = weights_time_shape.dims(1);
54
55 cmsis_nn_dims bias_dims;
56 bias_dims.n = bias_shape.dims(0);
57
58 cmsis_nn_dims state_dims;
59 state_dims.n = bias_shape.dims(0);
60 state_dims.h = bias_shape.dims(1);
61
62 cmsis_nn_dims output_dims;
63 output_dims.n = output_shape.dims(0);
64 output_dims.h = output_shape.dims(1);
65
66 cmsis_nn_svdf_params svdf_params;
67 svdf_params.rank = params.rank;
68 svdf_params.input_offset = params.input_zero_point;
69 svdf_params.output_offset = params.output_zero_point;
70
71 svdf_params.input_activation.min = INT16_MIN;
72 svdf_params.input_activation.max = INT16_MAX;
73
74 svdf_params.output_activation.min = INT8_MIN;
75 svdf_params.output_activation.max = INT8_MAX;
76
77 cmsis_nn_per_tensor_quant_params in_quant_params;
78 in_quant_params.multiplier = params.effective_scale_1_a;
79 in_quant_params.shift = params.effective_scale_1_b;
80
81 cmsis_nn_per_tensor_quant_params out_quant_params;
82 out_quant_params.multiplier = params.effective_scale_2_a;
83 out_quant_params.shift = params.effective_scale_2_b;
84
85 const int batch_size = input_shape.dims(0);
86 const int input_size = input_shape.dims(1);
87 const int num_filters = weights_feature_shape.dims(0);
88 const int num_units = num_filters / params.rank;
89
90 uint8_t *scratch_tensor_data;
91 OMStatus status = core::memory::OMMemoryManager::allocateMemory(
92 batch_size * num_filters * sizeof(int32_t), &scratch_tensor_data);
93 assert(status == Ok);
94 if (status != Ok)
95 return status;
96
97 uint8_t *scratch_output_tensor_data;
98 status = core::memory::OMMemoryManager::allocateMemory(batch_size * num_units * sizeof(int32_t),
99 &scratch_output_tensor_data);
100 assert(status == Ok);
101 if (status != Ok)
102 return status;
103
104 cmsis_nn_context scratch_ctx;
105 scratch_ctx.buf = reinterpret_cast<int32_t *>(scratch_tensor_data);
106
107 cmsis_nn_context scratch_output_ctx;
108 scratch_output_ctx.buf = reinterpret_cast<int32_t *>(scratch_output_tensor_data);
109
110 arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
111 &input_dims, input_data, &state_dims, state_data, &weights_feature_dims,
112 weights_feature_data, &weights_time_dims, weights_time_data, &bias_dims, bias_data,
113 &output_dims, output_data);
114
115 core::memory::OMMemoryManager::deallocateMemory(scratch_tensor_data);
116 core::memory::OMMemoryManager::deallocateMemory(scratch_output_tensor_data);
117
118 return Ok;
119}

References onert_micro::core::memory::OMMemoryManager::allocateMemory(), onert_micro::core::memory::OMMemoryManager::deallocateMemory(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SVDFQuantParams::effective_scale_1_a, onert_micro::core::SVDFQuantParams::effective_scale_1_b, onert_micro::core::SVDFQuantParams::effective_scale_2_a, onert_micro::core::SVDFQuantParams::effective_scale_2_b, onert_micro::core::SVDFQuantParams::input_zero_point, onert_micro::Ok, output_shape, onert_micro::core::SVDFQuantParams::output_zero_point, and onert_micro::core::SVDFQuantParams::rank.

Referenced by onert_micro::execute::execute_kernel_CircleSVDF().

◆ SVDF() [2/2]

OMStatus onert_micro::execute::pal::SVDF ( const float *  input_data,
const float *  weights_feature_data,
const float *  weights_time_data,
const float *  bias_data,
float *  state_data,
float *  scratch_data,
float *  output_data,
const int  rank,
const int  input_size,
const int  batch_size,
const int  num_filters,
const int  num_units,
const int  memory_size,
const circle::ActivationFunctionType  activation 
)

Definition at line 138 of file PALSVDFCommon.h.

143{
144 // Left shift the activation_state.
145 {
146 float *new_state_start = state_data;
147 const float *old_state_start = state_data + 1;
148 const float *old_state_end = state_data + batch_size * num_filters * memory_size;
149 while (old_state_start != old_state_end)
150 {
151 *new_state_start++ = *old_state_start++;
152 }
153 }
154
155 // Note: no need to clear the latest activation, matmul is not accumulative.
156
157 // Compute conv1d(inputs, weights_feature).
158 // The activation_state's rightmost column is used to save current cycle
159 // activation. This is achieved by starting at state_ptr[memory_size - 1] and
160 // having the stride equal to memory_size.
161
162 // Perform batched matrix vector multiply operation:
163 {
164 const float *matrix = weights_feature_data;
165 const float *vector = input_data;
166 float *result = &state_data[memory_size - 1];
167 float *result_in_batch = result;
168 for (int i = 0; i < batch_size; ++i)
169 {
170 const float *matrix_ptr = matrix;
171 for (int j = 0; j < num_filters; ++j)
172 {
173 float dot_prod = 0.0f;
174 const float *vector_in_batch = vector + i * input_size;
175 for (int k = 0; k < input_size; ++k)
176 {
177 dot_prod += *matrix_ptr++ * *vector_in_batch++;
178 }
179 *result_in_batch = dot_prod;
180 result_in_batch += memory_size;
181 }
182 }
183 }
184
185 applyTimeWeightsBiasAndActivation(batch_size, memory_size, num_filters, num_units, rank,
186 weights_time_data, bias_data, activation, state_data,
187 scratch_data, output_data);
188 return Ok;
189}

References onert_micro::Ok.

◆ Tanh()

template<typename T >
OMStatus onert_micro::execute::pal::Tanh ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 31 of file PALTanhCommon.h.

33{
34 const uint32_t flat_size = input_shape.flatSize();
35
36 if (flat_size == -1)
37 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
38
39 assert(input_data != nullptr);
40 assert(output_data != nullptr);
41
42 assert(input_shape == output_shape);
43
44 for (int i = 0; i < flat_size; i++)
45 {
46 output_data[i] = std::tanh(input_data[i]);
47 }
48
49 return Ok;
50}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

◆ Transpose()

template<typename T , int N = 5>
OMStatus onert_micro::execute::pal::Transpose ( const core::TransposeParams params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 78 of file PALTransposeCommon.h.

81{
82 // Transpose kernel only does rearranging values not numeric evaluations on
83 // each cell. It's safe to implement per size of scalar type and this trick
84 // keeps the total code size in a reasonable range.
85 OMStatus status;
86 switch (sizeof(T))
87 {
88 case 1:
89 status = TransposeImpl<int8_t, N>(
90 params, unextended_input_shape, reinterpret_cast<const int8_t *>(input_data),
91 unextended_output_shape, reinterpret_cast<int8_t *>(output_data));
92 break;
93 case 2:
94 status = TransposeImpl<int16_t, N>(
95 params, unextended_input_shape, reinterpret_cast<const int16_t *>(input_data),
96 unextended_output_shape, reinterpret_cast<int16_t *>(output_data));
97 break;
98
99 case 4:
100 status = TransposeImpl<int32_t, N>(
101 params, unextended_input_shape, reinterpret_cast<const int32_t *>(input_data),
102 unextended_output_shape, reinterpret_cast<int32_t *>(output_data));
103 break;
104 case 8:
105 status = TransposeImpl<int64_t, N>(
106 params, unextended_input_shape, reinterpret_cast<const int64_t *>(input_data),
107 unextended_output_shape, reinterpret_cast<int64_t *>(output_data));
108 break;
109 default:
110 status = UnknownError;
111 break;
112 }
113 return status;
114}

References onert_micro::UnknownError.

◆ TransposeConv()

template<typename T >
OMStatus onert_micro::execute::pal::TransposeConv ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape filter_shape,
const T *  filter_data,
const T *  bias_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 35 of file PALTransposeConvCommon.h.

38{
39 assert(false && "Not IMPL yet");
40}

◆ TransposeConv< float >()

template<>
OMStatus onert_micro::execute::pal::TransposeConv< float > ( const core::FloatConv2D params,
const core::OMRuntimeShape input_shape,
const float *  input_data,
const core::OMRuntimeShape filter_shape,
const float *  filter_data,
const float *  bias_data,
const core::OMRuntimeShape output_shape,
float *  output_data 
)
inline

Definition at line 42 of file PALTransposeConvCommon.h.

47{
48 const int stride_width = params->stride_w;
49 const int stride_height = params->stride_h;
50 const int pad_width = params->pad_w;
51 const int pad_height = params->pad_h;
52
53 const int batches = input_shape.dims(0);
54 const int input_depth = input_shape.dims(3);
55 const int output_depth = filter_shape.dims(0);
56 const int input_height = input_shape.dims(1);
57 const int input_width = input_shape.dims(2);
58 const int filter_height = filter_shape.dims(1);
59 const int filter_width = filter_shape.dims(2);
60 const int output_height = output_shape.dims(1);
61 const int output_width = output_shape.dims(2);
62 const float output_activation_min = params->activation_min;
63 const float output_activation_max = params->activation_max;
64
65 // Although transpose convolution simplifies to convolution with transposed
66 // weights for strides of 1, non-unitary striding complicates matters. To
67 // keep this reference implementation as clear as possible, we use a
68 // "scatter" access pattern, where we loop through all the input elements,
69 // computing their influence on the output, rather than looping through the
70 // output elements in the typical "gather" access pattern of a conv. We
71 // therefore must initialize the output array to zero.
72 const int num_elements = output_shape.flatSize();
73 for (int i = 0; i < num_elements; i++)
74 {
75 output_data[i] = 0.0f;
76 }
77
78 // Loop through input elements one at a time.
79 for (int batch = 0; batch < batches; ++batch)
80 {
81 for (int in_y = 0; in_y < input_height; ++in_y)
82 {
83 for (int in_x = 0; in_x < input_width; ++in_x)
84 {
85 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
86 {
87 // Loop through the output elements it will influence
88 const int out_x_origin = (in_x * stride_width) - pad_width;
89 const int out_y_origin = (in_y * stride_height) - pad_height;
90 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
91 {
92 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
93 {
94 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
95 {
96 // Compute output element location
97 const int out_x = out_x_origin + filter_x;
98 const int out_y = out_y_origin + filter_y;
99 // We cannot accumulate out of bounds
100 if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
101 (out_y < output_height))
102 {
103 float input_value =
104 input_data[offset(input_shape.dimsData(), batch, in_y, in_x, in_channel)];
105 float filter_value = filter_data[offset(filter_shape.dimsData(), out_channel,
106 filter_y, filter_x, in_channel)];
107 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)] +=
108 input_value * filter_value;
109 }
110 }
111 }
112 }
113 }
114 }
115 }
116 }
117
118 for (int batch = 0; batch < batches; ++batch)
119 {
120 for (int out_y = 0; out_y < output_height; ++out_y)
121 {
122 for (int out_x = 0; out_x < output_width; ++out_x)
123 {
124 for (int out_channel = 0; out_channel < output_depth; ++out_channel)
125 {
126 float acc =
127 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)];
128 if (bias_data)
129 acc += bias_data[out_channel];
130
131 output_data[offset(output_shape.dimsData(), batch, out_y, out_x, out_channel)] =
132 activationFunctionWithMinMax(acc, output_activation_min, output_activation_max);
133 }
134 }
135 }
136 }
137 return Ok;
138}

References onert_micro::core::FloatConv2D::activation_max, onert_micro::core::FloatConv2D::activation_min, activationFunctionWithMinMax(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), luci_interpreter::RuntimeShape::dimsData(), onert_micro::core::OMRuntimeShape::dimsData(), luci_interpreter::RuntimeShape::flatSize(), offset(), onert_micro::Ok, output_shape, onert_micro::core::FloatConv2D::pad_h, onert_micro::core::FloatConv2D::pad_w, onert_micro::core::FloatConv2D::stride_h, and onert_micro::core::FloatConv2D::stride_w.

Referenced by onert_micro::execute::execute_kernel_CircleTransposeConv().

◆ TransposeImpl()

template<typename T , int N>
OMStatus onert_micro::execute::pal::TransposeImpl ( const core::TransposeParams params,
const core::OMRuntimeShape unextended_input_shape,
const T *  input_data,
const core::OMRuntimeShape unextended_output_shape,
T *  output_data 
)
inline

Definition at line 36 of file PALTransposeCommon.h.

40{
41 const int unextended_input_size = unextended_input_shape.dimensionsCount();
42 const int unextended_output_size = unextended_output_shape.dimensionsCount();
43
44 const int input_ext_size = N - unextended_input_size;
45 const int output_ext_size = N - unextended_output_size;
46 NdArrayDesc<N> input_desc;
47 NdArrayDesc<N> output_desc;
48 copyDimsToDesc(core::OMRuntimeShape::extendedShape(N, unextended_input_shape), &input_desc);
49 copyDimsToDesc(core::OMRuntimeShape::extendedShape(N, unextended_output_shape), &output_desc);
50
51 // The perm data is extended to match the output, each index incremented by
52 // the amount of front padding of the input shape.
53 int extended_perm[N];
54 for (int i = 0; i < N; ++i)
55 {
56 extended_perm[i] = i < output_ext_size ? i : params.perm[i - output_ext_size] + input_ext_size;
57 }
58
59 // Permutes the input shape so we don't need to permute the indexes inside
60 // the loop. Check to make sure output_dims is matching input_dims.
61 NdArrayDesc<N> perm_input_desc;
62 for (int k = 0; k < N; ++k)
63 {
64 perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]];
65 perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]];
66 }
67
68 // Naive transpose loop (iterate on output index and compute input index).
69 auto tranpose_func = [&](int indexes[N]) {
70 output_data[subscriptToIndex(output_desc, indexes)] =
71 input_data[subscriptToIndex(perm_input_desc, indexes)];
72 };
73 NDOpsHelper<N>(output_desc, tranpose_func);
74 return Ok;
75}
int strides[N]
Definition NDArray.h:45
int extents[N]
Definition NDArray.h:41

References copyDimsToDesc(), onert_micro::core::OMRuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::extendedShape(), onert_micro::execute::pal::NdArrayDesc< N >::extents, onert_micro::Ok, onert_micro::core::TransposeParams::perm, onert_micro::execute::pal::NdArrayDesc< N >::strides, and subscriptToIndex().

◆ UnaryOp()

template<typename T , typename Fn >
OMStatus onert_micro::execute::pal::UnaryOp ( const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
T *  output_data 
)
inline

Definition at line 40 of file PALUnaryOpCommon.h.

42{
43 Fn func;
44 const uint32_t flat_size = input_shape.flatSize();
45
46 if (flat_size == -1)
47 OM_LOG_AND_RETURN(UnknownError, "Unknown error encountered");
48
49 assert(input_data != nullptr);
50 assert(output_data != nullptr);
51
52 assert(input_shape == output_shape);
53 for (int i = 0; i < flat_size; ++i)
54 {
55 output_data[i] = func(input_data[i]);
56 }
57 return Ok;
58}

References onert_micro::core::OMRuntimeShape::flatSize(), onert_micro::Ok, OM_LOG_AND_RETURN, output_shape, and onert_micro::UnknownError.

◆ Unpack()

template<typename T >
OMStatus onert_micro::execute::pal::Unpack ( const core::SplitParams params,
const core::OMRuntimeShape input_shape,
const T *  input_data,
const core::OMRuntimeShape output_shape,
int32_t  axis_value 
)

Definition at line 31 of file PALUnpack.h.

33{
34 const auto dimensions = input_shape.dimensionsCount();
35
36 if (axis_value < 0)
37 {
38 axis_value += dimensions;
39 }
40
41 int outer_size = 1;
42 for (int i = 0; i < axis_value; ++i)
43 {
44 outer_size *= input_shape.dims(i);
45 }
46 int copy_size = 1;
47 for (int i = axis_value + 1; i < dimensions; ++i)
48 {
49 copy_size *= input_shape.dims(i);
50 }
51 int output_size = 1;
52 for (int i = 0; i < output_shape.dimensionsCount(); ++i)
53 {
54 output_size *= output_shape.dims(i);
55 }
56
57 for (int i = 0; i < params.num_outputs; ++i)
58 {
59 T *output_data = core::utils::castOutputData<T>(params.output_data[i]);
60 assert(output_data != nullptr);
61 for (int k = 0; k < outer_size; ++k)
62 {
63 T *output_ptr = output_data + copy_size * k;
64 int loc = k * params.num_outputs * copy_size + i * copy_size;
65 const T *input_ptr = input_data + loc;
66 for (int j = 0; j < copy_size; ++j)
67 output_ptr[j] = input_ptr[j];
68 }
69 }
70 return Ok;
71}

References luci_interpreter::RuntimeShape::dimensionsCount(), onert_micro::core::OMRuntimeShape::dimensionsCount(), luci_interpreter::RuntimeShape::dims(), onert_micro::core::OMRuntimeShape::dims(), onert_micro::core::SplitParams::num_outputs, onert_micro::Ok, onert_micro::core::SplitParams::output_data, and output_shape.

Variable Documentation

◆ MAX_INDICES_ND

constexpr int onert_micro::execute::pal::MAX_INDICES_ND = 5
constexpr

Definition at line 32 of file PALGatherND.h.

Referenced by onert_micro::import::configure_kernel_CircleGatherND(), and GatherND().