Functions
void	BatchMatMul (const BatchMatMulParams &params, const float lhs_data, const float rhs_data, float *output_data)

template<typename T >
void	BinaryArithmeticOp (const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T input1_data, const Shape &input2_shape, const T input2_data, const Shape &output_shape, T *output_data, const std::function< T(const T &, const T &)> &fn)

template<>
void	BinaryArithmeticOp (const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float input1_data, const Shape &input2_shape, const float input2_data, const Shape &output_shape, float *output_data, const std::function< float(const float &, const float &)> &fn)

template<>
void	BinaryArithmeticOp (const BinaryArithmeticOpParam &, const Shape &input1_shape, const bool input1_data, const Shape &input2_shape, const bool input2_data, const Shape &output_shape, bool *output_data, const std::function< bool(const bool &, const bool &)> &fn)

template<>
void	BinaryArithmeticOp (const BinaryArithmeticOpParam &params, const Shape &input1_shape, const int64_t input1_data, const Shape &input2_shape, const int64_t input2_data, const Shape &output_shape, int64_t *output_data, const std::function< int64_t(const int64_t &, const int64_t &)> &fn)

template<typename T >
std::enable_if_t< is_quant8< T >::value >	BroadcastBinaryArithmeticOpSlow (const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T input1_data, const Shape &input2_shape, const T input2_data, const Shape &output_shape, T *output_data, const std::function< T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)

template<typename T >
void	BroadcastBinaryArithmeticOpSlow (const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T input1_data, const Shape &input2_shape, const T input2_data, const Shape &output_shape, T *output_data, const std::function< T(const T &, const T &)> &fn)

template<>
void	BroadcastBinaryArithmeticOpSlow (const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float input1_data, const Shape &input2_shape, const float input2_data, const Shape &output_shape, float *output_data, const std::function< float(const float &, const float &)> &fn)

template<>
void	BroadcastBinaryArithmeticOpSlow (const BinaryArithmeticOpParam &, const Shape &input1_shape, const bool input1_data, const Shape &input2_shape, const bool input2_data, const Shape &output_shape, bool *output_data, const std::function< bool(const bool &, const bool &)> &fn)

template<>
void	BroadcastBinaryArithmeticOpSlow (const BinaryArithmeticOpParam &params, const Shape &input1_shape, const int64_t input1_data, const Shape &input2_shape, const int64_t input2_data, const Shape &output_shape, int64_t *output_data, const std::function< int64_t(const int64_t &, const int64_t &)> &fn)

void	Conv (const ConvParams &params, const Shape &input_shape, const float input_data, const Shape &filter_shape, const float filter_data, const Shape &bias_shape, const float bias_data, const Shape &output_shape, float output_data)

void	Conv (const ConvParams &params, const Shape &input_shape, const uint8_t input_data, const Shape &filter_shape, const uint8_t filter_data, const Shape &bias_shape, const int32_t bias_data, const Shape &output_shape, uint8_t output_data)

template<typename T , bool is_asymmetric>
void	Conv (const ConvParams &params, const int32_t output_multiplier, const int32_t output_shift, const Shape &input_shape, const T input_data, const Shape &filter_shape, const T filter_data, const int32_t filter_zeropoint, const Shape &bias_shape, const int32_t bias_data, const Shape &output_shape, T *output_data)

void	HybridConvPerChannel (const ConvParams &params, float scaling_factors_ptr, const Shape &input_shape, const int8_t input_data, const Shape &filter_shape, const int8_t filter_data, const Shape &bias_shape, const float bias_data, const Shape &output_shape, float output_data, const float per_channel_scale, const int32_t *input_offset)

void	Softmax (const SoftmaxParams &params, const Shape &input_shape, const float input_data, const Shape &output_shape, float output_data)

template<typename T >
void	TransposeImpl (const TransposeParams &params, const Shape &unextended_input_shape, const T input_data, const Shape &unextended_output_shape, T output_data)

template<typename T >
void	Transpose (const TransposeParams &params, const Shape &unextended_input_shape, const T input_data, const Shape &unextended_output_shape, T output_data)

Function Documentation

◆ BatchMatMul()

void nnfw::cker::reference::BatchMatMul	(	const BatchMatMulParams &	params,
		const float *	lhs_data,
		const float *	rhs_data,
		float *	output_data
	)

inline

Definition at line 32 of file BatchMatMul.h.

{
  for (int b0 = 0; b0 < params.batch_dim0; ++b0)
  {
    const float *lhs_ptr0 = lhs_data + (b0 * params.lhs_ext0);
    const float *rhs_ptr0 = rhs_data + (b0 * params.rhs_ext0);
    for (int b1 = 0; b1 < params.batch_dim1; ++b1)
    {
      const float *lhs_ptr1 = lhs_ptr0 + b1 * params.lhs_ext1;
      const float *rhs_ptr1 = rhs_ptr0 + b1 * params.rhs_ext1;
      for (int b2 = 0; b2 < params.batch_dim2; ++b2)
      {
        const float *lhs_ptr2 = lhs_ptr1 + b2 * params.lhs_ext2;
        const float *rhs_ptr2 = rhs_ptr1 + b2 * params.rhs_ext2;
        float *out_ptr = output_data + ((b0 * params.batch_dim1 * params.batch_dim2) +
                                        b1 * params.batch_dim2 + b2) *
                                         params.lhs_rows * params.rhs_cols;
        for (int j = 0; j < params.rhs_cols; ++j)
        {
          for (int i = 0; i < params.lhs_rows; ++i)
          {
            float total = 0.f;
            for (int k = 0; k < params.accum_depth; ++k)
            {
              total += lhs_ptr2[params.accum_depth * i + k] * rhs_ptr2[j * params.accum_depth + k];
            }
            int idx = params.lhs_rows * j + i;
            out_ptr[idx] = total;
          }
        }
      }
    }
  }
}

Referenced by nnfw::cker::BatchMatMul::operator()().

◆ BinaryArithmeticOp() [1/4]

template<>

void nnfw::cker::reference::BinaryArithmeticOp	(	const BinaryArithmeticOpParam &	,
		const Shape &	input1_shape,
		const bool *	input1_data,
		const Shape &	input2_shape,
		const bool *	input2_data,
		const Shape &	output_shape,
		bool *	output_data,
		const std::function< bool(const bool &, const bool &)> &	fn
	)

inline

Definition at line 65 of file BinaryArithmeticOps.h.

{
  const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
  for (int i = 0; i < size; i++)
  {
    output_data[i] = fn(input1_data[i], input2_data[i]);
  }
}

References nnfw::cker::MatchingElementsSize(), output_shape, and size.

◆ BinaryArithmeticOp() [2/4]

template<>

void nnfw::cker::reference::BinaryArithmeticOp	(	const BinaryArithmeticOpParam &	params,
		const Shape &	input1_shape,
		const float *	input1_data,
		const Shape &	input2_shape,
		const float *	input2_data,
		const Shape &	output_shape,
		float *	output_data,
		const std::function< float(const float &, const float &)> &	fn
	)

inline

Definition at line 50 of file BinaryArithmeticOps.h.

{
  const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
  for (int i = 0; i < size; i++)
  {
    output_data[i] = ActivationFunctionWithMinMax(
      fn(input1_data[i], input2_data[i]), params.float_activation_min, params.float_activation_max);
  }
}

References nnfw::cker::ActivationFunctionWithMinMax(), nnfw::cker::BinaryArithmeticOpParam::float_activation_max, nnfw::cker::BinaryArithmeticOpParam::float_activation_min, nnfw::cker::MatchingElementsSize(), output_shape, and size.

◆ BinaryArithmeticOp() [3/4]

template<>

void nnfw::cker::reference::BinaryArithmeticOp	(	const BinaryArithmeticOpParam &	params,
		const Shape &	input1_shape,
		const int64_t *	input1_data,
		const Shape &	input2_shape,
		const int64_t *	input2_data,
		const Shape &	output_shape,
		int64_t *	output_data,
		const std::function< int64_t(const int64_t &, const int64_t &)> &	fn
	)

inline

Definition at line 79 of file BinaryArithmeticOps.h.

{
  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
  for (int i = 0; i < flat_size; ++i)
  {
    output_data[i] = ActivationFunctionWithMinMax(
      fn(input1_data[i], input2_data[i]), params.int64_activation_min, params.int64_activation_max);
  }
}

References nnfw::cker::ActivationFunctionWithMinMax(), nnfw::cker::BinaryArithmeticOpParam::int64_activation_max, nnfw::cker::BinaryArithmeticOpParam::int64_activation_min, nnfw::cker::MatchingElementsSize(), and output_shape.

◆ BinaryArithmeticOp() [4/4]

template<typename T >

void nnfw::cker::reference::BinaryArithmeticOp	(	const BinaryArithmeticOpParam &	params,
		const Shape &	input1_shape,
		const T *	input1_data,
		const Shape &	input2_shape,
		const T *	input2_data,
		const Shape &	output_shape,
		T *	output_data,
		const std::function< T(const T &, const T &)> &	fn
	)

inline

Definition at line 35 of file BinaryArithmeticOps.h.

{
  const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
  for (int i = 0; i < flat_size; ++i)
  {
    output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
                                                  params.quantized_activation_min,
                                                  params.quantized_activation_max);
  }
}

References nnfw::cker::ActivationFunctionWithMinMax(), nnfw::cker::MatchingElementsSize(), output_shape, nnfw::cker::BinaryArithmeticOpParam::quantized_activation_max, and nnfw::cker::BinaryArithmeticOpParam::quantized_activation_min.

Referenced by nnfw::cker::BinaryArithmeticOp(), and nnfw::cker::optimized::Div().

◆ BroadcastBinaryArithmeticOpSlow() [1/5]

template<>

void nnfw::cker::reference::BroadcastBinaryArithmeticOpSlow	(	const BinaryArithmeticOpParam &	,
		const Shape &	input1_shape,
		const bool *	input1_data,
		const Shape &	input2_shape,
		const bool *	input2_data,
		const Shape &	output_shape,
		bool *	output_data,
		const std::function< bool(const bool &, const bool &)> &	fn
	)

inline

Definition at line 207 of file BinaryArithmeticOps.h.

{
  NdArrayDesc<4> desc1;
  NdArrayDesc<4> desc2;
  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
  const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 
  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
  {
    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
    {
      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
      {
        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
        {
          output_data[Offset(extended_output_shape, b, y, x, c)] =
            fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
               input2_data[SubscriptToIndex(desc2, b, y, x, c)]);
        }
      }
    }
  }
}

References desc1, desc2, nnfw::cker::Shape::Dims(), nnfw::cker::NdArrayDescsForElementwiseBroadcast(), nnfw::cker::Offset(), output_shape, and nnfw::cker::SubscriptToIndex().

◆ BroadcastBinaryArithmeticOpSlow() [2/5]

template<>

void nnfw::cker::reference::BroadcastBinaryArithmeticOpSlow	(	const BinaryArithmeticOpParam &	params,
		const Shape &	input1_shape,
		const float *	input1_data,
		const Shape &	input2_shape,
		const float *	input2_data,
		const Shape &	output_shape,
		float *	output_data,
		const std::function< float(const float &, const float &)> &	fn
	)

inline

Definition at line 178 of file BinaryArithmeticOps.h.

{
  NdArrayDesc<4> desc1;
  NdArrayDesc<4> desc2;
  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
  const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 
  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
  {
    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
    {
      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
      {
        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
        {
          output_data[Offset(extended_output_shape, b, y, x, c)] =
            ActivationFunctionWithMinMax(fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
                                            input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
                                         params.float_activation_min, params.float_activation_max);
        }
      }
    }
  }
}

References nnfw::cker::ActivationFunctionWithMinMax(), desc1, desc2, nnfw::cker::Shape::Dims(), nnfw::cker::BinaryArithmeticOpParam::float_activation_max, nnfw::cker::BinaryArithmeticOpParam::float_activation_min, nnfw::cker::NdArrayDescsForElementwiseBroadcast(), nnfw::cker::Offset(), output_shape, and nnfw::cker::SubscriptToIndex().

◆ BroadcastBinaryArithmeticOpSlow() [3/5]

template<>

void nnfw::cker::reference::BroadcastBinaryArithmeticOpSlow	(	const BinaryArithmeticOpParam &	params,
		const Shape &	input1_shape,
		const int64_t *	input1_data,
		const Shape &	input2_shape,
		const int64_t *	input2_data,
		const Shape &	output_shape,
		int64_t *	output_data,
		const std::function< int64_t(const int64_t &, const int64_t &)> &	fn
	)

inline

Definition at line 235 of file BinaryArithmeticOps.h.

{
  NdArrayDesc<4> desc1;
  NdArrayDesc<4> desc2;
  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
  const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 
  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
  {
    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
    {
      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
      {
        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
        {
          output_data[Offset(extended_output_shape, b, y, x, c)] =
            ActivationFunctionWithMinMax(fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
                                            input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
                                         params.int64_activation_min, params.int64_activation_max);
        }
      }
    }
  }
}

References nnfw::cker::ActivationFunctionWithMinMax(), desc1, desc2, nnfw::cker::Shape::Dims(), nnfw::cker::BinaryArithmeticOpParam::int64_activation_max, nnfw::cker::BinaryArithmeticOpParam::int64_activation_min, nnfw::cker::NdArrayDescsForElementwiseBroadcast(), nnfw::cker::Offset(), output_shape, and nnfw::cker::SubscriptToIndex().

◆ BroadcastBinaryArithmeticOpSlow() [4/5]

template<typename T >

std::enable_if_t< is_quant8< T >::value > nnfw::cker::reference::BroadcastBinaryArithmeticOpSlow	(	const BinaryArithmeticOpParam &	params,
		const Shape &	input1_shape,
		const T *	input1_data,
		const Shape &	input2_shape,
		const T *	input2_data,
		const Shape &	output_shape,
		T *	output_data,
		const std::function< T(const BinaryArithmeticOpParam &params, const T &, const T &)> &	fn
	)

inline

Definition at line 94 of file BinaryArithmeticOps.h.

{
  NdArrayDesc<4> desc1;
  NdArrayDesc<4> desc2;
  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
  const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 
  // Comment from tensorflow lite:
  //
  // In Tensorflow, the dimensions are canonically named (batch_number, row,
  // col, channel), with extents (batches, height, width, depth), with the
  // trailing dimension changing most rapidly (channels has the smallest stride,
  // typically 1 element).
  //
  // In generated C code, we store arrays with the dimensions reversed. The
  // first dimension has smallest stride.
  //
  // We name our variables by their Tensorflow convention, but generate C code
  // nesting loops such that the innermost loop has the smallest stride for the
  // best cache behavior.
  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
  {
    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
    {
      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
      {
        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
        {
          output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
            fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
               input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
            params.quantized_activation_min, params.quantized_activation_max);
        }
      }
    }
  }
}

References desc1, desc2, nnfw::cker::Shape::Dims(), nnfw::cker::NdArrayDescsForElementwiseBroadcast(), nnfw::cker::Offset(), output_shape, nnfw::cker::BinaryArithmeticOpParam::quantized_activation_max, nnfw::cker::BinaryArithmeticOpParam::quantized_activation_min, and nnfw::cker::SubscriptToIndex().

Referenced by nnfw::cker::optimized::BroadcastAddDispatch(), nnfw::cker::optimized::BroadcastAddDispatch(), nnfw::cker::BroadcastBinaryArithmeticOp(), nnfw::cker::optimized::BroadcastDivDispatch(), nnfw::cker::optimized::BroadcastMulDispatch(), nnfw::cker::optimized::BroadcastMulDispatch(), and nnfw::cker::optimized::BroadcastSubDispatch().

◆ BroadcastBinaryArithmeticOpSlow() [5/5]

template<typename T >

void nnfw::cker::reference::BroadcastBinaryArithmeticOpSlow	(	const BinaryArithmeticOpParam &	params,
		const Shape &	input1_shape,
		const T *	input1_data,
		const Shape &	input2_shape,
		const T *	input2_data,
		const Shape &	output_shape,
		T *	output_data,
		const std::function< T(const T &, const T &)> &	fn
	)

inline

Definition at line 135 of file BinaryArithmeticOps.h.

{
  NdArrayDesc<4> desc1;
  NdArrayDesc<4> desc2;
  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
  const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 
  // Comment from tensorflow lite:
  //
  // In Tensorflow, the dimensions are canonically named (batch_number, row,
  // col, channel), with extents (batches, height, width, depth), with the
  // trailing dimension changing most rapidly (channels has the smallest stride,
  // typically 1 element).
  //
  // In generated C code, we store arrays with the dimensions reversed. The
  // first dimension has smallest stride.
  //
  // We name our variables by their Tensorflow convention, but generate C code
  // nesting loops such that the innermost loop has the smallest stride for the
  // best cache behavior.
  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
  {
    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
    {
      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
      {
        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
        {
          output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
            fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
               input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
            params.quantized_activation_min, params.quantized_activation_max);
        }
      }
    }
  }
}

References desc1, desc2, nnfw::cker::Shape::Dims(), nnfw::cker::NdArrayDescsForElementwiseBroadcast(), nnfw::cker::Offset(), output_shape, nnfw::cker::BinaryArithmeticOpParam::quantized_activation_max, nnfw::cker::BinaryArithmeticOpParam::quantized_activation_min, and nnfw::cker::SubscriptToIndex().

◆ Conv() [1/3]

template<typename T , bool is_asymmetric>

void nnfw::cker::reference::Conv	(	const ConvParams &	params,
		const int32_t *	output_multiplier,
		const int32_t *	output_shift,
		const Shape &	input_shape,
		const T *	input_data,
		const Shape &	filter_shape,
		const T *	filter_data,
		const int32_t *	filter_zeropoint,
		const Shape &	bias_shape,
		const int32_t *	bias_data,
		const Shape &	output_shape,
		T *	output_data
	)

inline

Definition at line 195 of file Conv.h.

{
  // Get parameters.
  const int32_t input_offset = params.input_offset; // r = s(q - Z)
  const int stride_width = params.stride_width;
  const int stride_height = params.stride_height;
  const int dilation_width_factor = params.dilation_width_factor;
  const int dilation_height_factor = params.dilation_height_factor;
  const int pad_width = params.padding_values.width;
  const int pad_height = params.padding_values.height;
  const int32_t output_offset = params.output_offset;
 
  // Set min and max value of the output.
  const int32_t output_activation_min = params.quantized_activation_min;
  const int32_t output_activation_max = params.quantized_activation_max;
 
  // Consistency check.
  assert(output_activation_min < output_activation_max);
  assert(input_shape.DimensionsCount() == 4);
  assert(filter_shape.DimensionsCount() == 4);
  assert(output_shape.DimensionsCount() == 4);
  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
  if (bias_data)
  {
    assert(bias_shape.FlatSize() == output_depth);
  }
 
  // Check dimensions of the tensors.
  const int input_height = input_shape.Dims(1);
  const int input_width = input_shape.Dims(2);
  const int filter_height = filter_shape.Dims(1);
  const int filter_width = filter_shape.Dims(2);
  const int output_height = output_shape.Dims(1);
  const int output_width = output_shape.Dims(2);
  for (int batch = 0; batch < batches; ++batch)
  {
    for (int out_y = 0; out_y < output_height; ++out_y)
    {
      const int in_y_origin = (out_y * stride_height) - pad_height;
      for (int out_x = 0; out_x < output_width; ++out_x)
      {
        const int in_x_origin = (out_x * stride_width) - pad_width;
        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
        {
          int32_t acc = 0;
          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
          {
            const int in_y = in_y_origin + dilation_height_factor * filter_y;
            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
            {
              const int in_x = in_x_origin + dilation_width_factor * filter_x;
 
              // Zero padding by omitting the areas outside the image.
              const bool is_point_inside_image =
                (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
 
              if (!is_point_inside_image)
              {
                continue;
              }
 
              for (int in_channel = 0; in_channel < input_depth; ++in_channel)
              {
                const T input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
                const T filter_val =
                  filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
                if (is_asymmetric)
                {
                  const int32_t filter_offset = -filter_zeropoint[out_channel];
                  acc += (filter_val + filter_offset) * (input_val + input_offset);
                }
                else
                {
                  // Accumulate with 32 bits accumulator.
                  // In the nudging process during model quantization, we force
                  // real value of 0.0 be represented by a quantized value. This
                  // guarantees that the input_offset is a int8_t, even though
                  // it is represented using int32_t. int32_t += int8_t *
                  // (int8_t - int8_t) so the highest value we can get from each
                  // accumulation is [-127, 127] * ([-128, 127] -
                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
                  // = 14.98, which means we can accumulate at least 2^16
                  // multiplications without overflow. The accumulator is
                  // applied to a filter so the accumulation logic will hold as
                  // long as the filter size (filter_y * filter_x * in_channel)
                  // does not exceed 2^16, which is the case in all the models
                  // we have seen so far.
                  // TODO(jianlijianli): Add a check to make sure the
                  // accumulator depth is smaller than 2^16.
                  acc += filter_val * (input_val + input_offset);
                }
              }
            }
          }
 
          if (bias_data)
          {
            acc += bias_data[out_channel];
          }
          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_channel],
                                              output_shift[out_channel]);
          acc += output_offset;
          acc = std::max(acc, output_activation_min);
          acc = std::min(acc, output_activation_max);
          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast<T>(acc);
        }
      }
    }
  }
}

◆ Conv() [2/3]

void nnfw::cker::reference::Conv	(	const ConvParams &	params,
		const Shape &	input_shape,
		const float *	input_data,
		const Shape &	filter_shape,
		const float *	filter_data,
		const Shape &	bias_shape,
		const float *	bias_data,
		const Shape &	output_shape,
		float *	output_data
	)

inline

Definition at line 33 of file Conv.h.

{
  const int stride_width = params.stride_width;
  const int stride_height = params.stride_height;
  const int dilation_width_factor = params.dilation_width_factor;
  const int dilation_height_factor = params.dilation_height_factor;
  const int pad_width = params.padding_values.width;
  const int pad_height = params.padding_values.height;
  const float output_activation_min = params.float_activation_min;
  const float output_activation_max = params.float_activation_max;
  assert(input_shape.DimensionsCount() == 4);
  assert(filter_shape.DimensionsCount() == 4);
  assert(output_shape.DimensionsCount() == 4);
 
  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
  if (bias_data)
  {
    assert(bias_shape.FlatSize() == output_depth);
  }
  const int input_height = input_shape.Dims(1);
  const int input_width = input_shape.Dims(2);
  const int filter_height = filter_shape.Dims(1);
  const int filter_width = filter_shape.Dims(2);
  const int output_height = output_shape.Dims(1);
  const int output_width = output_shape.Dims(2);
  for (int batch = 0; batch < batches; ++batch)
  {
    for (int out_y = 0; out_y < output_height; ++out_y)
    {
      for (int out_x = 0; out_x < output_width; ++out_x)
      {
        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
        {
          const int in_x_origin = (out_x * stride_width) - pad_width;
          const int in_y_origin = (out_y * stride_height) - pad_height;
          float total = 0.f;
          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
          {
            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
            {
              const int in_x = in_x_origin + dilation_width_factor * filter_x;
              const int in_y = in_y_origin + dilation_height_factor * filter_y;
              // If the location is outside the bounds of the input image,
              // use zero as a default value.
              if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
              {
                const int in_offset = Offset(input_shape, batch, in_y, in_x, 0);
                const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
                for (int in_channel = 0; in_channel < input_depth; ++in_channel)
                {
                  float input_value = input_data[in_offset + in_channel];
                  float filter_value = filter_data[filter_offset + in_channel];
                  total += (input_value * filter_value);
                }
              }
            }
          }
          float bias_value = 0.0f;
          if (bias_data)
          {
            bias_value = bias_data[out_channel];
          }
          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
            ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
                                         output_activation_max);
        }
      }
    }
  }
}

References nnfw::cker::ActivationFunctionWithMinMax(), nnfw::cker::ConvParams::dilation_height_factor, nnfw::cker::ConvParams::dilation_width_factor, nnfw::cker::Shape::DimensionsCount(), nnfw::cker::Shape::Dims(), nnfw::cker::ConvParams::float_activation_max, nnfw::cker::ConvParams::float_activation_min, nnfw::cker::PaddingValues::height, nnfw::cker::MatchingDim(), nnfw::cker::Offset(), output_shape, nnfw::cker::ConvParams::padding_values, nnfw::cker::ConvParams::stride_height, nnfw::cker::ConvParams::stride_width, and nnfw::cker::PaddingValues::width.

Referenced by nnfw::cker::Conv::operator()().

◆ Conv() [3/3]

void nnfw::cker::reference::Conv	(	const ConvParams &	params,
		const Shape &	input_shape,
		const uint8_t *	input_data,
		const Shape &	filter_shape,
		const uint8_t *	filter_data,
		const Shape &	bias_shape,
		const int32_t *	bias_data,
		const Shape &	output_shape,
		uint8_t *	output_data
	)

inline

Definition at line 109 of file Conv.h.

{
  const int stride_width = params.stride_width;
  const int stride_height = params.stride_height;
  const int dilation_width_factor = params.dilation_width_factor;
  const int dilation_height_factor = params.dilation_height_factor;
  const int pad_width = params.padding_values.width;
  const int pad_height = params.padding_values.height;
  const int32_t input_offset = params.input_offset;
  const int32_t filter_offset = params.weights_offset;
  const int32_t output_offset = params.output_offset;
  const int32_t output_multiplier = params.output_multiplier;
  const int output_shift = params.output_shift;
  const int32_t output_activation_min = params.quantized_activation_min;
  const int32_t output_activation_max = params.quantized_activation_max;
  assert(output_activation_min <= output_activation_max);
 
  assert(input_shape.DimensionsCount() == 4);
  assert(filter_shape.DimensionsCount() == 4);
  assert(output_shape.DimensionsCount() == 4);
  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
  if (bias_data)
  {
    assert(bias_shape.FlatSize() == output_depth);
  }
  const int input_height = input_shape.Dims(1);
  const int input_width = input_shape.Dims(2);
  const int filter_height = filter_shape.Dims(1);
  const int filter_width = filter_shape.Dims(2);
  const int output_height = output_shape.Dims(1);
  const int output_width = output_shape.Dims(2);
  for (int batch = 0; batch < batches; ++batch)
  {
    for (int out_y = 0; out_y < output_height; ++out_y)
    {
      for (int out_x = 0; out_x < output_width; ++out_x)
      {
        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
        {
          const int in_x_origin = (out_x * stride_width) - pad_width;
          const int in_y_origin = (out_y * stride_height) - pad_height;
          int32_t acc = 0;
          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
          {
            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
            {
              const int in_x = in_x_origin + dilation_width_factor * filter_x;
              const int in_y = in_y_origin + dilation_height_factor * filter_y;
              // If the location is outside the bounds of the input image,
              // use zero as a default value.
              if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
              {
                const int in_base = Offset(input_shape, batch, in_y, in_x, 0);
                const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
                for (int in_channel = 0; in_channel < input_depth; in_channel++)
                {
                  int32_t input_val = input_data[in_channel + in_base];
                  int32_t filter_val = filter_data[in_channel + filter_base];
                  acc += (filter_val + filter_offset) * (input_val + input_offset);
                }
              }
            }
          }
          if (bias_data)
          {
            acc += bias_data[out_channel];
          }
          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
          acc += output_offset;
          acc = std::max(acc, output_activation_min);
          acc = std::min(acc, output_activation_max);
          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
            static_cast<uint8_t>(acc);
        }
      }
    }
  }
}

◆ HybridConvPerChannel()

void nnfw::cker::reference::HybridConvPerChannel	(	const ConvParams &	params,
		float *	scaling_factors_ptr,
		const Shape &	input_shape,
		const int8_t *	input_data,
		const Shape &	filter_shape,
		const int8_t *	filter_data,
		const Shape &	bias_shape,
		const float *	bias_data,
		const Shape &	output_shape,
		float *	output_data,
		const float *	per_channel_scale,
		const int32_t *	input_offset
	)

inline

Definition at line 314 of file Conv.h.

{
  const int stride_width = params.stride_width;
  const int stride_height = params.stride_height;
  const int dilation_width_factor = params.dilation_width_factor;
  const int dilation_height_factor = params.dilation_height_factor;
  const int pad_width = params.padding_values.width;
  const int pad_height = params.padding_values.height;
  const float output_activation_min = params.float_activation_min;
  const float output_activation_max = params.float_activation_max;
  assert(input_shape.DimensionsCount() == 4);
  assert(filter_shape.DimensionsCount() == 4);
  assert(output_shape.DimensionsCount() == 4);
  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
  const int input_depth = input_shape.Dims(3);
  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
  if (bias_data)
  {
    assert(bias_shape.FlatSize() == output_depth);
  }
  const int input_height = input_shape.Dims(1);
  const int input_width = input_shape.Dims(2);
  const int filter_height = filter_shape.Dims(1);
  const int filter_width = filter_shape.Dims(2);
  const int filter_input_depth = filter_shape.Dims(3);
  const int groups = input_depth / filter_input_depth;
  assert(input_depth % filter_input_depth == 0);
  const int filters_per_group = output_depth / groups;
  const int output_height = output_shape.Dims(1);
  const int output_width = output_shape.Dims(2);
  for (int batch = 0; batch < batches; ++batch)
  {
    for (int out_y = 0; out_y < output_height; ++out_y)
    {
      for (int out_x = 0; out_x < output_width; ++out_x)
      {
        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
        {
          auto group = out_channel / filters_per_group;
          const int in_x_origin = (out_x * stride_width) - pad_width;
          const int in_y_origin = (out_y * stride_height) - pad_height;
          int32_t acc = 0;
          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
          {
            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
            {
              for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel)
              {
                const int in_x = in_x_origin + dilation_width_factor * filter_x;
                const int in_y = in_y_origin + dilation_height_factor * filter_y;
                // If the location is outside the bounds of the input image,
                // use zero as a default value.
                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
                {
                  int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x,
                                                        in_channel + group * filter_input_depth)];
                  int32_t filter_val =
                    filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
                  acc += filter_val * (input_val - input_offset[batch]);
                }
              }
            }
          }
          float acc_float = acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
          if (bias_data)
          {
            acc_float += bias_data[out_channel];
          }
          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
            ActivationFunctionWithMinMax(acc_float, output_activation_min, output_activation_max);
        }
      }
    }
  }
}

References nnfw::cker::ActivationFunctionWithMinMax(), nnfw::cker::ConvParams::dilation_height_factor, nnfw::cker::ConvParams::dilation_width_factor, nnfw::cker::Shape::DimensionsCount(), nnfw::cker::Shape::Dims(), nnfw::cker::ConvParams::float_activation_max, nnfw::cker::ConvParams::float_activation_min, nnfw::cker::PaddingValues::height, nnfw::cker::MatchingDim(), nnfw::cker::Offset(), output_shape, nnfw::cker::ConvParams::padding_values, nnfw::cker::ConvParams::stride_height, nnfw::cker::ConvParams::stride_width, and nnfw::cker::PaddingValues::width.

◆ Softmax()

void nnfw::cker::reference::Softmax	(	const SoftmaxParams &	params,
		const Shape &	input_shape,
		const float *	input_data,
		const Shape &	output_shape,
		float *	output_data
	)

inline

Definition at line 43 of file SoftMax.h.

{
  const int trailing_dim = input_shape.DimensionsCount() - 1;
  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
 
  for (int i = 0; i < outer_size; ++i)
  {
    // Find max element value which we'll use to ensure numerical stability
    // taking advantage of the following equality:
    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
    float max = std::numeric_limits<float>::lowest();
    for (int c = 0; c < depth; ++c)
    {
      max = std::max(max, input_data[i * depth + c]);
    }
 
    // Compute sum.
    float sum = 0.f;
    for (int c = 0; c < depth; ++c)
    {
      sum += std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta));
    }
 
    // Compute result.
    for (int c = 0; c < depth; ++c)
    {
      output_data[i * depth + c] =
        std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta)) / sum;
    }
  }
}

References nnfw::cker::SoftmaxParams::beta, nnfw::cker::Shape::DimensionsCount(), nnfw::cker::MatchingDim(), nnfw::cker::MatchingFlatSizeSkipDim(), and output_shape.

Referenced by onert::backend::cpu::ops::SoftMaxLayer::softmaxFloat32().

◆ Transpose()

template<typename T >

void nnfw::cker::reference::Transpose	(	const TransposeParams &	params,
		const Shape &	unextended_input_shape,
		const T *	input_data,
		const Shape &	unextended_output_shape,
		T *	output_data
	)

Definition at line 88 of file Transpose.h.

{
  // Transpose kernel only does rearranging values not numeric evaluations on
  // each cell. It's safe to implement per size of scalar type and this trick
  // keeps the total code size in a reasonable range.
  switch (sizeof(T))
  {
    case 1:
      TransposeImpl<int8_t>(params, unextended_input_shape,
                            reinterpret_cast<const int8_t *>(input_data), unextended_output_shape,
                            reinterpret_cast<int8_t *>(output_data));
      break;
    case 2:
      TransposeImpl<int16_t>(params, unextended_input_shape,
                             reinterpret_cast<const int16_t *>(input_data), unextended_output_shape,
                             reinterpret_cast<int16_t *>(output_data));
      break;
 
    case 4:
      TransposeImpl<int32_t>(params, unextended_input_shape,
                             reinterpret_cast<const int32_t *>(input_data), unextended_output_shape,
                             reinterpret_cast<int32_t *>(output_data));
      break;
    case 8:
      TransposeImpl<int64_t>(params, unextended_input_shape,
                             reinterpret_cast<const int64_t *>(input_data), unextended_output_shape,
                             reinterpret_cast<int64_t *>(output_data));
      break;
  }
}

Referenced by nnfw::cker::TransposeImpl().

◆ TransposeImpl()

template<typename T >

void nnfw::cker::reference::TransposeImpl	(	const TransposeParams &	params,
		const Shape &	unextended_input_shape,
		const T *	input_data,
		const Shape &	unextended_output_shape,
		T *	output_data
	)

Definition at line 33 of file Transpose.h.

{
  const int unextended_output_size = unextended_output_shape.DimensionsCount();
  assert(unextended_input_shape.DimensionsCount() <= 4);
  assert(unextended_output_size <= 4);
  assert(unextended_output_size == params.perm_count);
  const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
  const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
  const int input_ext_size = 4 - unextended_input_shape.DimensionsCount();
  const int output_ext_size = 4 - unextended_output_size;
 
  // The perm data is extended to match the output, each index incremented by
  // the amount of front padding of the input shape.
  int extended_perm[4];
  for (int i = 0; i < output_ext_size; ++i)
  {
    extended_perm[i] = i;
  }
  for (int i = 0; i < unextended_output_size; ++i)
  {
    extended_perm[i + output_ext_size] = params.perm[i] + input_ext_size;
  }
 
  int out_sizes[4];
  // Compute the inverse permutation array so we can do an output centered
  // transpose. Also, check to make sure output_dims is matching input_dims.
  for (int k = 0; k < 4; k++)
  {
    out_sizes[k] = MatchingDim(input_shape, extended_perm[k], output_shape, k);
  }
 
  // Naive transpose loop (iterate on output index and compute input index).
  int o[4]; // loop index (on output).
  int i[4];
  for (o[3] = 0; o[3] < out_sizes[3]; o[3]++)
  {
    i[extended_perm[3]] = o[3];
    for (o[2] = 0; o[2] < out_sizes[2]; o[2]++)
    {
      i[extended_perm[2]] = o[2];
      for (o[1] = 0; o[1] < out_sizes[1]; o[1]++)
      {
        i[extended_perm[1]] = o[1];
        for (o[0] = 0; o[0] < out_sizes[0]; o[0]++)
        {
          i[extended_perm[0]] = o[0];
          output_data[Offset(output_shape, o)] = input_data[Offset(input_shape, i)];
        }
      }
    }
  }
}

References nnfw::cker::Shape::DimensionsCount(), nnfw::cker::MatchingDim(), nnfw::cker::Offset(), output_shape, nnfw::cker::TransposeParams::perm, and nnfw::cker::TransposeParams::perm_count.

Functions

Function Documentation

◆ BatchMatMul()

◆ BinaryArithmeticOp() [1/4]

◆ BinaryArithmeticOp() [2/4]

◆ BinaryArithmeticOp() [3/4]

◆ BinaryArithmeticOp() [4/4]

◆ BroadcastBinaryArithmeticOpSlow() [1/5]

◆ BroadcastBinaryArithmeticOpSlow() [2/5]

◆ BroadcastBinaryArithmeticOpSlow() [3/5]

◆ BroadcastBinaryArithmeticOpSlow() [4/5]

◆ BroadcastBinaryArithmeticOpSlow() [5/5]

◆ Conv() [1/3]

◆ Conv() [2/3]

◆ Conv() [3/3]

◆ HybridConvPerChannel()

◆ Softmax()

◆ Transpose()

◆ TransposeImpl()