Data Structures
struct	GatherNdHelperResult

Functions
template<typename T >
void	Relu (const RuntimeShape &input_shape, const T input_data, const RuntimeShape &output_shape, T output_data)

template<typename T >
void	Relu1 (const RuntimeShape &input_shape, const T input_data, const RuntimeShape &output_shape, T output_data)

void	Relu6 (const RuntimeShape &input_shape, const float input_data, const RuntimeShape &output_shape, float output_data)

template<typename T >
void	ReluX (const tflite::ReluParams &params, const RuntimeShape &input_shape, const T input_data, const RuntimeShape &output_shape, T output_data)

template<typename T >
void	ReluX (const tflite::ActivationParams &params, const RuntimeShape &input_shape, const T input_data, const RuntimeShape &output_shape, T output_data)

void	BroadcastMulFivefold (const ArithmeticParams &unswitched_params, const RuntimeShape &unswitched_input1_shape, const uint8 unswitched_input1_data, const RuntimeShape &unswitched_input2_shape, const uint8 unswitched_input2_data, const RuntimeShape &output_shape, uint8 *output_data)

void	Mul (const ArithmeticParams &params, const RuntimeShape &input1_shape, const int16 input1_data, const RuntimeShape &input2_shape, const int16 input2_data, const RuntimeShape &output_shape, int16 *output_data)

void	Mul (const ArithmeticParams &params, const RuntimeShape &input1_shape, const int16 input1_data, const RuntimeShape &input2_shape, const int16 input2_data, const RuntimeShape &output_shape, uint8 *output_data)

void	Sub16 (const ArithmeticParams &params, const RuntimeShape &input1_shape, const int16_t input1_data, const RuntimeShape &input2_shape, const int16_t input2_data, const RuntimeShape &output_shape, int16_t *output_data)

template<typename Scalar >
void	Pack (const PackParams &params, const RuntimeShape const input_shapes, const Scalar const input_data, const RuntimeShape &output_shape, Scalar *output_data)

template<typename Scalar >
void	Unpack (const UnpackParams &params, const RuntimeShape &input_shape, const Scalar input_data, const RuntimeShape &output_shape, Scalar const *output_datas)

template<typename Scalar >
void	PackWithScaling (const PackParams &params, const RuntimeShape const input_shapes, const uint8 const input_data, const RuntimeShape &output_shape, uint8 *output_data)

template<typename Scalar >
void	DepthConcatenation (const ConcatenationParams &params, const RuntimeShape const input_shapes, const Scalar const input_data, const RuntimeShape &output_shape, Scalar *output_data)

void	LstmCell (const LstmCellParams &params, const RuntimeShape &unextended_input_shape, const float input_data, const RuntimeShape &unextended_prev_activ_shape, const float prev_activ_data, const RuntimeShape &weights_shape, const float weights_data, const RuntimeShape &unextended_bias_shape, const float bias_data, const RuntimeShape &unextended_prev_state_shape, const float prev_state_data, const RuntimeShape &unextended_output_state_shape, float output_state_data, const RuntimeShape &unextended_output_activ_shape, float output_activ_data, const RuntimeShape &unextended_concat_temp_shape, float concat_temp_data, const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)

template<int StateIntegerBits>
void	LstmCell (const LstmCellParams &params, const RuntimeShape &unextended_input_shape, const uint8 input_data_uint8, const RuntimeShape &unextended_prev_activ_shape, const uint8 prev_activ_data_uint8, const RuntimeShape &weights_shape, const uint8 weights_data_uint8, const RuntimeShape &unextended_bias_shape, const int32 bias_data_int32, const RuntimeShape &unextended_prev_state_shape, const int16 prev_state_data_int16, const RuntimeShape &unextended_output_state_shape, int16 output_state_data_int16, const RuntimeShape &unextended_output_activ_shape, uint8 output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape, uint8 concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape, int16 activ_temp_data_int16, void gemmlowp_context)

template<typename Scalar >
void	Split (const SplitParams &params, const RuntimeShape &input_shape, const Scalar input_data, const RuntimeShape const output_shapes, Scalar const *output_data)

int	NodeOffset (int b, int h, int w, int height, int width)

void	LocalResponseNormalization (const tflite::LocalResponseNormalizationParams &op_params, const RuntimeShape &input_shape, const float input_data, const RuntimeShape &output_shape, float output_data)

void	Dequantize (const RuntimeShape &input_shape, const Eigen::half input_data, const RuntimeShape &output_shape, float output_data)

void	FakeQuant (const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape, const float input_data, const RuntimeShape &output_shape, float output_data)

GatherNdHelperResult	GatherNdHelper (const RuntimeShape &params_shape, const RuntimeShape &indices_shape)

template<typename ParamsT , typename IndicesT = int32>
void	GatherNd (const RuntimeShape &params_shape, const ParamsT params_data, const RuntimeShape &indices_shape, const IndicesT indices_data, const RuntimeShape &output_shape, ParamsT *output_data)

template<typename IndicesT = int32>
void	GatherNdString (const RuntimeShape &params_shape, const TfLiteTensor params_data, const RuntimeShape &indices_shape, const IndicesT indices_data, const RuntimeShape &output_shape, TfLiteTensor *output_data)

template<typename IndicesT , typename UpdatesT >
void	ScatterNd (const RuntimeShape &indices_shape, const IndicesT indices_data, const RuntimeShape &updates_shape, const UpdatesT updates_data, const RuntimeShape &output_shape, UpdatesT *output_data)

template<typename T >
void	Slice (const tflite::SliceParams &op_params, const RuntimeShape &input_shape, const RuntimeShape &output_shape, SequentialTensorWriter< T > *writer)

template<typename T >
void	Slice (const tflite::SliceParams &op_params, const RuntimeShape &input_shape, const T input_data, const RuntimeShape &output_shape, T output_data)

template<typename T >
void	Slice (const tflite::SliceParams &op_params, const RuntimeShape &input_shape, const TfLiteTensor input, const RuntimeShape &output_shape, TfLiteTensor output)

template<typename T >
void	Minimum (const RuntimeShape &input1_shape, const T input1_data, const T input2_data, const RuntimeShape &output_shape, T *output_data)

template<typename T >
void	Minimum (const RuntimeShape &input1_shape, const T input1_data, const RuntimeShape &, const T input2_data, const RuntimeShape &output_shape, T *output_data)

template<typename T >
void	Maximum (const RuntimeShape &input1_shape, const T input1_data, const T input2_data, const RuntimeShape &output_shape, T *output_data)

template<typename T >
void	Maximum (const RuntimeShape &input1_shape, const T input1_data, const RuntimeShape &, const T input2_data, const RuntimeShape &output_shape, T *output_data)

template<typename T1 , typename T2 , typename T3 >
void	ArgMax (const RuntimeShape &input1_shape, const T1 input1_data, const T3 input2_data, const RuntimeShape &output_shape, T2 *output_data)

template<typename T1 , typename T2 , typename T3 >
void	ArgMax (const RuntimeShape &input1_shape, const T1 input1_data, const RuntimeShape &input2_shape, const T3 input2_data, const RuntimeShape &output_shape, T2 *output_data)

template<typename D , typename T >
void	Select (const RuntimeShape &input_condition_shape, const D input_condition_data, const RuntimeShape &input_x_shape, const T input_x_data, const RuntimeShape &input_y_shape, const T input_y_data, const RuntimeShape &output_shape, T output_data)

template<typename D , typename T >
void	RankOneSelect (const RuntimeShape &input_condition_shape, const D input_condition_data, const RuntimeShape &input_x_shape, const T input_x_data, const RuntimeShape &input_y_shape, const T input_y_data, const RuntimeShape &output_shape, T output_data)

template<typename D , typename T >
void	BroadcastSelect4DSlow (const RuntimeShape &input_condition_shape, const D input_condition_data, const RuntimeShape &input_x_shape, const T input_x_data, const RuntimeShape &input_y_shape, const T input_y_data, const RuntimeShape &output_shape, T output_data)

template<typename D , typename T >
void	SelectTrueCoords (const RuntimeShape &input_condition_shape, const D input_condition_data, T output_data)

template<typename T , typename TI >
void	SparseToDense (const std::vector< std::vector< TI > > &indices, const T values, T default_value, bool value_is_scalar, const RuntimeShape &unextended_output_shape, T output_data)

template<typename T >
void	Pow (const RuntimeShape &input1_shape, const T input1_data, const RuntimeShape &input2_shape, const T input2_data, const RuntimeShape &output_shape, T *output_data)

template<typename T >
void	BroadcastPow4DSlow (const RuntimeShape &unextended_input1_shape, const T input1_data, const RuntimeShape &unextended_input2_shape, const T input2_data, const RuntimeShape &unextended_output_shape, T *output_data)

template<typename Scalar >
void	Reverse (int axis, const RuntimeShape &input_shape, const Scalar input_data, const RuntimeShape &output_shape, Scalar output_data)

template<typename Scalar , typename TS >
void	ReverseSequence (const TS seq_lengths, const int seq_dim, const int batch_dim, const RuntimeShape &input_shape, const Scalar input_data, const RuntimeShape &output_shape, Scalar *output_data)

template<typename T >
void	SegmentSum (const RuntimeShape &input_shape, const T input_data, const RuntimeShape &segment_ids_shape, const int32_t segment_ids_data, const RuntimeShape &output_shape, T *output_data)

Function Documentation

◆ ArgMax() [1/2]

template<typename T1 , typename T2 , typename T3 >

void tflite::reference_ops::ArgMax	(	const RuntimeShape &	input1_shape,
		const T1 *	input1_data,
		const RuntimeShape &	input2_shape,
		const T3 *	input2_data,
		const RuntimeShape &	output_shape,
		T2 *	output_data
	)

inline

Definition at line 1188 of file PALreference_ops.h.

{
  // Drop shape of second input: not needed.
  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
}

References ArgMax(), and output_shape.

◆ ArgMax() [2/2]

template<typename T1 , typename T2 , typename T3 >

void tflite::reference_ops::ArgMax	(	const RuntimeShape &	input1_shape,
		const T1 *	input1_data,
		const T3 *	input2_data,
		const RuntimeShape &	output_shape,
		T2 *	output_data
	)

Definition at line 1179 of file PALreference_ops.h.

{
  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
}

References output_shape.

Referenced by ArgMax().

◆ BroadcastMulFivefold()

void tflite::reference_ops::BroadcastMulFivefold	(	const ArithmeticParams &	unswitched_params,
		const RuntimeShape &	unswitched_input1_shape,
		const uint8 *	unswitched_input1_data,
		const RuntimeShape &	unswitched_input2_shape,
		const uint8 *	unswitched_input2_data,
		const RuntimeShape &	output_shape,
		uint8 *	output_data
	)

inline

Definition at line 191 of file PALreference_ops.h.

{
  ArithmeticParams switched_params = unswitched_params;
  switched_params.input1_offset = unswitched_params.input2_offset;
  switched_params.input2_offset = unswitched_params.input1_offset;
 
  const bool use_unswitched = unswitched_params.broadcast_category ==
                              tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
 
  const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
  const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
  const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
 
  // Fivefold nested loops. The second input resets its position for each
  // iteration of the second loop. The first input resets its position at the
  // beginning of the fourth loop. The innermost loop is an elementwise Mul of
  // sections of the arrays.
  uint8 *output_data_ptr = output_data;
  const uint8 *input1_data_ptr = input1_data;
  const uint8 *input2_data_reset = input2_data;
  int y0 = params.broadcast_shape[0];
  int y1 = params.broadcast_shape[1];
  int y2 = params.broadcast_shape[2];
  int y3 = params.broadcast_shape[3];
  int y4 = params.broadcast_shape[4];
  for (int i0 = 0; i0 < y0; ++i0)
  {
    const uint8 *input2_data_ptr;
    for (int i1 = 0; i1 < y1; ++i1)
    {
      input2_data_ptr = input2_data_reset;
      for (int i2 = 0; i2 < y2; ++i2)
      {
        for (int i3 = 0; i3 < y3; ++i3)
        {
          MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
          input2_data_ptr += y4;
          output_data_ptr += y4;
        }
        input1_data_ptr += y4;
      }
    }
    input2_data_reset = input2_data_ptr;
  }
}

◆ BroadcastPow4DSlow()

template<typename T >

void tflite::reference_ops::BroadcastPow4DSlow	(	const RuntimeShape &	unextended_input1_shape,
		const T *	input1_data,
		const RuntimeShape &	unextended_input2_shape,
		const T *	input2_data,
		const RuntimeShape &	unextended_output_shape,
		T *	output_data
	)

inline

Definition at line 1388 of file PALreference_ops.h.

{
  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
  NdArrayDesc<4> desc1;
  NdArrayDesc<4> desc2;
  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
                                      &desc2);
 
  for (int b = 0; b < output_shape.Dims(0); ++b)
  {
    for (int y = 0; y < output_shape.Dims(1); ++y)
    {
      for (int x = 0; x < output_shape.Dims(2); ++x)
      {
        for (int c = 0; c < output_shape.Dims(3); ++c)
        {
          auto out_idx = Offset(output_shape, b, y, x, c);
          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
          auto in1_val = input1_data[in1_idx];
          auto in2_val = input2_data[in2_idx];
          output_data[out_idx] = std::pow(in1_val, in2_val);
        }
      }
    }
  }
}

References desc1, desc2, NdArrayDescsForElementwiseBroadcast(), Offset(), output_shape, and SubscriptToIndex().

◆ BroadcastSelect4DSlow()

template<typename D , typename T >

void tflite::reference_ops::BroadcastSelect4DSlow	(	const RuntimeShape &	input_condition_shape,
		const D *	input_condition_data,
		const RuntimeShape &	input_x_shape,
		const T *	input_x_data,
		const RuntimeShape &	input_y_shape,
		const T *	input_y_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

Definition at line 1248 of file PALreference_ops.h.

{
  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
 
  const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
 
  NdArrayDesc<4> desc_condition;
  NdArrayDesc<4> desc_x;
  NdArrayDesc<4> desc_y;
  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
                                      &desc_condition, &desc_x, &desc_y);
 
  // In Tensorflow, the dimensions are canonically named (batch_number, row,
  // col, channel), with extents (batches, height, width, depth), with the
  // trailing dimension changing most rapidly (channels has the smallest
  // stride, typically 1 element).
  //
  // In generated C code, we store arrays with the dimensions reversed. The
  // first dimension has smallest stride.
  //
  // We name our variables by their Tensorflow convention, but generate C code
  // nesting loops such that the innermost loop has the smallest stride for
  // the best cache behavior.
  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
  {
    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
    {
      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
      {
        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
        {
          const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
          output_data[Offset(extended_output_shape, b, y, x, c)] =
            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
        }
      }
    }
  }
}

References NdArrayDescsForElementwiseBroadcast(), Offset(), output_shape, and SubscriptToIndex().

◆ DepthConcatenation()

template<typename Scalar >

void tflite::reference_ops::DepthConcatenation	(	const ConcatenationParams &	params,
		const RuntimeShape const	input_shapes,
		const Scalar const	input_data,
		const RuntimeShape &	output_shape,
		Scalar *	output_data
	)

Definition at line 462 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("DepthConcatenation");
  auto params_copy = params;
  params_copy.axis = 3;
  Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
}

References Concatenation(), and output_shape.

◆ Dequantize()

void tflite::reference_ops::Dequantize	(	const RuntimeShape &	input_shape,
		const Eigen::half *	input_data,
		const RuntimeShape &	output_shape,
		float *	output_data
	)

inline

Definition at line 913 of file PALreference_ops.h.

{
  const int flat_size = MatchingFlatSize(input_shape, output_shape);
  for (int i = 0; i < flat_size; i++)
  {
    output_data[i] = static_cast<float>(input_data[i]);
  }
}

References MatchingFlatSize(), and output_shape.

◆ FakeQuant()

void tflite::reference_ops::FakeQuant	(	const tflite::FakeQuantParams &	op_params,
		const RuntimeShape &	input_shape,
		const float *	input_data,
		const RuntimeShape &	output_shape,
		float *	output_data
	)

inline

Definition at line 923 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("FakeQuant");
  float rmin = op_params.minmax.min;
  float rmax = op_params.minmax.max;
  int num_bits = op_params.num_bits;
  // 0 should always be a representable value. Let's assume that the initial
  // min,max range contains 0.
  TFLITE_DCHECK_LE(rmin, 0.0f);
  TFLITE_DCHECK_GE(rmax, 0.0f);
  TFLITE_DCHECK_LT(rmin, rmax);
 
  // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
  int quant_min = 0;
  int quant_max = (1 << num_bits) - 1;
  float nudged_min, nudged_max, nudged_scale;
  NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
  const int flat_size = MatchingFlatSize(input_shape, output_shape);
  FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
}

References MatchingFlatSize(), and output_shape.

◆ GatherNd()

template<typename ParamsT , typename IndicesT = int32>

void tflite::reference_ops::GatherNd	(	const RuntimeShape &	params_shape,
		const ParamsT *	params_data,
		const RuntimeShape &	indices_shape,
		const IndicesT *	indices_data,
		const RuntimeShape &	output_shape,
		ParamsT *	output_data
	)

inline

Definition at line 985 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("GatherNd");
 
  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
  for (int i = 0; i < res.n_slices; ++i)
  {
    int from_pos = 0;
    for (int j = 0; j < res.indices_nd; ++j)
    {
      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
    }
    std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
                sizeof(ParamsT) * res.slice_size);
  }
}

References tflite::reference_ops::GatherNdHelperResult::dims_to_count, GatherNdHelper(), tflite::reference_ops::GatherNdHelperResult::indices_nd, tflite::reference_ops::GatherNdHelperResult::n_slices, and tflite::reference_ops::GatherNdHelperResult::slice_size.

◆ GatherNdHelper()

GatherNdHelperResult tflite::reference_ops::GatherNdHelper	(	const RuntimeShape &	params_shape,
		const RuntimeShape &	indices_shape
	)

inline

Definition at line 955 of file PALreference_ops.h.

{
  GatherNdHelperResult ret;
  ret.n_slices = 1;
  ret.slice_size = 1;
  const int indices_dims = indices_shape.DimensionsCount();
  ret.indices_nd = indices_shape.Dims(indices_dims - 1);
  const int params_dims = params_shape.DimensionsCount();
  for (int i = 0; i < indices_dims - 1; ++i)
  {
    ret.n_slices *= indices_shape.Dims(i);
  }
  for (int i = ret.indices_nd; i < params_dims; ++i)
  {
    ret.slice_size *= params_shape.Dims(i);
  }
 
  int remain_flat_size = params_shape.FlatSize();
  ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
  for (int i = 0; i < ret.indices_nd; ++i)
  {
    ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
    remain_flat_size = ret.dims_to_count[i];
  }
 
  return ret;
}

References tflite::reference_ops::GatherNdHelperResult::dims_to_count, tflite::reference_ops::GatherNdHelperResult::indices_nd, tflite::reference_ops::GatherNdHelperResult::n_slices, and tflite::reference_ops::GatherNdHelperResult::slice_size.

Referenced by GatherNd(), and GatherNdString().

◆ GatherNdString()

template<typename IndicesT = int32>

void tflite::reference_ops::GatherNdString	(	const RuntimeShape &	params_shape,
		const TfLiteTensor *	params_data,
		const RuntimeShape &	indices_shape,
		const IndicesT *	indices_data,
		const RuntimeShape &	output_shape,
		TfLiteTensor *	output_data
	)

inline

Definition at line 1006 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("GatherNdString");
 
  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
  DynamicBuffer buffer;
  for (int i = 0; i < res.n_slices; ++i)
  {
    int from_pos = 0;
    for (int j = 0; j < res.indices_nd; ++j)
    {
      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
    }
    for (int j = 0; j < res.slice_size; ++j)
    {
      buffer.AddString(GetString(params_data, from_pos + j));
    }
  }
  buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
}

References tflite::reference_ops::GatherNdHelperResult::dims_to_count, GatherNdHelper(), tflite::reference_ops::GatherNdHelperResult::indices_nd, tflite::reference_ops::GatherNdHelperResult::n_slices, and tflite::reference_ops::GatherNdHelperResult::slice_size.

◆ LocalResponseNormalization()

void tflite::reference_ops::LocalResponseNormalization	(	const tflite::LocalResponseNormalizationParams &	op_params,
		const RuntimeShape &	input_shape,
		const float *	input_data,
		const RuntimeShape &	output_shape,
		float *	output_data
	)

inline

Definition at line 887 of file PALreference_ops.h.

{
  const int trailing_dim = input_shape.DimensionsCount() - 1;
  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
 
  for (int i = 0; i < outer_size; ++i)
  {
    for (int c = 0; c < depth; ++c)
    {
      const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
      const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
      float accum = 0.f;
      for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
      {
        const float input_val = input_data[i * depth + input_c];
        accum += input_val * input_val;
      }
      const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
      output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
    }
  }
}

References output_shape.

◆ LstmCell() [1/2]

void tflite::reference_ops::LstmCell	(	const LstmCellParams &	params,
		const RuntimeShape &	unextended_input_shape,
		const float *	input_data,
		const RuntimeShape &	unextended_prev_activ_shape,
		const float *	prev_activ_data,
		const RuntimeShape &	weights_shape,
		const float *	weights_data,
		const RuntimeShape &	unextended_bias_shape,
		const float *	bias_data,
		const RuntimeShape &	unextended_prev_state_shape,
		const float *	prev_state_data,
		const RuntimeShape &	unextended_output_state_shape,
		float *	output_state_data,
		const RuntimeShape &	unextended_output_activ_shape,
		float *	output_activ_data,
		const RuntimeShape &	unextended_concat_temp_shape,
		float *	concat_temp_data,
		const RuntimeShape &	unextended_activ_temp_shape,
		float *	activ_temp_data
	)

inline

Definition at line 472 of file PALreference_ops.h.

{
  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
  const RuntimeShape output_state_shape =
    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
  const RuntimeShape output_activ_shape =
    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
  const RuntimeShape concat_temp_shape =
    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
 
  const int weights_dim_count = weights_shape.DimensionsCount();
  const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
                                  output_state_shape, 0, output_activ_shape, 0);
  const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
                                 output_state_shape, 1, output_activ_shape, 1);
  const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
                                output_state_shape, 2, output_activ_shape, 2);
  const int input_depth = input_shape.Dims(3);
  const int prev_activ_depth = prev_activ_shape.Dims(3);
  const int total_input_depth = prev_activ_depth + input_depth;
  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
                                       3, output_activ_shape, 3);
  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
 
  // Concatenate prev_activ and input data together
  std::vector<float const *> concat_input_arrays_data;
  std::vector<RuntimeShape const *> concat_input_arrays_shapes;
  concat_input_arrays_data.push_back(input_data);
  concat_input_arrays_data.push_back(prev_activ_data);
  concat_input_arrays_shapes.push_back(&input_shape);
  concat_input_arrays_shapes.push_back(&prev_activ_shape);
  tflite::ConcatenationParams concat_params;
  concat_params.axis = 3;
  concat_params.inputs_count = concat_input_arrays_data.size();
  Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
                concat_temp_shape, concat_temp_data);
 
  // Fully connected
  tflite::FullyConnectedParams fc_params;
  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
  fc_params.float_activation_max = std::numeric_limits<float>::max();
  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
                 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
 
  // Memory state update (the LSTM "guts")
  for (int b = 0; b < batches; ++b)
  {
    for (int w = 0; w < width; ++w)
    {
      for (int h = 0; h < height; ++h)
      {
        for (int c = 0; c < output_depth; ++c)
        {
          const float input_gate =
            1.f /
            (1.f +
             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
          const float new_input =
            std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
          const float forget_gate =
            1.f /
            (1.f +
             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
          const float output_gate =
            1.f /
            (1.f +
             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
          const float new_state =
            input_gate * new_input +
            forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
            output_gate * std::tanh(new_state);
        }
      }
    }
  }
}

References Concatenation(), FullyConnected(), and Offset().

◆ LstmCell() [2/2]

template<int StateIntegerBits>

void tflite::reference_ops::LstmCell	(	const LstmCellParams &	params,
		const RuntimeShape &	unextended_input_shape,
		const uint8 *	input_data_uint8,
		const RuntimeShape &	unextended_prev_activ_shape,
		const uint8 *	prev_activ_data_uint8,
		const RuntimeShape &	weights_shape,
		const uint8 *	weights_data_uint8,
		const RuntimeShape &	unextended_bias_shape,
		const int32 *	bias_data_int32,
		const RuntimeShape &	unextended_prev_state_shape,
		const int16 *	prev_state_data_int16,
		const RuntimeShape &	unextended_output_state_shape,
		int16 *	output_state_data_int16,
		const RuntimeShape &	unextended_output_activ_shape,
		uint8 *	output_activ_data_uint8,
		const RuntimeShape &	unextended_concat_temp_shape,
		uint8 *	concat_temp_data_uint8,
		const RuntimeShape &	unextended_activ_temp_shape,
		int16 *	activ_temp_data_int16,
		void *	gemmlowp_context
	)

inline

Definition at line 664 of file PALreference_ops.h.

{
  (void)gemmlowp_context; // only used in optimized code.
  int32 weights_zero_point = params.weights_zero_point;
  int32 accum_multiplier = params.accum_multiplier;
  int accum_shift = params.accum_shift;
  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
  const RuntimeShape output_state_shape =
    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
  const RuntimeShape output_activ_shape =
    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
  const RuntimeShape concat_temp_shape =
    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
 
  // Gather dimensions information, and perform consistency checks.
  const int weights_dim_count = weights_shape.DimensionsCount();
  const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
                                                 output_state_shape, output_activ_shape);
  const int input_depth = input_shape.Dims(3);
  const int prev_activ_depth = prev_activ_shape.Dims(3);
  const int total_input_depth = prev_activ_depth + input_depth;
  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
                                       3, output_activ_shape, 3);
  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
  const int fc_output_depth =
    MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
  const int fc_accum_depth = total_input_depth;
  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
 
  // Depth-concatenate prev_activ and input data together.
  uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
  const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
  tflite::ConcatenationParams concat_params;
  concat_params.axis = 3;
  concat_params.inputs_count = 2;
  Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
                concat_temp_shape, concat_temp_data_uint8);
 
  // Implementation of the fully connected node inside the LSTM cell.
  // The operands are 8-bit integers, the accumulators are internally 32bit
  // integers, and the output is 16-bit fixed-point with 3 integer bits so
  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
  // is explained in the function comment above.
  for (int b = 0; b < fc_batches; ++b)
  {
    for (int out_c = 0; out_c < fc_output_depth; ++out_c)
    {
      // Internal accumulation.
      // Initialize accumulator with the bias-value.
      int32 accum = bias_data_int32[out_c];
      // Accumulation loop.
      for (int d = 0; d < fc_accum_depth; ++d)
      {
        int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
        int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
        accum += input_val * weights_val;
      }
      // Down-scale the final int32 accumulator to the scale used by our
      // (16-bit, using 3 integer bits) fixed-point format. The quantized
      // multiplier and shift here have been pre-computed offline
      // (e.g. by toco).
      accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
      // Saturate, cast to int16, and store to the temporary activations array.
      accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
    }
  }
 
  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
  // and muls, all done in 16-bit fixed-point.
  for (int b = 0; b < outer_size; ++b)
  {
    for (int c = 0; c < output_depth; ++c)
    {
      // Define the fixed-point data types that we will use here. All use
      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
      // They only differ by the number of integral vs. fractional bits,
      // determining the range of values that they can represent.
      //
      // F0 uses 0 integer bits, range [-1, 1].
      // This is the return type of math functions such as tanh, logistic,
      // whose range is in [-1, 1].
      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
      // F3 uses 3 integer bits, range [-8, 8].
      // This is the range of the previous fully-connected node's output,
      // which is our input here.
      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
      // 2^StateIntegerBits]. It's used to represent the internal state, whose
      // number of integer bits is currently dictated by the model. See comment
      // on the StateIntegerBits template parameter above.
      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
      // Implementation of input gate, using fixed-point logistic function.
      F3 input_gate_input =
        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
      // Implementation of input modulation gate, using fixed-point tanh
      // function.
      F3 input_modulation_gate_input =
        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
      F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
      // Implementation of forget gate, using fixed-point logistic function.
      F3 forget_gate_input =
        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
      // Implementation of output gate, using fixed-point logistic function.
      F3 output_gate_input =
        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
      // Implementation of internal multiplication nodes, still in fixed-point.
      F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
      FS prev_state_times_forget_state = forget_gate_output * prev_state;
      // Implementation of internal addition node, saturating.
      FS new_state =
        gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
                                prev_state_times_forget_state);
      // Implementation of last internal Tanh node, still in fixed-point.
      // Since a Tanh fixed-point implementation is specialized for a given
      // number or integer bits, and each specialization can have a substantial
      // code size, and we already used above a Tanh on an input with 3 integer
      // bits, and per the table in the above function comment there is no
      // significant accuracy to be lost by clamping to [-8, +8] for a
      // 3-integer-bits representation, let us just do that. This helps people
      // porting this to targets where code footprint must be minimized.
      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
      // Store the new internal state back to memory, as 16-bit integers.
      // Note: here we store the original value with StateIntegerBits, not
      // the rescaled 3-integer-bits value fed to tanh.
      output_state_data_int16[b * output_depth + c] = new_state.raw();
      // Down-scale the output activations to 8-bit integers, saturating,
      // and store back to memory.
      int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
      int16 clamped_output_activ =
        std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
      output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
    }
  }
}

References Concatenation().

◆ Maximum() [1/2]

template<typename T >

void tflite::reference_ops::Maximum	(	const RuntimeShape &	input1_shape,
		const T *	input1_data,
		const RuntimeShape &	,
		const T *	input2_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 1171 of file PALreference_ops.h.

{
  // Drop shape of second input: not needed.
  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
}

References Maximum(), and output_shape.

◆ Maximum() [2/2]

template<typename T >

void tflite::reference_ops::Maximum	(	const RuntimeShape &	input1_shape,
		const T *	input1_data,
		const T *	input2_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

Definition at line 1156 of file PALreference_ops.h.

{
  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
 
  auto max_value = input2_data[0];
  for (int i = 0; i < flat_size; i++)
  {
    output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
  }
}

References MatchingFlatSize(), and output_shape.

Referenced by Maximum().

◆ Minimum() [1/2]

template<typename T >

void tflite::reference_ops::Minimum	(	const RuntimeShape &	input1_shape,
		const T *	input1_data,
		const RuntimeShape &	,
		const T *	input2_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 1148 of file PALreference_ops.h.

{
  // Drop shape of second input: not needed.
  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
}

References Minimum(), and output_shape.

◆ Minimum() [2/2]

template<typename T >

void tflite::reference_ops::Minimum	(	const RuntimeShape &	input1_shape,
		const T *	input1_data,
		const T *	input2_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

Definition at line 1133 of file PALreference_ops.h.

{
  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
 
  auto min_value = input2_data[0];
  for (int i = 0; i < flat_size; i++)
  {
    output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
  }
}

References MatchingFlatSize(), and output_shape.

Referenced by Minimum().

◆ Mul() [1/2]

void tflite::reference_ops::Mul	(	const ArithmeticParams &	params,
		const RuntimeShape &	input1_shape,
		const int16 *	input1_data,
		const RuntimeShape &	input2_shape,
		const int16 *	input2_data,
		const RuntimeShape &	output_shape,
		int16 *	output_data
	)

inline

Definition at line 242 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Mul/Int16");
 
  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
 
  for (int i = 0; i < flat_size; i++)
  {
    // F0 uses 0 integer bits, range [-1, 1].
    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
 
    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
    output_data[i] = unclamped_result.raw();
  }
}

References output_shape.

◆ Mul() [2/2]

void tflite::reference_ops::Mul	(	const ArithmeticParams &	params,
		const RuntimeShape &	input1_shape,
		const int16 *	input1_data,
		const RuntimeShape &	input2_shape,
		const int16 *	input2_data,
		const RuntimeShape &	output_shape,
		uint8 *	output_data
	)

inline

Definition at line 260 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
  int32 output_offset = params.output_offset;
  int32 output_activation_min = params.quantized_activation_min;
  int32 output_activation_max = params.quantized_activation_max;
  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 
  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
 
  for (int i = 0; i < flat_size; i++)
  {
    // F0 uses 0 integer bits, range [-1, 1].
    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
 
    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
    int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
    int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
    clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
    output_data[i] = output_offset + clamped_result;
  }
}

References output_shape.

◆ NodeOffset()

int tflite::reference_ops::NodeOffset	(	int	b,
		int	h,
		int	w,
		int	height,
		int	width
	)

inline

Definition at line 882 of file PALreference_ops.h.

{
  return (b * height + h) * width + w;
}

◆ Pack()

template<typename Scalar >

void tflite::reference_ops::Pack	(	const PackParams &	params,
		const RuntimeShape const	input_shapes,
		const Scalar const	input_data,
		const RuntimeShape &	output_shape,
		Scalar *	output_data
	)

Definition at line 338 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Pack");
  const int dimensions = output_shape.DimensionsCount();
  int axis = params.axis;
  int inputs_count = params.inputs_count;
 
  int outer_size = 1;
  for (int i = 0; i < axis; i++)
  {
    outer_size *= output_shape.Dims(i);
  }
  int copy_size = 1;
  for (int i = params.axis + 1; i < dimensions; i++)
  {
    copy_size *= output_shape.Dims(i);
  }
  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
 
  for (int i = 0; i < inputs_count; ++i)
  {
    for (int k = 0; k < outer_size; k++)
    {
      const Scalar *input_ptr = input_data[i] + copy_size * k;
      int loc = k * inputs_count * copy_size + i * copy_size;
      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
    }
  }
}

References output_shape.

◆ PackWithScaling()

template<typename Scalar >

void tflite::reference_ops::PackWithScaling	(	const PackParams &	params,
		const RuntimeShape const	input_shapes,
		const uint8 const	input_data,
		const RuntimeShape &	output_shape,
		uint8 *	output_data
	)

Definition at line 408 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("PackWithScaling");
  const int dimensions = output_shape.DimensionsCount();
  int axis = params.axis;
  const int32 *input_zeropoint = params.input_zeropoint;
  const float *input_scale = params.input_scale;
  int inputs_count = params.inputs_count;
  const int32 output_zeropoint = params.output_zeropoint;
  const float output_scale = params.output_scale;
 
  int outer_size = 1;
  for (int i = 0; i < axis; i++)
  {
    outer_size *= output_shape.Dims(i);
  }
  int copy_size = 1;
  for (int i = axis + 1; i < dimensions; i++)
  {
    copy_size *= output_shape.Dims(i);
  }
  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
 
  Scalar *output_ptr = output_data;
  const float inverse_output_scale = 1.f / output_scale;
  for (int k = 0; k < outer_size; k++)
  {
    for (int i = 0; i < inputs_count; ++i)
    {
      if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
      {
        memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
      }
      else
      {
        assert(false);
        const float scale = input_scale[i] * inverse_output_scale;
        const float bias = -input_zeropoint[i] * scale;
        auto input_ptr = input_data[i];
        for (int j = 0; j < copy_size; ++j)
        {
          const int value =
            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
          output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
        }
      }
      output_ptr += copy_size;
    }
  }
}

References output_shape.

◆ Pow()

template<typename T >

void tflite::reference_ops::Pow	(	const RuntimeShape &	input1_shape,
		const T *	input1_data,
		const RuntimeShape &	input2_shape,
		const T *	input2_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 1376 of file PALreference_ops.h.

{
  const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
  for (int i = 0; i < flat_size; ++i)
  {
    output_data[i] = std::pow(input1_data[i], input2_data[i]);
  }
}

References MatchingFlatSize(), and output_shape.

◆ RankOneSelect()

template<typename D , typename T >

void tflite::reference_ops::RankOneSelect	(	const RuntimeShape &	input_condition_shape,
		const D *	input_condition_data,
		const RuntimeShape &	input_x_shape,
		const T *	input_x_data,
		const RuntimeShape &	input_y_shape,
		const T *	input_y_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

Definition at line 1221 of file PALreference_ops.h.

{
  const int64_t outer_size = input_condition_shape.FlatSize();
  int64_t inner_size;
  if (input_condition_shape.DimensionsCount() == 0)
  {
    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
  }
  else
  {
    TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
    inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
  }
 
  int64_t offset = 0;
  for (int64_t i = 0; i < outer_size; i++)
  {
    const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
    offset += inner_size;
  }
}

References MatchingFlatSize(), offset(), and output_shape.

◆ Relu()

template<typename T >

void tflite::reference_ops::Relu	(	const RuntimeShape &	input_shape,
		const T *	input_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 109 of file PALreference_ops.h.

{
  const int flat_size = MatchingFlatSize(input_shape, output_shape);
  for (int i = 0; i < flat_size; ++i)
  {
    const T val = input_data[i];
    const T lower = 0;
    const T clamped = val < lower ? lower : val;
    output_data[i] = clamped;
  }
}

References MatchingFlatSize(), and output_shape.

◆ Relu1()

template<typename T >

void tflite::reference_ops::Relu1	(	const RuntimeShape &	input_shape,
		const T *	input_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 123 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Relu1 (not fused)");
  const int flat_size = MatchingFlatSize(input_shape, output_shape);
  for (int i = 0; i < flat_size; ++i)
  {
    const T val = input_data[i];
    const T upper = 1;
    const T lower = -1;
    const T clamped = val > upper ? upper : val < lower ? lower : val;
    output_data[i] = clamped;
  }
}

References MatchingFlatSize(), and output_shape.

◆ Relu6()

void tflite::reference_ops::Relu6	(	const RuntimeShape &	input_shape,
		const float *	input_data,
		const RuntimeShape &	output_shape,
		float *	output_data
	)

inline

Definition at line 138 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Relu6 (not fused)");
  const int flat_size = MatchingFlatSize(input_shape, output_shape);
  for (int i = 0; i < flat_size; ++i)
  {
    const float val = input_data[i];
    const float upper = 6;
    const float lower = 0;
    const float clamped = val > upper ? upper : val < lower ? lower : val;
    output_data[i] = clamped;
  }
}

References MatchingFlatSize(), and output_shape.

◆ ReluX() [1/2]

template<typename T >

void tflite::reference_ops::ReluX	(	const tflite::ActivationParams &	params,
		const RuntimeShape &	input_shape,
		const T *	input_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 172 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
  const int flat_size = MatchingFlatSize(input_shape, output_shape);
  const T max_value = params.quantized_activation_max;
  const T min_value = params.quantized_activation_min;
  for (int i = 0; i < flat_size; ++i)
  {
    const T val = input_data[i];
    const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
    output_data[i] = clamped;
  }
}

References MatchingFlatSize(), and output_shape.

◆ ReluX() [2/2]

template<typename T >

void tflite::reference_ops::ReluX	(	const tflite::ReluParams &	params,
		const RuntimeShape &	input_shape,
		const T *	input_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 154 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
  const int flat_size = MatchingFlatSize(input_shape, output_shape);
  for (int i = 0; i < flat_size; ++i)
  {
    const int32 val = static_cast<int32_t>(input_data[i]);
    int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
                                                                         params.output_multiplier,
                                                                         params.output_shift);
    clamped = std::max(params.quantized_activation_min, clamped);
    clamped = std::min(params.quantized_activation_max, clamped);
    output_data[i] = static_cast<T>(clamped);
  }
}

References MatchingFlatSize(), and output_shape.

◆ Reverse()

template<typename Scalar >

void tflite::reference_ops::Reverse	(	int	axis,
		const RuntimeShape &	input_shape,
		const Scalar *	input_data,
		const RuntimeShape &	output_shape,
		Scalar *	output_data
	)

Definition at line 1423 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Reverse");
 
  int outer_size = 1;
  for (int i = 0; i < axis; ++i)
  {
    outer_size *= input_shape.Dims(i);
  }
 
  int copy_size = 1;
  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
  {
    copy_size *= input_shape.Dims(i);
  }
 
  const int dims_at_axis = input_shape.Dims(axis);
  for (int i = 0; i < outer_size; ++i)
  {
    for (int j = 0; j < dims_at_axis; ++j)
    {
      const int start_pos = (i * dims_at_axis + j) * copy_size;
      Scalar *output_ptr = output_data + start_pos;
      int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
    }
  }
}

◆ ReverseSequence()

template<typename Scalar , typename TS >

void tflite::reference_ops::ReverseSequence	(	const TS *	seq_lengths,
		const int	seq_dim,
		const int	batch_dim,
		const RuntimeShape &	input_shape,
		const Scalar *	input_data,
		const RuntimeShape &	output_shape,
		Scalar *	output_data
	)

Definition at line 1454 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("ReverseSequence");
 
  int outer_size = 1;
  int outer_dim = std::min(batch_dim, seq_dim);
  int medium_dim = std::max(batch_dim, seq_dim);
  for (int i = 0; i < outer_dim; ++i)
  {
    outer_size *= input_shape.Dims(i);
  }
 
  int medium_size = 1;
  for (int i = outer_dim + 1; i < medium_dim; ++i)
  {
    medium_size *= input_shape.Dims(i);
  }
 
  int copy_size = 1;
  for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
  {
    copy_size *= input_shape.Dims(i);
  }
 
  const int dims_at_outer_dim = input_shape.Dims(outer_dim);
  const int dims_at_medium_dim = input_shape.Dims(medium_dim);
 
  Scalar *output_ptr;
  if (batch_dim > seq_dim)
  {
    for (int i = 0; i < outer_size; ++i)
    {
      for (int j = 0; j < dims_at_outer_dim; ++j)
      {
        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
        for (int p = 0; p < medium_size; ++p)
        {
          for (int q = 0; q < dims_at_medium_dim; ++q)
          {
            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
            const Scalar *in_ptr = input_data + in_pos;
            int sl = seq_lengths[q] - 1;
            if (j > sl)
            {
              output_ptr = output_data + in_pos;
            }
            else
            {
              const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
              output_ptr = output_data + out_pos;
            }
            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
          }
        }
      }
    }
  }
  else if (batch_dim < seq_dim)
  {
    for (int i = 0; i < outer_size; ++i)
    {
      for (int j = 0; j < dims_at_outer_dim; ++j)
      {
        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
        int sl = seq_lengths[j] - 1;
        const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
        for (int p = 0; p < medium_size; ++p)
        {
          for (int q = 0; q < dims_at_medium_dim; ++q)
          {
            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
            const Scalar *in_ptr = input_data + in_pos;
            if (q > sl)
            {
              output_ptr = output_data + in_pos;
            }
            else
            {
              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
              output_ptr = output_data + out_pos;
            }
            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
          }
        }
      }
    }
  }
}

References p.

◆ ScatterNd()

template<typename IndicesT , typename UpdatesT >

void tflite::reference_ops::ScatterNd	(	const RuntimeShape &	indices_shape,
		const IndicesT *	indices_data,
		const RuntimeShape &	updates_shape,
		const UpdatesT *	updates_data,
		const RuntimeShape &	output_shape,
		UpdatesT *	output_data
	)

inline

Definition at line 1031 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("ScatterNd");
 
  int n_slices = 1;
  int slice_size = 1;
  const int outer_dims = indices_shape.DimensionsCount() - 1;
  const int indices_nd = indices_shape.Dims(outer_dims);
  const int updates_dims = updates_shape.DimensionsCount();
  for (int i = 0; i < outer_dims; ++i)
  {
    n_slices *= indices_shape.Dims(i);
  }
  for (int i = outer_dims; i < updates_dims; ++i)
  {
    slice_size *= updates_shape.Dims(i);
  }
 
  int output_flat_size = output_shape.FlatSize();
  int remain_flat_size = output_flat_size;
  std::vector<int> dims_to_count(indices_nd, 0);
  for (int i = 0; i < indices_nd; ++i)
  {
    dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
    remain_flat_size = dims_to_count[i];
  }
 
  memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
  for (int i = 0; i < n_slices; ++i)
  {
    int to_pos = 0;
    for (int j = 0; j < indices_nd; ++j)
    {
      IndicesT idx = indices_data[i * indices_nd + j];
      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
      to_pos += idx * dims_to_count[j];
    }
    for (int j = 0; j < slice_size; j++)
    {
      output_data[to_pos + j] += updates_data[i * slice_size + j];
    }
  }
}

References output_shape.

◆ SegmentSum()

template<typename T >

void tflite::reference_ops::SegmentSum	(	const RuntimeShape &	input_shape,
		const T *	input_data,
		const RuntimeShape &	segment_ids_shape,
		const int32_t *	segment_ids_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 1547 of file PALreference_ops.h.

{
  const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
 
  memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
 
  for (int i = 0; i < input_shape.Dims(0); i++)
  {
    int output_index = segment_ids_data[i];
    for (int j = 0; j < segment_flat_size; ++j)
    {
      output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
    }
  }
}

References output_shape.

◆ Select()

template<typename D , typename T >

void tflite::reference_ops::Select	(	const RuntimeShape &	input_condition_shape,
		const D *	input_condition_data,
		const RuntimeShape &	input_x_shape,
		const T *	input_x_data,
		const RuntimeShape &	input_y_shape,
		const T *	input_y_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

Definition at line 1197 of file PALreference_ops.h.

{
  int64_t flatsize;
  // Allow select operator executions on mixed scalar tensors and one element
  // tensors.
  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
  {
    flatsize = 1;
  }
  else
  {
    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
  }
  for (int64_t i = 0; i < flatsize; ++i)
  {
    output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
  }
}

References MatchingFlatSize(), and output_shape.

◆ SelectTrueCoords()

template<typename D , typename T >

void tflite::reference_ops::SelectTrueCoords	(	const RuntimeShape &	input_condition_shape,
		const D *	input_condition_data,
		T *	output_data
	)

Definition at line 1297 of file PALreference_ops.h.

{
  const size_t size = input_condition_shape.FlatSize();
  if (size == 0)
  {
    // Dimension is zero, in which case we don't need to output.
    return;
  }
  const size_t cond_rank = input_condition_shape.DimensionsCount();
 
  std::vector<int> dims_to_count(cond_rank, 0);
  int cur_flat_size = size;
  for (int i = 0; i < cond_rank; ++i)
  {
    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
    cur_flat_size = dims_to_count[i];
  }
 
  int output_index = 0;
  for (int i = 0; i < size; ++i)
  {
    if (input_condition_data[i])
    {
      // Insert the coordinate of the current item (row major) into output.
      int flat_index = i;
      for (int j = 0; j < cond_rank; ++j)
      {
        int coord_j = flat_index / dims_to_count[j];
        output_data[output_index * cond_rank + j] = coord_j;
        flat_index %= dims_to_count[j];
      }
      output_index++;
    }
  }
}

References size.

◆ Slice() [1/3]

template<typename T >

void tflite::reference_ops::Slice	(	const tflite::SliceParams &	op_params,
		const RuntimeShape &	input_shape,
		const RuntimeShape &	output_shape,
		SequentialTensorWriter< T > *	writer
	)

inline

Definition at line 1078 of file PALreference_ops.h.

{
  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
  TFLITE_DCHECK_LE(op_params.begin_count, 5);
  TFLITE_DCHECK_LE(op_params.size_count, 5);
  const int begin_count = op_params.begin_count;
  const int size_count = op_params.size_count;
  // We front-pad the begin and size vectors.
  std::array<int, 5> start;
  std::array<int, 5> stop;
  for (int i = 0; i < 5; ++i)
  {
    int padded_i = 5 - i;
    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
                ? ext_shape.Dims(i)
                : start[i] + op_params.size[size_count - padded_i];
  }
 
  for (int i0 = start[0]; i0 < stop[0]; ++i0)
  {
    for (int i1 = start[1]; i1 < stop[1]; ++i1)
    {
      for (int i2 = start[2]; i2 < stop[2]; ++i2)
      {
        for (int i3 = start[3]; i3 < stop[3]; ++i3)
        {
          for (int i4 = start[4]; i4 < stop[4]; ++i4)
          {
            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
          }
        }
      }
    }
  }
}

References begin_count, Offset(), and size_count.

Referenced by Slice(), and Slice().

◆ Slice() [2/3]

template<typename T >

void tflite::reference_ops::Slice	(	const tflite::SliceParams &	op_params,
		const RuntimeShape &	input_shape,
		const T *	input_data,
		const RuntimeShape &	output_shape,
		T *	output_data
	)

inline

Definition at line 1117 of file PALreference_ops.h.

{
  SequentialTensorWriter<T> writer(input_data, output_data);
  return Slice(op_params, input_shape, output_shape, &writer);
}

References output_shape, and Slice().

◆ Slice() [3/3]

template<typename T >

void tflite::reference_ops::Slice	(	const tflite::SliceParams &	op_params,
		const RuntimeShape &	input_shape,
		const TfLiteTensor *	input,
		const RuntimeShape &	output_shape,
		TfLiteTensor *	output
	)

inline

Definition at line 1125 of file PALreference_ops.h.

{
  SequentialTensorWriter<T> writer(input, output);
  return Slice(op_params, input_shape, output_shape, &writer);
}

References output_shape, and Slice().

◆ SparseToDense()

template<typename T , typename TI >

void tflite::reference_ops::SparseToDense	(	const std::vector< std::vector< TI > > &	indices,
		const T *	values,
		T	default_value,
		bool	value_is_scalar,
		const RuntimeShape &	unextended_output_shape,
		T *	output_data
	)

inline

Definition at line 1336 of file PALreference_ops.h.

{
  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
  const int value_count = indices.size();
 
  // First fill the output_data with default value.
  const int num_elements = output_shape.FlatSize();
  for (int i = 0; i < num_elements; ++i)
  {
    output_data[i] = default_value;
  }
 
  // Special handle for value is scalar case to avoid checking the boolean
  // condition within the loop every time.
  if (value_is_scalar)
  {
    for (int i = 0; i < value_count; ++i)
    {
      const std::vector<TI> &index = indices[i];
      TFLITE_DCHECK_EQ(index.size(), 4);
      const T value = *values; // just use the first value.
      output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
    }
    return;
  }
 
  // Go through the values and indices to fill the sparse values.
  for (int i = 0; i < value_count; ++i)
  {
    const std::vector<TI> &index = indices[i];
    TFLITE_DCHECK_EQ(index.size(), 4);
    const T value = values[i];
    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
  }
}

References Offset(), and output_shape.

◆ Split()

template<typename Scalar >

void tflite::reference_ops::Split	(	const SplitParams &	params,
		const RuntimeShape &	input_shape,
		const Scalar *	input_data,
		const RuntimeShape const	output_shapes,
		Scalar const	output_data
	)

Definition at line 834 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Split");
  const int split_dimensions = input_shape.DimensionsCount();
  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
  int outputs_count = params.num_split;
  TFLITE_DCHECK_LT(axis, split_dimensions);
 
  int64_t split_size = 0;
  for (int i = 0; i < outputs_count; i++)
  {
    TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
    for (int j = 0; j < split_dimensions; j++)
    {
      if (j != axis)
      {
        MatchingDim(*output_shapes[i], j, input_shape, j);
      }
    }
    split_size += output_shapes[i]->Dims(axis);
  }
  TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
  int64_t outer_size = 1;
  for (int i = 0; i < axis; ++i)
  {
    outer_size *= input_shape.Dims(i);
  }
  // For all output arrays,
  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
  int64_t base_inner_size = 1;
  for (int i = axis + 1; i < split_dimensions; ++i)
  {
    base_inner_size *= input_shape.Dims(i);
  }
 
  const Scalar *input_ptr = input_data;
  for (int k = 0; k < outer_size; k++)
  {
    for (int i = 0; i < outputs_count; ++i)
    {
      const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
      input_ptr += copy_size;
    }
  }
}

◆ Sub16()

void tflite::reference_ops::Sub16	(	const ArithmeticParams &	params,
		const RuntimeShape &	input1_shape,
		const int16_t *	input1_data,
		const RuntimeShape &	input2_shape,
		const int16_t *	input2_data,
		const RuntimeShape &	output_shape,
		int16_t *	output_data
	)

inline

Definition at line 285 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Sub/Int16");
  const int input1_shift = params.input1_shift;
  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
  const int16 output_activation_min = params.quantized_activation_min;
  const int16 output_activation_max = params.quantized_activation_max;
 
  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
  TFLITE_DCHECK_LE(input1_shift, 0);
  TFLITE_DCHECK_LE(params.input2_shift, 0);
  const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
  const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
  const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
 
  if (input1_shift == 0)
  {
    // F0 uses 0 integer bits, range [-1, 1].
    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
    for (int i = 0; i < flat_size; ++i)
    {
      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
      F0 scaled_input =
        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
      F0 result = SaturatingSub(input_ready_scaled, scaled_input);
      const int16 raw_output = result.raw();
      const int16 clamped_output =
        std::min(output_activation_max, std::max(output_activation_min, raw_output));
      output_data[i] = clamped_output;
    }
  }
  else
  {
    // F0 uses 0 integer bits, range [-1, 1].
    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
    for (int i = 0; i < flat_size; ++i)
    {
      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
      F0 scaled_input =
        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
      F0 result = SaturatingSub(scaled_input, input_ready_scaled);
      const int16 raw_output = result.raw();
      const int16 clamped_output =
        std::min(output_activation_max, std::max(output_activation_min, raw_output));
      output_data[i] = clamped_output;
    }
  }
}

References output_shape.

◆ Unpack()

template<typename Scalar >

void tflite::reference_ops::Unpack	(	const UnpackParams &	params,
		const RuntimeShape &	input_shape,
		const Scalar *	input_data,
		const RuntimeShape &	output_shape,
		Scalar const	output_datas
	)

Definition at line 370 of file PALreference_ops.h.

{
  ruy::profiler::ScopeLabel label("Unpack");
  const int dimensions = input_shape.DimensionsCount();
  const int outputs_count = params.num_split;
 
  int outer_size = 1;
  int axis = params.axis;
  if (axis < 0)
  {
    axis += dimensions;
  }
  TFLITE_DCHECK_GE(axis, 0);
  TFLITE_DCHECK_LT(axis, dimensions);
  for (int i = 0; i < axis; ++i)
  {
    outer_size *= input_shape.Dims(i);
  }
  int copy_size = 1;
  for (int i = axis + 1; i < dimensions; ++i)
  {
    copy_size *= input_shape.Dims(i);
  }
  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
 
  for (int i = 0; i < outputs_count; ++i)
  {
    for (int k = 0; k < outer_size; k++)
    {
      Scalar *output_ptr = output_datas[i] + copy_size * k;
      int loc = k * outputs_count * copy_size + i * copy_size;
      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
    }
  }
}

References output_shape.

Data Structures

Functions

Function Documentation

◆ ArgMax() [1/2]

◆ ArgMax() [2/2]

◆ BroadcastMulFivefold()

◆ BroadcastPow4DSlow()

◆ BroadcastSelect4DSlow()

◆ DepthConcatenation()

◆ Dequantize()

◆ FakeQuant()

◆ GatherNd()

◆ GatherNdHelper()

◆ GatherNdString()

◆ LocalResponseNormalization()

◆ LstmCell() [1/2]

◆ LstmCell() [2/2]

◆ Maximum() [1/2]

◆ Maximum() [2/2]

◆ Minimum() [1/2]

◆ Minimum() [2/2]

◆ Mul() [1/2]

◆ Mul() [2/2]

◆ NodeOffset()

◆ Pack()

◆ PackWithScaling()

◆ Pow()

◆ RankOneSelect()

◆ Relu()

◆ Relu1()

◆ Relu6()

◆ ReluX() [1/2]

◆ ReluX() [2/2]

◆ Reverse()

◆ ReverseSequence()

◆ ScatterNd()

◆ SegmentSum()

◆ Select()

◆ SelectTrueCoords()

◆ Slice() [1/3]

◆ Slice() [2/3]

◆ Slice() [3/3]

◆ SparseToDense()

◆ Split()

◆ Sub16()

◆ Unpack()