Namespaces
namespace	lstm

namespace	testing

Data Structures
class	Abs

class	Add

class	ArgMax

class	AveragePool2D

class	BatchMatMul

class	BatchToSpaceND

class	BroadcastableWrapper

class	BroadcastTo

class	Cast

struct	ChannelQuantMultipliers

class	Concatenation

class	Conv2D

class	Cos

class	CumSum

class	DepthToSpace

class	DepthwiseConv2D

class	Dequantize

class	Div

class	DownsamplingConv2DKernel

class	Elu

class	Equal

class	Exp

class	ExpandDims

class	Fill

class	Floor

class	FloorDiv

class	FloorMod

class	FullyConnected

class	Gather

class	Gelu

class	Greater

class	GreaterEqual

class	GRU

class	HardSwish

class	If

class	InstanceNorm

class	L2Normalize

class	L2Pool2D

class	LeakyRelu

class	Less

class	LessEqual

class	LocalResponseNormalization

class	Log

class	LogicalAnd

class	LogicalNot

class	LogicalOr

class	Logistic

class	LogSoftmax

class	Maximum

class	MaxPool2D

class	Mean

class	Minimum

class	MirrorPad

class	MISOKernel

class	Mul

class	Neg

class	NotEqual

class	OneHot

class	Pack

class	Pad

class	PadV2

class	Pow

class	PRelu

class	Quantize

class	ReduceMax

class	ReduceProd

class	Relu

class	Relu0To1

class	Relu6

class	Reshape

class	ResizeBilinear

class	ResizeNearestNeighbor

class	ReverseV2

class	RmsNorm

class	RoPE

class	Rsqrt

class	Select

class	SelectV2

class	ShapeKernel

class	Sin

class	SISOKernel

class	Slice

class	Softmax

class	SpaceToBatchND

class	SpaceToDepth

class	Split

class	SplitV

class	Sqrt

class	Square

class	SquaredDifference

class	Squeeze

class	StridedSlice

class	Sub

class	Sum

class	SVDF

class	Tanh

class	Tile

struct	TISOData

class	TISOKernel

class	Transpose

class	TransposeConv

class	UnidirectionalSequenceLSTM

class	Unpack

class	VectorOfQuantizedTensors

class	VectorOfTensors

class	While

Typedefs
using	Activation = luci_interpreter::FusedActFunc

Functions
void	TransposeRowsColumns (const Tensor tensor_in, Tensor tensor_out)

template<typename T , typename Op , int N = 5>
void	BinaryOpBroadcastSlow (const tflite::RuntimeShape &unextended_input1_shape, const T input1_data, const tflite::RuntimeShape &unextended_input2_shape, const T input2_data, const tflite::RuntimeShape &unextended_output_shape, T *output_data, Op op)

template<typename T >
Shape	calculateOutputShape (const Tensor input, const Tensor begin, const Tensor *size)

template<typename T >
void	getBeginAndSizeVectors (int dimensions, const Tensor begin, const Tensor size, std::vector< int > begins, std::vector< int > sizes)

TfLiteFusedActivation	getTfLiteActivation (Activation activation)

template<typename T >
void	calculateActivationRange (Activation activation, T activation_min, T activation_max)

template void	calculateActivationRange (Activation activation, float activation_min, float activation_max)

template void	calculateActivationRange (Activation activation, int32_t activation_min, int32_t activation_max)

template void	calculateActivationRange (Activation activation, int64_t activation_min, int64_t activation_max)

void	calculateActivationRangeQuantized (Activation activation, const Tensor output, int32_t activation_min, int32_t *activation_max)

void	quantizeMultiplier (double double_multiplier, int32_t quantized_multiplier, int shift)

void	quantizeMultiplierSmallerThanOneExp (double double_multiplier, int32_t quantized_multiplier, int left_shift)

Shape	calculateShapeForBroadcast (const Shape &input1_shape, const Shape &input2_shape)

int32_t	computePadding (int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)

int32_t	computePaddingWithOffset (int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size, int32_t *offset)

int32_t	computeOutputSize (Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)

int32_t	calcOffset (const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)

template<typename T >
constexpr bool	one_of_types ()

template<typename T , typename U , typename... Other>
constexpr bool	one_of_types ()

template<typename T >
void	fillArithmeticActivationRange (tflite::ArithmeticParams &p, Activation act)

double	getQuantizedConvolutionMultipler (float input_scale, float filter_scale, float output_scale)

std::vector< double >	getQuantizedConvolutionMultiplers (float input_scale, const std::vector< float > &filter_scale, float output_scale)

std::vector< ChannelQuantMultipliers >	quantizeMultipliers (const std::vector< double > &effective_scale)

tflite::RuntimeShape	getTensorShape (const Tensor *tensor)

template<typename T >
const T *	getTensorData (const Tensor *tensor)

template<typename T >
T *	getTensorData (Tensor *tensor)

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void	evalTISOKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel kernel, kernels::TISOData kernel_data, const Options *options, RuntimeShape &&input_shape_1, RuntimeShape &&input_shape_2, RuntimeShape &&output_shape)

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void	evalTISOInplaceKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel kernel, const Options options, RuntimeShape &&input_shape_1, RuntimeShape &&input_shape_2, RuntimeShape &&output_shape)

void	CheckBinaryOpDataTypesEqual (const kernels::TISOKernel &kernel)

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void	evalTISOQuantizedKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel kernel, kernels::TISOData kernel_data, const Options *options)

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>
void	evalTISOInplaceQuantizedKernel (TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func, kernels::TISOKernel kernel, const Options options)

template<typename T >
void	evalComparisonGeneric (const circle::Tensor x, const circle::Tensor y, const circle::Tensor output, BaseRuntimeGraph runtime_graph, bool F(T, T))

luci_interpreter::RuntimeShape	getTensorRuntimeShape (const circle::Tensor circle_tensor, BaseRuntimeGraph runtime_graph)

void	matrixScalarMultiplyAccumulate (const int8_t matrix, int32_t scalar, int32_t n_row, int32_t n_col, int32_t output)

bool	areShapesEqual (const luci_interpreter::RuntimeShape &input_shape1, const luci_interpreter::RuntimeShape &input_shape2)

bool	checkedLog2 (const float x, int *log2_result)

int	calculateInputRadius (int input_integer_bits, int input_left_shift, int total_signed_bits)

void	calculateActivationRangeQuantized (Activation activation, int32_t output_zero_point, float output_scale, DataType data_type, int32_t activation_min, int32_t activation_max)

void	calculateActivationRangeQuantized (Activation activation, const circle::Tensor output, int32_t activation_min, int32_t *activation_max)

luci_interpreter::RuntimeShape	calculateShapeForBroadcast (const circle::Tensor input1, const circle::Tensor input2)

int32_t	calcOffset (const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2, int32_t d3)

luci_interpreter::RuntimeShape	getTensorShape (const circle::Tensor *tensor)

void	getTensorDims (const circle::Tensor tensor, BaseRuntimeGraph runtime_graph, int32_t *dims)

template<typename T >
const T *	getTensorData (const uint8_t *tensor_data)

template<typename T >
T *	getTensorData (uint8_t *tensor_data)

Variables
const int	max_dim = 4

Typedef Documentation

◆ Activation

using luci_interpreter::kernels::Activation = typedef luci_interpreter::FusedActFunc

Definition at line 34 of file Utils.h.

Function Documentation

◆ areShapesEqual()

bool luci_interpreter::kernels::areShapesEqual	(	const luci_interpreter::RuntimeShape &	input_shape1,
		const luci_interpreter::RuntimeShape &	input_shape2
	)

Definition at line 89 of file Utils.cpp.

{
  if (input_shape1.dimensionsCount() == input_shape2.dimensionsCount())
  {
    int N = input_shape1.dimensionsCount();
    for (int i = 0; i < N; ++i)
    {
      if (input_shape1.dims(i) != input_shape2.dims(i))
        return false;
    }
    return true;
  }
  return false;
}

References luci_interpreter::RuntimeShape::dimensionsCount(), and luci_interpreter::RuntimeShape::dims().

Referenced by luci_interpreter::execute_kernel_CircleFloorDiv(), luci_interpreter::execute_kernel_CircleFloorMod(), luci_interpreter::execute_kernel_CircleMaximum(), and luci_interpreter::execute_kernel_CircleMinimum().

◆ BinaryOpBroadcastSlow()

template<typename T , typename Op , int N = 5>

void luci_interpreter::kernels::BinaryOpBroadcastSlow	(	const tflite::RuntimeShape &	unextended_input1_shape,
		const T *	input1_data,
		const tflite::RuntimeShape &	unextended_input2_shape,
		const T *	input2_data,
		const tflite::RuntimeShape &	unextended_output_shape,
		T *	output_data,
		Op	op
	)

Definition at line 31 of file BinaryOpCommon.h.

{
  if (unextended_input1_shape == unextended_input2_shape)
  {
    const int flat_size = tflite::MatchingElementsSize(
      unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
    for (int i = 0; i < flat_size; ++i)
    {
      output_data[i] = op(input1_data[i], input2_data[i]);
    }
  }
  else
  {
    assert(unextended_input1_shape.DimensionsCount() <= N);
    assert(unextended_input2_shape.DimensionsCount() <= N);
    assert(unextended_output_shape.DimensionsCount() <= N);
 
    tflite::NdArrayDesc<N> desc1{};
    tflite::NdArrayDesc<N> desc2{};
    tflite::NdArrayDesc<N> output_desc{};
    tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape,
                                                &desc1, &desc2);
    tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape),
                           &output_desc);
 
    auto fn = [&](int indexes[N]) {
      output_data[SubscriptToIndex(output_desc, indexes)] =
        op(input1_data[SubscriptToIndex(desc1, indexes)],
           input2_data[SubscriptToIndex(desc2, indexes)]);
    };
    tflite::NDOpsHelper<N>(output_desc, fn);
  }
}

References desc1, desc2, and SubscriptToIndex().

Referenced by luci_interpreter::kernels::LogicalOr::execute().

◆ calcOffset() [1/2]

int32_t luci_interpreter::kernels::calcOffset	(	const circle::Tensor *	tensor,
		int32_t	d0,
		int32_t	d1,
		int32_t	d2,
		int32_t	d3
	)

inline

Definition at line 78 of file Utils.h.

{
 
  return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
           Tensor::dim(tensor, 3) +
         d3;
}

References circle_eval_diff::TensorShape::dim().

◆ calcOffset() [2/2]

int32_t luci_interpreter::kernels::calcOffset	(	const Shape &	shape,
		int32_t	d0,
		int32_t	d1,
		int32_t	d2,
		int32_t	d3
	)

inline

Definition at line 75 of file Utils.h.

{
  return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
}

References luci_interpreter::Shape::dim().

◆ calculateActivationRange() [1/4]

template void luci_interpreter::kernels::calculateActivationRange	(	Activation	activation,
		float *	activation_min,
		float *	activation_max
	)

◆ calculateActivationRange() [2/4]

template void luci_interpreter::kernels::calculateActivationRange	(	Activation	activation,
		int32_t *	activation_min,
		int32_t *	activation_max
	)

◆ calculateActivationRange() [3/4]

template void luci_interpreter::kernels::calculateActivationRange	(	Activation	activation,
		int64_t *	activation_min,
		int64_t *	activation_max
	)

◆ calculateActivationRange() [4/4]

template<typename T >

void luci_interpreter::kernels::calculateActivationRange	(	Activation	activation,
		T *	activation_min,
		T *	activation_max
	)

Definition at line 52 of file Utils.cpp.

{
  switch (activation)
  {
    case Activation::NONE:
      *activation_min = std::numeric_limits<T>::lowest();
      *activation_max = std::numeric_limits<T>::max();
      break;
    case Activation::RELU:
      *activation_min = 0;
      *activation_max = std::numeric_limits<T>::max();
      break;
    case Activation::RELU_N1_TO_1:
      *activation_min = -1;
      *activation_max = 1;
      break;
    case Activation::RELU6:
      *activation_min = 0;
      *activation_max = 6;
      break;
    default:
      throw std::runtime_error("Unsupported activation.");
  }
}

Referenced by luci_interpreter::createConv2DParams(), luci_interpreter::createPoolParams(), luci_interpreter::kernels::L2Pool2D::execute(), and fillArithmeticActivationRange().

◆ calculateActivationRangeQuantized() [1/3]

void luci_interpreter::kernels::calculateActivationRangeQuantized	(	Activation	activation,
		const circle::Tensor *	output,
		int32_t *	activation_min,
		int32_t *	activation_max
	)

Definition at line 207 of file Utils.cpp.

{
  assert(Tensor::zero_points(output).size() == 1);
  const float scale = Tensor::scale(output);
  const int32_t zero_point = Tensor::zero_point(output);
  calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
                                    activation_min, activation_max);
}

References calculateActivationRangeQuantized(), and size.

◆ calculateActivationRangeQuantized() [2/3]

void luci_interpreter::kernels::calculateActivationRangeQuantized	(	Activation	activation,
		const Tensor *	output,
		int32_t *	activation_min,
		int32_t *	activation_max
	)

Definition at line 119 of file Utils.cpp.

{
  assert(output->zero_points().size() == 1);
  int32_t qmin{};
  int32_t qmax{};
  switch (output->element_type())
  {
    case DataType::U4:
      qmin = 0;
      qmax = 15;
      break;
    case DataType::U8:
      qmin = 0;
      qmax = std::numeric_limits<uint8_t>::max();
      break;
    case DataType::S4:
      qmin = -8;
      qmax = 7;
      break;
    case DataType::S8:
      qmin = -std::numeric_limits<int8_t>::max();
      qmax = std::numeric_limits<int8_t>::max();
      break;
    case DataType::S16:
      // For now, assume that signed int16 type implies signed symmetric quantization.
      assert(output->zero_point() == 0);
      qmin = -std::numeric_limits<int16_t>::max();
      qmax = std::numeric_limits<int16_t>::max();
      break;
    default:
      throw std::runtime_error("luci-intp (calculateActivationRangeQuantized) Unsupported type.");
  }
 
  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
                                        activation_max);
}

Referenced by calculateActivationRangeQuantized(), luci_interpreter::createConv2DParams(), luci_interpreter::createPoolParams(), and evalTISOQuantizedKernel().

◆ calculateActivationRangeQuantized() [3/3]

void luci_interpreter::kernels::calculateActivationRangeQuantized	(	Activation	activation,
		int32_t	output_zero_point,
		float	output_scale,
		DataType	data_type,
		int32_t *	activation_min,
		int32_t *	activation_max
	)

Definition at line 177 of file Utils.cpp.

{
  int32_t qmin{};
  int32_t qmax{};
  switch (data_type)
  {
    case DataType::U8:
      qmin = 0;
      qmax = std::numeric_limits<uint8_t>::max();
      break;
    case DataType::S8:
      qmin = -std::numeric_limits<int8_t>::max();
      qmax = std::numeric_limits<int8_t>::max();
      break;
    case DataType::S16:
      // For now, assume that signed int16 type implies signed symmetric quantization.
      assert(output_zero_point == 0);
      qmin = -std::numeric_limits<int16_t>::max();
      qmax = std::numeric_limits<int16_t>::max();
      break;
    default:
      assert(false && "Unsupported type.");
  }
 
  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
                                        activation_min, activation_max);
}

◆ calculateInputRadius()

int luci_interpreter::kernels::calculateInputRadius	(	int	input_integer_bits,
		int	input_left_shift,
		int	total_signed_bits
	)

Definition at line 123 of file Utils.cpp.

{
  const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
                                    (1LL << (total_signed_bits - input_integer_bits)) /
                                    (1LL << input_left_shift);
  // Tighten bound using floor.  Suppose that we could use the exact value.
  // After scaling the difference, the result would be at the maximum.  Thus we
  // must ensure that our value has lower magnitude.
  return static_cast<int>(std::floor(max_input_rescaled));
}

◆ calculateOutputShape()

template<typename T >

Shape luci_interpreter::kernels::calculateOutputShape	(	const Tensor *	input,
		const Tensor *	begin,
		const Tensor *	size
	)

Definition at line 37 of file Slice.cpp.

{
  Shape output_shape = Shape(input->shape().num_dims());
  for (int idx = 0; idx < input->shape().num_dims(); idx++)
  {
    T size_value = getTensorData<T>(size)[idx];
    if (size_value < 0)
    {
      if (size_value != -1)
      {
        throw std::runtime_error("Invalid size.");
      }
      size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
    }
    else
    {
      if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
      {
        throw std::runtime_error("Invalid begin and size.");
      }
    }
    output_shape.dim(idx) = static_cast<int>(size_value);
  }
  return output_shape;
}

References begin, output_shape, and size.

◆ calculateShapeForBroadcast() [1/2]

luci_interpreter::RuntimeShape luci_interpreter::kernels::calculateShapeForBroadcast	(	const circle::Tensor *	input1,
		const circle::Tensor *	input2
	)

Definition at line 265 of file Utils.cpp.

{
  const int num_input1_dims = Tensor::num_dims(input1);
  const int num_input2_dims = Tensor::num_dims(input2);
  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
  luci_interpreter::RuntimeShape output_shape(num_out_dims);
 
  for (int i = 0; i < num_out_dims; ++i)
  {
    const int32_t input1_dim =
      i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
    const int32_t input2_dim =
      i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
 
    bool need_broadcast = input1_dim != input2_dim;
    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
 
    output_shape.setDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
  }
 
  return output_shape;
}

References circle_eval_diff::TensorShape::dim(), LUCI_INTERPRETER_CHECK, output_shape, and luci_interpreter::RuntimeShape::setDim().

◆ calculateShapeForBroadcast() [2/2]

Shape luci_interpreter::kernels::calculateShapeForBroadcast	(	const Shape &	input1_shape,
		const Shape &	input2_shape
	)

Definition at line 204 of file Utils.cpp.

{
  const int num_input1_dims = input1_shape.num_dims();
  const int num_input2_dims = input2_shape.num_dims();
  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
  Shape output_shape(num_out_dims);
 
  for (int i = 0; i < num_out_dims; ++i)
  {
    const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
    const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
 
    bool need_broadcast = input1_dim != input2_dim;
    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
 
    output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
  }
 
  return output_shape;
}

References luci_interpreter::Shape::dim(), LUCI_INTERPRETER_CHECK, luci_interpreter::Shape::num_dims(), and output_shape.

◆ CheckBinaryOpDataTypesEqual()

void luci_interpreter::kernels::CheckBinaryOpDataTypesEqual ( const kernels::TISOKernel & kernel )

inline

Definition at line 116 of file BinaryOpCommon.h.

{
  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
                         Tensor::element_type(kernel.input2()));
  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
                         Tensor::element_type(kernel.output()));
}

References luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOKernel::input2(), LUCI_INTERPRETER_CHECK, and luci_interpreter::kernels::TISOKernel::output().

Referenced by luci_interpreter::configure_kernel_CircleMaximum(), and luci_interpreter::configure_kernel_CircleMinimum().

◆ checkedLog2()

bool luci_interpreter::kernels::checkedLog2	(	const float	x,
		int *	log2_result
	)

Definition at line 113 of file Utils.cpp.

{
  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
  const float x_log2_rounded = std::round(x_log2);
  const float x_log2_fracpart = x_log2 - x_log2_rounded;
 
  *log2_result = static_cast<int>(x_log2_rounded);
  return std::abs(x_log2_fracpart) < 1e-3f;
}

◆ computeOutputSize()

int32_t luci_interpreter::kernels::computeOutputSize	(	Padding	padding,
		int32_t	image_size,
		int32_t	filter_size,
		int32_t	stride,
		int32_t	dilation_rate = `1`
	)

inline

Definition at line 59 of file Utils.h.

{
  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  switch (padding)
  {
    case Padding::SAME:
      return (image_size + stride - 1) / stride;
    case Padding::VALID:
      return (image_size + stride - effective_filter_size) / stride;
    default:
      assert(false);
      return 0;
  }
}

Referenced by luci_interpreter::computeConvPadding(), luci_interpreter::kernels::AveragePool2D::configure(), luci_interpreter::kernels::Conv2D::configure(), luci_interpreter::kernels::DepthwiseConv2D::configure(), luci_interpreter::kernels::L2Pool2D::configure(), luci_interpreter::kernels::MaxPool2D::configure(), luci_interpreter::kernels::TransposeConv::configure(), and luci_interpreter::createPoolParams().

◆ computePadding()

int32_t luci_interpreter::kernels::computePadding	(	int32_t	stride,
		int32_t	dilation_rate,
		int32_t	in_size,
		int32_t	filter_size,
		int32_t	out_size
	)

inline

Definition at line 41 of file Utils.h.

{
  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
  return padding > 0 ? padding : 0;
}

Referenced by luci_interpreter::computeConvPadding(), luci_interpreter::kernels::AveragePool2D::configure(), luci_interpreter::kernels::Conv2D::configure(), luci_interpreter::kernels::DepthwiseConv2D::configure(), luci_interpreter::kernels::L2Pool2D::configure(), luci_interpreter::kernels::MaxPool2D::configure(), luci_interpreter::kernels::TransposeConv::configure(), and luci_interpreter::createPoolParams().

◆ computePaddingWithOffset()

int32_t luci_interpreter::kernels::computePaddingWithOffset	(	int32_t	stride,
		int32_t	dilation_rate,
		int32_t	in_size,
		int32_t	filter_size,
		int32_t	out_size,
		int32_t *	offset
	)

inline

Definition at line 49 of file Utils.h.

{
  int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
  total_padding = total_padding > 0 ? total_padding : 0;
  *offset = total_padding % 2;
  return total_padding / 2;
}

References offset().

◆ evalComparisonGeneric()

template<typename T >

void luci_interpreter::kernels::evalComparisonGeneric	(	const circle::Tensor *	x,
		const circle::Tensor *	y,
		const circle::Tensor *	output,
		BaseRuntimeGraph *	runtime_graph,
		bool	FT, T
	)

Definition at line 31 of file ComparisonCommon.h.

{
  auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
  if (x_data == nullptr)
    x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
 
  assert(x_data != nullptr);
 
  auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
  if (y_data == nullptr)
    y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
 
  assert(y_data != nullptr);
 
  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
 
  luci_interpreter_pal::ComparisonParams op_params;
  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
 
  if (op_params.is_broadcast)
  {
    luci_interpreter_pal::BroadcastComparison4DSlowNoScaling<T>(
      op_params, kernels::getTensorShape(x), x_data, kernels::getTensorShape(y), y_data,
      kernels::getTensorShape(output), output_data, F);
  }
  else
  {
    const int64_t flat_size = kernels::getTensorShape(x).flatSize();
    luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data, F);
  }
}

References luci_interpreter::RuntimeGraph::getConstDataByTensor(), luci_interpreter::RuntimeGraph::getDataByTensor(), getTensorShape(), and luci_interpreter_pal::ComparisonParams::is_broadcast.

◆ evalTISOInplaceKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>

void luci_interpreter::kernels::evalTISOInplaceKernel	(	TISOFunc	tiso_func,
		TISOBroadcastFunc	tiso_broadcast_func,
		kernels::TISOKernel *	kernel,
		const Options *	options,
		RuntimeShape &&	input_shape_1,
		RuntimeShape &&	input_shape_2,
		RuntimeShape &&	output_shape
	)

Definition at line 89 of file BinaryOpCommon.h.

{
  uint8_t *inplace_data_ptr = nullptr;
  circle::Tensor *input_inplace_tensor = nullptr;
 
  kernels::TISOData kernel_data = kernel->readInplaceData(inplace_data_ptr, input_inplace_tensor);
 
  evalTISOKernel<T, TISOFunc, TISOBroadcastFunc, Options>(
    tiso_func, tiso_broadcast_func, kernel, &kernel_data, options, std::move(input_shape_1),
    std::move(input_shape_2), std::move(output_shape));
 
  BaseRuntimeGraph *runtime_graph = kernel->runtime_graph();
 
  runtime_graph->makeInplaceOperation(input_inplace_tensor, kernel->output());
  if (input_inplace_tensor == kernel->input1())
  {
    runtime_graph->makeInplaceOperation(kernel->input2(), nullptr);
  }
  else
  {
    runtime_graph->makeInplaceOperation(kernel->input1(), nullptr);
  }
}

References luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOKernel::input2(), luci_interpreter::RuntimeGraph::makeInplaceOperation(), luci_interpreter::kernels::TISOKernel::output(), output_shape, luci_interpreter::kernels::TISOKernel::readInplaceData(), and luci_interpreter::kernels::TISOKernel::runtime_graph().

◆ evalTISOInplaceQuantizedKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>

void luci_interpreter::kernels::evalTISOInplaceQuantizedKernel	(	TISOFunc	tiso_func,
		TISOBroadcastFunc	tiso_broadcast_func,
		kernels::TISOKernel *	kernel,
		const Options *	options
	)

Definition at line 195 of file BinaryOpCommon.h.

{
  uint8_t *inplace_data_ptr = nullptr;
  circle::Tensor *input_inplace_tensor = nullptr;
 
  kernels::TISOData kernel_data = kernel->readInplaceData(inplace_data_ptr, input_inplace_tensor);
 
  evalTISOQuantizedKernel<T, TISOFunc, TISOBroadcastFunc, Options>(tiso_func, tiso_broadcast_func,
                                                                   kernel, &kernel_data, options);
 
  kernel->runtime_graph()->makeInplaceOperation(input_inplace_tensor, kernel->output());
  if (input_inplace_tensor == kernel->input1())
  {
    kernel->runtime_graph()->makeInplaceOperation(kernel->input2(), nullptr);
  }
  else
  {
    kernel->runtime_graph()->makeInplaceOperation(kernel->input1(), nullptr);
  }
}

References luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOKernel::input2(), luci_interpreter::RuntimeGraph::makeInplaceOperation(), luci_interpreter::kernels::TISOKernel::output(), luci_interpreter::kernels::TISOKernel::readInplaceData(), and luci_interpreter::kernels::TISOKernel::runtime_graph().

◆ evalTISOKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>

void luci_interpreter::kernels::evalTISOKernel	(	TISOFunc	tiso_func,
		TISOBroadcastFunc	tiso_broadcast_func,
		kernels::TISOKernel *	kernel,
		kernels::TISOData *	kernel_data,
		const Options *	options,
		RuntimeShape &&	input_shape_1,
		RuntimeShape &&	input_shape_2,
		RuntimeShape &&	output_shape
	)

Definition at line 61 of file BinaryOpCommon.h.

{
  luci_interpreter_pal::ArithmeticParams params{};
  fillArithmeticActivationRange<T>(params, luci_actfunc(options->fused_activation_function()));
 
  const bool need_broadcast =
    luci_interpreter_pal::ProcessBroadcastShapes(input_shape_1, input_shape_2, &params);
 
  if (need_broadcast)
  {
    tiso_broadcast_func(params, input_shape_1, kernels::getTensorData<T>(kernel_data->input1_data),
                        input_shape_2, kernels::getTensorData<T>(kernel_data->input2_data),
                        output_shape, kernels::getTensorData<T>(kernel_data->output_data));
  }
  else
  {
    const int flat_size = input_shape_1.flatSize();
    tiso_func(params, flat_size, kernels::getTensorData<T>(kernel_data->input1_data),
              kernels::getTensorData<T>(kernel_data->input2_data),
              kernels::getTensorData<T>(kernel_data->output_data));
  }
}

References luci_interpreter::kernels::TISOData::input1_data, luci_interpreter::kernels::TISOData::input2_data, luci::luci_actfunc(), luci_interpreter::kernels::TISOData::output_data, output_shape, and luci_interpreter_pal::ProcessBroadcastShapes().

◆ evalTISOQuantizedKernel()

template<typename T , typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t, typename Options = nullptr_t>

void luci_interpreter::kernels::evalTISOQuantizedKernel	(	TISOFunc	tiso_func,
		TISOBroadcastFunc	tiso_broadcast_func,
		kernels::TISOKernel *	kernel,
		kernels::TISOData *	kernel_data,
		const Options *	options
	)

Definition at line 127 of file BinaryOpCommon.h.

{
  const auto *input1 = kernel->input1();
  const auto *input2 = kernel->input2();
  const auto *output = kernel->output();
 
  const auto input1_scale = static_cast<double>(Tensor::scale(input1));
  const auto input2_scale = static_cast<double>(Tensor::scale(input2));
  const auto output_scale = static_cast<double>(Tensor::scale(output));
 
  const int left_shift = 20;
  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
 
  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
  int input1_shift{}, input2_shift{}, output_shift{};
  kernels::quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier,
                                               &input1_shift);
  kernels::quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier,
                                               &input2_shift);
  kernels::quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier,
                                               &output_shift);
 
  int32_t activation_min{};
  int32_t activation_max{};
  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
                                             output, &activation_min, &activation_max);
 
  luci_interpreter_pal::ArithmeticParams params{};
  params.left_shift = left_shift;
  // The kernel expects inputs' zero points to be negated.
  params.input1_offset = -Tensor::zero_point(input1); // Note the '-'.
  params.input1_multiplier = input1_multiplier;
  params.input1_shift = input1_shift;
  params.input2_offset = -Tensor::zero_point(input2); // Note the '-'.
  params.input2_multiplier = input2_multiplier;
  params.input2_shift = input2_shift;
  params.output_offset = Tensor::zero_point(output);
  params.output_multiplier = output_multiplier;
  params.output_shift = output_shift;
  params.quantized_activation_min = activation_min;
  params.quantized_activation_max = activation_max;
 
  const bool need_broadcast = luci_interpreter_pal::ProcessBroadcastShapes(
    kernels::getTensorShape(input1), kernels::getTensorShape(input2), &params);
 
  if (need_broadcast)
  {
    tiso_broadcast_func(
      params, kernels::getTensorShape(input1), kernels::getTensorData<T>(kernel_data->input1_data),
      kernels::getTensorShape(input2), kernels::getTensorData<T>(kernel_data->input2_data),
      kernels::getTensorShape(output), kernels::getTensorData<T>(kernel_data->output_data));
  }
  else
  {
    tiso_func(params, kernels::getTensorShape(input1),
              kernels::getTensorData<uint8_t>(kernel_data->input1_data),
              kernels::getTensorShape(input2), kernels::getTensorData<T>(kernel_data->input2_data),
              kernels::getTensorShape(output), kernels::getTensorData<T>(kernel_data->output_data));
  }
}

References calculateActivationRangeQuantized(), getTensorShape(), luci_interpreter::kernels::TISOKernel::input1(), luci_interpreter::kernels::TISOData::input1_data, luci_interpreter::kernels::TISOKernel::input2(), luci_interpreter::kernels::TISOData::input2_data, luci_interpreter_pal::ArithmeticParams::left_shift, luci::luci_actfunc(), luci_interpreter::kernels::TISOKernel::output(), luci_interpreter::kernels::TISOData::output_data, luci_interpreter_pal::ProcessBroadcastShapes(), and quantizeMultiplierSmallerThanOneExp().

◆ fillArithmeticActivationRange()

template<typename T >

void luci_interpreter::kernels::fillArithmeticActivationRange	(	tflite::ArithmeticParams &	p,
		Activation	act
	)

Fills activation min and max parameters depending on given data type and activation

T is a template parameter, so after optimization this code left with only required if case

Template Parameters

T	data type of arithmetic operation output tensor

Parameters

params	tflite params to fill
activation	luci_interpreter::Activation of arithmetic operation

Definition at line 106 of file Utils.h.

{
  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
 
  if (std::is_same<T, float>::value)
    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
  if (std::is_same<T, int32_t>::value)
    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
  else
    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
}

References calculateActivationRange(), and p.

◆ getBeginAndSizeVectors()

template<typename T >

void luci_interpreter::kernels::getBeginAndSizeVectors	(	int	dimensions,
		const Tensor *	begin,
		const Tensor *	size,
		std::vector< int > *	begins,
		std::vector< int > *	sizes
	)

Definition at line 64 of file Slice.cpp.

{
  for (int idx = dimensions - 1; idx >= 0; --idx)
  {
    begins->push_back(getTensorData<T>(begin)[idx]);
    sizes->push_back(getTensorData<T>(size)[idx]);
  }
}

References begin, and size.

◆ getQuantizedConvolutionMultipler()

double luci_interpreter::kernels::getQuantizedConvolutionMultipler	(	float	input_scale,
		float	filter_scale,
		float	output_scale
	)

inline

Definition at line 137 of file Utils.h.

{
  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
  return input_product_scale / static_cast<double>(output_scale);
}

References LUCI_INTERPRETER_CHECK.

Referenced by getQuantizedConvolutionMultiplers().

◆ getQuantizedConvolutionMultiplers()

std::vector< double > luci_interpreter::kernels::getQuantizedConvolutionMultiplers	(	float	input_scale,
		const std::vector< float > &	filter_scale,
		float	output_scale
	)

inline

Definition at line 147 of file Utils.h.

{
  std::vector<double> effective_output_scales;
  size_t n = filter_scale.size();
  effective_output_scales.reserve(n);
  for (size_t i = 0; i < n; ++i)
  {
    effective_output_scales.push_back(
      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
  }
  return effective_output_scales;
}

References getQuantizedConvolutionMultipler().

Referenced by luci_interpreter::kernels::PRelu::configure(), luci_interpreter::kernels::TransposeConv::configure(), and luci_interpreter::createConv2DParams().

◆ getTensorData() [1/4]

template<typename T >

const T * luci_interpreter::kernels::getTensorData ( const Tensor * tensor )

Definition at line 208 of file Utils.h.

{
  return tensor != nullptr ? tensor->data<T>() : nullptr;
}

◆ getTensorData() [2/4]

template<typename T >

const T * luci_interpreter::kernels::getTensorData ( const uint8_t * tensor_data )

Definition at line 158 of file Utils.h.

{
  return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
}

◆ getTensorData() [3/4]

template<typename T >

T * luci_interpreter::kernels::getTensorData ( Tensor * tensor )

Definition at line 213 of file Utils.h.

{
  return tensor != nullptr ? tensor->data<T>() : nullptr;
}

◆ getTensorData() [4/4]

template<typename T >

T * luci_interpreter::kernels::getTensorData ( uint8_t * tensor_data )

inline

Definition at line 163 of file Utils.h.

{
  return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
}

◆ getTensorDims()

void luci_interpreter::kernels::getTensorDims	(	const circle::Tensor *	tensor,
		BaseRuntimeGraph *	runtime_graph,
		int32_t *	dims
	)

inline

Definition at line 121 of file Utils.h.

{
  if (tensor == nullptr)
  {
    dims = nullptr;
    return;
  }
 
#ifndef DIS_DYN_SHAPES
  auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(tensor);
  if (dynamic_shape_vector != nullptr)
  {
    for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
    {
      dims[n] = dynamic_shape_vector->dims(n);
    }
  }
  else
  {
    auto const tensor_shape = Tensor::tensor_shape(tensor);
    assert(tensor_shape.size() <= kMaxSmallSize);
    for (int i = 0; i < tensor_shape.size(); ++i)
    {
      dims[i] = tensor_shape[i];
    }
  }
#else
  auto const tensor_shape = Tensor::tensor_shape(tensor);
  assert(tensor_shape.size() <= kMaxSmallSize);
  for (int i = 0; i < tensor_shape.size(); ++i)
  {
    dims[i] = tensor_shape[i];
  }
#endif // DIS_DYN_SHAPES
}

References luci_interpreter::RuntimeGraph::getDynamicShapeTensor().

Referenced by luci_interpreter_pal::lstm_internal::calculateLstmGate().

◆ getTensorRuntimeShape()

luci_interpreter::RuntimeShape luci_interpreter::kernels::getTensorRuntimeShape	(	const circle::Tensor *	circle_tensor,
		BaseRuntimeGraph *	runtime_graph
	)

Definition at line 29 of file Utils.cpp.

{
  luci_interpreter::RuntimeShape input_shape = getTensorShape(circle_tensor);
 
#ifndef DIS_DYN_SHAPES
  auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(circle_tensor);
  if (dynamic_shape_vector != nullptr)
  {
    input_shape.resize(dynamic_shape_vector->dimensionsCount());
 
    for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
    {
      input_shape.setDim(n, dynamic_shape_vector->dims(n));
    }
  }
#endif // DIS_DYN_SHAPES
  return input_shape;
}

References luci_interpreter::RuntimeGraph::getDynamicShapeTensor(), getTensorShape(), luci_interpreter::RuntimeShape::resize(), and luci_interpreter::RuntimeShape::setDim().

◆ getTensorShape() [1/2]

luci_interpreter::RuntimeShape luci_interpreter::kernels::getTensorShape ( const circle::Tensor * tensor )

inline

Definition at line 106 of file Utils.h.

{
  if (tensor == nullptr)
    return luci_interpreter::RuntimeShape();
 
  auto const tensor_shape = Tensor::tensor_shape(tensor);
 
  luci_interpreter::RuntimeShape runtime_shape(tensor_shape.size());
  for (int i = 0; i < tensor_shape.size(); ++i)
  {
    runtime_shape.setDim(i, tensor_shape[i]);
  }
  return runtime_shape;
}

References luci_interpreter::RuntimeShape::setDim().

◆ getTensorShape() [2/2]

tflite::RuntimeShape luci_interpreter::kernels::getTensorShape ( const Tensor * tensor )

inline

Definition at line 194 of file Utils.h.

{
  if (tensor == nullptr)
    return tflite::RuntimeShape();
 
  const Shape &shape = tensor->shape();
  tflite::RuntimeShape runtime_shape(shape.num_dims());
  for (int i = 0; i < shape.num_dims(); ++i)
  {
    runtime_shape.SetDim(i, shape.dim(i));
  }
  return runtime_shape;
}

References luci_interpreter::Shape::dim(), and luci_interpreter::Shape::num_dims().

◆ getTfLiteActivation()

TfLiteFusedActivation luci_interpreter::kernels::getTfLiteActivation ( Activation activation )

Definition at line 30 of file Utils.cpp.

{
  switch (activation)
  {
    case luci::FusedActFunc::RELU:
      return kTfLiteActRelu;
    case luci::FusedActFunc::RELU6:
      return kTfLiteActRelu6;
    case luci::FusedActFunc::RELU_N1_TO_1:
      return kTfLiteActReluN1To1;
    case luci::FusedActFunc::TANH:
      return kTfLiteActTanh;
    case luci::FusedActFunc::SIGN_BIT:
      return kTfLiteActSignBit;
    case luci::FusedActFunc::NONE:
      return kTfLiteActNone;
    default:
      throw std::runtime_error("Unsupported activation type");
  }
}

References luci::NONE, luci::RELU, luci::RELU6, luci::RELU_N1_TO_1, luci::SIGN_BIT, and luci::TANH.

◆ matrixScalarMultiplyAccumulate()

void luci_interpreter::kernels::matrixScalarMultiplyAccumulate	(	const int8_t *	matrix,
		int32_t	scalar,
		int32_t	n_row,
		int32_t	n_col,
		int32_t *	output
	)

Definition at line 75 of file Utils.cpp.

{
  for (int i = 0; i < n_row; ++i)
  {
    int32_t row_sum = 0;
    for (int j = 0; j < n_col; ++j)
    {
      row_sum += *matrix++;
    }
    output[i] += row_sum * scalar;
  }
}

◆ one_of_types() [1/2]

template<typename T >

constexpr bool luci_interpreter::kernels::one_of_types ( )

constexpr

Definition at line 88 of file Utils.h.

88{ return false; }

Referenced by one_of_types().

◆ one_of_types() [2/2]

template<typename T , typename U , typename... Other>

constexpr bool luci_interpreter::kernels::one_of_types ( )

constexpr

Definition at line 91 of file Utils.h.

{
  return std::is_same<T, U>::value || one_of_types<T, Other...>();
}

References one_of_types().

◆ quantizeMultiplier()

void luci_interpreter::kernels::quantizeMultiplier	(	double	double_multiplier,
		int32_t *	quantized_multiplier,
		int *	shift
	)

Definition at line 157 of file Utils.cpp.

{
  if (double_multiplier == 0.0)
  {
    *quantized_multiplier = 0;
    *shift = 0;
    return;
  }
 
  const double q = std::frexp(double_multiplier, shift);
  auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
 
  if (q_fixed == (INT64_C(1) << 31))
  {
    q_fixed /= 2;
    ++*shift;
  }
  assert(q_fixed <= std::numeric_limits<int32_t>::max());
  // A shift amount smaller than -31 would cause all bits to be shifted out
  // and thus all results would be zero. We implement that instead with
  // q_fixed==0, so as to avoid hitting issues with right-shift
  // operations with shift amounts greater than 31. Note that this happens
  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
  // that we're effectively flushing tiny double_multiplier's to zero.
  // We could conceivably handle values in the range (roughly) [32, 63]
  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
  // the present handling is just doing 'flush denormals to zero'. We could
  // reconsider and actually generate nonzero denormals if a need arises.
  if (*shift < -31)
  {
    *shift = 0;
    q_fixed = 0;
  }
  *quantized_multiplier = static_cast<int32_t>(q_fixed);
}

Referenced by luci_interpreter::kernels::LeakyRelu::configure(), luci_interpreter::kernels::PRelu::configure(), luci_interpreter::kernels::Relu::configure(), luci_interpreter::kernels::Relu0To1::configure(), luci_interpreter::kernels::Relu6::configure(), luci_interpreter::createConv2DParams(), quantizeMultipliers(), and quantizeMultiplierSmallerThanOneExp().

◆ quantizeMultipliers()

std::vector< ChannelQuantMultipliers > luci_interpreter::kernels::quantizeMultipliers ( const std::vector< double > & effective_scale )

inline

Definition at line 170 of file Utils.h.

{
  size_t n = effective_scale.size();
  std::vector<ChannelQuantMultipliers> params(n);
  for (size_t i = 0; i < n; ++i)
  {
    quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
  }
  return params;
}

References quantizeMultiplier().

Referenced by luci_interpreter::kernels::PRelu::configure(), and luci_interpreter::kernels::TransposeConv::configure().

◆ quantizeMultiplierSmallerThanOneExp()

void luci_interpreter::kernels::quantizeMultiplierSmallerThanOneExp	(	double	double_multiplier,
		int32_t *	quantized_multiplier,
		int *	left_shift
	)

Definition at line 193 of file Utils.cpp.

{
  assert(double_multiplier < 1.0);
  assert(double_multiplier > 0.0);
  int shift;
  quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
  assert(shift <= 0);
  *left_shift = shift;
}

References quantizeMultiplier().

Referenced by luci_interpreter::kernels::Equal::configure(), luci_interpreter::kernels::Greater::configure(), luci_interpreter::kernels::GreaterEqual::configure(), luci_interpreter::kernels::Less::configure(), luci_interpreter::kernels::LessEqual::configure(), luci_interpreter::kernels::NotEqual::configure(), and evalTISOQuantizedKernel().

◆ TransposeRowsColumns()

void luci_interpreter::kernels::TransposeRowsColumns	(	const Tensor *	tensor_in,
		Tensor *	tensor_out
	)

Definition at line 124 of file BatchMatMul.cpp.

{
  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
  tflite::RuntimeShape shape(getTensorShape(tensor_in));
  tflite::TransposeParams params;
  int rank = shape.DimensionsCount();
  params.perm_count = rank;
  for (int i = 0; i < rank - 2; ++i)
  {
    params.perm[i] = i;
  }
  // Transpose the last two dimensions.
  params.perm[rank - 2] = rank - 1;
  params.perm[rank - 1] = rank - 2;
  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
  switch (tensor_in->element_type())
  {
    case DataType::FLOAT32:
      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
                                       transposed_shape, getTensorData<float>(tensor_out));
      break;
    default:
      throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
  }
}

References luci_interpreter::Tensor::element_type(), and getTensorShape().

Referenced by luci_interpreter::kernels::BatchMatMul::execute().

Variable Documentation

◆ max_dim

const int luci_interpreter::kernels::max_dim = 4

Definition at line 29 of file Slice.cpp.

Referenced by luci_interpreter::kernels::Slice::configure(), and luci_interpreter::kernels::Slice::execute().

Namespaces

Data Structures

Typedefs

Functions

Variables

Typedef Documentation

◆ Activation

Function Documentation

◆ areShapesEqual()

◆ BinaryOpBroadcastSlow()

◆ calcOffset() [1/2]

◆ calcOffset() [2/2]

◆ calculateActivationRange() [1/4]

◆ calculateActivationRange() [2/4]

◆ calculateActivationRange() [3/4]

◆ calculateActivationRange() [4/4]

◆ calculateActivationRangeQuantized() [1/3]

◆ calculateActivationRangeQuantized() [2/3]

◆ calculateActivationRangeQuantized() [3/3]

◆ calculateInputRadius()

◆ calculateOutputShape()

◆ calculateShapeForBroadcast() [1/2]

◆ calculateShapeForBroadcast() [2/2]

◆ CheckBinaryOpDataTypesEqual()

◆ checkedLog2()

◆ computeOutputSize()

◆ computePadding()

◆ computePaddingWithOffset()

◆ evalComparisonGeneric()

◆ evalTISOInplaceKernel()

◆ evalTISOInplaceQuantizedKernel()

◆ evalTISOKernel()

◆ evalTISOQuantizedKernel()

◆ fillArithmeticActivationRange()

◆ getBeginAndSizeVectors()

◆ getQuantizedConvolutionMultipler()

◆ getQuantizedConvolutionMultiplers()

◆ getTensorData() [1/4]

◆ getTensorData() [2/4]

◆ getTensorData() [3/4]

◆ getTensorData() [4/4]

◆ getTensorDims()

◆ getTensorRuntimeShape()

◆ getTensorShape() [1/2]

◆ getTensorShape() [2/2]

◆ getTfLiteActivation()

◆ matrixScalarMultiplyAccumulate()

◆ one_of_types() [1/2]

◆ one_of_types() [2/2]

◆ quantizeMultiplier()

◆ quantizeMultipliers()

◆ quantizeMultiplierSmallerThanOneExp()

◆ TransposeRowsColumns()

Variable Documentation

◆ max_dim