Functions
void	EvalFloat (const Tensor input, const Tensor input_to_input_weights, const Tensor input_to_forget_weights, const Tensor input_to_cell_weights, const Tensor input_to_output_weights, const Tensor recurrent_to_input_weights, const Tensor recurrent_to_forget_weights, const Tensor recurrent_to_cell_weights, const Tensor recurrent_to_output_weights, const Tensor cell_to_input_weights, const Tensor cell_to_forget_weights, const Tensor cell_to_output_weights, const Tensor input_layer_norm_coefficients, const Tensor forget_layer_norm_coefficients, const Tensor cell_layer_norm_coefficients, const Tensor output_layer_norm_coefficients, const Tensor aux_input, const Tensor aux_input_to_input_weights, const Tensor aux_input_to_forget_weights, const Tensor aux_input_to_cell_weights, const Tensor aux_input_to_output_weights, const Tensor input_gate_bias, const Tensor forget_gate_bias, const Tensor cell_gate_bias, const Tensor output_gate_bias, const Tensor projection_weights, const Tensor projection_bias, const TfLiteLSTMParams params, bool forward_sequence, bool time_major, int output_offset, Tensor scratch_buffer, Tensor output_state, Tensor cell_state, Tensor output)

Function Documentation

◆ EvalFloat()

void luci_interpreter::kernels::lstm::EvalFloat	(	const Tensor *	input,
		const Tensor *	input_to_input_weights,
		const Tensor *	input_to_forget_weights,
		const Tensor *	input_to_cell_weights,
		const Tensor *	input_to_output_weights,
		const Tensor *	recurrent_to_input_weights,
		const Tensor *	recurrent_to_forget_weights,
		const Tensor *	recurrent_to_cell_weights,
		const Tensor *	recurrent_to_output_weights,
		const Tensor *	cell_to_input_weights,
		const Tensor *	cell_to_forget_weights,
		const Tensor *	cell_to_output_weights,
		const Tensor *	input_layer_norm_coefficients,
		const Tensor *	forget_layer_norm_coefficients,
		const Tensor *	cell_layer_norm_coefficients,
		const Tensor *	output_layer_norm_coefficients,
		const Tensor *	aux_input,
		const Tensor *	aux_input_to_input_weights,
		const Tensor *	aux_input_to_forget_weights,
		const Tensor *	aux_input_to_cell_weights,
		const Tensor *	aux_input_to_output_weights,
		const Tensor *	input_gate_bias,
		const Tensor *	forget_gate_bias,
		const Tensor *	cell_gate_bias,
		const Tensor *	output_gate_bias,
		const Tensor *	projection_weights,
		const Tensor *	projection_bias,
		const TfLiteLSTMParams *	params,
		bool	forward_sequence,
		bool	time_major,
		int	output_offset,
		Tensor *	scratch_buffer,
		Tensor *	output_state,
		Tensor *	cell_state,
		Tensor *	output
	)

Definition at line 241 of file UnidirectionalSequenceLSTM.cpp.

{
  const Shape &input_shape = input->shape();
  assert(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3);
  int max_time, n_batch;
  if (input_shape.num_dims() == 3)
  {
    max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1);
    n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0);
  }
  else
  {
    max_time = 1;
    n_batch = input_shape.dim(0);
  }
  const int n_input = input_shape.dim(input_shape.num_dims() - 1);
 
  int aux_input_temp = 0;
  if (aux_input)
  {
    const Shape &aux_input_shape = aux_input->shape();
    aux_input_temp = aux_input_shape.dim(aux_input_shape.num_dims() - 1);
  }
  const int aux_input_size = aux_input_temp;
 
  // n_cell and n_output will be the same size when there is no projection.
  const Shape &input_to_output_weights_shape = input_to_output_weights->shape();
  const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights->shape();
  const int n_cell = input_to_output_weights_shape.dim(0);
  const int n_output = recurrent_to_output_weights_shape.dim(1);
 
  // Since we have already checked that weights are all there or none, we can
  // check the existence of only one to the get the condition.
  const bool use_cifg = (input_to_input_weights == nullptr);
 
  // Index the scratch buffers pointers to the global scratch buffer.
  float *scratch_buffer_ptr = getTensorData<float>(scratch_buffer);
  float *input_gate_scratch = nullptr;
  float *cell_gate_scratch = nullptr;
  float *forget_gate_scratch = nullptr;
  float *output_gate_scratch = nullptr;
  if (use_cifg)
  {
    cell_gate_scratch = scratch_buffer_ptr;
    forget_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
    output_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
  }
  else
  {
    input_gate_scratch = scratch_buffer_ptr;
    cell_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
    forget_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
    output_gate_scratch = scratch_buffer_ptr + 3 * n_cell * n_batch;
  }
 
  const Shape &output_shape = output->shape();
  const int output_batch_leading_dim = output_shape.dim(output_shape.num_dims() - 1);
  if (time_major)
  {
    // Loop through the sequence.
    const int input_step = n_batch * n_input;
    const int output_step = n_batch * output_batch_leading_dim;
    for (int t = 0; t < max_time; t++)
    {
      // If this is the forward_sequence, step forward, otherwise step
      // backwards.
      const int t_rel = forward_sequence ? t : max_time - t - 1;
      const float *input_ptr = getTensorData<float>(input) + t_rel * input_step;
      const float *aux_input_ptr = nullptr;
      if (aux_input)
      {
        aux_input_ptr = getTensorData<float>(aux_input) + t_rel * input_step;
      }
      float *output_ptr = getTensorData<float>(output) + t_rel * output_step + output_offset;
 
      LstmStepFloat(
        input_ptr, getTensorData<float>(input_to_input_weights),
        getTensorData<float>(input_to_forget_weights), getTensorData<float>(input_to_cell_weights),
        getTensorData<float>(input_to_output_weights), aux_input_ptr,
        getTensorData<float>(aux_input_to_input_weights),
        getTensorData<float>(aux_input_to_forget_weights),
        getTensorData<float>(aux_input_to_cell_weights),
        getTensorData<float>(aux_input_to_output_weights),
        getTensorData<float>(recurrent_to_input_weights),
        getTensorData<float>(recurrent_to_forget_weights),
        getTensorData<float>(recurrent_to_cell_weights),
        getTensorData<float>(recurrent_to_output_weights),
        getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
        getTensorData<float>(cell_to_output_weights),
        getTensorData<float>(input_layer_norm_coefficients),
        getTensorData<float>(forget_layer_norm_coefficients),
        getTensorData<float>(cell_layer_norm_coefficients),
        getTensorData<float>(output_layer_norm_coefficients), getTensorData<float>(input_gate_bias),
        getTensorData<float>(forget_gate_bias), getTensorData<float>(cell_gate_bias),
        getTensorData<float>(output_gate_bias), getTensorData<float>(projection_weights),
        getTensorData<float>(projection_bias), params, n_batch, n_cell, n_input, aux_input_size,
        n_output, output_batch_leading_dim, getTensorData<float>(output_state),
        getTensorData<float>(cell_state), input_gate_scratch, forget_gate_scratch,
        cell_gate_scratch, output_gate_scratch, output_ptr);
    }
  }
  else
  {
    for (int b = 0; b < n_batch; b++)
    {
      const int input_step = n_input;
      const int output_step = output_batch_leading_dim;
      for (int t = 0; t < max_time; t++)
      {
        // If this is the forward_sequence, step forward, otherwise step
        // backwards.
        const int t_rel = forward_sequence ? t : max_time - t - 1;
        const int time_offset = b * max_time + t_rel;
        const float *input_ptr = getTensorData<float>(input) + time_offset * input_step;
        const float *aux_input_ptr = nullptr;
        if (aux_input)
        {
          aux_input_ptr = getTensorData<float>(aux_input) + time_offset * input_step;
        }
        float *output_ptr =
          getTensorData<float>(output) + time_offset * output_step + output_offset;
 
        // Offset the {output,cell}_state pointers to the right batch.
        float *output_state_ptr = getTensorData<float>(output_state) + b * output_batch_leading_dim;
        float *cell_state_ptr = getTensorData<float>(cell_state) + b * n_cell;
        // Offset the scratch pointers to the right batch.
        float *input_gate_scratch_ptr =
          input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
        float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
        float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
        float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
 
        LstmStepFloat(
          input_ptr, getTensorData<float>(input_to_input_weights),
          getTensorData<float>(input_to_forget_weights),
          getTensorData<float>(input_to_cell_weights),
          getTensorData<float>(input_to_output_weights), aux_input_ptr,
          getTensorData<float>(aux_input_to_input_weights),
          getTensorData<float>(aux_input_to_forget_weights),
          getTensorData<float>(aux_input_to_cell_weights),
          getTensorData<float>(aux_input_to_output_weights),
          getTensorData<float>(recurrent_to_input_weights),
          getTensorData<float>(recurrent_to_forget_weights),
          getTensorData<float>(recurrent_to_cell_weights),
          getTensorData<float>(recurrent_to_output_weights),
          getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
          getTensorData<float>(cell_to_output_weights),
          getTensorData<float>(input_layer_norm_coefficients),
          getTensorData<float>(forget_layer_norm_coefficients),
          getTensorData<float>(cell_layer_norm_coefficients),
          getTensorData<float>(output_layer_norm_coefficients),
          getTensorData<float>(input_gate_bias), getTensorData<float>(forget_gate_bias),
          getTensorData<float>(cell_gate_bias), getTensorData<float>(output_gate_bias),
          getTensorData<float>(projection_weights), getTensorData<float>(projection_bias), params,
          /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
          output_state_ptr, cell_state_ptr, input_gate_scratch_ptr, forget_gate_scratch_ptr,
          cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
      }
    }
  }
}

References luci_interpreter::Shape::dim(), luci_interpreter::Shape::num_dims(), output_shape, and luci_interpreter::Tensor::shape().

Functions

Function Documentation

◆ EvalFloat()