18#include "kernels/UnidirectionalSequenceLSTM.h"
19#include "kernels/Utils.h"
21#include <tensorflow/lite/kernels/internal/tensor_utils.h>
32using namespace tflite;
56 tensor_utils::CwiseClipping(cell_state,
n_batch *
n_cell, clip);
62 const float *projection_weights,
const float *projection_bias,
63 const float proj_clip,
float *output_state,
float *scratch)
65 tensor_utils::ApplyActivationToVector(cell_state,
n_batch *
n_cell, activation, scratch);
75 tensor_utils::VectorBatchVectorAssign(projection_bias,
n_output,
n_batch, output_state);
81 tensor_utils::MatrixBatchVectorMultiplyAccumulate(projection_weights,
n_output,
n_cell, scratch,
85 tensor_utils::CwiseClipping(output_state,
n_batch *
n_output, proj_clip);
96 const float *output_state,
153inline void LstmStepFloat(
206 CalculateLstmGateFloat(
229 for (
int b = 0; b <
n_batch; b++)
240 const Tensor *input_to_input_weights,
const Tensor *input_to_forget_weights,
241 const Tensor *input_to_cell_weights,
const Tensor *input_to_output_weights,
243 const Tensor *recurrent_to_input_weights,
const Tensor *recurrent_to_forget_weights,
244 const Tensor *recurrent_to_cell_weights,
const Tensor *recurrent_to_output_weights,
246 const Tensor *cell_to_input_weights,
const Tensor *cell_to_forget_weights,
247 const Tensor *cell_to_output_weights,
249 const Tensor *input_layer_norm_coefficients,
250 const Tensor *forget_layer_norm_coefficients,
251 const Tensor *cell_layer_norm_coefficients,
252 const Tensor *output_layer_norm_coefficients,
258 const Tensor *input_gate_bias,
const Tensor *forget_gate_bias,
259 const Tensor *cell_gate_bias,
const Tensor *output_gate_bias,
261 const Tensor *projection_weights,
const Tensor *projection_bias,
299 const bool use_cifg = (input_to_input_weights ==
nullptr);
369 for (
int b = 0; b <
n_batch; b++)
440 const Tensor *input_to_input_weights,
const Tensor *input_to_forget_weights,
441 const Tensor *input_to_cell_weights,
const Tensor *input_to_output_weights,
443 const Tensor *recurrent_to_input_weights,
const Tensor *recurrent_to_forget_weights,
444 const Tensor *recurrent_to_cell_weights,
const Tensor *recurrent_to_output_weights,
446 const Tensor *cell_to_input_weights,
const Tensor *cell_to_forget_weights,
447 const Tensor *cell_to_output_weights,
449 const Tensor *input_gate_bias,
const Tensor *forget_gate_bias,
const Tensor *cell_gate_bias,
450 const Tensor *output_gate_bias,
452 const Tensor *projection_weights,
const Tensor *projection_bias,
454 const Tensor *output_state,
const Tensor *cell_state,
const Tensor *input_layer_norm_coefficients,
455 const Tensor *forget_layer_norm_coefficients,
const Tensor *cell_layer_norm_coefficients,
456 const Tensor *output_layer_norm_coefficients,
497void UnidirectionalSequenceLSTM::check_input_tensor_dimensions(
int n_input,
int n_output,
696 loco::DataType::S16);
701 loco::DataType::FLOAT32);
711 loco::DataType::S16);
716 loco::DataType::FLOAT32);
729 loco::DataType::FLOAT32);
738 loco::DataType::S16);
743 loco::DataType::FLOAT32);
813 input()->element_type() == loco::DataType::FLOAT32)
815 throw std::runtime_error(
"Hybrid type is not currently supported");
823 switch (
input()->element_type())
825 case loco::DataType::FLOAT32:
829 throw std::runtime_error(
"Unsupported type");
833void UnidirectionalSequenceLSTM::evalFloat()
const
const std::vector< Tensor * > & getOutputTensors() const
const std::vector< const Tensor * > & getInputTensors() const
const UnidirectionalSequenceLSTMParams & params() const
void resize(const Shape &new_shape)
const Shape & shape() const
const Tensor * recurrent_to_input_weights() const
const Tensor * input() const
const Tensor * cell_to_forget_weights() const
const Tensor * forget_gate_bias() const
void execute() const override
const Tensor * cell_to_output_weights() const
const Tensor * recurrent_to_forget_weights() const
const Tensor * input_to_output_weights() const
const Tensor * cell_state() const
const Tensor * output_gate_bias() const
void configure() override
UnidirectionalSequenceLSTM(const Tensor *input, const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights, const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights, const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights, const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights, const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights, const Tensor *cell_to_output_weights, const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias, const Tensor *output_gate_bias, const Tensor *projection_weights, const Tensor *projection_bias, const Tensor *output_state, const Tensor *cell_state, const Tensor *input_layer_norm_coefficients, const Tensor *forget_layer_norm_coefficients, const Tensor *cell_layer_norm_coefficients, const Tensor *output_layer_norm_coefficients, Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3, const UnidirectionalSequenceLSTMParams ¶ms)
const Tensor * cell_gate_bias() const
const Tensor * projection_weights() const
const Tensor * input_gate_bias() const
const Tensor * recurrent_to_output_weights() const
const Tensor * input_layer_norm_coefficients() const
const Tensor * output_state() const
const Tensor * input_to_cell_weights() const
const Tensor * input_to_forget_weights() const
const Tensor * projection_bias() const
const Tensor * forget_layer_norm_coefficients() const
const Tensor * input_to_input_weights() const
const Tensor * output_layer_norm_coefficients() const
const Tensor * cell_to_input_weights() const
const Tensor * cell_layer_norm_coefficients() const
const Tensor * recurrent_to_cell_weights() const
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
void EvalFloat(const Tensor *input, const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights, const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights, const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights, const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights, const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights, const Tensor *cell_to_output_weights, const Tensor *input_layer_norm_coefficients, const Tensor *forget_layer_norm_coefficients, const Tensor *cell_layer_norm_coefficients, const Tensor *output_layer_norm_coefficients, const Tensor *aux_input, const Tensor *aux_input_to_input_weights, const Tensor *aux_input_to_forget_weights, const Tensor *aux_input_to_cell_weights, const Tensor *aux_input_to_output_weights, const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias, const Tensor *output_gate_bias, const Tensor *projection_weights, const Tensor *projection_bias, const TfLiteLSTMParams *params, bool forward_sequence, bool time_major, int output_offset, Tensor *scratch_buffer, Tensor *output_state, Tensor *cell_state, Tensor *output)
TfLiteFusedActivation getTfLiteActivation(Activation activation)
T must_cast(loco::Node *node)
bool asymmetric_quantize_inputs