19#include "kernels/Utils.h"
21#include "PALUnidirectionalSequenceLSTM.h"
35 const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
51 const DataType output_type,
const int output_zp)
54 if (output_type == DataType::S16)
57 op_params.quantized_activation_max = std::numeric_limits<int16_t>::max();
59 else if (output_type == DataType::S8)
61 op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
62 op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
72 auto output_shift =
static_cast<int>(
op_params.output_shift);
91 int32_t output_multiplier;
117 int32_t output_multiplier;
197 std::max(
static_cast<double>(cell_clip) /
static_cast<double>(
cell_state_scale), -32768.0),
212 const bool time_major =
lstm_struct.options->time_major();
213 const auto batch_size =
229 std::make_unique<int8_t[]>(Tensor::num_elements(
lstm_struct.output_state()));
234 std::make_unique<int16_t[]>(Tensor::num_elements(
lstm_struct.cell_state()));
242 kernels::getTensorData<int16_t>(
scratch_3_data.get()), runtime_graph);
306 const bool time_major =
lstm_struct.options->time_major();
307 const auto batch_size =
323 std::make_unique<float[]>(Tensor::num_elements(
lstm_struct.output_state()));
330 luci_interpreter_pal::evalLSTM<float, float, float, float>(
335 kernels::getTensorData<float>(
scratch_3_data.get()), runtime_graph);
348 const auto batch_size =
355 for (int32_t
i = 1;
i < 5;
i++)
361 for (int32_t
i = 5;
i < 9;
i++)
367 for (int32_t
i = 12;
i < 16;
i++)
377 batch_size * state_dimension);
379 batch_size * state_dimension);
398 Tensor::element_type(
lstm_struct.input()) == DataType::S8);
402 const bool time_major =
lstm_struct.options->time_major();
407 for (int32_t
i = 9;
i < 12; ++
i)
411 for (int32_t
i = 16;
i < 18; ++
i)
415 for (int32_t
i = 20;
i < 24; ++
i)
422 const auto input_index =
cur_op->inputs()->operator[](0);
423 assert(input_index != -1);
429 switch (Tensor::element_type(input))
432 case DataType::FLOAT32:
442 assert(
false &&
"Unsupported type.");
const circle::Tensor * getCircleTensorByIndex(int32_t index)
bool is_inplace_op(const circle::Operator *op)
#define LUCI_INTERPRETER_CHECK(cond)
DataType
"scalar" value type
bool checkedLog2(const float x, int *log2_result)
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
void evalLSTM< int8_t, int8_t, int16_t, int32_t >(luci_interpreter::lstm::LSTMStruct *lstm_struct, luci_interpreter::lstm::LSTMParameters *lstm_params, luci_interpreter::lstm::CellStateInfo *cell_state_info, int8_t *output_state_data, int16_t *cell_state_data, int16_t *scratch0, int16_t *scratch1, int16_t *scratch2, int16_t *scratch3, luci_interpreter::BaseRuntimeGraph *runtime_graph)
RuntimeGraph BaseRuntimeGraph
void configure_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
size_t getDataTypeSize(DataType data_type)
void execute_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
T must_cast(loco::Node *node)
const loco::Dimension & dim(uint32_t axis) const
int32_t quantized_activation_min