18#include "kernels/Utils.h"
21#include "PALSoftmax.h"
30void evalFloat(
const circle::Tensor *input,
const circle::Tensor *output,
33 const auto *
input_data = runtime_graph->getDataByTensor(input);
34 auto *
output_data = runtime_graph->getDataByTensor(output);
36 const float beta =
options->beta();
38 const auto trailing_dim = Tensor::num_dims(input) - 1;
41 for (
int i = 0; i < Tensor::num_dims(input); ++i)
43 flat_size *= (i == trailing_dim) ? 1 :
Tensor::dim(
input, i);
51 luci_interpreter_pal::Softmax(params, kernels::getTensorData<float>(input_data),
52 kernels::getTensorData<float>(output_data));
57void preprocessSoftmaxScaling(
double beta,
double input_scale,
int input_integer_bits,
58 int32_t *quantized_multiplier,
int *left_shift)
60 const double max_real_multiplier = (1LL << 31) - 1.0;
61 const double input_beta_real_multiplier =
62 std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)), max_real_multiplier);
67void evalQuantize(
const circle::Tensor *input,
const circle::Tensor *output,
70 static const int kScaledDiffIntegerBits = 5;
72 const float beta =
options->beta();
74 const auto trailing_dim = Tensor::num_dims(input) - 1;
77 for (
int i = 0; i < Tensor::num_dims(input); ++i)
79 flat_size *= (i == trailing_dim) ? 1 :
Tensor::dim(
input, i);
87 if (Tensor::element_type(input) == DataType::S16)
90 double input_scale_beta_rescale =
91 static_cast<double>(Tensor::scale(input)) *
static_cast<double>(beta) /
96 luci_interpreter_pal::Softmax(
97 params, kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input)),
98 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));
103 preprocessSoftmaxScaling(
static_cast<double>(params.
beta),
104 static_cast<double>(Tensor::scale(input)), kScaledDiffIntegerBits,
109 if (Tensor::element_type(output) == DataType::S8)
110 luci_interpreter_pal::Softmax(
111 params, kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input)),
112 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(output)));
113 else if (Tensor::element_type(output) == DataType::S16)
114 luci_interpreter_pal::Softmax(
115 params, kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input)),
116 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));
128 Tensor::element_type(kernel.
output()));
132 if (Tensor::element_type(kernel.
input()) == DataType::U8 ||
133 Tensor::element_type(kernel.
input()) == DataType::S8)
136 Tensor::zero_point(kernel.
output()) == 0);
138 Tensor::zero_point(kernel.
output()) ==
139 std::numeric_limits<int8_t>::min());
148 const auto *options = cur_op->builtin_options_as_SoftmaxOptions();
149 const auto input_type = Tensor::element_type(kernel.
input());
153 case DataType::FLOAT32:
154 evalFloat(kernel.
input(), kernel.
output(), options, runtime_graph);
160 evalQuantize(kernel.
input(), kernel.
output(), options, runtime_graph);
164 assert(
false &&
"Unsupported type.");
const circle::Tensor * output() const
const circle::Tensor * input() const
#define LUCI_INTERPRETER_CHECK(cond)
int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
void configure_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
const loco::Dimension & dim(uint32_t axis) const