ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Softmax.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Builders.h"
18#include "kernels/Utils.h"
19#include "SISOKernel.h"
20
21#include "PALSoftmax.h"
22
23namespace luci_interpreter
24{
25
26namespace
27{
28
29#ifndef DIS_FLOAT
30void evalFloat(const circle::Tensor *input, const circle::Tensor *output,
31 const circle::SoftmaxOptions *options, BaseRuntimeGraph *runtime_graph)
32{
33 const auto *input_data = runtime_graph->getDataByTensor(input);
34 auto *output_data = runtime_graph->getDataByTensor(output);
35
36 const float beta = options->beta();
37
38 const auto trailing_dim = Tensor::num_dims(input) - 1;
39
40 int flat_size = 1;
41 for (int i = 0; i < Tensor::num_dims(input); ++i)
42 {
43 flat_size *= (i == trailing_dim) ? 1 : Tensor::dim(input, i);
44 }
45
47 params.beta = beta;
48 params.num_rows = flat_size;
49 params.row_size = std::min(Tensor::dim(input, trailing_dim), Tensor::dim(output, trailing_dim));
50
51 luci_interpreter_pal::Softmax(params, kernels::getTensorData<float>(input_data),
52 kernels::getTensorData<float>(output_data));
53}
54#endif // DIS_FLOAT
55
56#ifndef DIS_QUANT
57void preprocessSoftmaxScaling(double beta, double input_scale, int input_integer_bits,
58 int32_t *quantized_multiplier, int *left_shift)
59{
60 const double max_real_multiplier = (1LL << 31) - 1.0;
61 const double input_beta_real_multiplier =
62 std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)), max_real_multiplier);
63
64 kernels::quantizeMultiplier(input_beta_real_multiplier, quantized_multiplier, left_shift);
65}
66
67void evalQuantize(const circle::Tensor *input, const circle::Tensor *output,
68 const circle::SoftmaxOptions *options, BaseRuntimeGraph *runtime_graph)
69{
70 static const int kScaledDiffIntegerBits = 5;
71
72 const float beta = options->beta();
73
74 const auto trailing_dim = Tensor::num_dims(input) - 1;
75
76 int flat_size = 1;
77 for (int i = 0; i < Tensor::num_dims(input); ++i)
78 {
79 flat_size *= (i == trailing_dim) ? 1 : Tensor::dim(input, i);
80 }
81
83 params.beta = beta;
84 params.num_rows = flat_size;
85 params.row_size = std::min(Tensor::dim(input, trailing_dim), Tensor::dim(output, trailing_dim));
86
87 if (Tensor::element_type(input) == DataType::S16)
88 {
89 int left_shift = 0;
90 double input_scale_beta_rescale =
91 static_cast<double>(Tensor::scale(input)) * static_cast<double>(beta) /
92 (10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
93 // correspond to [-10.0, 0.0]
94 kernels::quantizeMultiplier(input_scale_beta_rescale, &params.input_multiplier, &left_shift);
95 params.input_left_shift = left_shift;
96 luci_interpreter_pal::Softmax(
97 params, kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input)),
98 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));
99 }
100 else
101 {
102 int left_shift = 0;
103 preprocessSoftmaxScaling(static_cast<double>(params.beta),
104 static_cast<double>(Tensor::scale(input)), kScaledDiffIntegerBits,
105 &params.input_multiplier, &left_shift);
106 params.input_left_shift = left_shift;
107 params.diff_min =
108 -1.0 * kernels::calculateInputRadius(kScaledDiffIntegerBits, params.input_left_shift, 31);
109 if (Tensor::element_type(output) == DataType::S8)
110 luci_interpreter_pal::Softmax(
111 params, kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input)),
112 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(output)));
113 else if (Tensor::element_type(output) == DataType::S16)
114 luci_interpreter_pal::Softmax(
115 params, kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input)),
116 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));
117 }
118}
119#endif // DIS_QUANT
120
121} // namespace
122
123void configure_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
124{
125 kernels::SISOKernel kernel(cur_op, runtime_graph);
126
127 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
128 Tensor::element_type(kernel.output()));
129 LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input()) >= 1);
130
131#ifndef DIS_QUANT
132 if (Tensor::element_type(kernel.input()) == DataType::U8 ||
133 Tensor::element_type(kernel.input()) == DataType::S8)
134 {
135 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) == DataType::S8 ||
136 Tensor::zero_point(kernel.output()) == 0);
137 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) == DataType::U8 ||
138 Tensor::zero_point(kernel.output()) ==
139 std::numeric_limits<int8_t>::min());
140 }
141#endif
142}
143
144void execute_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
145{
146 kernels::SISOKernel kernel(cur_op, runtime_graph);
147
148 const auto *options = cur_op->builtin_options_as_SoftmaxOptions();
149 const auto input_type = Tensor::element_type(kernel.input());
150 switch (input_type)
151 {
152#ifndef DIS_FLOAT
153 case DataType::FLOAT32:
154 evalFloat(kernel.input(), kernel.output(), options, runtime_graph);
155 break;
156#endif // DIS_FLOAT
157#ifndef DIS_QUANT
158 case DataType::S8:
159 case DataType::S16:
160 evalQuantize(kernel.input(), kernel.output(), options, runtime_graph);
161 break;
162#endif
163 default:
164 assert(false && "Unsupported type.");
165 }
166}
167
168} // namespace luci_interpreter
const circle::Tensor * output() const
Definition SISOKernel.h:47
const circle::Tensor * input() const
Definition SISOKernel.h:46
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
list input_data
Definition infer.py:29
int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
Definition Utils.cpp:123
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157
void configure_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Softmax.cpp:123
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Softmax.cpp:144
const loco::Dimension & dim(uint32_t axis) const
Definition Tensor.h:44