ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Tanh.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Builders.h"
18#include "kernels/Utils.h"
19#include "SISOKernel.h"
20
21#include "PALTanh.h"
22
23namespace luci_interpreter
24{
25
26#ifndef DIS_QUANT
27
28namespace
29{
30void calculateArithmeticData(const circle::Tensor *input, const circle::Tensor *output,
31 int32_t &input_zero_point, int32_t &input_range_radius,
32 int32_t &input_multiplier, int &input_left_shift)
33{
34 const auto input_dtype = Tensor::element_type(input);
35 switch (input_dtype)
36 {
37 // TODO: enable it
38#if 0
39 case DataType::S8:
40 {
41 static constexpr int input_integer_bits = 4;
42 const double input_real_multiplier = static_cast<double>(Tensor::scale(input)) *
43 static_cast<double>(1 << (31 - input_integer_bits));
44
45 const double q = std::frexp(input_real_multiplier, &input_left_shift);
46 input_multiplier = static_cast<int32_t>(std::round(q * (1ll << 31)));
47 input_range_radius = kernels::calculateInputRadius(input_integer_bits, input_left_shift, 31);
48 }
49 break;
50#endif
51 case DataType::S16:
52 {
53 static constexpr int input_integer_bits = 3;
54 static constexpr int output_fractional_bits = 15;
55
56 // These operators are implemented in fixed-point arithmetic,
57 // which intrinsically wants symmetric ranges (zero_point==0)
58 // and power-of-two scales (power-of-two is abbreviated below as POT).
59 // While more general support would be possible by means of rescaling,
60 // that would add some overhead and some loss of accuracy and wouldn't
61 // be used at the moment as current quantized LSTM applications are
62 // happy with symmetric, power-of-two-scales quantization. So we just
63 // implement that narrow case only for now.
64
65 int input_scale_log2_rounded;
66 bool param_scale_pot = kernels::checkedLog2(Tensor::scale(input), &input_scale_log2_rounded);
67
68 input_left_shift = (15 - input_integer_bits) + input_scale_log2_rounded;
69 param_scale_pot &= (input_left_shift == 0 || input_left_shift == 1);
70
71 if (param_scale_pot)
72 {
73 input_multiplier = 0;
74 }
75 else
76 {
77 // Calculate multiplier to change input scale to 1/(3*4096)
78 // as required by the table lookup.
79 // The number 3.0 in the multiplier comes from here,
80 // because the interval is [-10.7, 10.7] instead of [-8, 8].
81 // So, in this scaling +/-2^17 represents +/-10.7.
82
83 double multiplier = static_cast<double>(Tensor::scale(input)) * 4096.0 * 3.0;
84 input_left_shift = 0;
85
86 while (multiplier <= 32767.0 / 2.0 && input_left_shift <= 30)
87 {
88 input_left_shift++;
89 multiplier = multiplier * 2.0;
90 }
91
92 input_multiplier = static_cast<int32_t>(multiplier);
93 }
94
95 int output_scale_log2_rounded;
96 kernels::checkedLog2(Tensor::scale(output), &output_scale_log2_rounded);
97 assert(output_scale_log2_rounded == -output_fractional_bits);
98 }
99 break;
100 default:
101 assert(false && "Unsupported type");
102 }
103}
104
105} // namespace
106
107void evalInteger(const circle::Tensor *input, const circle::Tensor *output,
108 BaseRuntimeGraph *runtime_graph)
109{
110 int32_t input_zero_point = 0;
111 int32_t input_range_radius = 0;
112 int32_t input_multiplier = 0;
113 int input_left_shift = 0;
114
115 calculateArithmeticData(input, output, input_zero_point, input_range_radius, input_multiplier,
116 input_left_shift);
117
118 const auto *input_data = runtime_graph->getDataByTensor(input);
119 assert(input_data);
120
121 auto *output_data = runtime_graph->getDataByTensor(output);
122 assert(output_data);
123
124 const int flat_size = kernels::getTensorRuntimeShape(input, runtime_graph).flatSize();
125
126 const auto input_dtype = Tensor::element_type(input);
127 switch (input_dtype)
128 {
129 // TODO: enable it
130#if 0
131 case DataType::S8:
133 input_zero_point, input_range_radius, input_multiplier, input_left_shift,
134 flat_size, kernels::getTensorData<int8_t>(input_data), kernels::getTensorData<int8_t>(output_data));
135 break;
136#endif // 0
137 case DataType::S16:
138 luci_interpreter_pal::Tanh(input_multiplier, input_left_shift, flat_size,
139 kernels::getTensorData<int16_t>(input_data),
140 kernels::getTensorData<int16_t>(output_data));
141 break;
142 default:
143 assert(false && "Not support yet");
144 }
145}
146#endif // DIS_QUANT
147
148void configure_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
149{
150 kernels::SISOKernel kernel(cur_op, runtime_graph);
151
152 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
153 Tensor::element_type(kernel.output()));
154}
155
156void execute_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
157{
158 kernels::SISOKernel kernel(cur_op, runtime_graph);
159
160 const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
161 assert(input_data);
162
163 auto *output_data = runtime_graph->getDataByTensor(kernel.output());
164
165 bool is_inplace = runtime_graph->is_inplace_op(cur_op);
166
167 switch (Tensor::element_type(kernel.input()))
168 {
169#ifndef DIS_FLOAT
170 case DataType::FLOAT32:
171 {
172 const float *input_data_float = kernels::getTensorData<float>(input_data);
173 float *output_data_float = kernels::getTensorData<float>(output_data);
174 if (is_inplace)
175 {
176 output_data_float = const_cast<float *>(input_data_float);
177 }
178
179 assert(output_data_float);
180
181 const int flat_size =
182 kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
183
184 luci_interpreter_pal::Tanh(flat_size, input_data_float, output_data_float);
185 break;
186 }
187#endif // DIS_FLOAT
188#ifndef DIS_QUANT
189 case DataType::S16:
190 // TODO: enable it
191#if 0
192 case DataType::S8:
193#endif
194 evalInteger(kernel.input(), kernel.output(), runtime_graph);
195 break;
196#endif // DIS_QUANT
197 default:
198 assert(false && "Unsupported type");
199 }
200
201 if (is_inplace)
202 runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
203}
204} // namespace luci_interpreter
void makeInplaceOperation(const circle::Tensor *src_tensor, const circle::Tensor *dst_tensor)
bool is_inplace_op(const circle::Operator *op)
uint8_t * getDataByTensor(const circle::Tensor *raw_tensor)
const circle::Tensor * output() const
Definition SISOKernel.h:47
const circle::Tensor * input() const
Definition SISOKernel.h:46
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
bool checkedLog2(const float x, int *log2_result)
Definition Utils.cpp:113
int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
Definition Utils.cpp:123
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
Definition Utils.cpp:29
void Tanh(const int flat_size, const float *input_data, float *output_data)
Definition PALTanh.h:26
void evalInteger(const circle::Tensor *input, const circle::Tensor *output, BaseRuntimeGraph *runtime_graph)
Definition Tanh.cpp:107
void execute_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Tanh.cpp:156
void configure_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Tanh.cpp:148