ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALLogistic.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef LUCI_INTERPRETER_PAL_LOGISTIC_H
19#define LUCI_INTERPRETER_PAL_LOGISTIC_H
20
21#include "Params.h"
22#include "PALUtils.h"
23
25{
26
27inline void Logistic(const int flat_size, const float *input_data, float *output_data)
28{
29 const float cutoff_upper = 16.619047164916992188f;
30 const float cutoff_lower = -9.f;
31
32 // Rational for using approximation in reference kernel.
33 // 0. This approximation gives enough precision for float.
34 // 1. This works around an issue on an embedded chipset where exp() does not
35 // return correctly as expected - exp(x) should return inf when overflown
36 // not 1.701417 IEEE 754 defines representation for inf.
37 // 2. This will speed up calculation and is matching the behavior in the
38 // optimized kernels. (check the definition of scalar_logistic_op<float>)
39
40 for (int i = 0; i < flat_size; i++)
41 {
42 float val = input_data[i];
43 float result;
44 if (val > cutoff_upper)
45 {
46 result = 1.0f;
47 }
48 else if (val < cutoff_lower)
49 {
50 result = std::exp(val);
51 }
52 else
53 {
54 result = 1.f / (1.f + std::exp(-val));
55 }
57 }
58}
59
60inline void Logistic(const int flat_size, const int8_t *input_data, float input_scale,
61 int input_zero_point, int8_t *output_data, float output_scale,
62 int output_zero_point)
63{
64 const float cutoff_upper = 16.619047164916992188f;
65 const float cutoff_lower = -9.f;
66
67 // Rational for using approximation in reference kernel.
68 // 0. This approximation gives enough precision for float.
69 // 1. This works around an issue on an embedded chipset where exp() does not
70 // return correctly as expected - exp(x) should return inf when overflown
71 // not 1.701417 IEEE 754 defines representation for inf.
72 // 2. This will speed up calculation and is matching the behavior in the
73 // optimized kernels. (check the definition of scalar_logistic_op<float>)
74
75 for (int i = 0; i < flat_size; i++)
76 {
77 // Dequantize.
78 float val = static_cast<float>((input_data[i] - input_zero_point) * input_scale);
79 float result;
80 if (val > cutoff_upper)
81 {
82 result = 1.0f;
83 }
84 else if (val < cutoff_lower)
85 {
86 result = std::exp(val);
87 }
88 else
89 {
90 result = 1.f / (1.f + std::exp(-val));
91 }
92 // Requantize
93 int8_t output = static_cast<int8_t>(result / output_scale + output_zero_point);
94 output_data[i] = output;
95 }
96}
97
98inline void Logistic(int32_t input_multiplier, int32_t input_left_shift, int32_t input_size,
99 const int16_t *ptr_input_data, int16_t *ptr_output_data)
100{
101 // We use the LUT for sigmoid and take into account, that
102 // tanh(x) = 2*sigmoid(2*x) - 1
103
104 // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
105 // In case of general parameter scale, multiplier 3 is taken into account
106 // in TanhPrepare function and it is included in
107 // input_multiplier already.
108 if (input_multiplier == 0)
109 { // power of two case
110 input_multiplier = 3 << input_left_shift;
111 input_left_shift = 0;
112 }
113
114 int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
115
116 for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++)
117 {
118 int32_t input_data = ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
119
120 // We do interpolation on unsigned values.
121 uint32_t abs_input_data = abs(input_data);
122
123 // We divide by 2 power of 9, because
124 // we need to divide by 2 in power of 7 for
125 // the input conversion + 1/4 from the scale above.
126
127 // Define uh as uint32_t type not to make this function overflow.
128 uint32_t uh = abs_input_data >> 9;
129 uint32_t result;
130
131 if (uh >= 255)
132 {
133 // Saturate to maximum.
134 result = 0x7FFF << 10;
135 }
136 else
137 {
138 uint32_t ua = sigmoid_table_uint16[uh];
139 uint32_t ub = sigmoid_table_uint16[uh + 1];
140 uint32_t ut = abs_input_data & 0x1ff;
141 // Interpolation is done using the fractional bit.
142 result = (ua << 9) + ut * (ub - ua);
143 }
144
145 result = (input_data >= 0) ? (result + (1 << 9)) : ((1 << (16 + 9)) - result + (1 << 9) - 1);
146
147 // Back to 16-bit.
148 result >>= 10;
149
150 *ptr_output_data = result;
151 }
152}
153
154} // namespace luci_interpreter_pal
155
156#endif // LUCI_INTERPRETER_PAL_LOGISTIC_H
result
Definition infer.py:103
list input_data
Definition infer.py:29
void Logistic(const int flat_size, const float *input_data, float *output_data)
Definition PALGRU.h:26