ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALSoftmaxCommon.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef ONERT_MICRO_EXECUTE_PAL_SOFTMAX_COMMON_H
19#define ONERT_MICRO_EXECUTE_PAL_SOFTMAX_COMMON_H
20
21#include "core/OMKernelData.h"
22
23#include <cmath>
24
25namespace onert_micro
26{
27namespace execute
28{
29namespace pal
30{
31
32template <typename T, typename U>
33OMStatus Softmax(const core::SoftmaxParams &params, const T *input_data, U *output_data)
34{
35 const int outer_size = params.num_rows;
36 const int depth = params.row_size;
37 const double beta = params.beta;
38
39 const float input_scale = params.input_scale;
40 const float output_scale = params.output_scale;
41
42 const int input_zp = params.input_zp;
43 const int output_zp = params.output_zp;
44
45 for (int i = 0; i < outer_size; ++i)
46 {
47 // Find max element value which we'll use to ensure numerical stability
48 // taking advantage of the following equality:
49 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
50 float max = std::numeric_limits<float>::lowest();
51 for (int c = 0; c < depth; ++c)
52 {
53 auto t = input_data[i * depth + c] - input_zp;
54 auto t_f = static_cast<float>(input_data[i * depth + c] - input_zp);
55 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
56 max = std::max(max, cur_val);
57 }
58
59 static constexpr int32_t min_val = std::numeric_limits<U>::min();
60 static constexpr int32_t max_val = std::numeric_limits<U>::max();
61 // Compute sum.
62 float sum = 0.f;
63 for (int c = 0; c < depth; ++c)
64 {
65 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
66 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
67 sum += exp_c;
68 }
69
70 // Compute result.
71 for (int c = 0; c < depth; ++c)
72 {
73 float cur_val = static_cast<float>(input_data[i * depth + c] - input_zp) * input_scale;
74 const auto exp_c = static_cast<float>(std::exp((cur_val - max) * beta));
75 float softmax_val = exp_c / sum;
76 auto unclamped = static_cast<int32_t>(std::round(softmax_val / output_scale) +
77 static_cast<float>(output_zp));
78 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
79 output_data[i * depth + c] = static_cast<U>(clamped);
80 }
81 }
82 return Ok;
83}
84
85template <>
86OMStatus Softmax<float, float>(const core::SoftmaxParams &params, const float *input_data,
87 float *output_data)
88{
89 const int outer_size = params.num_rows;
90 const int depth = params.row_size;
91 const double beta = params.beta;
92
93 for (int i = 0; i < outer_size; ++i)
94 {
95 // Find max element value which we'll use to ensure numerical stability
96 // taking advantage of the following equality:
97 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
98 float max = std::numeric_limits<float>::lowest();
99 for (int c = 0; c < depth; ++c)
100 {
101 max = std::max(max, input_data[i * depth + c]);
102 }
103
104 // Compute sum.
105 float sum = 0.f;
106 for (int c = 0; c < depth; ++c)
107 {
108 const float exp_c = std::exp((input_data[i * depth + c] - max) * static_cast<float>(beta));
109 output_data[i * depth + c] = exp_c;
110 sum += exp_c;
111 }
112
113 assert(sum != 0);
114
115 if (sum == 0)
116 return UnknownError;
117
118 // Compute result.
119 for (int c = 0; c < depth; ++c)
120 {
121 output_data[i * depth + c] = output_data[i * depth + c] / sum;
122 }
123 }
124 return Ok;
125}
126
127} // namespace pal
128} // namespace execute
129} // namespace onert_micro
130
131#endif // ONERT_MICRO_EXECUTE_PAL_SOFTMAX_COMMON_H
OMStatus Softmax< float, float >(const core::SoftmaxParams &params, const float *input_data, float *output_data)
OMStatus Softmax(const core::SoftmaxParams &params, const T *input_data, U *output_data)