ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Softmax.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18
19#include "core/OMUtils.h"
20
23
24#include "PALSoftmax.h"
25
26#include "execute/OMUtils.h"
27
28using namespace onert_micro;
29using namespace onert_micro::execute;
30
31namespace
32{
33
34constexpr uint32_t inputTensorIdx = 0;
35constexpr uint32_t outputTensorIdx = 0;
36
37static const int kScaledDiffIntegerBits = 5;
38void preprocessSoftmaxScaling(double beta, double input_scale, int input_integer_bits,
39 int32_t *quantized_multiplier, int *left_shift)
40{
41 const double max_real_multiplier = (1LL << 31) - 1.0;
42 const double input_beta_real_multiplier =
43 std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)), max_real_multiplier);
44
45 onert_micro::execute::quantizeMultiplier(input_beta_real_multiplier, quantized_multiplier,
46 left_shift);
47}
48
49} // namespace
50
51// NOTE: doesnt currently support dynamic shapes
52OMStatus onert_micro::execute::execute_kernel_CircleSoftmax(const OMExecuteArgs &execute_args)
53{
54 core::OMRuntimeContext &runtime_context = execute_args.runtime_context;
55 core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage;
56 uint16_t op_index = execute_args.kernel_index;
57
58 const circle::Tensor *input = nullptr;
59 const circle::Tensor *output = nullptr;
60
61 uint8_t *input_data = nullptr;
62 uint8_t *output_data = nullptr;
63
64 OMStatus status = Ok;
65
66 const circle::SoftmaxOptions *options;
67 {
68 OMRuntimeKernel runtime_kernel;
69 runtime_kernel.readKernel(op_index, runtime_context);
70
71 input = runtime_kernel.inputs[inputTensorIdx];
72 output = runtime_kernel.outputs[outputTensorIdx];
73
74 assert(input != nullptr);
75 assert(output != nullptr);
76
77 status = runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context);
78 if (status != Ok)
79 return status;
80
81 input_data = runtime_kernel.inputs_data[inputTensorIdx];
83
84 options = runtime_kernel.first_operator->builtin_options_as_SoftmaxOptions();
85 }
86
87 assert(input_data != nullptr);
88 assert(output_data != nullptr);
89
90 const float beta = options->beta();
91
92 core::OMRuntimeShape inputs_shape(input);
93 core::OMRuntimeShape outputs_shape(output);
94
95 const auto dim_count = inputs_shape.dimensionsCount();
96
97 const auto trailing_dim = dim_count - 1;
98
99 int flat_size = 1;
100 for (int i = 0; i < inputs_shape.dimensionsCount(); ++i)
101 {
102 flat_size *= (i == trailing_dim) ? 1 : inputs_shape.dims(i);
103 }
104
105 core::SoftmaxParams params{};
106 params.beta = beta;
107 params.num_rows = flat_size;
108 params.row_size = std::min(inputs_shape.dims(trailing_dim), outputs_shape.dims(trailing_dim));
109
110 switch (input->type())
111 {
112#ifndef DIS_FLOAT
113 case circle::TensorType_FLOAT32:
114 {
115
116 status = pal::Softmax(params, core::utils::castInputData<float>(input_data),
117 core::utils::castOutputData<float>(output_data));
118 }
119 break;
120#endif // DIS_FLOAT
121#ifndef DIS_QUANT
122 case circle::TensorType_INT8:
123 {
124 assert(output->type() == circle::TensorType_INT8);
125 if (output->type() != circle::TensorType_INT8)
126 return UnsupportedType;
127
128 assert(input->quantization() != nullptr and output->quantization() != nullptr);
129 assert(input->quantization()->scale() != nullptr and
130 output->quantization()->scale() != nullptr);
131 assert(input->quantization()->zero_point() != nullptr and
132 output->quantization()->zero_point() != nullptr);
133 assert(input->quantization()->scale()->size() == 1 and
134 output->quantization()->scale()->size() == 1);
135 assert(input->quantization()->zero_point()->size() == 1 and
136 output->quantization()->zero_point()->size() == 1);
137
138 params.output_scale = output->quantization()->scale()->operator[](0);
139 params.input_scale = input->quantization()->scale()->operator[](0);
140 params.output_zp = output->quantization()->zero_point()->operator[](0);
141 params.input_zp = input->quantization()->zero_point()->operator[](0);
142
143 int left_shift = 0;
144 preprocessSoftmaxScaling(static_cast<double>(params.beta),
145 static_cast<double>(params.input_scale), kScaledDiffIntegerBits,
146 &params.input_multiplier, &left_shift);
147 params.input_left_shift = left_shift;
148 params.diff_min = -1.0 * onert_micro::execute::calculateInputRadius(
149 kScaledDiffIntegerBits, params.input_left_shift, 31);
150
151 status = pal::Softmax(params, core::utils::castInputData<int8_t>(input_data),
152 core::utils::castOutputData<int8_t>(output_data));
153 }
154 break;
155#endif // DIS_QUANT
156 default:
157 {
158 status = UnsupportedType;
159 assert(false && "Unsupported type.");
160 }
161 }
162
163 return status;
164}
uint8_t * outputs_data[maxOutputSize]
const circle::Operator * first_operator
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
constexpr uint32_t outputTensorIdx
list input_data
Definition infer.py:29
OMStatus Softmax(const core::SoftmaxParams &params, const T *input_data, U *output_data)
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition OMUtils.cpp:23
int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
Definition OMUtils.h:170
@ UnsupportedType
Definition OMStatus.h:26
core::OMRuntimeContext & runtime_context
core::OMRuntimeStorage & runtime_storage