ONE - On-device Neural Engine
Loading...
Searching...
No Matches
ComparisonCommon.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ONERT_MICRO_EXECUTE_KERNELS_COMPARISONCOMMON_H
18#define ONERT_MICRO_EXECUTE_KERNELS_COMPARISONCOMMON_H
19
20#include "OMStatus.h"
21
22#include "core/OMUtils.h"
23#include "execute/OMUtils.h"
24#include "core/OMKernelData.h"
26
27#include "PALComparisons.h"
28
29namespace onert_micro
30{
31namespace execute
32{
33
34namespace
35{
36
37constexpr uint32_t input1TensorIdx = 0;
38constexpr uint32_t input2TensorIdx = 1;
39constexpr uint32_t outputTensorIdx = 0;
40
41} // namespace
42
43template <typename T>
44void readDataKernel(OMRuntimeKernel *runtime_kernel, const T *&cast_input_data1,
45 const T *&cast_input_data2, bool *&cast_output_data,
46 core::OMRuntimeShape &input1_shape_ref, core::OMRuntimeShape &input2_shape_ref,
47 core::OMRuntimeShape &output_shape_ref)
48{
49 const circle::Tensor *input1 = nullptr;
50 const circle::Tensor *input2 = nullptr;
51 const circle::Tensor *output = nullptr;
52
53 uint8_t *input_data1 = nullptr;
54 uint8_t *input_data2 = nullptr;
55 uint8_t *output_data = nullptr;
56
57 input1 = runtime_kernel->inputs[input1TensorIdx];
58 input2 = runtime_kernel->inputs[input2TensorIdx];
59 output = runtime_kernel->outputs[outputTensorIdx];
60
61 assert(input1 != nullptr);
62 assert(input2 != nullptr);
63 assert(output != nullptr);
64
65 input_data1 = runtime_kernel->inputs_data[input1TensorIdx];
66 input_data2 = runtime_kernel->inputs_data[input2TensorIdx];
67 output_data = runtime_kernel->outputs_data[outputTensorIdx];
68
69 assert(input_data1 != nullptr);
70 assert(input_data2 != nullptr);
71 assert(output_data != nullptr);
72
73 cast_input_data1 = core::utils::castInputData<T>(input_data1);
74 cast_input_data2 = core::utils::castInputData<T>(input_data2);
75 cast_output_data = core::utils::castOutputData<bool>(output_data);
76
77 input1_shape_ref = std::move(core::OMRuntimeShape(input1));
78 input2_shape_ref = std::move(core::OMRuntimeShape(input2));
79 output_shape_ref = std::move(core::OMRuntimeShape(output));
80}
81
82template <typename T> void evalComparisonGeneric(OMRuntimeKernel *runtime_kernel, bool F(T, T))
83{
84
85 const T *cast_input_data1 = nullptr;
86 const T *cast_input_data2 = nullptr;
87 bool *cast_output_data = nullptr;
88
89 core::OMRuntimeShape input1_shape;
90 core::OMRuntimeShape input2_shape;
92
93 readDataKernel(runtime_kernel, cast_input_data1, cast_input_data2, cast_output_data, input1_shape,
94 input2_shape, output_shape);
95
97 op_params.is_broadcast = input1_shape.flatSize() != input2_shape.flatSize();
98
99 if (op_params.is_broadcast)
100 {
101 onert_micro::execute::pal::BroadcastComparison4DSlowNoScaling<T>(
102 op_params, input1_shape, cast_input_data1, input2_shape, cast_input_data2, output_shape,
103 cast_output_data, F);
104 }
105 else
106 {
107 const int64_t flat_size = input1_shape.flatSize();
108 onert_micro::execute::pal::ComparisonNoScaling<T>(flat_size, cast_input_data1, cast_input_data2,
109 cast_output_data, F);
110 }
111}
112
113template <typename T, typename AccType>
114void evalQuantizedComparisonGeneric(OMRuntimeKernel *runtime_kernel, bool F(AccType, AccType))
115{
116 const circle::Tensor *input1 = nullptr;
117 const circle::Tensor *input2 = nullptr;
118 const circle::Tensor *output = nullptr;
119
120 input1 = runtime_kernel->inputs[input1TensorIdx];
121 input2 = runtime_kernel->inputs[input2TensorIdx];
122 output = runtime_kernel->outputs[outputTensorIdx];
123
124 assert(input1 != nullptr);
125 assert(input2 != nullptr);
126 assert(output != nullptr);
127
128 const T *cast_input_data1 = nullptr;
129 const T *cast_input_data2 = nullptr;
130 bool *cast_output_data = nullptr;
131
132 core::OMRuntimeShape input1_shape;
133 core::OMRuntimeShape input2_shape;
135
136 readDataKernel(runtime_kernel, cast_input_data1, cast_input_data2, cast_output_data, input1_shape,
137 input2_shape, output_shape);
138
139 assert(input1->quantization() != nullptr);
140 assert(input1->quantization()->scale() != nullptr);
141 assert(input1->quantization()->scale()->size() == 1);
142 assert(input1->quantization()->zero_point() != nullptr);
143 assert(input1->quantization()->zero_point()->size() == 1);
144
145 auto input1_scale = *input1->quantization()->scale()->begin();
146 auto input2_scale = *input2->quantization()->scale()->begin();
147
148 auto input1_zero_point = *input1->quantization()->zero_point()->begin();
149 auto input2_zero_point = *input2->quantization()->zero_point()->begin();
150
151 int32_t x_multiplier;
152 int x_shift;
153
154 int32_t y_multiplier;
155 int y_shift;
156
157 onert_micro::execute::quantizeMultiplierSmallerThanOneExp(input1_scale, &x_multiplier, &x_shift);
158 onert_micro::execute::quantizeMultiplierSmallerThanOneExp(input2_scale, &y_multiplier, &y_shift);
159
161 op_params.left_shift = 8;
162 op_params.input1_offset = -input1_zero_point; // Note the '-'
163 op_params.input1_shift = x_shift;
164 op_params.input1_multiplier = x_multiplier;
165 op_params.input2_offset = -input2_zero_point; // Note the '-'
166 op_params.input2_shift = y_shift;
167 op_params.input2_multiplier = y_multiplier;
168 op_params.is_broadcast = input1_shape.flatSize() != input2_shape.flatSize();
169 ;
170
171 if (op_params.is_broadcast)
172 {
173 onert_micro::execute::pal::BroadcastComparison4DSlowWithScaling<T>(
174 op_params, input1_shape, cast_input_data1, input2_shape, cast_input_data2, output_shape,
175 cast_output_data, F);
176 }
177 else
178 {
179 const int64_t flat_size = input1_shape.flatSize();
180 onert_micro::execute::pal::ComparisonWithScaling<T>(op_params, flat_size, cast_input_data1,
181 cast_input_data2, cast_output_data, F);
182 }
183}
184
185} // namespace execute
186} // namespace onert_micro
187
188#endif // ONERT_MICRO_EXECUTE_KERNELS_COMPARISONCOMMON_H
uint8_t * outputs_data[maxOutputSize]
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
const luci_interpreter::RuntimeShape output_shape
constexpr uint32_t input1TensorIdx
constexpr uint32_t outputTensorIdx
constexpr uint32_t input2TensorIdx
void readDataKernel(OMRuntimeKernel *runtime_kernel, const T *&cast_input_data1, const T *&cast_input_data2, bool *&cast_output_data, core::OMRuntimeShape &input1_shape_ref, core::OMRuntimeShape &input2_shape_ref, core::OMRuntimeShape &output_shape_ref)
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition OMUtils.cpp:60
void evalComparisonGeneric(OMRuntimeKernel *runtime_kernel, bool F(T, T))
void evalQuantizedComparisonGeneric(OMRuntimeKernel *runtime_kernel, bool F(AccType, AccType))