ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
OperationUtils.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OperationUtils.h"
18
19#include <algorithm>
20#include <cassert>
21#include <cmath>
22
24{
25
27{
28 assert(tensor);
29 return tensor->getShape().rank();
30}
31
32uint32_t getNumberOfElements(const IPortableTensor *tensor)
33{
34 assert(tensor);
35 uint32_t count = 1;
36 auto shape = tensor->getShape();
37 for (int i = 0; i < shape.rank(); i++)
38 {
39 count *= shape.dim(i);
40 }
41 return count;
42}
43
44uint32_t getSizeOfDimension(const IPortableTensor *tensor, uint32_t dimensionIdx)
45{
46 assert(tensor);
47 auto shape = tensor->getShape();
48 if (dimensionIdx >= static_cast<uint32_t>(shape.rank()))
49 {
50 // TODO, log the error
51 return 0;
52 }
53 return shape.dim(dimensionIdx);
54}
55
56void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
57{
58 if (double_multiplier == 0.)
59 {
60 *quantized_multiplier = 0;
61 *shift = 0;
62 return;
63 }
64 const double q = std::frexp(double_multiplier, shift);
65 auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
66
67 assert(q_fixed <= (1ll << 31));
68 if (q_fixed == (1ll << 31))
69 {
70 q_fixed /= 2;
71 ++*shift;
72 }
73 assert(q_fixed <= std::numeric_limits<int32_t>::max());
74 *quantized_multiplier = static_cast<int32_t>(q_fixed);
75}
76
78 const IPortableTensor *bias, const IPortableTensor *output,
79 double *multiplier)
80{
81 const double input_product_scale = input->data_scale() * filter->data_scale();
82 [[maybe_unused]] const double bias_scale =
83 (bias != nullptr) ? bias->data_scale() : input_product_scale;
84 const double output_scale = output->data_scale();
85 // The following conditions must be guaranteed by the training pipeline.
86 assert(std::abs(input_product_scale - bias_scale) <=
87 1e-6 * std::min(input_product_scale, bias_scale));
88 assert(input_product_scale >= 0);
89 assert(input_product_scale < output_scale);
90 *multiplier = input_product_scale / output_scale;
91}
92
94 float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size,
95 int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
96 std::vector<int> &per_channel_output_shift)
97{
98 // Originates from tflite's PopulateConvolutionQuantizationParams()
99 per_channel_output_multiplier.resize(num_channels);
100 per_channel_output_shift.resize(num_channels);
101
102 const bool is_per_channel = filter_scales_size > 1;
103 auto per_channel_multiplier = per_channel_output_multiplier.data();
104 auto per_channel_shift = per_channel_output_shift.data();
105 for (int i = 0; i < num_channels; ++i)
106 {
107 // If per-tensor quantization parameter is specified, broadcast it along the
108 // quantization dimension (channels_out).
109 const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
110 const double filter_scale = static_cast<double>(scale);
111 const double effective_output_scale =
112 static_cast<double>(input_scale) * filter_scale / static_cast<double>(output_scale);
113 int32_t significand;
114 int channel_shift;
115 QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
116 per_channel_multiplier[i] = significand;
117 per_channel_shift[i] = channel_shift;
118 }
119}
120
121void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
122 int *left_shift)
123{
124 assert(double_multiplier > 1.);
125 const double q = std::frexp(double_multiplier, left_shift);
126 int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
127 assert(q_fixed <= (1ll << 31));
128 if (q_fixed == (1ll << 31))
129 {
130 q_fixed /= 2;
131 ++*left_shift;
132 }
133 assert(*left_shift >= 0);
134 assert(q_fixed <= std::numeric_limits<int32_t>::max());
135 *quantized_multiplier = static_cast<int32_t>(q_fixed);
136}
137
139 int32_t *act_min, int32_t *act_max)
140{
141 int32_t qmin = 0;
142 int32_t qmax = 0;
143
144 switch (output->data_type())
145 {
146 case OperandType::QUANT_UINT8_ASYMM:
147 qmin = std::numeric_limits<uint8_t>::min();
148 qmax = std::numeric_limits<uint8_t>::max();
149 break;
150 case OperandType::QUANT_INT8_ASYMM:
151 case OperandType::QUANT_INT8_SYMM:
152 qmin = std::numeric_limits<int8_t>::min();
153 qmax = std::numeric_limits<int8_t>::max();
154 break;
155 default:
156 throw std::runtime_error("CalculateActivationRangeQuantized: Not supported operand type.");
157 }
158
159 const auto scale = output->data_scale();
160 const auto zero_point = output->data_zero_point();
161 auto quantize = [scale, zero_point](float f) {
162 return zero_point + static_cast<int32_t>(std::round(f / scale));
163 };
164 if (activation == ir::Activation::RELU)
165 {
166 *act_min = std::max(qmin, quantize(0.0));
167 *act_max = qmax;
168 }
169 else if (activation == ir::Activation::RELU6)
170 {
171 *act_min = std::max(qmin, quantize(0.0));
172 *act_max = std::min(qmax, quantize(6.0));
173 }
174 else if (activation == ir::Activation::RELU1)
175 {
176 *act_min = std::max(qmin, quantize(-1.0));
177 *act_max = std::min(qmax, quantize(1.0));
178 }
179 else if (activation == ir::Activation::SIGMOID)
180 {
181 *act_min = std::max(qmin, quantize(0.0));
182 *act_max = std::min(qmax, quantize(1.0));
183 }
184 else if (activation == ir::Activation::NONE)
185 {
186 *act_min = qmin;
187 *act_max = qmax;
188 }
189 else
190 {
191 throw std::runtime_error{"Unsupported fused activation function."};
192 }
193}
194
195bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2)
196{
197 if (input1 == input2)
198 return true;
199 if (input2 == NULL || input2 == NULL)
200 return false;
201
202 if (input1 == NULL)
203 {
204 return (getNumberOfDimensions(input2) == 0);
205 }
206
207 if (getNumberOfDimensions(input1) != getNumberOfDimensions(input2))
208 return false;
209
210 auto shape1 = input1->getShape();
211 auto shape2 = input2->getShape();
212 for (uint32_t i = 0; i < getNumberOfDimensions(input1); i++)
213 if (shape1.dim(i) != shape2.dim(i))
214 return false;
215
216 return true;
217}
218
219int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
220{
221 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
222 (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
223 // Tighten bound using floor. Suppose that we could use the exact value.
224 // After scaling the difference, the result would be at the maximum. Thus we
225 // must ensure that our value has lower magnitude.
226 return static_cast<int32_t>(std::floor(max_input_rescaled));
227}
228
229uint32_t sizeOfData(OperandType type, const std::vector<int32_t> &dimensions)
230{
231 uint32_t size = 4;
232
233 switch (type)
234 {
235 case OperandType::FLOAT32:
236 case OperandType::INT32:
237 case OperandType::UINT32:
238 size = 4;
239 break;
240 case OperandType::BOOL8:
241 case OperandType::QUANT_UINT8_ASYMM:
242 case OperandType::QUANT_INT8_SYMM:
243 size = 1;
244 break;
245 case OperandType::INT64:
246 size = 8;
247 break;
248 default:
249 throw std::runtime_error("Not supported operand type.");
250 break;
251 }
252
253 for (auto &&d : dimensions)
254 {
255 assert(d >= 0);
256 size *= static_cast<uint32_t>(d);
257 }
258
259 return size;
260}
261
263{
264 switch (ir_padding_type)
265 {
272 default:
273 throw std::runtime_error("Wrong padding type.");
274 break;
275 }
276}
277
278std::vector<int32_t> getReducerAxes(const IPortableTensor *axes)
279{
280 std::vector<int32_t> ret;
281
282 auto axes_vals = (axes->getShape().rank() == 0) ? 1 : axes->getShape().dim(0);
283 assert(static_cast<size_t>(axes_vals) == axes->getShape().num_elements());
284 switch (axes->data_type())
285 {
286 case ir::DataType::INT32:
287 {
288 for (int i = 0; i < axes_vals; ++i)
289 ret.emplace_back(*(getBuffer<int32_t>(axes) + i));
290 break;
291 }
292 case ir::DataType::INT64:
293 {
294 for (int i = 0; i < axes_vals; ++i)
295 ret.emplace_back(*(getBuffer<int64_t>(axes) + i));
296 break;
297 }
298 default:
299 throw std::runtime_error("getReducerAxes: Not supported data type");
300 break;
301 }
302 return ret;
303}
304
306{
307 switch (rope_mode)
308 {
313 default:
314 throw std::runtime_error("Wrong rope mode.");
315 break;
316 }
317}
318
319} // namespace onert::backend::cpu::ops
OperandType
Definition OperandType.h:24
A tensor class that is portable for other backends.
ir::DataType data_type() const override final
ir::Shape getShape() const override final
Get ir::Shape of tensor.
uint32_t getNumberOfElements(const Shape &shape)
Definition Shape.cpp:48
uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
Definition Shape.cpp:60
uint32_t getNumberOfDimensions(const Shape &shape)
Definition Shape.cpp:58
PaddingType
Definition Types.h:41
int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
nnfw::cker::RoPEMode getRoPEMode(ir::operation::RoPE::RoPEMode rope_mode)
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
uint32_t sizeOfData(OperandType type, const std::vector< int32_t > &dimensions)
void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
std::vector< int32_t > getReducerAxes(const IPortableTensor *axes)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2)
int32_t size[5]
Definition Slice.cpp:35