ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PortableTensorUtils.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
19#define __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
20
21#include "cker/Types.h"
23#include <ruy/context.h>
24
25#include <cstring>
26#include <cmath>
27
28namespace nnfw
29{
30namespace cker
31{
32
34{
35public:
37
38 float operator()(float a) const
39 {
40 switch (act_)
41 {
43 return a;
45 return a < 0.f ? 0.f : a;
47 return std::max(0.f, std::min(a, 6.f));
49 return std::tanh(a);
51 return 1.0f / (1.0f + std::exp(-a));
52 default:
53 // TODO(aselle): More informative fatal error!
54 exit(1);
55 }
56 }
57
58private:
60};
61
62template <typename T>
63void PortableCwiseClipping(T *vector, const int v_size, const T clipping_value)
64{
65 for (int i = 0; i < v_size; i++)
66 {
67 vector[i] = std::max(std::min(clipping_value, vector[i]), static_cast<T>(-clipping_value));
68 }
69}
70
71inline void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
72 float *batch_vector)
73{
74 for (int b = 0; b < n_batch; b++)
75 {
76 memcpy(batch_vector + b * v_size, vector, v_size * sizeof(float));
77 }
78}
79
80inline void PortableVectorBatchVectorAdd(const float *vector, int v_size, int n_batch,
81 float *batch_vector)
82{
83 for (int b = 0; b < n_batch; b++)
84 {
85 for (int i = 0; i < v_size; ++i)
86 {
87 batch_vector[i] += vector[i];
88 }
89 batch_vector += v_size;
90 }
91}
92
93inline bool PortableIsZeroVector(const float *vector, int v_size)
94{
95 for (int i = 0; i < v_size; ++i)
96 {
97 if (*vector++ != 0.0f)
98 return false;
99 }
100 return true;
101}
102
103inline void PortableApplyActivationToVector(const float *vector, int v_size,
104 FusedActivationFunctionType activation, float *result)
105{
106 auto activation_func = ActivationFunctor(activation);
107 for (int v = 0; v < v_size; v++)
108 {
109 *result++ = (activation_func)(*vector++);
110 }
111}
112
113inline void PortableSub1Vector(const float *vector, int v_size, float *result)
114{
115 for (int v = 0; v < v_size; v++)
116 {
117 *result++ = 1.0f - *vector++;
118 }
119}
120
121inline void PortableSymmetricQuantizeFloats(const float *values, const int size,
122 int8_t *quantized_values, float *min_value,
123 float *max_value, float *scaling_factor)
124{
125 auto minmax = std::minmax_element(values, values + size);
126 *min_value = *minmax.first;
127 *max_value = *minmax.second;
128 const int kScale = 127;
129 const float range = std::max(std::abs(*min_value), std::abs(*max_value));
130 if (range == 0)
131 {
132 memset(quantized_values, 0, size * sizeof(int8_t));
133 *scaling_factor = 1;
134 return;
135 }
136 *scaling_factor = range / kScale;
137 const float scaling_factor_inv = kScale / range;
138 for (int i = 0; i < size; ++i)
139 {
140 const int32_t quantized_value =
141 static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
142 // Clamp: just in case some odd numeric offset.
143 quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
144 }
145}
146
147inline void PortableAsymmetricQuantizeFloats(const float *values, const int size,
148 int8_t *quantized_values, float *scaling_factor,
149 int32_t *offset)
150{
151 /* Copied from TensorFlow PortableAsymmetricQuantizeFloats */
152 const int32_t kMinScale = -128;
153 const int32_t kMaxScale = 127;
154 const double qmin_double = kMinScale;
155 const double qmax_double = kMaxScale;
156 const auto minmax = std::minmax_element(values, values + size);
157 const double rmin = static_cast<double>(std::min(0.0f, *minmax.first));
158 const double rmax = static_cast<double>(std::max(0.0f, *minmax.second));
159 if (rmin == rmax)
160 {
161 memset(quantized_values, 0, size * sizeof(int8_t));
162 *scaling_factor = 1;
163 *offset = 0;
164 return;
165 }
166 else
167 {
168 double scale = (rmax - rmin) / (qmax_double - qmin_double);
169 const double zero_point_from_min = qmin_double - rmin / scale;
170 const double zero_point_from_max = qmax_double - rmax / scale;
171 const double zero_point_from_min_error = std::abs(qmin_double) + std::abs(rmin / scale);
172 const double zero_point_from_max_error = std::abs(qmax_double) + std::abs(rmax / scale);
173 const double zero_point_double = zero_point_from_min_error < zero_point_from_max_error
174 ? zero_point_from_min
175 : zero_point_from_max;
176 int8_t nudged_zero_point = 0;
177 if (zero_point_double <= qmin_double)
178 {
179 nudged_zero_point = kMinScale;
180 }
181 else if (zero_point_double >= qmax_double)
182 {
183 nudged_zero_point = kMaxScale;
184 }
185 else
186 {
187 nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
188 }
189 *scaling_factor = scale;
190 *offset = nudged_zero_point;
191 }
192 const float scaling_factor_inv = 1.0f / *scaling_factor;
193 for (int i = 0; i < size; ++i)
194 {
195 const int32_t quantized_value =
196 static_cast<int32_t>(std::round(*offset + values[i] * scaling_factor_inv));
197 quantized_values[i] = std::min(kMaxScale, std::max(kMinScale, quantized_value));
198 }
199}
200
201inline void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
202 const int m_rows, const int m_cols,
203 const int8_t *__restrict__ vectors,
204 const float *scaling_factors, int n_batch,
205 float *__restrict__ result,
206 int result_stride)
207{
208 int batch, row, col;
209 for (batch = 0; batch < n_batch; ++batch, vectors += m_cols)
210 {
211 const float batch_scaling_factor = scaling_factors[batch];
212 // Get the address of the first row.
213 const int8_t *row_ptr = matrix;
214 for (row = 0; row < m_rows; ++row, result += result_stride)
215 {
216 // Initialize the dot product sum for the row to 0.
217 int32_t dotprod = 0;
218#if defined(__GNUC__)
219 // Prefetch the row to cache.
220 __builtin_prefetch(row_ptr, 0 /* prefetch for read */, 3 /* temporal locality */);
221#endif
222 for (col = 0; col < m_cols; ++col, ++row_ptr)
223 {
224 dotprod += (*row_ptr) * (vectors[col]);
225 } // for col
226 *result += (dotprod * batch_scaling_factor);
227 } // for row
228 } // for batch
229}
230
231inline void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
232 const int m_rows, const int m_cols,
233 const int8_t *__restrict__ vector,
234 const float *scaling_factors, int n_batch,
235 int32_t *, float *__restrict__ result,
236 int result_stride, ruy::Context *)
237{
238 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, scaling_factors,
239 n_batch, result, result_stride);
240}
241
242inline void PortableMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
243 const float *vector, int n_batch,
244 float *result, int result_stride)
245{
246 float *result_in_batch = result;
247 for (int b = 0; b < n_batch; b++)
248 {
249 const float *matrix_ptr = matrix;
250 for (int r = 0; r < m_rows; r++)
251 {
252 float dot_prod = 0.0f;
253 const float *vector_in_batch = vector + b * m_cols;
254 for (int c = 0; c < m_cols; c++)
255 {
256 dot_prod += *matrix_ptr++ * *vector_in_batch++;
257 }
258 *result_in_batch += dot_prod;
259 result_in_batch += result_stride;
260 }
261 }
262}
263
264inline void PortableMeanStddevNormalization(const float *input_vector, float *output_vector,
265 int v_size, int n_batch)
266{
267 for (int batch = 0; batch < n_batch; ++batch)
268 {
269 float sum = 0.0f;
270 for (int i = 0; i < v_size; ++i)
271 {
272 sum += input_vector[i];
273 }
274 const float mean = sum / v_size;
275 float sum_diff_sq = 0.0f;
276 for (int i = 0; i < v_size; ++i)
277 {
278 const float diff = input_vector[i] - mean;
279 sum_diff_sq += diff * diff;
280 }
281 const float variance = sum_diff_sq / v_size;
282 constexpr float kNormalizationConstant = 1e-8f;
283 const float stddev_inv = 1.0f / std::sqrt(variance + kNormalizationConstant);
284 for (int i = 0; i < v_size; ++i)
285 {
286 output_vector[i] = (input_vector[i] - mean) * stddev_inv;
287 }
288 input_vector += v_size;
289 output_vector += v_size;
290 }
291}
292
293inline void PortableZeroVector(float *vector, int v_size) { std::fill_n(vector, v_size, 0); }
294
295} // namespace cker
296} // namespace nnfw
297
298#endif // __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
ActivationFunctor(FusedActivationFunctionType act)
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
void PortableMeanStddevNormalization(const float *input_vector, float *output_vector, int v_size, int n_batch)
void PortableSub1Vector(const float *vector, int v_size, float *result)
void PortableZeroVector(float *vector, int v_size)
void PortableCwiseClipping(T *vector, const int v_size, const T clipping_value)
void PortableApplyActivationToVector(const float *vector, int v_size, FusedActivationFunctionType activation, float *result)
void PortableSymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, float *min_value, float *max_value, float *scaling_factor)
void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *__restrict__ vectors, const float *scaling_factors, int n_batch, float *__restrict__ result, int result_stride)
void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batch, float *batch_vector)
void PortableAsymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, float *scaling_factor, int32_t *offset)
bool PortableIsZeroVector(const float *vector, int v_size)
void PortableVectorBatchVectorAdd(const float *vector, int v_size, int n_batch, float *batch_vector)
FusedActivationFunctionType
Definition Types.h:32
Definition topk_v2.h:30
int32_t size[5]
Definition Slice.cpp:35