ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Round.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_ROUND_H__
19#define __NNFW_CKER_ROUND_H__
20
21#include "cker/Shape.h"
22#include "cker/Utils.h"
23
24#include <cmath>
25
26namespace nnfw
27{
28namespace cker
29{
30
31inline float RoundToNearest(float value)
32{
33 auto floor_val = std::floor(value);
34 auto diff = value - floor_val;
35 if ((diff < 0.5f) || ((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0)))
36 {
37 return floor_val;
38 }
39 else
40 {
41 return floor_val = floor_val + 1.0f;
42 }
43}
44
45#ifdef USE_NEON
46
47inline int32x4_t RoundToNearest(const float32x4_t input)
48{
49#if defined(__aarch64__) || defined(__SSSE3__)
50 // Note: vcvtnq_s32_f32 is not available in ARMv7
51 return vcvtnq_s32_f32(input);
52#else
53 static const float32x4_t zero_val_dup = vdupq_n_f32(0.0f);
54 static const float32x4_t point5_val_dup = vdupq_n_f32(0.5f);
55 static const float32x4_t minus_point5_val_dup = vdupq_n_f32(-0.5f);
56
57 const uint32x4_t mask = vcltq_f32(input, zero_val_dup);
58 const float32x4_t round = vbslq_f32(mask, minus_point5_val_dup, point5_val_dup);
59 return vcvtq_s32_f32(vaddq_f32(input, round));
60#endif // defined(__aarch64__) || defined(__SSSE3__)
61}
62
63#endif // NEON
64
65inline void Round(const Shape &input_shape, const float *input_data, const Shape &output_shape,
66 float *output_data)
67{
68 const int flat_size = MatchingFlatSize(input_shape, output_shape);
69 for (int i = 0; i < flat_size; ++i)
70 {
71 // Note that this implementation matches that of tensorFlow tf.round
72 // and corresponds to the bankers rounding method.
73 // cfenv (for fesetround) is not yet supported universally on Android, so
74 // using a work around.
75 output_data[i] = RoundToNearest(input_data[i]);
76 }
77}
78
79} // namespace cker
80} // namespace nnfw
81
82#endif // __NNFW_CKER_ROUND_H__
const luci_interpreter::RuntimeShape output_shape
void Round(const Shape &input_shape, const float *input_data, const Shape &output_shape, float *output_data)
Definition Round.h:65
float RoundToNearest(float value)
Definition Round.h:31
int MatchingFlatSize(const Shape &shape, Ts... check_shapes)
Definition Shape.h:297
Definition topk_v2.h:30