ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Dequantize.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef __NNFW_CKER_DEQUANTIZE_H__
18#define __NNFW_CKER_DEQUANTIZE_H__
19
20#include "cker/Shape.h"
21#include "cker/Types.h"
23
24namespace nnfw
25{
26namespace cker
27{
28
29#ifdef USE_NEON
30namespace
31{
32inline void ScaleWithNewZeroPoint(const int32x4_t input, const float32x4_t scale_dup,
33 const float32x4_t zero_times_scale_dup, float32x4_t *output)
34{
35#ifdef __ARM_FEATURE_FMA
36 *output = vfmaq_f32(zero_times_scale_dup, vcvtq_f32_s32(input), scale_dup);
37#else
38 *output = vaddq_f32(vmulq_f32(vcvtq_f32_s32(input), scale_dup), zero_times_scale_dup);
39#endif
40}
41} // namespace
42#endif // USE_NEON
43
44inline void Dequantize(const Shape &input_shape, const uint8_t *input_data,
45 const Shape &output_shape, float *output_data, const float scale,
46 const int32_t zero_point)
47{
48 const int flat_size = MatchingFlatSize(input_shape, output_shape);
49
50 int i = 0;
51#ifdef USE_NEON
52 const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
53 const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
54 for (; i <= flat_size - 8; i += 8)
55 {
56 const uint8x8_t input_u8 = vld1_u8(input_data + i);
57 const uint16x8_t input_u16 = vmovl_u8(input_u8);
58 const int16x8_t input_s16 = vreinterpretq_s16_u16(input_u16);
59 const int16x4_t input_s16_low = vget_low_s16(input_s16);
60 const int16x4_t input_s16_high = vget_high_s16(input_s16);
61 const int32x4_t val_low = vmovl_s16(input_s16_low);
62 const int32x4_t val_high = vmovl_s16(input_s16_high);
63
64 float32x4_t result_low, result_high;
65 ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
66 ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
67
68 vst1q_f32(output_data + i, result_low);
69 vst1q_f32(output_data + i + 4, result_high);
70 }
71#endif // NEON
72 for (; i < flat_size; ++i)
73 {
74 const int32_t val = input_data[i];
75 const float result = static_cast<float>(scale * (val - zero_point));
76 output_data[i] = result;
77 }
78}
79
80inline void Dequantize(const Shape &input_shape, const int8_t *input_data,
81 const Shape &output_shape, float *output_data, const float scale,
82 const int32_t zero_point)
83{
84 const int flat_size = MatchingFlatSize(input_shape, output_shape);
85
86 int i = 0;
87#ifdef USE_NEON
88 const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
89 const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
90 for (; i <= flat_size - 8; i += 8)
91 {
92 const int8x8_t input_s8 = vld1_s8(input_data + i);
93 const int16x8_t input_s16 = vmovl_s8(input_s8);
94 const int16x4_t input_s16_low = vget_low_s16(input_s16);
95 const int16x4_t input_s16_high = vget_high_s16(input_s16);
96 const int32x4_t val_low = vmovl_s16(input_s16_low);
97 const int32x4_t val_high = vmovl_s16(input_s16_high);
98
99 float32x4_t result_low, result_high;
100 ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
101 ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
102
103 vst1q_f32(output_data + i, result_low);
104 vst1q_f32(output_data + i + 4, result_high);
105 }
106#endif // NEON
107 for (; i < flat_size; ++i)
108 {
109 const int32_t val = input_data[i];
110 const float result = static_cast<float>(scale * (val - zero_point));
111 output_data[i] = result;
112 }
113}
114
115inline void Dequantize(const Shape &input_shape, const int16_t *input_data,
116 const Shape &output_shape, float *output_data, const float scale,
117 const int32_t zero_point)
118{
119 const int flat_size = MatchingFlatSize(input_shape, output_shape);
120
121 int i = 0;
122#ifdef USE_NEON
123 const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
124 const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
125 for (; i <= flat_size - 8; i += 8)
126 {
127 const int16x4_t input_s16_low = vld1_s16(input_data + i);
128 const int16x4_t input_s16_high = vld1_s16(input_data + i + 4);
129 const int32x4_t val_low = vmovl_s16(input_s16_low);
130 const int32x4_t val_high = vmovl_s16(input_s16_high);
131
132 float32x4_t result_low, result_high;
133 ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
134 ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
135
136 vst1q_f32(output_data + i, result_low);
137 vst1q_f32(output_data + i + 4, result_high);
138 }
139#endif // NEON
140 for (; i < flat_size; ++i)
141 {
142 const int32_t val = input_data[i];
143 const float result = static_cast<float>(scale * (val - zero_point));
144 output_data[i] = result;
145 }
146}
147
148} // namespace cker
149} // namespace nnfw
150
151#endif // __NNFW_CKER_DEQUANTIZE_H__
const luci_interpreter::RuntimeShape output_shape
int MatchingFlatSize(const Shape &shape, Ts... check_shapes)
Definition Shape.h:297
void Dequantize(const Shape &input_shape, const uint8_t *input_data, const Shape &output_shape, float *output_data, const float scale, const int32_t zero_point)
Definition Dequantize.h:44
Definition topk_v2.h:30