46 const int32_t zero_point)
52 const float32x4_t scale_dup = vdupq_n_f32(
static_cast<float>(scale));
53 const float32x4_t zero_times_scale_dup = vdupq_n_f32(
static_cast<float>(-zero_point * scale));
54 for (; i <= flat_size - 8; i += 8)
56 const uint8x8_t input_u8 = vld1_u8(input_data + i);
57 const uint16x8_t input_u16 = vmovl_u8(input_u8);
58 const int16x8_t input_s16 = vreinterpretq_s16_u16(input_u16);
59 const int16x4_t input_s16_low = vget_low_s16(input_s16);
60 const int16x4_t input_s16_high = vget_high_s16(input_s16);
61 const int32x4_t val_low = vmovl_s16(input_s16_low);
62 const int32x4_t val_high = vmovl_s16(input_s16_high);
64 float32x4_t result_low, result_high;
65 ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
66 ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
68 vst1q_f32(output_data + i, result_low);
69 vst1q_f32(output_data + i + 4, result_high);
72 for (; i < flat_size; ++i)
74 const int32_t val = input_data[i];
75 const float result =
static_cast<float>(scale * (val - zero_point));
76 output_data[i] = result;
82 const int32_t zero_point)
88 const float32x4_t scale_dup = vdupq_n_f32(
static_cast<float>(scale));
89 const float32x4_t zero_times_scale_dup = vdupq_n_f32(
static_cast<float>(-zero_point * scale));
90 for (; i <= flat_size - 8; i += 8)
92 const int8x8_t input_s8 = vld1_s8(input_data + i);
93 const int16x8_t input_s16 = vmovl_s8(input_s8);
94 const int16x4_t input_s16_low = vget_low_s16(input_s16);
95 const int16x4_t input_s16_high = vget_high_s16(input_s16);
96 const int32x4_t val_low = vmovl_s16(input_s16_low);
97 const int32x4_t val_high = vmovl_s16(input_s16_high);
99 float32x4_t result_low, result_high;
100 ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
101 ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
103 vst1q_f32(output_data + i, result_low);
104 vst1q_f32(output_data + i + 4, result_high);
107 for (; i < flat_size; ++i)
109 const int32_t val = input_data[i];
110 const float result =
static_cast<float>(scale * (val - zero_point));
111 output_data[i] = result;
117 const int32_t zero_point)
123 const float32x4_t scale_dup = vdupq_n_f32(
static_cast<float>(scale));
124 const float32x4_t zero_times_scale_dup = vdupq_n_f32(
static_cast<float>(-zero_point * scale));
125 for (; i <= flat_size - 8; i += 8)
127 const int16x4_t input_s16_low = vld1_s16(input_data + i);
128 const int16x4_t input_s16_high = vld1_s16(input_data + i + 4);
129 const int32x4_t val_low = vmovl_s16(input_s16_low);
130 const int32x4_t val_high = vmovl_s16(input_s16_high);
132 float32x4_t result_low, result_high;
133 ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
134 ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
136 vst1q_f32(output_data + i, result_low);
137 vst1q_f32(output_data + i + 4, result_high);
140 for (; i < flat_size; ++i)
142 const int32_t val = input_data[i];
143 const float result =
static_cast<float>(scale * (val - zero_point));
144 output_data[i] = result;
void Dequantize(const Shape &input_shape, const uint8_t *input_data, const Shape &output_shape, float *output_data, const float scale, const int32_t zero_point)