ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Quantize.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2018 The TensorFlow Authors. All Rights Reserved.*
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_QUANTIZE_H__
19#define __NNFW_CKER_QUANTIZE_H__
20
22#include "cker/Shape.h"
23#include "cker/Types.h"
24#include "cker/Utils.h"
25#include <cassert>
26#include <iostream>
27#include <stdexcept>
28
29namespace nnfw
30{
31namespace cker
32{
33template <typename InputT, typename OutputT>
34inline void Quantize(const Shape &input_shape, const InputT *input_data, const Shape &output_shape,
35 OutputT *output_data, const float output_scale, const int32_t output_offset)
36{
37 const int flat_size = MatchingFlatSize(input_shape, output_shape);
38 int min_val = std::numeric_limits<OutputT>::min();
39 int max_val = std::numeric_limits<OutputT>::max();
40
41 for (int i = 0; i < flat_size; i++)
42 {
43 int32_t unclamped = static_cast<int32_t>(round(input_data[i] / output_scale)) + output_offset;
44 int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
45 output_data[i] = clamped;
46 }
47}
48
49template <>
50inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
51 int8_t *output_data, const float scale, const int32_t zero_point)
52{
53 const int flat_size = MatchingFlatSize(input_shape, output_shape);
54 static constexpr int32_t min_val = std::numeric_limits<int8_t>::min();
55 static constexpr int32_t max_val = std::numeric_limits<int8_t>::max();
56
57 int i = 0;
58#ifdef USE_NEON
59 const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
60 const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
61 const int32x4_t min_val_dup = vdupq_n_s32(min_val);
62 const int32x4_t max_val_dup = vdupq_n_s32(max_val);
63
64 for (; i <= flat_size - 8; i += 8)
65 {
66 const float *src_data_ptr = input_data + i;
67 float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
68 float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
69
70 input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
71 input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
72
73 int32x4_t casted_val_0 = RoundToNearest(input_val_0);
74 int32x4_t casted_val_1 = RoundToNearest(input_val_1);
75
76 casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
77 casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
78
79 // Clamp the values to fit the target type's range.
80 casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
81 casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
82 casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
83 casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
84
85 const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
86 const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
87 const int16x8_t combined_val = vcombine_s16(narrowed_val_0, narrowed_val_1);
88 const int8x8_t combined_val_narrowed = vmovn_s16(combined_val);
89 vst1_s8(output_data + i, combined_val_narrowed);
90 }
91#endif // NEON
92
93 for (; i < flat_size; ++i)
94 {
95 const float val = input_data[i];
96 const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
97 const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
98 output_data[i] = clamped;
99 }
100}
101
102template <>
103inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
104 uint8_t *output_data, const float scale, const int32_t zero_point)
105{
106 const int flat_size = MatchingFlatSize(input_shape, output_shape);
107 static constexpr int32_t min_val = std::numeric_limits<uint8_t>::min();
108 static constexpr int32_t max_val = std::numeric_limits<uint8_t>::max();
109
110 int i = 0;
111#ifdef USE_NEON
112 const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
113 const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
114 const int32x4_t min_val_dup = vdupq_n_s32(min_val);
115 const int32x4_t max_val_dup = vdupq_n_s32(max_val);
116
117 for (; i <= flat_size - 8; i += 8)
118 {
119 const float *src_data_ptr = input_data + i;
120 float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
121 float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
122
123 input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
124 input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
125
126 int32x4_t casted_val_0 = RoundToNearest(input_val_0);
127 int32x4_t casted_val_1 = RoundToNearest(input_val_1);
128
129 casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
130 casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
131
132 // Clamp the values to fit the target type's range.
133 casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
134 casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
135 casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
136 casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
137
138 const uint16x4_t narrowed_val_0 = vqmovun_s32(casted_val_0);
139 const uint16x4_t narrowed_val_1 = vqmovun_s32(casted_val_1);
140 const uint16x8_t combined_val = vcombine_u16(narrowed_val_0, narrowed_val_1);
141 const uint8x8_t combined_val_narrowed = vmovn_u16(combined_val);
142 vst1_u8(output_data + i, combined_val_narrowed);
143 }
144#endif // NEON
145
146 for (; i < flat_size; ++i)
147 {
148 const float val = input_data[i];
149 const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
150 const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
151 output_data[i] = clamped;
152 }
153}
154
155template <>
156inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
157 int16_t *output_data, const float scale, const int32_t zero_point)
158{
159 const int flat_size = MatchingFlatSize(input_shape, output_shape);
160 static constexpr int32_t min_val = std::numeric_limits<int16_t>::min();
161 static constexpr int32_t max_val = std::numeric_limits<int16_t>::max();
162
163 int i = 0;
164#ifdef USE_NEON
165 const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
166 const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
167 const int32x4_t min_val_dup = vdupq_n_s32(min_val);
168 const int32x4_t max_val_dup = vdupq_n_s32(max_val);
169
170 for (; i <= flat_size - 8; i += 8)
171 {
172 const float *src_data_ptr = input_data + i;
173 float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
174 float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
175
176 input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
177 input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
178
179 int32x4_t casted_val_0 = RoundToNearest(input_val_0);
180 int32x4_t casted_val_1 = RoundToNearest(input_val_1);
181
182 casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
183 casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
184
185 // Clamp the values to fit the target type's range.
186 casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
187 casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
188 casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
189 casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
190
191 const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
192 const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
193 vst1_s16(output_data + i, narrowed_val_0);
194 vst1_s16(output_data + i + 4, narrowed_val_1);
195 }
196#endif // NEON
197
198 for (; i < flat_size; ++i)
199 {
200 const float val = input_data[i];
201 const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
202 const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
203 output_data[i] = clamped;
204 }
205}
206
207inline void Quantize(const int32_t *multiplier, const int32_t *shift, int32_t channel_size,
208 int32_t total_size, int32_t output_zp, int32_t output_min, int32_t output_max,
209 int32_t *scratch, int8_t *output)
210{
211 // Here we're trying to quantize the raw accumulators:
212 // output_channels
213 // data data data data data
214 // rows data data data data data
215 // data data data data data
216 // ....
217 //
218 // In order to minimize the reload of the multipliers & shifts, once we load
219 // the multipliers & shifts, we load & quantize the raw accumulators for every
220 // row.
221#ifdef USE_NEON
222 const int32x4_t output_offset_vec = vdupq_n_s32(output_zp);
223 const int32x4_t output_activation_min_vec = vdupq_n_s32(output_min);
224 const int32x4_t output_activation_max_vec = vdupq_n_s32(output_max);
225 const int32x4_t zeros = vdupq_n_s32(0);
226#endif
227
228 assert(total_size % channel_size == 0);
229 const int32_t rows = total_size / channel_size;
230
231 int c = 0;
232
233#ifdef USE_NEON
234 using gemmlowp::RoundingDivideByPOT;
235 for (; c <= channel_size - 8; c += 8)
236 {
237 int32x4_t out_shift_1 = vld1q_s32(shift + c);
238 int32x4_t out_shift_2 = vld1q_s32(shift + c + 4);
239 int32x4_t left_shift_1 = vmaxq_s32(out_shift_1, zeros);
240 int32x4_t left_shift_2 = vmaxq_s32(out_shift_2, zeros);
241
242 // Right shift will be performed as left shift with negative values.
243 int32x4_t right_shift_1 = vminq_s32(out_shift_1, zeros);
244 int32x4_t right_shift_2 = vminq_s32(out_shift_2, zeros);
245
246 int32x4_t out_mul_1 = vld1q_s32(multiplier + c);
247 int32x4_t out_mul_2 = vld1q_s32(multiplier + c + 4);
248 for (int n = 0; n < rows; ++n)
249 {
250 int loc = n * channel_size + c;
251 int32x4_t acc_1 = vld1q_s32(scratch + loc);
252 int32x4_t acc_2 = vld1q_s32(scratch + loc + 4);
253
254 // Saturating Rounding Doubling High Mul.
255 acc_1 = vshlq_s32(acc_1, left_shift_1);
256 acc_1 = vqrdmulhq_s32(acc_1, out_mul_1);
257 acc_2 = vshlq_s32(acc_2, left_shift_2);
258 acc_2 = vqrdmulhq_s32(acc_2, out_mul_2);
259
260 // Rounding Dividing By POT.
261 acc_1 = vrshlq_s32(acc_1, right_shift_1);
262 acc_2 = vrshlq_s32(acc_2, right_shift_2);
263
264 // Add the output offset.
265 acc_1 = vaddq_s32(acc_1, output_offset_vec);
266 acc_2 = vaddq_s32(acc_2, output_offset_vec);
267
268 // Apply the activation function.
269 acc_1 = vmaxq_s32(acc_1, output_activation_min_vec);
270 acc_1 = vminq_s32(acc_1, output_activation_max_vec);
271 acc_2 = vmaxq_s32(acc_2, output_activation_min_vec);
272 acc_2 = vminq_s32(acc_2, output_activation_max_vec);
273
274 // Saturating cast to int8 and store to destination.
275 const int16x4_t acc_s16_1 = vqmovn_s32(acc_1);
276 const int16x4_t acc_s16_2 = vqmovn_s32(acc_2);
277 const int16x8_t res_s16 = vcombine_s16(acc_s16_1, acc_s16_2);
278 const int8x8_t res_s8 = vqmovn_s16(res_s16);
279 vst1_s8(output + loc, res_s8);
280 }
281 }
282
283#endif // USE_NEON
284 // Handle leftover values, one by one. This is very slow.
285 for (; c < channel_size; c++)
286 {
287 for (int n = 0; n < rows; ++n)
288 {
289 int loc = n * channel_size + c;
290 int32_t acc = scratch[loc];
291 acc = MultiplyByQuantizedMultiplier(acc, multiplier[c], shift[c]);
292 acc += output_zp;
293 acc = std::max(acc, output_min);
294 acc = std::min(acc, output_max);
295 output[loc] = static_cast<int8_t>(acc);
296 }
297 }
298}
299
300template <typename input_type, typename output_type>
301inline void Requantize(const input_type *input_data, int32_t size,
302 int32_t effective_scale_multiplier, int32_t effective_scale_shift,
303 int32_t input_zeropoint, int32_t output_zeropoint, output_type *output_data)
304{
305 assert(!"Requantize: not supported type. It shouldn't reach here.");
306 UNUSED_ALL(input_data, size, effective_scale_multiplier, effective_scale_shift, input_zeropoint,
307 output_zeropoint, output_data);
308}
309
310template <>
311inline void Requantize<uint8_t, int8_t>(const uint8_t *input_data, int32_t size,
312 int32_t effective_scale_multiplier,
313 int32_t effective_scale_shift, int32_t input_zeropoint,
314 int32_t output_zeropoint, int8_t *output_data)
315{
316 static constexpr int32_t kMinOutput = std::numeric_limits<int8_t>::min();
317 static constexpr int32_t kMaxOutput = std::numeric_limits<int8_t>::max();
318
319 int i = 0;
320#ifdef USE_NEON
321 // Constants.
322 const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
323 const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
324 const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
325 const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
326
327 for (; i <= size - 16; i += 16)
328 {
329 const uint8x16_t input_vec = vld1q_u8(input_data + i);
330 const uint16x8_t first_half = vmovl_u8(vget_low_u8(input_vec));
331 const uint16x8_t second_half = vmovl_u8(vget_high_u8(input_vec));
332 int32x4x4_t input;
333 input.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(first_half)));
334 input.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(first_half)));
335 input.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(second_half)));
336 input.val[3] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(second_half)));
337 input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
338 input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
339 input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
340 input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
341
342 int32x4x4_t result =
343 MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
344
345 result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
346 result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
347 result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
348 result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
349 result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
350 result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
351 result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
352 result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
353
354 const int16x4_t narrowed_val_1 = vqmovn_s32(result.val[0]);
355 const int16x4_t narrowed_val_2 = vqmovn_s32(result.val[1]);
356 const int16x4_t narrowed_val_3 = vqmovn_s32(result.val[2]);
357 const int16x4_t narrowed_val_4 = vqmovn_s32(result.val[3]);
358 const int16x8_t output_first_half = vcombine_s16(narrowed_val_1, narrowed_val_2);
359 const int16x8_t output_second_half = vcombine_s16(narrowed_val_3, narrowed_val_4);
360 const int8x8_t narrowed_first_half = vqmovn_s16(output_first_half);
361 const int8x8_t narrowed_second_half = vqmovn_s16(output_second_half);
362 const int8x16_t narrowed_result = vcombine_s8(narrowed_first_half, narrowed_second_half);
363 vst1q_s8(output_data + i, narrowed_result);
364 }
365
366#endif
367 for (; i < size; ++i)
368 {
369 const int32_t input = input_data[i] - input_zeropoint;
370 const int32_t output =
371 MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
372 output_zeropoint;
373 const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
374 output_data[i] = static_cast<int8_t>(clamped_output);
375 }
376}
377
378template <>
379inline void Requantize<int8_t, uint8_t>(const int8_t *input_data, int32_t size,
380 int32_t effective_scale_multiplier,
381 int32_t effective_scale_shift, int32_t input_zeropoint,
382 int32_t output_zeropoint, uint8_t *output_data)
383{
384 static constexpr int32_t kMinOutput = std::numeric_limits<uint8_t>::min();
385 static constexpr int32_t kMaxOutput = std::numeric_limits<uint8_t>::max();
386
387 int i = 0;
388#ifdef USE_NEON
389 // Constants.
390 const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
391 const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
392 const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
393 const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
394
395 for (; i <= size - 16; i += 16)
396 {
397 const int8x16_t input_vec = vld1q_s8(input_data + i);
398 const int16x8_t first_half = vmovl_s8(vget_low_s8(input_vec));
399 const int16x8_t second_half = vmovl_s8(vget_high_s8(input_vec));
400 int32x4x4_t input;
401 input.val[0] = vmovl_s16(vget_low_s16(first_half));
402 input.val[1] = vmovl_s16(vget_high_s16(first_half));
403 input.val[2] = vmovl_s16(vget_low_s16(second_half));
404 input.val[3] = vmovl_s16(vget_high_s16(second_half));
405 input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
406 input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
407 input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
408 input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
409
410 int32x4x4_t result =
411 MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
412
413 result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
414 result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
415 result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
416 result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
417 result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
418 result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
419 result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
420 result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
421
422 const uint32x4_t result_val_1_unsigned = vreinterpretq_u32_s32(result.val[0]);
423 const uint32x4_t result_val_2_unsigned = vreinterpretq_u32_s32(result.val[1]);
424 const uint32x4_t result_val_3_unsigned = vreinterpretq_u32_s32(result.val[2]);
425 const uint32x4_t result_val_4_unsigned = vreinterpretq_u32_s32(result.val[3]);
426
427 const uint16x4_t narrowed_val_1 = vqmovn_u32(result_val_1_unsigned);
428 const uint16x4_t narrowed_val_2 = vqmovn_u32(result_val_2_unsigned);
429 const uint16x4_t narrowed_val_3 = vqmovn_u32(result_val_3_unsigned);
430 const uint16x4_t narrowed_val_4 = vqmovn_u32(result_val_4_unsigned);
431 const uint16x8_t output_first_half = vcombine_u16(narrowed_val_1, narrowed_val_2);
432 const uint16x8_t output_second_half = vcombine_u16(narrowed_val_3, narrowed_val_4);
433 const uint8x8_t narrowed_first_half = vqmovn_u16(output_first_half);
434 const uint8x8_t narrowed_second_half = vqmovn_u16(output_second_half);
435 const uint8x16_t narrowed_result = vcombine_u8(narrowed_first_half, narrowed_second_half);
436 vst1q_u8(output_data + i, narrowed_result);
437 }
438
439#endif
440 for (; i < size; ++i)
441 {
442 const int32_t input = input_data[i] - input_zeropoint;
443 const int32_t output =
444 MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
445 output_zeropoint;
446 const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
447 output_data[i] = static_cast<uint8_t>(clamped_output);
448 }
449}
450
451} // namespace cker
452} // namespace nnfw
453
454#endif // __NNFW_CKER_QUANTIZE_H__
const luci_interpreter::RuntimeShape output_shape
void Quantize(const Shape &input_shape, const InputT *input_data, const Shape &output_shape, OutputT *output_data, const float output_scale, const int32_t output_offset)
Definition Quantize.h:34
void Requantize(const input_type *input_data, int32_t size, int32_t effective_scale_multiplier, int32_t effective_scale_shift, int32_t input_zeropoint, int32_t output_zeropoint, output_type *output_data)
Definition Quantize.h:301
void Requantize< int8_t, uint8_t >(const int8_t *input_data, int32_t size, int32_t effective_scale_multiplier, int32_t effective_scale_shift, int32_t input_zeropoint, int32_t output_zeropoint, uint8_t *output_data)
Definition Quantize.h:379
float RoundToNearest(float value)
Definition Round.h:31
int MatchingFlatSize(const Shape &shape, Ts... check_shapes)
Definition Shape.h:297
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
Definition Utils.h:96
void Requantize< uint8_t, int8_t >(const uint8_t *input_data, int32_t size, int32_t effective_scale_multiplier, int32_t effective_scale_shift, int32_t input_zeropoint, int32_t output_zeropoint, int8_t *output_data)
Definition Quantize.h:311
Definition topk_v2.h:30
int32_t size[5]
Definition Slice.cpp:35