164{
166
169
170 const std::string qdqw = "--quantize_dequantize_weights";
171 const std::string qwmm = "--quantize_with_minmax";
172 const std::string qofm = "--quantize_onnx_fq_model";
173 const std::string rq = "--requantize";
174 const std::string fq = "--force_quantparam";
175 const std::string cq = "--copy_quantparam";
176 const std::string fake_quant = "--fake_quantize";
177 const std::string qw = "--quantize_weights";
178 const std::string cfg = "--config";
179
180 const std::string tf_maxpool = "--TF-style_maxpool";
181
182 const std::string gpd = "--generate_profile_data";
183
184 const std::string save_min_max = "--save_min_max";
185
187
190
191 arser.add_argument(qofm).nargs(0).default_value(
false).help(
192 "Quantize Onnx fake-quantized (with QDQ) model");
193
194 arser.add_argument(qdqw)
195 .nargs(3)
196 .type(arser::DataType::STR_VEC)
197 .help("Quantize-dequantize weight values required action before quantization. "
198 "Three arguments required: input_model_dtype(float32) "
199 "output_model_dtype(uint8) granularity(layer, channel)");
200
201 arser.add_argument(qwmm)
202 .nargs(3)
203 .type(arser::DataType::STR_VEC)
204 .help("Quantize with min/max values. "
205 "Three arguments required: input_model_dtype(float32) "
206 "output_model_dtype(uint8) granularity(layer, channel)");
207
208 arser.add_argument(tf_maxpool)
209 .nargs(0)
210 .default_value(false)
211 .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
212 "degrade accuracy of some models");
213
214 arser.add_argument(save_min_max)
215 .nargs(0)
216 .default_value(false)
217 .help("Save recorded min/max values.");
218
219 arser.add_argument(fake_quant)
220 .nargs(0)
221 .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will "
222 "generate an fp32 model.");
223
224 arser.add_argument(rq)
225 .nargs(2)
226 .type(arser::DataType::STR_VEC)
227 .help("Requantize a quantized model. "
228 "Two arguments required: input_model_dtype(int8) "
229 "output_model_dtype(uint8)");
230
231 arser.add_argument(fq)
232 .nargs(3)
233 .type(arser::DataType::STR_VEC)
234 .accumulated(true)
235 .help("Write quantization parameters to the specified tensor. "
236 "Three arguments required: tensor_name(string), "
237 "scale(float) zero_point(int)");
238
239 arser.add_argument(cq)
240 .nargs(2)
241 .type(arser::DataType::STR_VEC)
242 .accumulated(true)
243 .help("Copy quantization parameter from a tensor to another tensor."
244 "Two arguments required: source_tensor_name(string), "
245 "destination_tensor_name(string)");
246
247 arser.add_argument(qw)
248 .nargs(3)
249 .type(arser::DataType::STR_VEC)
250 .help("Quantize weights values only"
251 "Three arguments required: input_model_dtype(float32) "
252 "output_model_dtype(int8, int16) granularity(channel)");
253
254 arser.add_argument(
"--input_type")
255 .help("Input type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
256 "multiple inputs, "
257 "use comma-separated values. e.g., uint8,int16");
258
259 arser.add_argument(
"--output_type")
260 .help("Output type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
261 "multiple outputs, "
262 "use comma-separated values. e.g., uint8,int16");
263
264 arser.add_argument(cfg).help(
"Path to the quantization configuration file");
265
266 arser.add_argument(
"input").help(
"Input circle model");
267 arser.add_argument(
"output").help(
"Output circle model");
268
269 arser.add_argument(gpd).nargs(0).required(
false).default_value(
false).help(
270 "This will turn on profiling data generation.");
271
272 try
273 {
274 arser.parse(argc, argv);
275 }
276 catch (const std::runtime_error &err)
277 {
278 std::cerr << err.what() << std::endl;
280 return 255;
281 }
282
283 {
284
285 int32_t opt_used =
arser[qdqw] ? 1 : 0;
286 opt_used +=
arser[qwmm] ? 1 : 0;
287 opt_used +=
arser[rq] ? 1 : 0;
288 opt_used +=
arser[fq] ? 1 : 0;
289 opt_used +=
arser[cq] ? 1 : 0;
290 opt_used +=
arser[fake_quant] ? 1 : 0;
291 opt_used +=
arser[qw] ? 1 : 0;
292 opt_used +=
arser.get<
bool>(qofm) ? 1 : 0;
293 if (opt_used != 1)
294 {
296 return 255;
297 }
298 }
299
300 if (
arser.get<
bool>(
"--verbose"))
301 {
302
303
304 setenv("LUCI_LOG", "100", 0);
305 }
306
308 {
309 auto values =
arser.get<std::vector<std::string>>(qdqw);
310 if (values.size() != 3)
311 {
313 return 255;
314 }
315 options->enable(Algorithms::QuantizeDequantizeWeights);
316
317 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
318 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
319 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
320
322 {
323 auto filename =
arser.get<std::string>(cfg);
324 try
325 {
327
328 options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
329
331
332 options->layer_params_set(layer_params_set);
333 }
334 catch (const std::runtime_error &e)
335 {
336 std::cerr << e.what() << '\n';
337 return 255;
338 }
339 }
340 }
341
343 {
344 auto values =
arser.get<std::vector<std::string>>(qwmm);
345 if (values.size() != 3)
346 {
348 return 255;
349 }
350 options->enable(Algorithms::QuantizeWithMinMax);
351
352 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
353 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
354 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
355
356 if (
arser[
"--input_type"])
357 options->param(AlgorithmParameters::Quantize_input_type,
358 arser.get<std::string>(
"--input_type"));
359
360 if (
arser[
"--output_type"])
361 options->param(AlgorithmParameters::Quantize_output_type,
362 arser.get<std::string>(
"--output_type"));
363
364 if (
arser[tf_maxpool] and
arser.get<
bool>(tf_maxpool))
365 options->param(AlgorithmParameters::Quantize_TF_style_maxpool,
"True");
366
367 if (
arser[save_min_max] and
arser.get<
bool>(save_min_max))
368 options->param(AlgorithmParameters::Quantize_save_min_max,
"True");
369
371 {
372 auto filename =
arser.get<std::string>(cfg);
373 try
374 {
376
377 options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
378
380
381 options->layer_params_set(layer_params_set);
382 }
383 catch (const std::runtime_error &e)
384 {
385 std::cerr << e.what() << '\n';
386 return 255;
387 }
388 }
389 }
390
391 if (
arser.get<
bool>(qofm))
392 {
393 options->enable(Algorithms::QuantizeOnnxFakeQuantizedModel);
394
395 options->param(AlgorithmParameters::Quantize_input_model_dtype,
"onnx_fake_quant");
396 }
397
399 {
400 auto values =
arser.get<std::vector<std::string>>(rq);
401 if (values.size() != 2)
402 {
404 return 255;
405 }
406 options->enable(Algorithms::Requantize);
407
408 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
409 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
410 }
411
413 {
414 auto values =
arser.get<std::vector<std::vector<std::string>>>(fq);
415
416 std::vector<std::string> tensors;
417 std::vector<std::string> scales;
418 std::vector<std::string> zero_points;
419
420 for (auto const value : values)
421 {
422 if (value.size() != 3)
423 {
425 return 255;
426 }
427
428 tensors.push_back(value[0]);
429 scales.push_back(value[1]);
430 zero_points.push_back(value[2]);
431 }
432
433 options->enable(Algorithms::ForceQuantParam);
434
435 options->params(AlgorithmParameters::Quantize_tensor_names, tensors);
436 options->params(AlgorithmParameters::Quantize_scales, scales);
437 options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
438 }
439
441 {
442 auto values =
arser.get<std::vector<std::vector<std::string>>>(cq);
443
444 std::vector<std::string> src;
445 std::vector<std::string> dst;
446
447 for (auto const value : values)
448 {
449 if (value.size() != 2)
450 {
452 return 255;
453 }
454
455 src.push_back(value[0]);
456 dst.push_back(value[1]);
457 }
458
459 options->enable(Algorithms::CopyQuantParam);
460
461 options->params(AlgorithmParameters::Quantize_src_tensor_names, src);
462 options->params(AlgorithmParameters::Quantize_dst_tensor_names, dst);
463 }
464
465 if (
arser[fake_quant])
466 options->enable(Algorithms::ConvertToFakeQuantizedModel);
467
469 {
470 auto values =
arser.get<std::vector<std::string>>(qw);
471 if (values.size() != 3)
472 {
474 return 255;
475 }
476 options->enable(Algorithms::QuantizeWeights);
477
478 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
479 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
480 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
481 }
482
483 std::string input_path =
arser.get<std::string>(
"input");
485
488
489
491 auto module = importerex.importVerifyModule(input_path);
492 if (
module.get() ==
nullptr)
493 return EXIT_FAILURE;
494
495 for (size_t idx = 0; idx < module->size(); ++idx)
496 {
497 auto graph =
module->graph(idx);
498
499
501
503 {
504 std::cerr << "ERROR: Quantized graph is invalid" << std::endl;
505 return 255;
506 }
507 }
508
509
511
513
514 if (!exporter.
invoke(&contract))
515 {
516 std::cerr <<
"ERROR: Failed to export '" <<
output_path <<
"'" << std::endl;
517 return 255;
518 }
519
520 return 0;
521}
void print_exclusive_options(void)
LayerParams read_layer_params(std::string &filename)
LayerParamsSet read_layer_params_set(std::string &filename)
static void add_version(Arser &arser, const std::function< void(void)> &func)
static void add_verbose(Arser &arser)
bool invoke(Contract *) const
void quantize(loco::Graph *) const
bool validate(luci::PartitionTable &partition)
static UserSettings * settings()