167 auto options = quantizer.
options();
170 const std::string qdqw =
"--quantize_dequantize_weights";
171 const std::string qwmm =
"--quantize_with_minmax";
172 const std::string qofm =
"--quantize_onnx_fq_model";
173 const std::string rq =
"--requantize";
174 const std::string fq =
"--force_quantparam";
175 const std::string cq =
"--copy_quantparam";
176 const std::string fake_quant =
"--fake_quantize";
177 const std::string qw =
"--quantize_weights";
178 const std::string cfg =
"--config";
180 const std::string tf_maxpool =
"--TF-style_maxpool";
182 const std::string gpd =
"--generate_profile_data";
184 const std::string save_min_max =
"--save_min_max";
191 arser.add_argument(qofm).nargs(0).default_value(
false).help(
192 "Quantize Onnx fake-quantized (with QDQ) model");
194 arser.add_argument(qdqw)
196 .type(arser::DataType::STR_VEC)
197 .help(
"Quantize-dequantize weight values required action before quantization. "
198 "Three arguments required: input_model_dtype(float32) "
199 "output_model_dtype(uint8) granularity(layer, channel)");
201 arser.add_argument(qwmm)
203 .type(arser::DataType::STR_VEC)
204 .help(
"Quantize with min/max values. "
205 "Three arguments required: input_model_dtype(float32) "
206 "output_model_dtype(uint8) granularity(layer, channel)");
208 arser.add_argument(tf_maxpool)
210 .default_value(
false)
211 .help(
"Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
212 "degrade accuracy of some models");
214 arser.add_argument(save_min_max)
216 .default_value(
false)
217 .help(
"Save recorded min/max values.");
219 arser.add_argument(fake_quant)
221 .help(
"Convert a quantized model to a fake-quantized model. NOTE: This feature will "
222 "generate an fp32 model.");
224 arser.add_argument(rq)
226 .type(arser::DataType::STR_VEC)
227 .help(
"Requantize a quantized model. "
228 "Two arguments required: input_model_dtype(int8) "
229 "output_model_dtype(uint8)");
231 arser.add_argument(fq)
233 .type(arser::DataType::STR_VEC)
235 .help(
"Write quantization parameters to the specified tensor. "
236 "Three arguments required: tensor_name(string), "
237 "scale(float) zero_point(int)");
239 arser.add_argument(cq)
241 .type(arser::DataType::STR_VEC)
243 .help(
"Copy quantization parameter from a tensor to another tensor."
244 "Two arguments required: source_tensor_name(string), "
245 "destination_tensor_name(string)");
247 arser.add_argument(qw)
249 .type(arser::DataType::STR_VEC)
250 .help(
"Quantize weights values only"
251 "Three arguments required: input_model_dtype(float32) "
252 "output_model_dtype(int8, int16) granularity(channel)");
254 arser.add_argument(
"--input_type")
255 .help(
"Input type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
257 "use comma-separated values. e.g., uint8,int16");
259 arser.add_argument(
"--output_type")
260 .help(
"Output type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
262 "use comma-separated values. e.g., uint8,int16");
264 arser.add_argument(cfg).help(
"Path to the quantization configuration file");
266 arser.add_argument(
"input").help(
"Input circle model");
267 arser.add_argument(
"output").help(
"Output circle model");
269 arser.add_argument(gpd).nargs(0).required(
false).default_value(
false).help(
270 "This will turn on profiling data generation.");
274 arser.parse(argc, argv);
276 catch (
const std::runtime_error &err)
278 std::cerr << err.what() << std::endl;
285 int32_t opt_used =
arser[qdqw] ? 1 : 0;
286 opt_used +=
arser[qwmm] ? 1 : 0;
287 opt_used +=
arser[rq] ? 1 : 0;
288 opt_used +=
arser[fq] ? 1 : 0;
289 opt_used +=
arser[cq] ? 1 : 0;
290 opt_used +=
arser[fake_quant] ? 1 : 0;
291 opt_used +=
arser[qw] ? 1 : 0;
292 opt_used +=
arser.get<
bool>(qofm) ? 1 : 0;
300 if (
arser.get<
bool>(
"--verbose"))
304 setenv(
"LUCI_LOG",
"100", 0);
309 auto values =
arser.get<std::vector<std::string>>(qdqw);
310 if (values.size() != 3)
315 options->enable(Algorithms::QuantizeDequantizeWeights);
317 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
318 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
319 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
323 auto filename =
arser.get<std::string>(cfg);
328 options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
332 options->layer_params_set(layer_params_set);
334 catch (
const std::runtime_error &e)
336 std::cerr << e.what() <<
'\n';
344 auto values =
arser.get<std::vector<std::string>>(qwmm);
345 if (values.size() != 3)
350 options->enable(Algorithms::QuantizeWithMinMax);
352 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
353 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
354 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
356 if (
arser[
"--input_type"])
357 options->param(AlgorithmParameters::Quantize_input_type,
358 arser.get<std::string>(
"--input_type"));
360 if (
arser[
"--output_type"])
361 options->param(AlgorithmParameters::Quantize_output_type,
362 arser.get<std::string>(
"--output_type"));
364 if (
arser[tf_maxpool] and
arser.get<
bool>(tf_maxpool))
365 options->param(AlgorithmParameters::Quantize_TF_style_maxpool,
"True");
367 if (
arser[save_min_max] and
arser.get<
bool>(save_min_max))
368 options->param(AlgorithmParameters::Quantize_save_min_max,
"True");
372 auto filename =
arser.get<std::string>(cfg);
377 options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
381 options->layer_params_set(layer_params_set);
383 catch (
const std::runtime_error &e)
385 std::cerr << e.what() <<
'\n';
391 if (
arser.get<
bool>(qofm))
393 options->enable(Algorithms::QuantizeOnnxFakeQuantizedModel);
395 options->param(AlgorithmParameters::Quantize_input_model_dtype,
"onnx_fake_quant");
400 auto values =
arser.get<std::vector<std::string>>(rq);
401 if (values.size() != 2)
406 options->enable(Algorithms::Requantize);
408 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
409 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
414 auto values =
arser.get<std::vector<std::vector<std::string>>>(fq);
416 std::vector<std::string> tensors;
417 std::vector<std::string> scales;
418 std::vector<std::string> zero_points;
420 for (
auto const value : values)
422 if (value.size() != 3)
428 tensors.push_back(value[0]);
429 scales.push_back(value[1]);
430 zero_points.push_back(value[2]);
433 options->enable(Algorithms::ForceQuantParam);
435 options->params(AlgorithmParameters::Quantize_tensor_names, tensors);
436 options->params(AlgorithmParameters::Quantize_scales, scales);
437 options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
442 auto values =
arser.get<std::vector<std::vector<std::string>>>(cq);
444 std::vector<std::string> src;
445 std::vector<std::string> dst;
447 for (
auto const value : values)
449 if (value.size() != 2)
455 src.push_back(value[0]);
456 dst.push_back(value[1]);
459 options->enable(Algorithms::CopyQuantParam);
461 options->params(AlgorithmParameters::Quantize_src_tensor_names, src);
462 options->params(AlgorithmParameters::Quantize_dst_tensor_names, dst);
465 if (
arser[fake_quant])
466 options->enable(Algorithms::ConvertToFakeQuantizedModel);
470 auto values =
arser.get<std::vector<std::string>>(qw);
471 if (values.size() != 3)
476 options->enable(Algorithms::QuantizeWeights);
478 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
479 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
480 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
483 std::string input_path =
arser.get<std::string>(
"input");
484 std::string output_path =
arser.get<std::string>(
"output");
491 auto module = importerex.importVerifyModule(input_path);
492 if (module.get() ==
nullptr)
495 for (
size_t idx = 0; idx <
module->size(); ++idx)
497 auto graph =
module->graph(idx);
504 std::cerr <<
"ERROR: Quantized graph is invalid" << std::endl;
514 if (!exporter.
invoke(&contract))
516 std::cerr <<
"ERROR: Failed to export '" << output_path <<
"'" << std::endl;