ONE - On-device Neural Engine
Loading...
Searching...
No Matches
CircleQuantizer.cpp File Reference
#include <luci/ImporterEx.h>
#include <luci/CircleQuantizer.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
#include <luci/CircleFileExpContract.h>
#include <luci/UserSettings.h>
#include <oops/InternalExn.h>
#include <arser/arser.h>
#include <vconone/vconone.h>
#include <json.h>
#include <iostream>
#include <map>
#include <string>

Go to the source code of this file.

Data Structures

struct  QConfReader
 

Typedefs

using LayerParam = luci::CircleQuantizer::Options::LayerParam
 
using LayerParams = luci::CircleQuantizer::Options::LayerParams
 
using LayerParamsSet = luci::CircleQuantizer::Options::LayerParamsSet
 
using Algorithms = luci::CircleQuantizer::Options::Algorithm
 
using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters
 

Functions

LayerParams read_layer_params (std::string &filename)
 
LayerParamsSet read_layer_params_set (std::string &filename)
 
void print_exclusive_options (void)
 
void print_version (void)
 
int entry (int argc, char **argv)
 

Typedef Documentation

◆ AlgorithmParameters

◆ Algorithms

◆ LayerParam

◆ LayerParams

◆ LayerParamsSet

Function Documentation

◆ entry()

int entry ( int  argc,
char **  argv 
)

Definition at line 163 of file CircleQuantizer.cpp.

164{
165 luci::CircleQuantizer quantizer;
166
167 auto options = quantizer.options();
168 auto settings = luci::UserSettings::settings();
169
170 const std::string qdqw = "--quantize_dequantize_weights";
171 const std::string qwmm = "--quantize_with_minmax";
172 const std::string qofm = "--quantize_onnx_fq_model";
173 const std::string rq = "--requantize";
174 const std::string fq = "--force_quantparam";
175 const std::string cq = "--copy_quantparam";
176 const std::string fake_quant = "--fake_quantize";
177 const std::string qw = "--quantize_weights";
178 const std::string cfg = "--config";
179
180 const std::string tf_maxpool = "--TF-style_maxpool";
181
182 const std::string gpd = "--generate_profile_data";
183
184 const std::string save_min_max = "--save_min_max";
185
186 arser::Arser arser("circle-quantizer provides circle model quantization");
187
190
191 arser.add_argument(qofm).nargs(0).default_value(false).help(
192 "Quantize Onnx fake-quantized (with QDQ) model");
193
194 arser.add_argument(qdqw)
195 .nargs(3)
196 .type(arser::DataType::STR_VEC)
197 .help("Quantize-dequantize weight values required action before quantization. "
198 "Three arguments required: input_model_dtype(float32) "
199 "output_model_dtype(uint8) granularity(layer, channel)");
200
201 arser.add_argument(qwmm)
202 .nargs(3)
203 .type(arser::DataType::STR_VEC)
204 .help("Quantize with min/max values. "
205 "Three arguments required: input_model_dtype(float32) "
206 "output_model_dtype(uint8) granularity(layer, channel)");
207
208 arser.add_argument(tf_maxpool)
209 .nargs(0)
210 .default_value(false)
211 .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
212 "degrade accuracy of some models");
213
214 arser.add_argument(save_min_max)
215 .nargs(0)
216 .default_value(false)
217 .help("Save recorded min/max values.");
218
219 arser.add_argument(fake_quant)
220 .nargs(0)
221 .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will "
222 "generate an fp32 model.");
223
224 arser.add_argument(rq)
225 .nargs(2)
226 .type(arser::DataType::STR_VEC)
227 .help("Requantize a quantized model. "
228 "Two arguments required: input_model_dtype(int8) "
229 "output_model_dtype(uint8)");
230
231 arser.add_argument(fq)
232 .nargs(3)
233 .type(arser::DataType::STR_VEC)
234 .accumulated(true)
235 .help("Write quantization parameters to the specified tensor. "
236 "Three arguments required: tensor_name(string), "
237 "scale(float) zero_point(int)");
238
239 arser.add_argument(cq)
240 .nargs(2)
241 .type(arser::DataType::STR_VEC)
242 .accumulated(true)
243 .help("Copy quantization parameter from a tensor to another tensor."
244 "Two arguments required: source_tensor_name(string), "
245 "destination_tensor_name(string)");
246
247 arser.add_argument(qw)
248 .nargs(3)
249 .type(arser::DataType::STR_VEC)
250 .help("Quantize weights values only"
251 "Three arguments required: input_model_dtype(float32) "
252 "output_model_dtype(int8, int16) granularity(channel)");
253
254 arser.add_argument("--input_type")
255 .help("Input type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
256 "multiple inputs, "
257 "use comma-separated values. e.g., uint8,int16");
258
259 arser.add_argument("--output_type")
260 .help("Output type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
261 "multiple outputs, "
262 "use comma-separated values. e.g., uint8,int16");
263
264 arser.add_argument(cfg).help("Path to the quantization configuration file");
265
266 arser.add_argument("input").help("Input circle model");
267 arser.add_argument("output").help("Output circle model");
268
269 arser.add_argument(gpd).nargs(0).required(false).default_value(false).help(
270 "This will turn on profiling data generation.");
271
272 try
273 {
274 arser.parse(argc, argv);
275 }
276 catch (const std::runtime_error &err)
277 {
278 std::cerr << err.what() << std::endl;
279 std::cout << arser;
280 return 255;
281 }
282
283 {
284 // only one of qdqw, qwmm, rq, fq, cq, fake_quant, qw, qofm option can be used
285 int32_t opt_used = arser[qdqw] ? 1 : 0;
286 opt_used += arser[qwmm] ? 1 : 0;
287 opt_used += arser[rq] ? 1 : 0;
288 opt_used += arser[fq] ? 1 : 0;
289 opt_used += arser[cq] ? 1 : 0;
290 opt_used += arser[fake_quant] ? 1 : 0;
291 opt_used += arser[qw] ? 1 : 0;
292 opt_used += arser.get<bool>(qofm) ? 1 : 0;
293 if (opt_used != 1)
294 {
296 return 255;
297 }
298 }
299
300 if (arser.get<bool>("--verbose"))
301 {
302 // The third parameter of setenv means REPLACE.
303 // If REPLACE is zero, it does not overwrite an existing value.
304 setenv("LUCI_LOG", "100", 0);
305 }
306
307 if (arser[qdqw])
308 {
309 auto values = arser.get<std::vector<std::string>>(qdqw);
310 if (values.size() != 3)
311 {
312 std::cerr << arser;
313 return 255;
314 }
315 options->enable(Algorithms::QuantizeDequantizeWeights);
316
317 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
318 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
319 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
320
321 if (arser[cfg])
322 {
323 auto filename = arser.get<std::string>(cfg);
324 try
325 {
326 auto layer_params = read_layer_params(filename);
327
328 options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
329
330 auto layer_params_set = read_layer_params_set(filename);
331
332 options->layer_params_set(layer_params_set);
333 }
334 catch (const std::runtime_error &e)
335 {
336 std::cerr << e.what() << '\n';
337 return 255;
338 }
339 }
340 }
341
342 if (arser[qwmm])
343 {
344 auto values = arser.get<std::vector<std::string>>(qwmm);
345 if (values.size() != 3)
346 {
347 std::cerr << arser;
348 return 255;
349 }
350 options->enable(Algorithms::QuantizeWithMinMax);
351
352 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
353 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
354 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
355
356 if (arser["--input_type"])
357 options->param(AlgorithmParameters::Quantize_input_type,
358 arser.get<std::string>("--input_type"));
359
360 if (arser["--output_type"])
361 options->param(AlgorithmParameters::Quantize_output_type,
362 arser.get<std::string>("--output_type"));
363
364 if (arser[tf_maxpool] and arser.get<bool>(tf_maxpool))
365 options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "True");
366
367 if (arser[save_min_max] and arser.get<bool>(save_min_max))
368 options->param(AlgorithmParameters::Quantize_save_min_max, "True");
369
370 if (arser[cfg])
371 {
372 auto filename = arser.get<std::string>(cfg);
373 try
374 {
375 auto layer_params = read_layer_params(filename);
376
377 options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
378
379 auto layer_params_set = read_layer_params_set(filename);
380
381 options->layer_params_set(layer_params_set);
382 }
383 catch (const std::runtime_error &e)
384 {
385 std::cerr << e.what() << '\n';
386 return 255;
387 }
388 }
389 }
390
391 if (arser.get<bool>(qofm))
392 {
393 options->enable(Algorithms::QuantizeOnnxFakeQuantizedModel);
394
395 options->param(AlgorithmParameters::Quantize_input_model_dtype, "onnx_fake_quant");
396 }
397
398 if (arser[rq])
399 {
400 auto values = arser.get<std::vector<std::string>>(rq);
401 if (values.size() != 2)
402 {
403 std::cerr << arser;
404 return 255;
405 }
406 options->enable(Algorithms::Requantize);
407
408 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
409 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
410 }
411
412 if (arser[fq])
413 {
414 auto values = arser.get<std::vector<std::vector<std::string>>>(fq);
415
416 std::vector<std::string> tensors;
417 std::vector<std::string> scales;
418 std::vector<std::string> zero_points;
419
420 for (auto const value : values)
421 {
422 if (value.size() != 3)
423 {
424 std::cerr << arser;
425 return 255;
426 }
427
428 tensors.push_back(value[0]);
429 scales.push_back(value[1]);
430 zero_points.push_back(value[2]);
431 }
432
433 options->enable(Algorithms::ForceQuantParam);
434
435 options->params(AlgorithmParameters::Quantize_tensor_names, tensors);
436 options->params(AlgorithmParameters::Quantize_scales, scales);
437 options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
438 }
439
440 if (arser[cq])
441 {
442 auto values = arser.get<std::vector<std::vector<std::string>>>(cq);
443
444 std::vector<std::string> src;
445 std::vector<std::string> dst;
446
447 for (auto const value : values)
448 {
449 if (value.size() != 2)
450 {
451 std::cerr << arser;
452 return 255;
453 }
454
455 src.push_back(value[0]);
456 dst.push_back(value[1]);
457 }
458
459 options->enable(Algorithms::CopyQuantParam);
460
461 options->params(AlgorithmParameters::Quantize_src_tensor_names, src);
462 options->params(AlgorithmParameters::Quantize_dst_tensor_names, dst);
463 }
464
465 if (arser[fake_quant])
466 options->enable(Algorithms::ConvertToFakeQuantizedModel);
467
468 if (arser[qw])
469 {
470 auto values = arser.get<std::vector<std::string>>(qw);
471 if (values.size() != 3)
472 {
473 std::cerr << arser;
474 return 255;
475 }
476 options->enable(Algorithms::QuantizeWeights);
477
478 options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
479 options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
480 options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
481 }
482
483 std::string input_path = arser.get<std::string>("input");
484 std::string output_path = arser.get<std::string>("output");
485
486 if (arser[gpd])
488
489 // Load model from the file
490 luci::ImporterEx importerex;
491 auto module = importerex.importVerifyModule(input_path);
492 if (module.get() == nullptr)
493 return EXIT_FAILURE;
494
495 for (size_t idx = 0; idx < module->size(); ++idx)
496 {
497 auto graph = module->graph(idx);
498
499 // quantize the graph
500 quantizer.quantize(graph);
501
502 if (!luci::validate(graph))
503 {
504 std::cerr << "ERROR: Quantized graph is invalid" << std::endl;
505 return 255;
506 }
507 }
508
509 // Export to output Circle file
510 luci::CircleExporter exporter;
511
512 luci::CircleFileExpContract contract(module.get(), output_path);
513
514 if (!exporter.invoke(&contract))
515 {
516 std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
517 return 255;
518 }
519
520 return 0;
521}
void print_exclusive_options(void)
LayerParams read_layer_params(std::string &filename)
void print_version(void)
LayerParamsSet read_layer_params_set(std::string &filename)
static void add_version(Arser &arser, const std::function< void(void)> &func)
Definition arser.h:755
static void add_verbose(Arser &arser)
Definition arser.h:765
bool invoke(Contract *) const
void quantize(loco::Graph *) const
Definition arser.h:39
bool validate(luci::PartitionTable &partition)
static UserSettings * settings()

References arser::Helper::add_verbose(), arser::Helper::add_version(), luci::CircleExporter::invoke(), luci::CircleQuantizer::options(), print_exclusive_options(), print_version(), luci::UserSettings::ProfilingDataGen, luci::CircleQuantizer::quantize(), read_layer_params(), read_layer_params_set(), luci::UserSettings::settings(), and luci::validate().

◆ print_exclusive_options()

void print_exclusive_options ( void  )

Definition at line 146 of file CircleQuantizer.cpp.

147{
148 std::cout << "Use only one of the 3 options below." << std::endl;
149 std::cout << " --quantize_dequantize_weights" << std::endl;
150 std::cout << " --quantize_with_minmax" << std::endl;
151 std::cout << " --requantize" << std::endl;
152 std::cout << " --force_quantparam" << std::endl;
153 std::cout << " --quantize_weights" << std::endl;
154 std::cout << " --quantize_onnx_fq_model" << std::endl;
155}

Referenced by entry().

◆ print_version()

void print_version ( void  )

Definition at line 157 of file CircleQuantizer.cpp.

158{
159 std::cout << "circle-quantizer version " << vconone::get_string() << std::endl;
160 std::cout << vconone::get_copyright() << std::endl;
161}
std::string get_copyright(void)
get_copyright will return copyright string
Definition version.cpp:54
std::string get_string(void)
get_string will return string of major.minor.patch (without build)
Definition version.cpp:44

References vconone::get_copyright(), and vconone::get_string().

Referenced by entry().

◆ read_layer_params()

LayerParams read_layer_params ( std::string &  filename)

Definition at line 67 of file CircleQuantizer.cpp.

68{
69 QConfReader qcr;
70 qcr.init(filename);
71
72 auto layers = qcr.root()["layers"];
74 for (auto layer : layers)
75 {
76 if (layer.isMember("name"))
77 {
78 auto l = std::make_shared<LayerParam>();
79 {
80 l->name = layer["name"].asString();
81 l->dtype = layer["dtype"].asString();
82 l->granularity = layer["granularity"].asString();
83 }
84 p.emplace_back(l);
85 }
86
87 // Multiple names with the same dtype & granularity
88 if (layer.isMember("names"))
89 {
90 for (auto name : layer["names"])
91 {
92 auto l = std::make_shared<LayerParam>();
93 {
94 l->name = name.asString();
95 l->dtype = layer["dtype"].asString();
96 l->granularity = layer["granularity"].asString();
97 }
98 p.emplace_back(l);
99 }
100 }
101 }
102
103 return p;
104}
luci::CircleQuantizer::Options::LayerParams LayerParams
Json::Value & root(void)
void init(const std::string &filename)

References QConfReader::init(), and QConfReader::root().

Referenced by entry(), and read_layer_params_set().

◆ read_layer_params_set()

LayerParamsSet read_layer_params_set ( std::string &  filename)

Definition at line 106 of file CircleQuantizer.cpp.

107{
108 LayerParamsSet lpss;
109
110 // read default values
111 LayerParams lps = read_layer_params(filename);
112 lpss.emplace_back(lps);
113
114 QConfReader qcr;
115 qcr.init(filename);
116
117 auto layers = qcr.root()["layers"];
118 // alternate names
119 for (const auto &layer : layers)
120 {
121 const std::string key_alt_names = "alternate";
122 if (layer.isMember(key_alt_names))
123 {
124 auto alternate = layer[key_alt_names];
125 for (const auto &altkey : alternate.getMemberNames())
126 {
127 LayerParams lps;
128 for (const auto &altvalue : alternate[altkey])
129 {
130 auto l = std::make_shared<LayerParam>();
131 {
132 l->name = altvalue.asString();
133 l->dtype = layer["dtype"].asString();
134 l->granularity = layer["granularity"].asString();
135 }
136 lps.emplace_back(l);
137 }
138 lpss.emplace_back(lps);
139 }
140 }
141 }
142
143 return lpss;
144}

References luci::CircleQuantizer::Options::LayerParamsSet::emplace_back(), QConfReader::init(), read_layer_params(), and QConfReader::root().

Referenced by entry().