50#include <unordered_set>
62void canonicalize_input_type(
loco::Graph *g, std::vector<loco::DataType> &input_type)
67 const auto inputs =
g->inputs();
72 if (input_type.size() != 1 and input_type.size() !=
inputs->size())
74 throw std::runtime_error(
75 "Invalid number of input dtype. The number of input dtype should be 1 or "
76 "the same as the number of graph inputs.");
80 if (input_type.size() == 1)
82 const auto user_given_dtype = input_type[0];
90 auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
92 if (
input->dtype() == loco::DataType::FLOAT32)
93 input_type.push_back(user_given_dtype);
95 input_type.push_back(
input->dtype());
107 auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
108 assert(i ==
input->index());
110 if (
input->dtype() != loco::DataType::FLOAT32)
113 if (
input->dtype() != input_type[i])
114 throw std::runtime_error(
115 "Input dtype of " +
input->name() +
116 " is invalid. It has to be the same with the model's input dtype.");
121 if (input_type[i] != loco::DataType::FLOAT32 and input_type[i] != loco::DataType::U8 and
122 input_type[i] != loco::DataType::S16)
124 throw std::runtime_error(
"Input dtype of " +
input->name() +
125 " is invalid. For float32 input, the input dtype after "
126 "quantization must be one of uint8, int16, or float32.");
137void canonicalize_output_type(
loco::Graph *g, std::vector<loco::DataType> &output_type)
142 const auto outputs =
g->outputs();
149 throw std::runtime_error(
150 "Invalid number of output dtype. The number of output dtype should be 1 or "
151 "the same as the number of graph outputs.");
166 auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
168 if (
output->dtype() == loco::DataType::FLOAT32)
183 auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
184 assert(i ==
output->index());
186 if (
output->dtype() != loco::DataType::FLOAT32)
189 if (
output->dtype() != output_type[i])
190 throw std::runtime_error(
191 "Output dtype of " +
output->name() +
192 " is invalid. It has to be the same with the model's output dtype.");
197 if (output_type[i] != loco::DataType::FLOAT32 and output_type[i] != loco::DataType::U8 and
198 output_type[i] != loco::DataType::S16)
200 throw std::runtime_error(
"Output dtype of " +
output->name() +
201 " is invalid. For float32 output, the output dtype after "
202 "quantization must be one of uint8, int16, or float32.");
210 std::istringstream ss;
217template <
typename T> std::vector<T>
lexical_cast(std::vector<std::string> &sv)
220 std::transform(sv.begin(), sv.end(), std::back_inserter(result),
221 [](std::string
str) -> T { return lexical_cast<T>(str); });
228 void enable(Algorithm)
final;
237 bool query(Algorithm)
final;
240 std::vector<Algorithm> _algorithms;
241 std::map<AlgorithmParameters, const std::string> _algorithm_params;
242 std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
243 std::map<AlgorithmParameters, LayerParams> _layer_params;
247void QuantizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
251 _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param,
str));
256 auto param_str = _algorithm_params.find(param);
257 if (param_str != _algorithm_params.end())
259 return param_str->second;
263 return std::string();
269 _multiple_params[param] = vec;
274 auto param_vec = _multiple_params.find(param);
275 if (param_vec != _multiple_params.end())
277 return param_vec->second;
281 return std::vector<std::string>();
287 _layer_params[param] = vec;
292 auto param_vec = _layer_params.find(param);
293 if (param_vec != _layer_params.end())
295 return param_vec->second;
303void QuantizeOptionsImpl::layer_params_set(
LayerParamsSet &vec) { _layer_params_set = vec; }
305LayerParamsSet QuantizeOptionsImpl::layer_params_set(
void)
const {
return _layer_params_set; }
307bool QuantizeOptionsImpl::query(Algorithm algo)
309 std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
310 if (it == _algorithms.end())
324 std::unordered_set<std::string> us;
327 if (us.find(lp->name) != us.end())
328 throw std::runtime_error(
"Duplicate name found in configuration: " + lp->name);
329 us.emplace(lp->name);
335 auto &name = lp->name;
339 auto cnode = loco::must_cast<luci::CircleNode *>(node);
340 if (cnode->opcode() == luci::CircleOpcode::CIRCLEOUTPUT)
343 if (cnode->name() == name)
358 uint32_t valid_count = 0;
360 for (
auto &lps : lpss)
362 if (is_valid_params(g, lps))
368 if (valid_count != 1)
369 throw std::runtime_error(
370 "Configuration file has layer names (and alternates) that can be mapped in multiple or no "
371 "ways. Please update configuration file to have only one valid name mapping.");
383 if (_options ==
nullptr)
385 _options = std::make_unique<QuantizeOptionsImpl>();
388 return _options.get();
391void CircleQuantizer::quantize_dequantize_weight(
loco::Graph *g)
const
396 static const std::vector<std::string> fakeq_supported_input_model_dtype{
"float32"};
397 static const std::vector<std::string> fakeq_supported_output_model_dtype{
"uint8",
"int16"};
398 static const std::vector<std::string> fakeq_supported_granularity{
"layer",
"channel"};
400 auto input_model_dtype =
401 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
402 auto output_model_dtype =
403 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
404 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
405 auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
406 auto layer_params_set = _options->layer_params_set();
409 throw std::runtime_error(
"Unsupported input type. List of supported input type: " +
410 to_string(fakeq_supported_input_model_dtype));
413 throw std::runtime_error(
"Unsupported output type. List of supported output type: " +
414 to_string(fakeq_supported_output_model_dtype));
417 throw std::runtime_error(
"Unsupported granularity. List of supported granularity: " +
422 throw std::runtime_error(
"Layer-wise quantization only supports uint8 dtype.");
424 if (layer_params_set.size() > 1u)
426 layer_params = find_valid_params(g, layer_params_set);
430 for (
auto layer_param : layer_params)
432 const auto &name = layer_param->name;
435 throw std::runtime_error(
"Unsupported dtype in " + name +
". List of supported dtype: " +
436 to_string(fakeq_supported_output_model_dtype));
440 throw std::runtime_error(
441 "Unsupported granularity in " + name +
442 ". List of supported granularity: " +
to_string(fakeq_supported_granularity));
449 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
450 if (circle_node->quantparam() !=
nullptr)
451 circle_node->quantparam(
nullptr);
454 auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
456 ctx->input_model_dtype =
str_to_dtype(input_model_dtype);
457 ctx->output_model_dtype =
str_to_dtype(output_model_dtype);
460 for (
auto layer_param : layer_params)
468 ctx->layers_info.emplace_back(
info);
474 fake_quantizer.run(g);
478void CircleQuantizer::quantize_with_min_max(
loco::Graph *g)
const
483 static const std::vector<std::string> qwmm_supported_input_model_dtype{
"float32"};
484 static const std::vector<std::string> qwmm_supported_output_model_dtype{
"uint8",
"int16"};
485 static const std::vector<std::string> qwmm_supported_granularity{
"layer",
"channel"};
486 static const std::vector<std::string> qwmm_supported_input_type{
"uint8",
"int16",
"int32",
487 "int64",
"float32",
"bool"};
488 static const std::vector<std::string> qwmm_supported_output_type{
"uint8",
"int16",
"int32",
489 "int64",
"float32",
"bool"};
491 auto input_model_dtype =
492 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
493 auto output_model_dtype =
494 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
495 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
496 auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
497 if (input_type.empty())
498 input_type = output_model_dtype;
499 auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
503 auto input_type_vec = pepper::csv_to_vector<std::string>(input_type);
504 auto output_type_vec = pepper::csv_to_vector<std::string>(output_type);
506 bool TF_style_maxpool =
507 _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) ==
"True";
510 _options->param(Options::AlgorithmParameters::Quantize_save_min_max) ==
"True";
512 auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
513 auto layer_params_set = _options->layer_params_set();
516 throw std::runtime_error(
"Unsupported input type. List of supported input types: " +
517 to_string(qwmm_supported_input_model_dtype));
520 throw std::runtime_error(
"Unsupported output type. List of supported output types: " +
521 to_string(qwmm_supported_output_model_dtype));
524 throw std::runtime_error(
"Unsupported granularity. List of supported granularity: " +
527 for (
const auto &dtype : input_type_vec)
530 throw std::runtime_error(
"Unsupported input type. List of supported input types: " +
534 for (
const auto &dtype : output_type_vec)
537 throw std::runtime_error(
"Unsupported output type. List of supported output types: " +
543 throw std::runtime_error(
"Layer-wise quantization only supports uint8 dtype.");
545 if (layer_params_set.size() > 1u)
547 layer_params = find_valid_params(g, layer_params_set);
551 for (
auto layer_param : layer_params)
553 const auto &name = layer_param->name;
556 throw std::runtime_error(
"Unsupported dtype in " + name +
". List of supported dtype: " +
557 to_string(qwmm_supported_output_model_dtype));
561 throw std::runtime_error(
562 "Unsupported granularity in " + name +
563 ". List of supported granularity: " +
to_string(qwmm_supported_granularity));
571 canonicalize_input_type(g, input_types);
572 canonicalize_output_type(g, output_types);
576 input_model_checker.
run(g);
578 auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
580 ctx->input_model_dtype =
str_to_dtype(input_model_dtype);
581 ctx->output_model_dtype =
str_to_dtype(output_model_dtype);
583 ctx->input_types = input_types;
584 ctx->output_types = output_types;
585 ctx->TF_style_maxpool = TF_style_maxpool;
586 ctx->save_min_max = save_min_max;
588 for (
auto layer_param : layer_params)
596 ctx->layers_info.emplace_back(
info);
604 auto verify_ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
606 verify_ctx->output_model_dtype =
str_to_dtype(output_model_dtype);
608 verify_ctx->input_types = input_types;
609 verify_ctx->output_types = output_types;
610 verify_ctx->TF_style_maxpool = TF_style_maxpool;
612 for (
auto layer_param : layer_params)
620 verify_ctx->layers_info.emplace_back(
info);
631void CircleQuantizer::quantize_weights(
loco::Graph *g)
const
635 static const std::vector<std::string> qw_supported_input_model_dtype{
"float32"};
636 static const std::vector<std::string> qw_supported_output_model_dtype{
"int4",
"int8",
"int16"};
637 static const std::vector<std::string> qw_supported_granularity{
"channel"};
639 auto input_model_dtype =
640 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
641 auto output_model_dtype =
642 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
643 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
646 throw std::runtime_error(
"Unsupported input type. List of supported input type: " +
647 to_string(qw_supported_input_model_dtype));
650 throw std::runtime_error(
"Unsupported output type. List of supported output type: " +
651 to_string(qw_supported_output_model_dtype));
654 throw std::runtime_error(
"Unsupported granularity. List of supported granularity: " +
656 auto ctx = std::make_unique<luci::QuantizeWeightsPass::Context>();
658 ctx->input_model_dtype =
str_to_dtype(input_model_dtype);
659 ctx->output_model_dtype =
str_to_dtype(output_model_dtype);
664 weights_quantizer.run(g);
668void CircleQuantizer::quantize_onnx_fake_quantized_model(
loco::Graph *g)
const
672 auto ctx = std::make_unique<luci::QuantizeOnnxFakeQuantModelPass::Context>();
674 ctx->default_activation_dtype = loco::DataType::S16;
684 phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
685 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
686 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
690 phase_runner.attach(&prog);
691 phase_runner.run(phase);
695void CircleQuantizer::requantize(
loco::Graph *g)
const
700 static const std::vector<std::string> rq_supported_input_model_dtype{
"int8"};
701 static const std::vector<std::string> rq_supported_output_model_dtype{
"uint8"};
703 auto input_model_dtype =
704 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
705 auto output_model_dtype =
706 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
709 throw std::runtime_error(
"Unsupported input type. List of supported input types: " +
710 to_string(rq_supported_input_model_dtype));
713 throw std::runtime_error(
"Unsupported output type. List of supported output types: " +
714 to_string(rq_supported_output_model_dtype));
722void CircleQuantizer::force_quant_param(
loco::Graph *g)
const
729 _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
730 auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
731 auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
742void CircleQuantizer::copy_quant_param(
loco::Graph *g)
const
748 _options->params(Options::AlgorithmParameters::Quantize_src_tensor_names);
750 _options->params(Options::AlgorithmParameters::Quantize_dst_tensor_names);
757void CircleQuantizer::convert_to_fake_quantized_model(
loco::Graph *g)
const
763 fake_quantizer.
run(g);
768 phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
769 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
770 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
773 phase.emplace_back(std::make_unique<luci::RemoveRedundantDequantizePass>());
775 phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
779 phase_runner.attach(&prog);
780 phase_runner.run(phase);
786 quantize_dequantize_weight(g);
787 quantize_with_min_max(g);
789 quantize_onnx_fake_quantized_model(g);
791 force_quant_param(g);
793 convert_to_fake_quantized_model(g);
798 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
799 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
803 phase_runner.attach(&prog);
804 phase_runner.run(phase);
luci::CircleQuantizer::Options::LayerParams LayerParams
void quantize(loco::Graph *) const
Pass to copy quantparam (scale, zerop) of a tensor to another tensor.
std::vector< std::string > TensorVector
Pass to write quantparam (scale, zerop) to the specified tensors.
std::vector< float > ScaleVector
std::vector< std::string > TensorVector
std::vector< int64_t > ZPVector
Pass to quantize weights.
Pass to create a quantized graph from a graph fake-quantized on onnx.
Pass to verify the input model has the form acceptable by quantizer.
bool run(loco::Graph *graph) final
Pass to quantize weights.
Pass to quantize activation, weights, and bias.
Pass to re-quantize graph (ex: int8 -> uint8)
volatile const char info[]
T lexical_cast(const std::string &str)
const T * data(const std::vector< T, Alloc > &v)
std::vector< Node * > input_nodes(const Graph *)
std::set< loco::Node * > active_nodes(const std::vector< loco::Node * > &roots)
Enumerate all the nodes required to compute "roots".
std::vector< Node * > output_nodes(Graph *)
std::vector< std::unique_ptr< Pass > > Phase
std::string to_string(const std::vector< std::string > &strings)
std::vector< loco::DataType > str_vec_to_dtype_vec(std::vector< std::string > &vec)
std::string to_lower_case(std::string s)
QuantizationGranularity str_to_granularity(const std::string &str)
loco::DataType str_to_dtype(const std::string &str)
bool in_array(const std::string &str, const std::vector< std::string > &array)
virtual void enable(Algorithm)=0
virtual void layer_params(AlgorithmParameters, LayerParams &)=0
virtual void params(AlgorithmParameters, std::vector< std::string > &)=0
virtual void param(AlgorithmParameters, const std::string &)=0
std::vector< std::shared_ptr< LayerParam > > LayerParams
virtual LayerParamsSet layer_params_set(void) const =0
virtual bool query(Algorithm)=0
@ QuantizeDequantizeWeights
@ QuantizeOnnxFakeQuantizedModel
@ ConvertToFakeQuantizedModel
Class to convert a quantized model to a fake-quantized fp32 model.
bool run(loco::Graph *g) final
Run the pass.
Class to verify quantized model.