50#include <unordered_set>
67 const auto inputs =
g->inputs();
72 if (input_type.size() != 1
and input_type.size() !=
inputs->size())
74 throw std::runtime_error(
75 "Invalid number of input dtype. The number of input dtype should be 1 or "
76 "the same as the number of graph inputs.");
80 if (input_type.size() == 1)
92 if (
input->dtype() == loco::DataType::FLOAT32)
95 input_type.push_back(
input->dtype());
108 assert(
i ==
input->index());
110 if (
input->dtype() != loco::DataType::FLOAT32)
113 if (
input->dtype() != input_type[
i])
114 throw std::runtime_error(
115 "Input dtype of " +
input->name() +
116 " is invalid. It has to be the same with the model's input dtype.");
121 if (input_type[
i] != loco::DataType::FLOAT32
and input_type[
i] != loco::DataType::U8
and
122 input_type[
i] != loco::DataType::S16)
124 throw std::runtime_error(
"Input dtype of " +
input->name() +
125 " is invalid. For float32 input, the input dtype after "
126 "quantization must be one of uint8, int16, or float32.");
142 const auto outputs =
g->outputs();
149 throw std::runtime_error(
150 "Invalid number of output dtype. The number of output dtype should be 1 or "
151 "the same as the number of graph outputs.");
168 if (
output->dtype() == loco::DataType::FLOAT32)
186 if (
output->dtype() != loco::DataType::FLOAT32)
189 if (
output->dtype() != output_type[
i])
190 throw std::runtime_error(
191 "Output dtype of " +
output->name() +
192 " is invalid. It has to be the same with the model's output dtype.");
197 if (output_type[
i] != loco::DataType::FLOAT32
and output_type[
i] != loco::DataType::U8
and
198 output_type[
i] != loco::DataType::S16)
200 throw std::runtime_error(
"Output dtype of " +
output->name() +
201 " is invalid. For float32 output, the output dtype after "
202 "quantization must be one of uint8, int16, or float32.");
208template <
typename T> T
lexical_cast(
const std::string &str)
210 std::istringstream
ss;
217template <
typename T> std::vector<T>
lexical_cast(std::vector<std::string> &
sv)
220 std::transform(
sv.begin(),
sv.end(), std::back_inserter(result),
221 [](std::string str) -> T { return lexical_cast<T>(str); });
228 void enable(Algorithm)
final;
237 bool query(Algorithm)
final;
240 std::vector<Algorithm> _algorithms;
241 std::map<AlgorithmParameters, const std::string> _algorithm_params;
242 std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
243 std::map<AlgorithmParameters, LayerParams> _layer_params;
247void QuantizeOptionsImpl::enable(Algorithm
algo) { _algorithms.push_back(
algo); }
251 _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param, str));
256 auto param_str = _algorithm_params.find(param);
257 if (
param_str != _algorithm_params.end())
263 return std::string();
269 _multiple_params[param] = vec;
274 auto param_vec = _multiple_params.find(param);
281 return std::vector<std::string>();
287 _layer_params[param] = vec;
292 auto param_vec = _layer_params.find(param);
303void QuantizeOptionsImpl::layer_params_set(
LayerParamsSet &vec) { _layer_params_set = vec; }
305LayerParamsSet QuantizeOptionsImpl::layer_params_set(
void)
const {
return _layer_params_set; }
307bool QuantizeOptionsImpl::query(Algorithm
algo)
309 std::vector<Algorithm>::iterator
it = std::find(_algorithms.begin(), _algorithms.end(),
algo);
310 if (
it == _algorithms.end())
324 std::unordered_set<std::string>
us;
327 if (
us.find(
lp->name) !=
us.end())
328 throw std::runtime_error(
"Duplicate name found in configuration: " +
lp->name);
329 us.emplace(
lp->name);
340 if (
cnode->opcode() == luci::CircleOpcode::CIRCLEOUTPUT)
343 if (
cnode->name() == name)
369 throw std::runtime_error(
370 "Configuration file has layer names (and alternates) that can be mapped in multiple or no "
371 "ways. Please update configuration file to have only one valid name mapping.");
383 if (_options ==
nullptr)
385 _options = std::make_unique<QuantizeOptionsImpl>();
388 return _options.get();
391void CircleQuantizer::quantize_dequantize_weight(
loco::Graph *g)
const
400 auto input_model_dtype =
401 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
402 auto output_model_dtype =
403 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
404 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
405 auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
406 auto layer_params_set = _options->layer_params_set();
409 throw std::runtime_error(
"Unsupported input type. List of supported input type: " +
413 throw std::runtime_error(
"Unsupported output type. List of supported output type: " +
417 throw std::runtime_error(
"Unsupported granularity. List of supported granularity: " +
422 throw std::runtime_error(
"Layer-wise quantization only supports uint8 dtype.");
424 if (layer_params_set.size() > 1u)
435 throw std::runtime_error(
"Unsupported dtype in " + name +
". List of supported dtype: " +
440 throw std::runtime_error(
441 "Unsupported granularity in " + name +
454 auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
456 ctx->input_model_dtype =
str_to_dtype(input_model_dtype);
457 ctx->output_model_dtype =
str_to_dtype(output_model_dtype);
468 ctx->layers_info.emplace_back(
info);
478void CircleQuantizer::quantize_with_min_max(
loco::Graph *g)
const
487 "int64",
"float32",
"bool"};
489 "int64",
"float32",
"bool"};
491 auto input_model_dtype =
492 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
493 auto output_model_dtype =
494 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
495 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
496 auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
497 if (input_type.empty())
498 input_type = output_model_dtype;
499 auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
503 auto input_type_vec = pepper::csv_to_vector<std::string>(input_type);
504 auto output_type_vec = pepper::csv_to_vector<std::string>(output_type);
506 bool TF_style_maxpool =
507 _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) ==
"True";
510 _options->param(Options::AlgorithmParameters::Quantize_save_min_max) ==
"True";
512 auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
513 auto layer_params_set = _options->layer_params_set();
516 throw std::runtime_error(
"Unsupported input type. List of supported input types: " +
520 throw std::runtime_error(
"Unsupported output type. List of supported output types: " +
524 throw std::runtime_error(
"Unsupported granularity. List of supported granularity: " +
530 throw std::runtime_error(
"Unsupported input type. List of supported input types: " +
537 throw std::runtime_error(
"Unsupported output type. List of supported output types: " +
543 throw std::runtime_error(
"Layer-wise quantization only supports uint8 dtype.");
545 if (layer_params_set.size() > 1u)
556 throw std::runtime_error(
"Unsupported dtype in " + name +
". List of supported dtype: " +
561 throw std::runtime_error(
562 "Unsupported granularity in " + name +
578 auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
580 ctx->input_model_dtype =
str_to_dtype(input_model_dtype);
581 ctx->output_model_dtype =
str_to_dtype(output_model_dtype);
583 ctx->input_types = input_types;
584 ctx->output_types = output_types;
585 ctx->TF_style_maxpool = TF_style_maxpool;
586 ctx->save_min_max = save_min_max;
596 ctx->layers_info.emplace_back(
info);
604 auto verify_ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
610 verify_ctx->TF_style_maxpool = TF_style_maxpool;
631void CircleQuantizer::quantize_weights(
loco::Graph *g)
const
639 auto input_model_dtype =
640 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
641 auto output_model_dtype =
642 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
643 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
646 throw std::runtime_error(
"Unsupported input type. List of supported input type: " +
650 throw std::runtime_error(
"Unsupported output type. List of supported output type: " +
654 throw std::runtime_error(
"Unsupported granularity. List of supported granularity: " +
656 auto ctx = std::make_unique<luci::QuantizeWeightsPass::Context>();
658 ctx->input_model_dtype =
str_to_dtype(input_model_dtype);
659 ctx->output_model_dtype =
str_to_dtype(output_model_dtype);
668void CircleQuantizer::quantize_onnx_fake_quantized_model(
loco::Graph *g)
const
672 auto ctx = std::make_unique<luci::QuantizeOnnxFakeQuantModelPass::Context>();
674 ctx->default_activation_dtype = loco::DataType::S16;
684 phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
685 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
686 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
695void CircleQuantizer::requantize(
loco::Graph *g)
const
703 auto input_model_dtype =
704 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
705 auto output_model_dtype =
706 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
709 throw std::runtime_error(
"Unsupported input type. List of supported input types: " +
713 throw std::runtime_error(
"Unsupported output type. List of supported output types: " +
722void CircleQuantizer::force_quant_param(
loco::Graph *g)
const
729 _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
730 auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
731 auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
742void CircleQuantizer::copy_quant_param(
loco::Graph *g)
const
748 _options->params(Options::AlgorithmParameters::Quantize_src_tensor_names);
750 _options->params(Options::AlgorithmParameters::Quantize_dst_tensor_names);
757void CircleQuantizer::convert_to_fake_quantized_model(
loco::Graph *g)
const
768 phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
769 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
770 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
773 phase.emplace_back(std::make_unique<luci::RemoveRedundantDequantizePass>());
775 phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
786 quantize_dequantize_weight(g);
787 quantize_with_min_max(g);
789 quantize_onnx_fake_quantized_model(g);
791 force_quant_param(g);
793 convert_to_fake_quantized_model(g);
798 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
799 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
luci::CircleQuantizer::Options::LayerParams LayerParams
void quantize(loco::Graph *) const
Pass to copy quantparam (scale, zerop) of a tensor to another tensor.
std::vector< std::string > TensorVector
Pass to write quantparam (scale, zerop) to the specified tensors.
std::vector< float > ScaleVector
std::vector< std::string > TensorVector
std::vector< int64_t > ZPVector
Pass to quantize weights.
Pass to create a quantized graph from a graph fake-quantized on onnx.
Pass to verify the input model has the form acceptable by quantizer.
bool run(loco::Graph *graph) final
Pass to quantize weights.
Pass to quantize activation, weights, and bias.
Pass to re-quantize graph (ex: int8 -> uint8)
volatile const char info[]
T lexical_cast(const std::string &str)
const T * data(const std::vector< T, Alloc > &v)
std::vector< Node * > input_nodes(const Graph *)
std::set< loco::Node * > active_nodes(const std::vector< loco::Node * > &roots)
Enumerate all the nodes required to compute "roots".
T must_cast(FeatureEncoder *node)
A helper dynamic_cast that throws when failed.
std::vector< Node * > output_nodes(Graph *)
std::vector< std::unique_ptr< Pass > > Phase
std::string to_string(const std::vector< std::string > &strings)
T must_cast(loco::Node *node)
std::vector< loco::DataType > str_vec_to_dtype_vec(std::vector< std::string > &vec)
std::string to_lower_case(std::string s)
QuantizationGranularity str_to_granularity(const std::string &str)
loco::DataType str_to_dtype(const std::string &str)
bool in_array(const std::string &str, const std::vector< std::string > &array)
virtual void enable(Algorithm)=0
virtual void layer_params(AlgorithmParameters, LayerParams &)=0
virtual void params(AlgorithmParameters, std::vector< std::string > &)=0
virtual void param(AlgorithmParameters, const std::string &)=0
std::vector< std::shared_ptr< LayerParam > > LayerParams
virtual LayerParamsSet layer_params_set(void) const =0
virtual bool query(Algorithm)=0
@ QuantizeDequantizeWeights
@ QuantizeOnnxFakeQuantizedModel
@ ConvertToFakeQuantizedModel
Class to convert a quantized model to a fake-quantized fp32 model.
bool run(loco::Graph *g) final
Run the pass.
Class to verify quantized model.