ONE - On-device Neural Engine
Loading...
Searching...
No Matches
CircleQuantizer.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
18
31
34
35// logo passes
37
38#include "ProgressReporter.h"
39#include "helpers/Strings.h"
40
42
43#include <luci/IR/CircleNode.h>
44#include <logo/Phase.h>
45#include <pepper/csv2vec.h>
46
47#include <map>
48#include <memory>
49#include <string>
50#include <unordered_set>
51
52namespace
53{
54
55using namespace luci;
59
60// This function updates user-given input_type to match with the input signature of graph
61// If user gives only one input_type, it will be expanded to the number of graph inputs
62void canonicalize_input_type(loco::Graph *g, std::vector<loco::DataType> &input_type)
63{
64 if (g == nullptr)
65 return;
66
67 const auto inputs = g->inputs();
68
69 assert(inputs); // FIX_CALLER_UNLESS
70
71 // Check validity of the number of input dtype given by a user
72 if (input_type.size() != 1 and input_type.size() != inputs->size())
73 {
74 throw std::runtime_error(
75 "Invalid number of input dtype. The number of input dtype should be 1 or "
76 "the same as the number of graph inputs.");
77 }
78
79 // Handle the case when a user gives only one input dtype
80 if (input_type.size() == 1)
81 {
82 const auto user_given_dtype = input_type[0];
83 input_type.clear();
84
85 // Expand input dtype to the number of graph inputs
86 // Since quantizer can only quantize float32, user_given_dtype is set only for float32 inputs
88 for (uint32_t i = 0; i < input_nodes.size(); i++)
89 {
90 auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
91
92 if (input->dtype() == loco::DataType::FLOAT32)
93 input_type.push_back(user_given_dtype);
94 else
95 input_type.push_back(input->dtype());
96 }
97 }
98
99 // Finally, check validity of input_type
100 // input_type is valid if
101 // C1. for non-float32 model input, input_type == model's input dtype
102 // or
103 // C2. for float32 model input, input_type == uint8, int16, or float32
105 for (uint32_t i = 0; i < input_nodes.size(); i++)
106 {
107 auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
108 assert(i == input->index()); // FIX_ME_UNLESS
109
110 if (input->dtype() != loco::DataType::FLOAT32)
111 {
112 // C1
113 if (input->dtype() != input_type[i])
114 throw std::runtime_error(
115 "Input dtype of " + input->name() +
116 " is invalid. It has to be the same with the model's input dtype.");
117 }
118 else
119 {
120 // C2
121 if (input_type[i] != loco::DataType::FLOAT32 and input_type[i] != loco::DataType::U8 and
122 input_type[i] != loco::DataType::S16)
123 {
124 throw std::runtime_error("Input dtype of " + input->name() +
125 " is invalid. For float32 input, the input dtype after "
126 "quantization must be one of uint8, int16, or float32.");
127 }
128 }
129 }
130}
131
132// This function updates user-given output_type to match with the output signature of graph
133// If user gives only one output_type, it will be expanded to the number of graph outputs
134// NOTE This function is almost same with canonicalize_input_type, but it is written as a
135// separate function for more precise error messaging.
136// TODO Find a way to reduce duplicate codes
137void canonicalize_output_type(loco::Graph *g, std::vector<loco::DataType> &output_type)
138{
139 if (g == nullptr)
140 return;
141
142 const auto outputs = g->outputs();
143
144 assert(outputs); // FIX_CALLER_UNLESS
145
146 // Check validity of the number of output dtype given by a user
147 if (output_type.size() != 1 and output_type.size() != outputs->size())
148 {
149 throw std::runtime_error(
150 "Invalid number of output dtype. The number of output dtype should be 1 or "
151 "the same as the number of graph outputs.");
152 }
153
154 // Handle the case when a user gives only one output dtype
155 if (output_type.size() == 1)
156 {
157 const auto user_given_dtype = output_type[0];
158 output_type.clear();
159
160 // Expand output dtype to the number of graph outputs
161 // If dtype of graph output is float32, it will be replaced with user_given_dtype
162 // Otherwise, it will not change
164 for (uint32_t i = 0; i < output_nodes.size(); i++)
165 {
166 auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
167
168 if (output->dtype() == loco::DataType::FLOAT32)
169 output_type.push_back(user_given_dtype);
170 else
171 output_type.push_back(output->dtype());
172 }
173 }
174
175 // Finally, check validity of output_type
176 // output_type is valid if
177 // C1. for non-float32 model output, output_type == model's output dtype
178 // or
179 // C2. for float32 model output, output_type == uint8, int16, or float32
181 for (uint32_t i = 0; i < output_nodes.size(); i++)
182 {
183 auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
184 assert(i == output->index()); // FIX_ME_UNLESS
185
186 if (output->dtype() != loco::DataType::FLOAT32)
187 {
188 // C1
189 if (output->dtype() != output_type[i])
190 throw std::runtime_error(
191 "Output dtype of " + output->name() +
192 " is invalid. It has to be the same with the model's output dtype.");
193 }
194 else
195 {
196 // C2
197 if (output_type[i] != loco::DataType::FLOAT32 and output_type[i] != loco::DataType::U8 and
198 output_type[i] != loco::DataType::S16)
199 {
200 throw std::runtime_error("Output dtype of " + output->name() +
201 " is invalid. For float32 output, the output dtype after "
202 "quantization must be one of uint8, int16, or float32.");
203 }
204 }
205 }
206}
207
208template <typename T> T lexical_cast(const std::string &str)
209{
210 std::istringstream ss;
211 ss.str(str);
212 T data;
213 ss >> data;
214 return data;
215}
216
217template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
218{
219 std::vector<T> result;
220 std::transform(sv.begin(), sv.end(), std::back_inserter(result),
221 [](std::string str) -> T { return lexical_cast<T>(str); });
222 return result;
223}
224
225class QuantizeOptionsImpl final : public luci::CircleQuantizer::Options
226{
227public:
228 void enable(Algorithm) final;
229 void param(AlgorithmParameters, const std::string &) final;
230 const std::string param(AlgorithmParameters) const final;
231 void params(AlgorithmParameters, std::vector<std::string> &) final;
232 std::vector<std::string> params(AlgorithmParameters) const final;
235 void layer_params_set(LayerParamsSet &) final;
236 LayerParamsSet layer_params_set(void) const final;
237 bool query(Algorithm) final;
238
239private:
240 std::vector<Algorithm> _algorithms;
241 std::map<AlgorithmParameters, const std::string> _algorithm_params;
242 std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
243 std::map<AlgorithmParameters, LayerParams> _layer_params;
244 LayerParamsSet _layer_params_set;
245};
246
247void QuantizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
248
249void QuantizeOptionsImpl::param(AlgorithmParameters param, const std::string &str)
250{
251 _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param, str));
252}
253
254const std::string QuantizeOptionsImpl::param(AlgorithmParameters param) const
255{
256 auto param_str = _algorithm_params.find(param);
257 if (param_str != _algorithm_params.end())
258 {
259 return param_str->second;
260 }
261 else
262 {
263 return std::string();
264 }
265}
266
267void QuantizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
268{
269 _multiple_params[param] = vec;
270}
271
272std::vector<std::string> QuantizeOptionsImpl::params(AlgorithmParameters param) const
273{
274 auto param_vec = _multiple_params.find(param);
275 if (param_vec != _multiple_params.end())
276 {
277 return param_vec->second;
278 }
279 else
280 {
281 return std::vector<std::string>();
282 }
283}
284
285void QuantizeOptionsImpl::layer_params(AlgorithmParameters param, LayerParams &vec)
286{
287 _layer_params[param] = vec;
288}
289
290LayerParams QuantizeOptionsImpl::layer_params(AlgorithmParameters param) const
291{
292 auto param_vec = _layer_params.find(param);
293 if (param_vec != _layer_params.end())
294 {
295 return param_vec->second;
296 }
297 else
298 {
299 return LayerParams();
300 }
301}
302
303void QuantizeOptionsImpl::layer_params_set(LayerParamsSet &vec) { _layer_params_set = vec; }
304
305LayerParamsSet QuantizeOptionsImpl::layer_params_set(void) const { return _layer_params_set; }
306
307bool QuantizeOptionsImpl::query(Algorithm algo)
308{
309 std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
310 if (it == _algorithms.end())
311 return false;
312
313 return true;
314}
315
316} // namespace
317
318namespace
319{
320
321bool is_valid_params(loco::Graph *g, LayerParams &lps)
322{
323 // no same name in lps
324 std::unordered_set<std::string> us;
325 for (auto &lp : lps)
326 {
327 if (us.find(lp->name) != us.end())
328 throw std::runtime_error("Duplicate name found in configuration: " + lp->name);
329 us.emplace(lp->name);
330 }
331
332 // all name should be found in graph
333 for (auto &lp : lps)
334 {
335 auto &name = lp->name;
336 bool found = false;
337 for (auto node : loco::active_nodes(loco::output_nodes(g)))
338 {
339 auto cnode = loco::must_cast<luci::CircleNode *>(node);
340 if (cnode->opcode() == luci::CircleOpcode::CIRCLEOUTPUT)
341 continue;
342
343 if (cnode->name() == name)
344 {
345 found = true;
346 break;
347 }
348 }
349 if (not found)
350 return false;
351 }
352 return true;
353}
354
355LayerParams find_valid_params(loco::Graph *g, LayerParamsSet &lpss)
356{
357 // valid condition: there should be only one LayerParams that is OK
358 uint32_t valid_count = 0;
359 LayerParams params;
360 for (auto &lps : lpss)
361 {
362 if (is_valid_params(g, lps))
363 {
364 valid_count++;
365 params = lps;
366 }
367 }
368 if (valid_count != 1)
369 throw std::runtime_error(
370 "Configuration file has layer names (and alternates) that can be mapped in multiple or no "
371 "ways. Please update configuration file to have only one valid name mapping.");
372
373 return params;
374}
375
376} // namespace
377
378namespace luci
379{
380
382{
383 if (_options == nullptr)
384 {
385 _options = std::make_unique<QuantizeOptionsImpl>();
386 }
387
388 return _options.get();
389}
390
391void CircleQuantizer::quantize_dequantize_weight(loco::Graph *g) const
392{
393 // Fake quantization of weights
395 {
396 static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
397 static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
398 static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
399
400 auto input_model_dtype =
401 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
402 auto output_model_dtype =
403 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
404 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
405 auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
406 auto layer_params_set = _options->layer_params_set();
407
408 if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
409 throw std::runtime_error("Unsupported input type. List of supported input type: " +
410 to_string(fakeq_supported_input_model_dtype));
411
412 if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
413 throw std::runtime_error("Unsupported output type. List of supported output type: " +
414 to_string(fakeq_supported_output_model_dtype));
415
416 if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
417 throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
418 to_string(fakeq_supported_granularity));
419
421 str_to_dtype(output_model_dtype) != loco::DataType::U8)
422 throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
423
424 if (layer_params_set.size() > 1u)
425 {
426 layer_params = find_valid_params(g, layer_params_set);
427 }
428
429 // Check dtype/granularity of layer params
430 for (auto layer_param : layer_params)
431 {
432 const auto &name = layer_param->name;
433 if (!in_array(to_lower_case(layer_param->dtype), fakeq_supported_output_model_dtype))
434 {
435 throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
436 to_string(fakeq_supported_output_model_dtype));
437 }
438 if (!in_array(to_lower_case(layer_param->granularity), fakeq_supported_granularity))
439 {
440 throw std::runtime_error(
441 "Unsupported granularity in " + name +
442 ". List of supported granularity: " + to_string(fakeq_supported_granularity));
443 }
444 }
445
446 // Clear existing quantparams before doing fake quantization
447 for (auto &node : loco::active_nodes(loco::output_nodes(g)))
448 {
449 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
450 if (circle_node->quantparam() != nullptr)
451 circle_node->quantparam(nullptr);
452 }
453
454 auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
455 {
456 ctx->input_model_dtype = str_to_dtype(input_model_dtype);
457 ctx->output_model_dtype = str_to_dtype(output_model_dtype);
458 ctx->granularity = str_to_granularity(granularity);
459
460 for (auto layer_param : layer_params)
461 {
463 {
464 info.name = layer_param->name;
465 info.dtype = str_to_dtype(layer_param->dtype);
466 info.granularity = str_to_granularity(layer_param->granularity);
467 }
468 ctx->layers_info.emplace_back(info);
469 }
470 }
471
472 luci::QuantizeDequantizeWeightsPass fake_quantizer(std::move(ctx));
473
474 fake_quantizer.run(g);
475 }
476}
477
478void CircleQuantizer::quantize_with_min_max(loco::Graph *g) const
479{
480 // Actual quantization of weights, bias, and activation
481 if (_options->query(Options::Algorithm::QuantizeWithMinMax))
482 {
483 static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
484 static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
485 static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
486 static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16", "int32",
487 "int64", "float32", "bool"};
488 static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16", "int32",
489 "int64", "float32", "bool"};
490
491 auto input_model_dtype =
492 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
493 auto output_model_dtype =
494 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
495 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
496 auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
497 if (input_type.empty())
498 input_type = output_model_dtype;
499 auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
500 if (output_type.empty())
501 output_type = output_model_dtype;
502
503 auto input_type_vec = pepper::csv_to_vector<std::string>(input_type);
504 auto output_type_vec = pepper::csv_to_vector<std::string>(output_type);
505
506 bool TF_style_maxpool =
507 _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) == "True";
508
509 bool save_min_max =
510 _options->param(Options::AlgorithmParameters::Quantize_save_min_max) == "True";
511
512 auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
513 auto layer_params_set = _options->layer_params_set();
514
515 if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
516 throw std::runtime_error("Unsupported input type. List of supported input types: " +
517 to_string(qwmm_supported_input_model_dtype));
518
519 if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
520 throw std::runtime_error("Unsupported output type. List of supported output types: " +
521 to_string(qwmm_supported_output_model_dtype));
522
523 if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
524 throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
525 to_string(qwmm_supported_granularity));
526
527 for (const auto &dtype : input_type_vec)
528 {
529 if (!in_array(to_lower_case(dtype), qwmm_supported_input_type))
530 throw std::runtime_error("Unsupported input type. List of supported input types: " +
531 to_string(qwmm_supported_input_type));
532 }
533
534 for (const auto &dtype : output_type_vec)
535 {
536 if (!in_array(to_lower_case(dtype), qwmm_supported_output_type))
537 throw std::runtime_error("Unsupported output type. List of supported output types: " +
538 to_string(qwmm_supported_output_type));
539 }
540
542 str_to_dtype(output_model_dtype) != loco::DataType::U8)
543 throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
544
545 if (layer_params_set.size() > 1u)
546 {
547 layer_params = find_valid_params(g, layer_params_set);
548 }
549
550 // Check dtype/granularity of layer params
551 for (auto layer_param : layer_params)
552 {
553 const auto &name = layer_param->name;
554 if (!in_array(to_lower_case(layer_param->dtype), qwmm_supported_output_model_dtype))
555 {
556 throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
557 to_string(qwmm_supported_output_model_dtype));
558 }
559 if (!in_array(to_lower_case(layer_param->granularity), qwmm_supported_granularity))
560 {
561 throw std::runtime_error(
562 "Unsupported granularity in " + name +
563 ". List of supported granularity: " + to_string(qwmm_supported_granularity));
564 }
565 }
566
567 auto input_types = str_vec_to_dtype_vec(input_type_vec);
568 auto output_types = str_vec_to_dtype_vec(output_type_vec);
569
570 // Canonicalize user-given input/output_type (match with # of inputs/outputs)
571 canonicalize_input_type(g, input_types);
572 canonicalize_output_type(g, output_types);
573
574 // Input model checker for quantization
575 luci::QuantizePreCheckerPass input_model_checker{};
576 input_model_checker.run(g);
577
578 auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
579 {
580 ctx->input_model_dtype = str_to_dtype(input_model_dtype);
581 ctx->output_model_dtype = str_to_dtype(output_model_dtype);
582 ctx->granularity = str_to_granularity(granularity);
583 ctx->input_types = input_types;
584 ctx->output_types = output_types;
585 ctx->TF_style_maxpool = TF_style_maxpool;
586 ctx->save_min_max = save_min_max;
587
588 for (auto layer_param : layer_params)
589 {
591 {
592 info.name = layer_param->name;
593 info.dtype = str_to_dtype(layer_param->dtype);
594 info.granularity = str_to_granularity(layer_param->granularity);
595 }
596 ctx->layers_info.emplace_back(info);
597 }
598 }
599
600 luci::QuantizeWithMinMaxPass quantizer(std::move(ctx));
601
602 quantizer.run(g);
603
604 auto verify_ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
605 {
606 verify_ctx->output_model_dtype = str_to_dtype(output_model_dtype);
607 verify_ctx->granularity = str_to_granularity(granularity);
608 verify_ctx->input_types = input_types;
609 verify_ctx->output_types = output_types;
610 verify_ctx->TF_style_maxpool = TF_style_maxpool;
611
612 for (auto layer_param : layer_params)
613 {
615 {
616 info.name = layer_param->name;
617 info.dtype = str_to_dtype(layer_param->dtype);
618 info.granularity = str_to_granularity(layer_param->granularity);
619 }
620 verify_ctx->layers_info.emplace_back(info);
621 }
622 }
623
624 // Verify the type/granularity of the quantized model
625 luci::QuantizedModelVerifier verifier(std::move(verify_ctx));
626
627 verifier.verify(g);
628 }
629}
630
631void CircleQuantizer::quantize_weights(loco::Graph *g) const
632{
633 if (_options->query(Options::Algorithm::QuantizeWeights))
634 {
635 static const std::vector<std::string> qw_supported_input_model_dtype{"float32"};
636 static const std::vector<std::string> qw_supported_output_model_dtype{"int4", "int8", "int16"};
637 static const std::vector<std::string> qw_supported_granularity{"channel"};
638
639 auto input_model_dtype =
640 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
641 auto output_model_dtype =
642 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
643 auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
644
645 if (!in_array(to_lower_case(input_model_dtype), qw_supported_input_model_dtype))
646 throw std::runtime_error("Unsupported input type. List of supported input type: " +
647 to_string(qw_supported_input_model_dtype));
648
649 if (!in_array(to_lower_case(output_model_dtype), qw_supported_output_model_dtype))
650 throw std::runtime_error("Unsupported output type. List of supported output type: " +
651 to_string(qw_supported_output_model_dtype));
652
653 if (!in_array(to_lower_case(granularity), qw_supported_granularity))
654 throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
655 to_string(qw_supported_granularity));
656 auto ctx = std::make_unique<luci::QuantizeWeightsPass::Context>();
657 {
658 ctx->input_model_dtype = str_to_dtype(input_model_dtype);
659 ctx->output_model_dtype = str_to_dtype(output_model_dtype);
660 ctx->granularity = str_to_granularity(granularity);
661 }
662 luci::QuantizeWeightsPass weights_quantizer(std::move(ctx));
663
664 weights_quantizer.run(g);
665 }
666}
667
668void CircleQuantizer::quantize_onnx_fake_quantized_model(loco::Graph *g) const
669{
671 {
672 auto ctx = std::make_unique<luci::QuantizeOnnxFakeQuantModelPass::Context>();
673 {
674 ctx->default_activation_dtype = loco::DataType::S16;
675 }
676
677 luci::QuantizeOnnxFakeQuantModelPass quantizer(std::move(ctx));
678
679 quantizer.run(g);
680
681 logo::Phase phase;
682
683 // Default passes
684 phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
685 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
686 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
687
690 phase_runner.attach(&prog);
691 phase_runner.run(phase);
692 }
693}
694
695void CircleQuantizer::requantize(loco::Graph *g) const
696{
697 // Requantize
698 if (_options->query(Options::Algorithm::Requantize))
699 {
700 static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
701 static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
702
703 auto input_model_dtype =
704 _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
705 auto output_model_dtype =
706 _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
707
708 if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
709 throw std::runtime_error("Unsupported input type. List of supported input types: " +
710 to_string(rq_supported_input_model_dtype));
711
712 if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
713 throw std::runtime_error("Unsupported output type. List of supported output types: " +
714 to_string(rq_supported_output_model_dtype));
715
716 luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
717 str_to_dtype(output_model_dtype));
718 requantizer.run(g);
719 }
720}
721
722void CircleQuantizer::force_quant_param(loco::Graph *g) const
723{
724 // Force to write quantparam to specified tensors
725 // NOTE Only per-tensor (not per-channel) qparam can be written
726 if (_options->query(Options::Algorithm::ForceQuantParam))
727 {
729 _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
730 auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
731 auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
732
733 // Cast scales/zero_points to proper types
734 ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
735 ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
736
737 ForceQuantParamPass fq(tensors, scales, zero_points);
738 fq.run(g);
739 }
740}
741
742void CircleQuantizer::copy_quant_param(loco::Graph *g) const
743{
744 // Copy quantparam of a tensor to another tensor
745 if (_options->query(Options::Algorithm::CopyQuantParam))
746 {
748 _options->params(Options::AlgorithmParameters::Quantize_src_tensor_names);
750 _options->params(Options::AlgorithmParameters::Quantize_dst_tensor_names);
751
752 CopyQuantParamPass cq(src_tensors, dst_tensors);
753 cq.run(g);
754 }
755}
756
757void CircleQuantizer::convert_to_fake_quantized_model(loco::Graph *g) const
758{
759 // Convert quantized model to fake-quantized model
761 {
763 fake_quantizer.run(g);
764
765 logo::Phase phase;
766
767 // Default passes
768 phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
769 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
770 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
771
772 // Remove redundant Dequantize Ops generated during fake quantization
773 phase.emplace_back(std::make_unique<luci::RemoveRedundantDequantizePass>());
774 // Fold Dequantize Ops generated during fake quantization
775 phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
776
779 phase_runner.attach(&prog);
780 phase_runner.run(phase);
781 }
782}
783
785{
786 quantize_dequantize_weight(g);
787 quantize_with_min_max(g);
788 quantize_weights(g);
789 quantize_onnx_fake_quantized_model(g);
790 requantize(g);
791 force_quant_param(g);
792 copy_quant_param(g);
793 convert_to_fake_quantized_model(g);
794
795 logo::Phase phase;
796
797 // Do Shape/Type inference
798 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
799 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
800
803 phase_runner.attach(&prog);
804 phase_runner.run(phase);
805}
806
807} // namespace luci
luci::CircleQuantizer::Options::LayerParams LayerParams
A neural network graph.
Definition Graph.h:161
void quantize(loco::Graph *) const
Pass to copy quantparam (scale, zerop) of a tensor to another tensor.
std::vector< std::string > TensorVector
Pass to write quantparam (scale, zerop) to the specified tensors.
std::vector< float > ScaleVector
std::vector< std::string > TensorVector
std::vector< int64_t > ZPVector
Pass to create a quantized graph from a graph fake-quantized on onnx.
Pass to verify the input model has the form acceptable by quantizer.
bool run(loco::Graph *graph) final
Pass to quantize weights.
Pass to quantize activation, weights, and bias.
Pass to re-quantize graph (ex: int8 -> uint8)
volatile const char info[]
T lexical_cast(const std::string &str)
Definition arser.h:43
const char * str
Definition util.h:290
const T * data(const std::vector< T, Alloc > &v)
result
Definition infer.py:103
std::vector< Node * > input_nodes(const Graph *)
Definition Graph.cpp:71
std::set< loco::Node * > active_nodes(const std::vector< loco::Node * > &roots)
Enumerate all the nodes required to compute "roots".
std::vector< Node * > output_nodes(Graph *)
Definition Graph.cpp:101
std::vector< std::unique_ptr< Pass > > Phase
Definition Phase.h:31
std::string to_string(const std::vector< std::string > &strings)
Definition Strings.cpp:29
std::vector< loco::DataType > str_vec_to_dtype_vec(std::vector< std::string > &vec)
Definition Strings.cpp:85
std::string to_lower_case(std::string s)
Definition Strings.cpp:41
QuantizationGranularity str_to_granularity(const std::string &str)
Definition Strings.cpp:93
loco::DataType str_to_dtype(const std::string &str)
Definition Strings.cpp:47
bool in_array(const std::string &str, const std::vector< std::string > &array)
Definition Strings.cpp:24
virtual void enable(Algorithm)=0
virtual void layer_params(AlgorithmParameters, LayerParams &)=0
virtual void params(AlgorithmParameters, std::vector< std::string > &)=0
virtual void param(AlgorithmParameters, const std::string &)=0
std::vector< std::shared_ptr< LayerParam > > LayerParams
virtual LayerParamsSet layer_params_set(void) const =0
virtual bool query(Algorithm)=0
Class to convert a quantized model to a fake-quantized fp32 model.
bool run(loco::Graph *g) final
Run the pass.
Class to verify quantized model.