562{
564 INFO(l) <<
"QuantizeWithMinMaxPass Start" << std::endl;
565
567
569 auto iter = info_by_name.find(node->name());
570
571
572 if (iter != info_by_name.end())
573 return iter->second.dtype;
574
575
576 return _ctx->output_model_dtype;
577 };
578
580 auto iter = info_by_name.find(node->name());
581
582
583 if (iter != info_by_name.end())
584 return iter->second.granularity;
585
586
587 return _ctx->granularity;
588 };
589
590
591
592
593
595 {
596 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
598 circle_node->accept(&qa);
599 }
600
601
603 {
604 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
605 auto op_dtype = quantize_dtype(circle_node);
606 if (op_dtype != _ctx->output_model_dtype)
607 {
608 InsertQuantizeOp iqo(_ctx->output_model_dtype, op_dtype);
609 circle_node->accept(&iqo);
610 }
611 }
612
613
614 {
616
617 phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
618
621 phase_runner.attach(&prog);
622 phase_runner.run(phase);
623 }
624
625
626 {
628 pqbp.run(g);
629 }
630
631
633 {
634 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
636 circle_node->accept(&qcia);
637 }
638
639
641 {
642 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
643
644
645
646
647 if (circle_node->quantparam() == nullptr)
648 continue;
649
651 circle_node->accept(&qsa);
652 }
653
654
656
657 phase.emplace_back(std::make_unique<luci::PropagateQParamForwardPass>(_ctx->TF_style_maxpool));
658
661 phase_runner.attach(&prog);
662 phase_runner.run(phase);
663
664
666 {
667 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
668 QuantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
669 quantize_granularity(circle_node));
670 circle_node->accept(&qw);
671 }
672
673
675 {
676 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
677 QuantizeBias qb(_ctx->input_model_dtype, quantize_dtype(circle_node),
678 quantize_granularity(circle_node));
679 circle_node->accept(&qb);
680 }
681
682
683 auto graph_outputs =
g->outputs();
685 {
686 auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
687 if (
static_cast<luci::CircleNode *
>(circle_node->from())->dtype() == _ctx->output_model_dtype)
688 {
689 circle_node->dtype(_ctx->output_model_dtype);
690 auto graph_output = graph_outputs->at(circle_node->index());
691 graph_output->dtype(_ctx->output_model_dtype);
692 }
693 }
694
695
696 set_input_type(g);
697
698
699 set_output_type(g);
700
701
702 {
704
705 phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
706
709 phase_runner.attach(&prog);
710 phase_runner.run(phase);
711 }
712
713 if (not _ctx->save_min_max)
714 {
715
717 {
718 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
719 if (auto qparam = circle_node->quantparam())
720 {
722 qparam->min.clear();
723 qparam->max.clear();
724 }
725 }
726 }
727
728 INFO(l) <<
"QuantizeWithMinMaxPass End" << std::endl;
729 return false;
730}
std::set< Node * > all_nodes(Graph *)
Enumerate all the nodes in a given graph.
std::set< loco::Node * > active_nodes(const std::vector< loco::Node * > &roots)
Enumerate all the nodes required to compute "roots".
std::vector< Node * > output_nodes(Graph *)
std::vector< std::unique_ptr< Pass > > Phase
void warn_accuracy_with_range(luci::CircleNode *n)
LayerInfoMap layer_info_map(loco::Graph *g, std::vector< LayerInfo > &layers_info)
Class to propagate quantization parameters of an operator's output to input.
Quantize non-const activation using recorded min/max values.
QuantizeBias quantizes tensors for bias.
Quantize non-const activaion using pre-defined scale/zp for special Ops.
QuantizeWeights quantizes tensors for weights.