Pass to create a quantized graph from a graph fake-quantized on onnx. More...

#include <QuantizeOnnxFakeQuantModelPass.h>

Collaboration diagram for luci::QuantizeOnnxFakeQuantModelPass:

Data Structures
struct	Context

Public Member Functions
	QuantizeOnnxFakeQuantModelPass (std::unique_ptr< Context > &&ctx)

virtual const char *	name (void) const

bool	run (loco::Graph *graph)

Public Member Functions inherited from logo::Pass
virtual	~Pass ()=default

Detailed Description

Pass to create a quantized graph from a graph fake-quantized on onnx.

Definition at line 32 of file QuantizeOnnxFakeQuantModelPass.h.

Constructor & Destructor Documentation

◆ QuantizeOnnxFakeQuantModelPass()

luci::QuantizeOnnxFakeQuantModelPass::QuantizeOnnxFakeQuantModelPass ( std::unique_ptr< Context > && ctx )

inline

Definition at line 41 of file QuantizeOnnxFakeQuantModelPass.h.

                                                               : _ctx{std::move(ctx)}
  {
    assert(_ctx);                           // FIX_CALLER_UNLESS
    assert(_ctx->default_activation_dtype); // FIX_CALLER_UNLESS
  }

Member Function Documentation

◆ name()

virtual const char * luci::QuantizeOnnxFakeQuantModelPass::name ( void ) const

inlinevirtual

Reimplemented from logo::Pass.

Definition at line 47 of file QuantizeOnnxFakeQuantModelPass.h.

47{ return "luci::QuantizeOnnxFakeQuantModelPass"; }

◆ run()

bool luci::QuantizeOnnxFakeQuantModelPass::run ( loco::Graph * g )

virtual

How QuantizeOnnxFakeQuantModel works?

Activation is quantized as below.

Before

[node(fp32)] -> [OnnxQuantizeLinear] -> [OnnxDequantizeLinear]

After

[node(q)]

Weight(constant) are quantized as below.

Before

[Const(q w/o qparam)] -> [OnnxDequantizeLinear]

After

[Const(q)]

Quantize constant activations
Quantize with predecessors' qparams
Update qparams of special operators
Insert Quantize Op if an Op's input dtype and output dtype mismatch

Implements logo::Pass.

Definition at line 65 of file QuantizeOnnxFakeQuantModelPass.cpp.

{
  LOGGER(l);
  INFO(l) << "QuantizeOnnxFakeQuantModelPass Start" << std::endl;
 
  // Quantize Onnx QuantizeLinear-DequantizeLinear pattern
  {
    QuantizeOnnxQDQPass pass;
    pass.run(g);
  }
 
  // Quantize Onnx const-DequantizeLinear pattern
  {
    QuantizeOnnxDequantizeLinearPass pass;
    pass.run(g);
  }
 
  // Quantize const input activation
  for (auto node : loco::active_nodes(loco::output_nodes(g)))
  {
    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 
    QuantizeConstInputActivation qcia(_ctx->default_activation_dtype);
    circle_node->accept(&qcia);
  }
 
  // Quantize nodes using their predecessors' qparams
  {
    logo::Phase phase;
    phase.emplace_back(std::make_unique<QuantizeWithPredecessorPass>());
 
    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
    phase_runner.run(phase);
  }
 
  // Backward propagation of activation qparam
  {
    PropagateQParamBackwardPass pqbp(_ctx->default_activation_dtype);
    pqbp.run(g);
  }
 
  // Update qparam of output of special Ops
  for (auto node : loco::active_nodes(loco::output_nodes(g)))
  {
    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 
    if (is_quantized(circle_node))
    {
      QuantizeSpecialActivation qsa(circle_node->dtype());
      circle_node->accept(&qsa);
    }
  }
 
  // Insert QuantizeOp if input/output dtype does not match
  for (auto node : loco::active_nodes(loco::output_nodes(g)))
  {
    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 
    InsertQuantizeOpOnDTypeMismatch iqoodm;
    circle_node->accept(&iqoodm);
  }
 
  // Update output dtype
  auto graph_outputs = g->outputs();
  for (auto node : loco::output_nodes(g))
  {
    auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
    auto from = loco::must_cast<luci::CircleNode *>(circle_node->from());
    circle_node->dtype(from->dtype());
 
    auto graph_output = graph_outputs->at(circle_node->index());
    graph_output->dtype(circle_node->dtype());
  }
 
  INFO(l) << "QuantizeOnnxFakeQuantModelPass End" << std::endl;
  return false; // one time run
}

References loco::active_nodes(), INFO, luci::is_quantized(), LOGGER, loco::output_nodes(), luci::PropagateQParamBackwardPass::run(), luci::QuantizeOnnxDequantizeLinearPass::run(), and luci::QuantizeOnnxQDQPass::run().

The documentation for this class was generated from the following files:

compiler/luci/pass/include/luci/Pass/QuantizeOnnxFakeQuantModelPass.h
compiler/luci/pass/src/QuantizeOnnxFakeQuantModelPass.cpp