#include <BisectionSolver.h>

Collaboration diagram for mpqsolver::bisection::BisectionSolver:

Public Types
enum	Algorithm { Auto , ForceQ16Front , ForceQ16Back }
	Algorithm options for running bisection algorithm. More...

Public Member Functions
	BisectionSolver (const mpqsolver::core::Quantizer::Context &ctx, float qerror_ratio)
	Construct a new Bisection Solver object.

	BisectionSolver ()=delete

std::unique_ptr< luci::Module >	run (const std::string &module_path) override
	run bisection for recorded float module at module_path

void	setInputData (std::unique_ptr< mpqsolver::core::DataProvider > &&data)
	set data provider

void	algorithm (Algorithm algorithm)
	set used algorithm

void	setVisqPath (const std::string &visq_path)
	set visq_file path to be used in 'auto' mode

Public Member Functions inherited from mpqsolver::MPQSolver
	MPQSolver (const core::Quantizer::Context &ctx)

virtual	~MPQSolver ()=default

void	setSaveIntermediate (const std::string &save_path)
	set all intermediate artifacts to be saved

Additional Inherited Members
Protected Member Functions inherited from mpqsolver::MPQSolver
std::unique_ptr< luci::Module >	readModule (const std::string &path)

Protected Attributes inherited from mpqsolver::MPQSolver
std::string	_input_quantization

std::string	_output_quantization

std::unique_ptr< core::Quantizer >	_quantizer

std::unique_ptr< core::DumpingHooks >	_hooks

Detailed Description

Definition at line 33 of file BisectionSolver.h.

Member Enumeration Documentation

◆ Algorithm

enum mpqsolver::bisection::BisectionSolver::Algorithm

Algorithm options for running bisection algorithm.

Enumerator
Auto
ForceQ16Front
ForceQ16Back

Definition at line 39 of file BisectionSolver.h.

  {
    Auto,
    ForceQ16Front,
    ForceQ16Back,
  };

Constructor & Destructor Documentation

◆ BisectionSolver() [1/2]

BisectionSolver::BisectionSolver	(	const mpqsolver::core::Quantizer::Context &	ctx,
		float	qerror_ratio
	)

Construct a new Bisection Solver object.

Parameters

ctx	- quantizer context
qerror_ratio	- target error ratio

Definition at line 78 of file BisectionSolver.cpp.

  : MPQSolver(ctx), _qerror_ratio(qerror_ratio)
{
}

◆ BisectionSolver() [2/2]

mpqsolver::bisection::BisectionSolver::BisectionSolver ( )

delete

Member Function Documentation

◆ algorithm()

void BisectionSolver::algorithm ( Algorithm algorithm )

set used algorithm

Definition at line 99 of file BisectionSolver.cpp.

99{ _algorithm = algorithm; }

mpqsolver::bisection::BisectionSolver::algorithm

void algorithm(Algorithm algorithm)

set used algorithm

Definition BisectionSolver.cpp:99

References algorithm().

Referenced by algorithm(), and handleAutoAlgorithm().

◆ run()

std::unique_ptr< luci::Module > BisectionSolver::run ( const std::string & module_path )

overridevirtual

run bisection for recorded float module at module_path

Implements mpqsolver::MPQSolver.

Definition at line 108 of file BisectionSolver.cpp.

{
  auto module = readModule(module_path);
  assert(module != nullptr);
 
  float min_depth = 0.f;
  float max_depth = 0.f;
  NodeDepthType nodes_depth;
  if (compute_depth(module.get(), nodes_depth, min_depth, max_depth) !=
      ParameterizerResult::SUCCESS)
  {
    std::cerr << "ERROR: Invalid graph for bisectioning" << std::endl;
    return nullptr;
  }
 
  SolverOutput::get() << "\n>> Computing baseline qerrors\n";
 
  std::unique_ptr<core::MAEMetric> metric = std::make_unique<core::MAEMetric>();
  if (!_input_data)
  {
    throw std::runtime_error("no input data");
  }
  core::DatasetEvaluator evaluator(module.get(), *_input_data.get(), *metric.get());
 
  core::LayerParams layer_params;
  float int16_qerror =
    evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
  SolverOutput::get() << "Full int16 model qerror: " << int16_qerror << "\n";
 
  float uint8_qerror =
    evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
  SolverOutput::get() << "Full uint8 model qerror: " << uint8_qerror << "\n";
  _quantizer->setHook(_hooks.get());
  if (_hooks)
  {
    _hooks->onBeginSolver(module_path, uint8_qerror, int16_qerror);
  }
 
  if (int16_qerror > uint8_qerror)
  {
    throw std::runtime_error("Q8 model's qerror is less than Q16 model's qerror.");
  }
 
  _qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
  SolverOutput::get() << "Target qerror: " << _qerror << "\n";
 
  // it'is assumed that int16_qerror <= _qerror <= uint8_qerror,
  if (int16_qerror >= _qerror)
  {
    // return Q16 model (we can not make it more accurate)
    if (!_quantizer->quantize(module.get(), "int16", layer_params))
    {
      std::cerr << "ERROR: Failed to quantize model" << std::endl;
      return nullptr;
    }
 
    if (_hooks)
    {
      _hooks->onEndSolver(layer_params, "int16", int16_qerror);
    }
 
    SolverOutput::get() << "The best configuration is int16 configuration\n";
    return module;
  }
  else if (uint8_qerror <= _qerror)
  {
    // return Q8 model (we can not make it less accurate)
    if (!_quantizer->quantize(module.get(), "uint8", layer_params))
    {
      std::cerr << "ERROR: Failed to quantize model" << std::endl;
      return nullptr;
    }
 
    if (_hooks)
    {
      _hooks->onEndSolver(layer_params, "uint8", uint8_qerror);
    }
 
    SolverOutput::get() << "The best configuration is uint8 configuration\n";
    return module;
  }
 
  // search for optimal mixed precision quantization configuration
  int last_depth = -1;
  float best_depth = -1;
  float best_error = -1; // minimal error
  core::LayerParams best_params;
  if (module->size() != 1)
  {
    throw std::runtime_error("Unsupported module");
  }
  auto graph = module->graph(0);
  auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
  // input and output nodes are not valid for quantization, so let's remove them
  for (auto node : loco::input_nodes(graph))
  {
    active_nodes.erase(node);
  }
  for (auto node : loco::output_nodes(graph))
  {
    active_nodes.erase(node);
  }
 
  // let's decide whether nodes at input are more suspectible to be quantized into Q16, than at
  // output
  bool int16_front = true;
  switch (_algorithm)
  {
    case Algorithm::Auto:
      int16_front =
        front_has_higher_error(nodes_depth, _visq_data_path, 0.5f * (max_depth + min_depth));
      break;
    case Algorithm::ForceQ16Front:
      SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
      int16_front = true;
      break;
    case Algorithm::ForceQ16Back:
      SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
      int16_front = false;
      break;
  }
 
  SolverOutput::get() << "\n";
 
  while (true)
  {
    int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
 
    if (last_depth == cut_depth)
    {
      break;
    }
 
    if (_hooks)
    {
      _hooks->onBeginIteration();
    }
 
    SolverOutput::get() << "Looking for the optimal configuration in [" << min_depth << " , "
                        << max_depth << "] depth segment\n";
 
    last_depth = cut_depth;
 
    core::LayerParams layer_params;
    for (auto &node : active_nodes)
    {
      auto cur_node = loco::must_cast<luci::CircleNode *>(node);
      auto iter = nodes_depth.find(cur_node);
      if (iter == nodes_depth.end())
      {
        continue; // to filter out nodes like weights
      }
 
      float depth = iter->second;
 
      if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
      {
        auto layer_param = std::make_shared<core::LayerParam>();
        {
          layer_param->name = cur_node->name();
          layer_param->dtype = "int16";
          layer_param->granularity = "channel";
        }
 
        layer_params.emplace_back(layer_param);
      }
    }
 
    float cur_error = evaluate(evaluator, module_path, "uint8", layer_params);
 
    if (_hooks)
    {
      _hooks->onEndIteration(layer_params, "uint8", cur_error);
    }
 
    if (cur_error < _qerror)
    {
      SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_error
                          << " < target qerror (" << _qerror << ")\n";
      int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
      best_params = layer_params;
      best_depth = cut_depth;
      best_error = cur_error;
    }
    else
    {
      SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_error
                          << (cur_error > _qerror ? " > " : " == ") << "target qerror (" << _qerror
                          << ")\n";
      int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
    }
  }
 
  if (_hooks)
  {
    _hooks->onEndSolver(best_params, "uint8", best_error);
  }
 
  SolverOutput::get() << "Found the best configuration at depth " << best_depth << "\n";
  if (!_quantizer->quantize(module.get(), "uint8", best_params))
  {
    std::cerr << "ERROR: Failed to quantize model" << std::endl;
    return nullptr;
  }
 
  return module;
}

References mpqsolver::MPQSolver::_hooks, mpqsolver::MPQSolver::_quantizer, loco::active_nodes(), Auto, mpqsolver::bisection::compute_depth(), ForceQ16Back, ForceQ16Front, SolverOutput::get(), loco::input_nodes(), loco::output_nodes(), and mpqsolver::bisection::SUCCESS.

◆ setInputData()

void BisectionSolver::setInputData ( std::unique_ptr< mpqsolver::core::DataProvider > && data )

set data provider

Definition at line 103 of file BisectionSolver.cpp.

{
  _input_data = std::move(data);
}

◆ setVisqPath()

void BisectionSolver::setVisqPath ( const std::string & visq_path )

set visq_file path to be used in 'auto' mode

this is used to handle which way (8 or 16bit) of splitting the neural network will be the best for accuracy.

Definition at line 101 of file BisectionSolver.cpp.

101{ _visq_data_path = visq_path; }

Referenced by handleAutoAlgorithm().

The documentation for this class was generated from the following files:

compiler/circle-mpqsolver/src/bisection/BisectionSolver.h
compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp

Public Types

Public Member Functions

Additional Inherited Members