Pass to quantize weights with GPTQ algorithm. More...

#include <QuantizeDequantizeWeightsWithGPTQPass.h>

Collaboration diagram for luci::QuantizeDequantizeWeightsWithGPTQPass:

Data Structures
struct	Context

Public Member Functions
	QuantizeDequantizeWeightsWithGPTQPass (std::unique_ptr< Context > &&ctx, HessianMap *hessian_map)

virtual const char *	name (void) const

bool	run (loco::Graph *graph)
	Run the pass.

Public Member Functions inherited from logo::Pass
virtual	~Pass ()=default

Detailed Description

Pass to quantize weights with GPTQ algorithm.

Definition at line 36 of file QuantizeDequantizeWeightsWithGPTQPass.h.

Constructor & Destructor Documentation

◆ QuantizeDequantizeWeightsWithGPTQPass()

luci::QuantizeDequantizeWeightsWithGPTQPass::QuantizeDequantizeWeightsWithGPTQPass	(	std::unique_ptr< Context > &&	ctx,
		HessianMap *	hessian_map
	)

inline

Definition at line 48 of file QuantizeDequantizeWeightsWithGPTQPass.h.

    : _ctx{std::move(ctx)}, _hessian_map{hessian_map}
  {
    // DO NOTHING
  }

Member Function Documentation

◆ name()

virtual const char * luci::QuantizeDequantizeWeightsWithGPTQPass::name ( void ) const

inlinevirtual

Reimplemented from logo::Pass.

Definition at line 53 of file QuantizeDequantizeWeightsWithGPTQPass.h.

53{ return "luci::QuantizeDequantizeWeightsWithGPTQPass"; }

◆ run()

bool luci::QuantizeDequantizeWeightsWithGPTQPass::run ( loco::Graph * graph )

virtual

Run the pass.

Returns: false if there was nothing changed

Implements logo::Pass.

Definition at line 149 of file QuantizeDequantizeWeightsWithGPTQPass.cpp.

{
  LOGGER(l);
  INFO(l) << "QuantizeDequantizeWeightsWithGPTQ Start" << std::endl;
 
  if (_ctx->input_model_dtype != loco::DataType::FLOAT32)
    throw std::runtime_error("GPTQPass: Weights-only quantization supports float32 input only");
 
  if (_ctx->output_model_dtype != loco::DataType::U8 &&
      _ctx->output_model_dtype != loco::DataType::U4)
  {
    throw std::runtime_error("GPTQPass: GPTQ quantization supports uint4/uint8");
  }
 
  auto info_by_name = layer_info_map(g, _ctx->layers_info);
 
  auto quantize_dtype = [&](const luci::CircleNode *node) {
    auto iter = info_by_name.find(node->name());
 
    // Return designated quantization dtype
    if (iter != info_by_name.end())
      return iter->second.dtype;
 
    // Return default quantization dtype
    return _ctx->output_model_dtype;
  };
 
  auto quantize_granularity = [&](const luci::CircleNode *node) {
    auto iter = info_by_name.find(node->name());
 
    // Return designated quantization granularity
    if (iter != info_by_name.end())
      return iter->second.granularity;
 
    // Return default quantization granularity
    return _ctx->granularity;
  };
 
  // Quantize weights
  for (auto node : loco::active_nodes(loco::output_nodes(g)))
  {
    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
    QuantizeDequantizeWeightsWithGPTQ qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
                                         quantize_granularity(circle_node), _hessian_map);
    circle_node->accept(&qw);
  }
 
  INFO(l) << "QuantizeDequantizeWeightsWithGPTQ End" << std::endl;
  return false; // one time run
}

References loco::active_nodes(), INFO, luci::layer_info_map(), LOGGER, luci::CircleNode::name(), and loco::output_nodes().

Referenced by package.infer.session::inference().

The documentation for this class was generated from the following files:

compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsWithGPTQPass.h
compiler/luci/pass/src/QuantizeDequantizeWeightsWithGPTQPass.cpp