ONE - On-device Neural Engine
Loading...
Searching...
No Matches
mpqsolver::bisection::BisectionSolver Class Referencefinal

#include <BisectionSolver.h>

Collaboration diagram for mpqsolver::bisection::BisectionSolver:

Public Types

enum  Algorithm { Auto , ForceQ16Front , ForceQ16Back }
 Algorithm options for running bisection algorithm. More...
 

Public Member Functions

 BisectionSolver (const mpqsolver::core::Quantizer::Context &ctx, float qerror_ratio)
 Construct a new Bisection Solver object.
 
 BisectionSolver ()=delete
 
std::unique_ptr< luci::Modulerun (const std::string &module_path) override
 run bisection for recorded float module at module_path
 
void setInputData (std::unique_ptr< mpqsolver::core::DataProvider > &&data)
 set data provider
 
void algorithm (Algorithm algorithm)
 set used algorithm
 
void setVisqPath (const std::string &visq_path)
 set visq_file path to be used in 'auto' mode
 
- Public Member Functions inherited from mpqsolver::MPQSolver
 MPQSolver (const core::Quantizer::Context &ctx)
 
virtual ~MPQSolver ()=default
 
void setSaveIntermediate (const std::string &save_path)
 set all intermediate artifacts to be saved
 

Additional Inherited Members

- Protected Member Functions inherited from mpqsolver::MPQSolver
std::unique_ptr< luci::ModulereadModule (const std::string &path)
 
- Protected Attributes inherited from mpqsolver::MPQSolver
std::string _input_quantization
 
std::string _output_quantization
 
std::unique_ptr< core::Quantizer_quantizer
 
std::unique_ptr< core::DumpingHooks_hooks
 

Detailed Description

Definition at line 33 of file BisectionSolver.h.

Member Enumeration Documentation

◆ Algorithm

Algorithm options for running bisection algorithm.

Enumerator
Auto 
ForceQ16Front 
ForceQ16Back 

Definition at line 39 of file BisectionSolver.h.

Constructor & Destructor Documentation

◆ BisectionSolver() [1/2]

BisectionSolver::BisectionSolver ( const mpqsolver::core::Quantizer::Context ctx,
float  qerror_ratio 
)

Construct a new Bisection Solver object.

Parameters
ctx- quantizer context
qerror_ratio- target error ratio

Definition at line 78 of file BisectionSolver.cpp.

79 : MPQSolver(ctx), _qerror_ratio(qerror_ratio)
80{
81}
MPQSolver(const core::Quantizer::Context &ctx)
Definition MPQSolver.cpp:24

◆ BisectionSolver() [2/2]

mpqsolver::bisection::BisectionSolver::BisectionSolver ( )
delete

Member Function Documentation

◆ algorithm()

void BisectionSolver::algorithm ( Algorithm  algorithm)

set used algorithm

Definition at line 99 of file BisectionSolver.cpp.

99{ _algorithm = algorithm; }
void algorithm(Algorithm algorithm)
set used algorithm

References algorithm().

Referenced by algorithm(), and handleAutoAlgorithm().

◆ run()

std::unique_ptr< luci::Module > BisectionSolver::run ( const std::string &  module_path)
overridevirtual

run bisection for recorded float module at module_path

Implements mpqsolver::MPQSolver.

Definition at line 108 of file BisectionSolver.cpp.

109{
110 auto module = readModule(module_path);
111 assert(module != nullptr);
112
113 float min_depth = 0.f;
114 float max_depth = 0.f;
115 NodeDepthType nodes_depth;
116 if (compute_depth(module.get(), nodes_depth, min_depth, max_depth) !=
118 {
119 std::cerr << "ERROR: Invalid graph for bisectioning" << std::endl;
120 return nullptr;
121 }
122
123 SolverOutput::get() << "\n>> Computing baseline qerrors\n";
124
125 std::unique_ptr<core::MAEMetric> metric = std::make_unique<core::MAEMetric>();
126 if (!_input_data)
127 {
128 throw std::runtime_error("no input data");
129 }
130 core::DatasetEvaluator evaluator(module.get(), *_input_data.get(), *metric.get());
131
132 core::LayerParams layer_params;
133 float int16_qerror =
134 evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
135 SolverOutput::get() << "Full int16 model qerror: " << int16_qerror << "\n";
136
137 float uint8_qerror =
138 evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
139 SolverOutput::get() << "Full uint8 model qerror: " << uint8_qerror << "\n";
140 _quantizer->setHook(_hooks.get());
141 if (_hooks)
142 {
143 _hooks->onBeginSolver(module_path, uint8_qerror, int16_qerror);
144 }
145
146 if (int16_qerror > uint8_qerror)
147 {
148 throw std::runtime_error("Q8 model's qerror is less than Q16 model's qerror.");
149 }
150
151 _qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
152 SolverOutput::get() << "Target qerror: " << _qerror << "\n";
153
154 // it'is assumed that int16_qerror <= _qerror <= uint8_qerror,
155 if (int16_qerror >= _qerror)
156 {
157 // return Q16 model (we can not make it more accurate)
158 if (!_quantizer->quantize(module.get(), "int16", layer_params))
159 {
160 std::cerr << "ERROR: Failed to quantize model" << std::endl;
161 return nullptr;
162 }
163
164 if (_hooks)
165 {
166 _hooks->onEndSolver(layer_params, "int16", int16_qerror);
167 }
168
169 SolverOutput::get() << "The best configuration is int16 configuration\n";
170 return module;
171 }
172 else if (uint8_qerror <= _qerror)
173 {
174 // return Q8 model (we can not make it less accurate)
175 if (!_quantizer->quantize(module.get(), "uint8", layer_params))
176 {
177 std::cerr << "ERROR: Failed to quantize model" << std::endl;
178 return nullptr;
179 }
180
181 if (_hooks)
182 {
183 _hooks->onEndSolver(layer_params, "uint8", uint8_qerror);
184 }
185
186 SolverOutput::get() << "The best configuration is uint8 configuration\n";
187 return module;
188 }
189
190 // search for optimal mixed precision quantization configuration
191 int last_depth = -1;
192 float best_depth = -1;
193 float best_error = -1; // minimal error
194 core::LayerParams best_params;
195 if (module->size() != 1)
196 {
197 throw std::runtime_error("Unsupported module");
198 }
199 auto graph = module->graph(0);
201 // input and output nodes are not valid for quantization, so let's remove them
202 for (auto node : loco::input_nodes(graph))
203 {
204 active_nodes.erase(node);
205 }
206 for (auto node : loco::output_nodes(graph))
207 {
208 active_nodes.erase(node);
209 }
210
211 // let's decide whether nodes at input are more suspectible to be quantized into Q16, than at
212 // output
213 bool int16_front = true;
214 switch (_algorithm)
215 {
216 case Algorithm::Auto:
217 int16_front =
218 front_has_higher_error(nodes_depth, _visq_data_path, 0.5f * (max_depth + min_depth));
219 break;
221 SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
222 int16_front = true;
223 break;
225 SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
226 int16_front = false;
227 break;
228 }
229
230 SolverOutput::get() << "\n";
231
232 while (true)
233 {
234 int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
235
236 if (last_depth == cut_depth)
237 {
238 break;
239 }
240
241 if (_hooks)
242 {
243 _hooks->onBeginIteration();
244 }
245
246 SolverOutput::get() << "Looking for the optimal configuration in [" << min_depth << " , "
247 << max_depth << "] depth segment\n";
248
249 last_depth = cut_depth;
250
251 core::LayerParams layer_params;
252 for (auto &node : active_nodes)
253 {
254 auto cur_node = loco::must_cast<luci::CircleNode *>(node);
255 auto iter = nodes_depth.find(cur_node);
256 if (iter == nodes_depth.end())
257 {
258 continue; // to filter out nodes like weights
259 }
260
261 float depth = iter->second;
262
263 if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
264 {
265 auto layer_param = std::make_shared<core::LayerParam>();
266 {
267 layer_param->name = cur_node->name();
268 layer_param->dtype = "int16";
269 layer_param->granularity = "channel";
270 }
271
272 layer_params.emplace_back(layer_param);
273 }
274 }
275
276 float cur_error = evaluate(evaluator, module_path, "uint8", layer_params);
277
278 if (_hooks)
279 {
280 _hooks->onEndIteration(layer_params, "uint8", cur_error);
281 }
282
283 if (cur_error < _qerror)
284 {
285 SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_error
286 << " < target qerror (" << _qerror << ")\n";
287 int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
288 best_params = layer_params;
289 best_depth = cut_depth;
290 best_error = cur_error;
291 }
292 else
293 {
294 SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_error
295 << (cur_error > _qerror ? " > " : " == ") << "target qerror (" << _qerror
296 << ")\n";
297 int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
298 }
299 }
300
301 if (_hooks)
302 {
303 _hooks->onEndSolver(best_params, "uint8", best_error);
304 }
305
306 SolverOutput::get() << "Found the best configuration at depth " << best_depth << "\n";
307 if (!_quantizer->quantize(module.get(), "uint8", best_params))
308 {
309 std::cerr << "ERROR: Failed to quantize model" << std::endl;
310 return nullptr;
311 }
312
313 return module;
314}
static SolverOutput & get(void)
get singleton object
std::unique_ptr< core::Quantizer > _quantizer
Definition MPQSolver.h:54
std::unique_ptr< core::DumpingHooks > _hooks
Definition MPQSolver.h:55
std::vector< Node * > input_nodes(const Graph *)
Definition Graph.cpp:71
std::set< loco::Node * > active_nodes(const std::vector< loco::Node * > &roots)
Enumerate all the nodes required to compute "roots".
std::vector< Node * > output_nodes(Graph *)
Definition Graph.cpp:101
std::map< luci::CircleNode *, float > NodeDepthType
ParameterizerResult compute_depth(const luci::Module *module, NodeDepthType &nodes_depth, float &min_depth, float &max_depth)
compute maximal distance from graph inputs to graph nodes along with min/max values of distance and r...
std::vector< std::shared_ptr< LayerParam > > LayerParams
Definition Dumper.h:31

References mpqsolver::MPQSolver::_hooks, mpqsolver::MPQSolver::_quantizer, loco::active_nodes(), Auto, mpqsolver::bisection::compute_depth(), ForceQ16Back, ForceQ16Front, SolverOutput::get(), loco::input_nodes(), loco::output_nodes(), and mpqsolver::bisection::SUCCESS.

Referenced by package.infer.session::inference().

◆ setInputData()

void BisectionSolver::setInputData ( std::unique_ptr< mpqsolver::core::DataProvider > &&  data)

set data provider

Definition at line 103 of file BisectionSolver.cpp.

104{
105 _input_data = std::move(data);
106}

◆ setVisqPath()

void BisectionSolver::setVisqPath ( const std::string &  visq_path)

set visq_file path to be used in 'auto' mode

this is used to handle which way (8 or 16bit) of splitting the neural network will be the best for accuracy.

Definition at line 101 of file BisectionSolver.cpp.

101{ _visq_data_path = visq_path; }

Referenced by handleAutoAlgorithm().


The documentation for this class was generated from the following files: