ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert::exec::MultiModelExecutors Class Reference

Class to gather executors. More...

#include <MultiModelExecutors.h>

Collaboration diagram for onert::exec::MultiModelExecutors:

Public Member Functions

 MultiModelExecutors (void)=delete
 
 MultiModelExecutors (std::unique_ptr< ir::ModelEdges > model_edges)
 
 MultiModelExecutors (const MultiModelExecutors &)=delete
 
 MultiModelExecutors (MultiModelExecutors &&)=default
 
 ~MultiModelExecutors ()=default
 
void emplace (const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index, std::unique_ptr< IExecutor > exec) override
 Insert executor in executor set.
 
IExecutorat (const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index) const override
 Return executor of index.
 
uint32_t inputSize () const override
 Return executor set's number of input.
 
uint32_t outputSize () const override
 Return executor set's number of output.
 
const ir::OperandInfoinputInfo (const ir::IOIndex &index) const override
 Return NN package input tensor info.
 
const ir::OperandInfooutputInfo (const ir::IOIndex &index) const override
 Return NN package output tensor info.
 
void execute (const ExecutionContext &ctx) override
 Execute NN package executor set.
 
- Public Member Functions inherited from onert::exec::IExecutors
virtual ~IExecutors ()=default
 Virtual IExecutors destructor.
 
IExecutorentryExecutor () const
 

Detailed Description

Class to gather executors.

Definition at line 48 of file MultiModelExecutors.h.

Constructor & Destructor Documentation

◆ MultiModelExecutors() [1/4]

onert::exec::MultiModelExecutors::MultiModelExecutors ( void  )
delete

◆ MultiModelExecutors() [2/4]

onert::exec::MultiModelExecutors::MultiModelExecutors ( std::unique_ptr< ir::ModelEdges model_edges)
inline

Definition at line 52 of file MultiModelExecutors.h.

53 : _executors{}, _model_edges{std::move(model_edges)}, _edge_quant_layers{},
54 _edge_quant_tensors{}, _edge_tensors{}, _is_created_edge_quant_layers{false},
55 _pkg_input_quant_layers{}, _pkg_output_quant_layers{}, _pkg_input_quant_tensors{},
56 _pkg_output_quant_tensors{}, _pkg_input_tensors{}, _pkg_output_tensors{}
57 {
58 for (const auto &edge : _model_edges->edges)
59 {
60 _edge_map[edge.from].emplace_back(edge.to);
61 }
62 }

◆ MultiModelExecutors() [3/4]

onert::exec::MultiModelExecutors::MultiModelExecutors ( const MultiModelExecutors )
delete

◆ MultiModelExecutors() [4/4]

onert::exec::MultiModelExecutors::MultiModelExecutors ( MultiModelExecutors &&  )
default

◆ ~MultiModelExecutors()

onert::exec::MultiModelExecutors::~MultiModelExecutors ( )
default

Member Function Documentation

◆ at()

IExecutor * onert::exec::MultiModelExecutors::at ( const ir::ModelIndex model_index,
const ir::SubgraphIndex subg_index 
) const
overridevirtual

Return executor of index.

Parameters
[in]model_indexModel index
[in]subg_indexSubgraph index
Returns
Executor

Implements onert::exec::IExecutors.

Definition at line 68 of file MultiModelExecutors.cc.

70{
71 return _executors.at(std::make_pair(model_index, subg_index)).get();
72}

Referenced by execute(), inputInfo(), and outputInfo().

◆ emplace()

void onert::exec::MultiModelExecutors::emplace ( const ir::ModelIndex model_index,
const ir::SubgraphIndex subg_index,
std::unique_ptr< IExecutor exec 
)
overridevirtual

Insert executor in executor set.

Parameters
[in]model_indexModel index
[in]subg_indexSubgraph index
[in]execExecutor to insert

Implements onert::exec::IExecutors.

Definition at line 61 of file MultiModelExecutors.cc.

64{
65 _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec));
66}

◆ execute()

void onert::exec::MultiModelExecutors::execute ( const ExecutionContext ctx)
overridevirtual

Execute NN package executor set.

Parameters
[in]ctxExecution context

Implements onert::exec::IExecutors.

Definition at line 374 of file MultiModelExecutors.cc.

375{
376 auto &desc = ctx.desc;
377
378 // Check supported multi model package
379 checkSupportedMultimodel();
380
381 // TODO Move creating type-aware quantization layers for edges in compilation stage
382 createEdgeQuantLayers();
383
384 // TODO Create IOTensors only once and recreate them only if nnpkg info changes
385 CreatePkgIOTensors(desc);
386
387 // TODO Create type-aware quantization layers only once and recreate them only if type changes
388 createPkgIOQuantLayers(desc);
389
390 // TODO Find better way to schedule order of executors
391 auto const model_count = modelCount();
392
393 auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
394 const ir::IOIndex &io_index) {
395 for (const auto &edge : _model_edges->edges)
396 {
397 if ((std::get<ir::ModelIndex>(edge.to) == model_index) &&
398 (std::get<ir::SubgraphIndex>(edge.to) == subg_index) &&
399 (std::get<ir::IOIndex>(edge.to) == io_index))
400 return edge.from;
401 }
402
403 throw std::runtime_error{"Cannot find edge for model input"};
404 };
405
406 // Execute each model
407 // NOTE May be better to use vector instead of unordered_map for _executors
408 for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++)
409 {
410 // Find executor
411 auto executor = at(model_index, ir::SubgraphIndex{0});
412
413 // Set IOTensors
414 // TODO Set internal IOTensors only once
415 std::vector<backend::IPortableTensor *> inputs_inter;
416 std::vector<backend::IPortableTensor *> outputs_inter;
417 auto const input_size = executor->inputSize();
418 auto const output_size = executor->outputSize();
419 inputs_inter.resize(input_size);
420 outputs_inter.resize(output_size);
421
422 // Set inputs of executor
423 // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor
424 for (uint32_t i = 0; i < input_size; i++)
425 {
426 const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
428 const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
429 if (input_pkg_index != -1)
430 {
431 // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors
432 if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end())
433 {
434 _pkg_input_quant_tensors[input_io_desc]->allocate_buffer();
435
436 inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get();
437 }
438 else
439 {
440 inputs_inter[i] = _pkg_input_tensors[input_io_desc].get();
441 }
442 }
443 else
444 {
445 auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
446
447 // Supported only sequantial execution of models
448 assert(std::get<ir::ModelIndex>(from_iodesc).value() < model_index.value());
449 assert(std::get<ir::SubgraphIndex>(from_iodesc).value() == 0);
450 const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
451 if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end())
452 {
453 inputs_inter[i] = _edge_tensors.at(from_iodesc).get();
454 }
455 else
456 {
457 inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get();
458 }
459 assert(inputs_inter[i]->buffer() != nullptr);
460 }
461 }
462
463 // Set outputs of executor
464 for (uint32_t i = 0; i < output_size; i++)
465 {
466 const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index,
468 const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
469 if (output_pkg_index != -1)
470 {
471 // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors
472 if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end())
473 {
474 _pkg_output_quant_tensors[output_io_desc]->allocate_buffer();
475
476 outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get();
477 }
478 else
479 {
480 outputs_inter[i] = _pkg_output_tensors[output_io_desc].get();
481 }
482 }
483 else
484 {
485 // Allocate buffer of `from` tensors
486 const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
487 _edge_tensors[from_iodesc]->allocate_buffer();
488 outputs_inter[i] = _edge_tensors[from_iodesc].get();
489
490 // Allocate buffer of tensors for type-aware quantization
491 for (const auto &to_iodesc : _edge_map[from_iodesc])
492 {
493 _edge_tensors[from_iodesc]->increase_ref();
494 if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
495 {
496 auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get();
497 type_aware_quant_tensor->allocate_buffer();
498
499 _edge_tensors[from_iodesc]->decrease_ref();
500 }
501 }
502 }
503 }
504
505 _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
506
507 executor->execute(inputs_inter, outputs_inter, ctx.options);
508
509 _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
510 _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
511
512 // Release input buffers that are no longer needed
513 for (uint32_t i = 0; i < input_size; i++)
514 {
515 const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
517
518 const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
519 if (input_pkg_index == -1)
520 {
521 if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
522 {
523 // Decrease reference count of tensor for type-aware quantization if input tensor is the
524 // tensor
525 const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
526 if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
527 {
528 _edge_quant_tensors[to_iodesc]->decrease_ref();
529 }
530 }
531 else
532 {
533 // Decrease reference count of `from` tensor if input tensor is the `from` tensor
534 const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
535 _edge_tensors[from_iodesc]->decrease_ref();
536
537 // Decrease reference count of nnpkg inputs
538 if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end())
539 {
540 _pkg_input_quant_tensors[to_iodesc]->decrease_ref();
541 }
542 }
543 }
544 }
545
546 // Release output buffers if those buffers are no longer used other executors because of
547 // type-aware quantization
548 // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type
549 for (uint32_t i = 0; i < output_size; i++)
550 {
551 auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
552
553 // Check if other executors will use the buffer of edge tensor
554 const auto &to_list = _edge_map[from_iodesc];
555 if (to_list.size() == 0)
556 {
557 // This condition means `from_iodesc` tensor is an output of nnpkg
558 continue;
559 }
560
561 bool to_be_release =
562 !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) {
563 // This condition means another executor uses the buffer of edge tensor
564 return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end();
565 });
566
567 if (to_be_release)
568 {
569 // This edge tensor's buffer won't be used in other executors
570 // Tensors for type-aware quantization take over the role of this edge tensor instead
571 _edge_tensors[from_iodesc]->decrease_ref();
572 }
573
574 // Decrease reference count of nnpkg outputs
575 if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end())
576 {
577 _pkg_output_quant_tensors[from_iodesc]->decrease_ref();
578 }
579 }
580 }
581}
IExecutor * at(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index) const override
Return executor of index.
T value() const
Return underlying value.
Definition Index.h:139
std::tuple< ModelIndex, SubgraphIndex, IOIndex > IODesc
Definition NNPkg.h:32
virtual uint32_t inputSize() const =0
Get input size.

References at(), onert::exec::ExecutionContext::desc, onert::exec::IExecutor::inputSize(), onert::exec::ExecutionContext::options, and onert::util::Index< T, DummyTag >::value().

◆ inputInfo()

const ir::OperandInfo & onert::exec::MultiModelExecutors::inputInfo ( const ir::IOIndex index) const
overridevirtual

Return NN package input tensor info.

Parameters
[in]indexInput index
Returns
Tensor info

Implements onert::exec::IExecutors.

Definition at line 78 of file MultiModelExecutors.cc.

79{
80 auto const desc = _model_edges->pkg_inputs[index.value()];
81 auto const model_index = std::get<0>(desc);
82 auto const subg_index = std::get<1>(desc);
83 auto const io_index = std::get<2>(desc);
84 auto const executor = at(model_index, subg_index);
85 return executor->inputInfo(io_index.value());
86}
loco::GraphInputIndex index(const TFPlaceholder *node)
Definition TFNode.cpp:54

References at(), and onert::util::Index< T, DummyTag >::value().

◆ inputSize()

uint32_t onert::exec::MultiModelExecutors::inputSize ( ) const
overridevirtual

Return executor set's number of input.

Returns
Number of input

Implements onert::exec::IExecutors.

Definition at line 74 of file MultiModelExecutors.cc.

74{ return _model_edges->pkg_inputs.size(); }

◆ outputInfo()

const ir::OperandInfo & onert::exec::MultiModelExecutors::outputInfo ( const ir::IOIndex index) const
overridevirtual

Return NN package output tensor info.

Parameters
[in]indexOutput index
Returns
Tensor info

Implements onert::exec::IExecutors.

Definition at line 88 of file MultiModelExecutors.cc.

89{
90 auto const desc = _model_edges->pkg_outputs[index.value()];
91 auto const model_index = std::get<0>(desc);
92 auto const subg_index = std::get<1>(desc);
93 auto const io_index = std::get<2>(desc);
94 auto const executor = at(model_index, subg_index);
95 return executor->outputInfo(io_index.value());
96}

References at(), and onert::util::Index< T, DummyTag >::value().

◆ outputSize()

uint32_t onert::exec::MultiModelExecutors::outputSize ( ) const
overridevirtual

Return executor set's number of output.

Returns
Number of output

Implements onert::exec::IExecutors.

Definition at line 76 of file MultiModelExecutors.cc.

76{ return _model_edges->pkg_outputs.size(); }

The documentation for this class was generated from the following files: