Class to execute Graph in parallel. More...

#include <ParallelExecutor.h>

Collaboration diagram for onert::exec::ParallelExecutor:

[legend]

Public Member Functions
	ParallelExecutor (std::unique_ptr< compiler::LoweredGraph > lowered_graph, backend::BackendContexts &&backend_contexts, const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, const util::TracingCtx *tracing_ctx)
	Constructs a ParallelExecutor object.

void	executeImpl (const ExecutionObservee &subject) override

Public Member Functions inherited from onert::exec::DataflowExecutor
	DataflowExecutor (std::unique_ptr< compiler::LoweredGraph > lowered_graph, backend::BackendContexts &&backend_contexts, const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, const util::TracingCtx *tracing_ctx)
	Constructs a DataflowExecutor object.

Public Member Functions inherited from onert::exec::ExecutorBase
	ExecutorBase (std::unique_ptr< compiler::LoweredGraph > &&lowered_graph, backend::BackendContexts &&backend_contexts, const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx)
	Construct a new ExecutorBase object.

virtual	~ExecutorBase ()=default

const ir::Graph &	graph () const final
	Returns graph object.

void	execute (const std::vector< backend::IPortableTensor * > &inputs, const std::vector< backend::IPortableTensor * > &outputs, const ExecutionOptions &options) override
	Execute with given input/output tensors.

uint32_t	inputSize () const override
	Get input size.

uint32_t	outputSize () const override
	Get output size.

const ir::OperandInfo &	inputInfo (uint32_t index) const override
	Get input info at index.

const ir::OperandInfo &	outputInfo (uint32_t index) const override
	Get output info at index.

const uint8_t *	outputBuffer (uint32_t index) const final
	Get output buffer at index.

const backend::IPortableTensor *	outputTensor (uint32_t index) const final
	Get output tensor at index.

void	setIndexedRanks (std::shared_ptr< ir::OperationIndexMap< int64_t > > ranks) final
	Set an ordering on operations.

void	addObserver (std::unique_ptr< IExecutionObserver > ref)

backend::BackendContexts &	getBackendContexts ()

const ExecutionOptions &	currentOptions () const override
	Return current execution configuration.

Public Member Functions inherited from onert::exec::IExecutor
	IExecutor ()=default
	Construct a new IExecutor object.

virtual	~IExecutor ()=default
	Destroy the IExecutor object.

Protected Member Functions
void	notify (uint32_t finished_job_id) override

Protected Member Functions inherited from onert::exec::DataflowExecutor
bool	noWaitingJobs ()

int64_t	calculateRank (const std::vector< ir::OperationIndex > &operations)

void	emplaceToReadyJobs (const uint32_t &id)

Protected Member Functions inherited from onert::exec::ExecutorBase
bool	hasDynamicInput ()
	Returns `true` if any input tensor is dynamic; `false` if all are static tensors.

Additional Inherited Members
Protected Attributes inherited from onert::exec::DataflowExecutor
compiler::CodeMap	_code_map

std::vector< std::unique_ptr< Job > >	_finished_jobs
	A vector of finished jobs for current execution After a run it has all the jobs of this execution for the next run.

std::vector< std::unique_ptr< Job > >	_waiting_jobs
	A vector of waiting jobs for current execution All the jobs are moved from _finished_jobs to it when start a run.

std::vector< std::list< uint32_t > >	_output_info
	Jobs' output info Used for notifying after finishing a job.

std::vector< uint32_t >	_initial_input_info

std::vector< uint32_t >	_input_info

std::multimap< int64_t, std::unique_ptr< Job >, std::greater< int64_t > >	_ready_jobs
	A collection of jobs that are ready for execution Jobs in it are ready to be scheduled. Ordered by priority from `_indexed_ranks`

std::unordered_map< uint32_t, ir::OperationIndex >	_job_to_op
	Which job runs which op and function.

Protected Attributes inherited from onert::exec::ExecutorBase
ExecObservers	_observers

std::shared_ptr< ir::OperationIndexMap< int64_t > >	_indexed_ranks

std::unique_ptr< compiler::LoweredGraph >	_lowered_graph

backend::BackendContexts	_backend_contexts

const ir::Graph &	_graph

std::vector< backend::builtin::IOTensor * >	_input_tensors

std::vector< backend::builtin::IOTensor * >	_output_tensors

std::mutex	_mutex

const util::TracingCtx *	_tracing_ctx

ExecutionOptions	_current_options

Detailed Description

Class to execute Graph in parallel.

Definition at line 33 of file ParallelExecutor.h.

Constructor & Destructor Documentation

◆ ParallelExecutor()

onert::exec::ParallelExecutor::ParallelExecutor	(	std::unique_ptr< compiler::LoweredGraph >	lowered_graph,
		backend::BackendContexts &&	backend_contexts,
		const compiler::TensorRegistries &	tensor_regs,
		compiler::CodeMap &&	code_map,
		const util::TracingCtx *	tracing_ctx
	)

Constructs a ParallelExecutor object.

Parameters

lowered_graph	LoweredGraph object
tensor_builders	Tensor builders that are currently used
code_map	`ir::Operation` and its code map

Definition at line 60 of file ParallelExecutor.cc.

  : DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
                     std::move(code_map), tracing_ctx}
{
  VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}

References VERBOSE.

Member Function Documentation

◆ executeImpl()

void onert::exec::ParallelExecutor::executeImpl ( const ExecutionObservee & subject )

overridevirtual

Reimplemented from onert::exec::DataflowExecutor.

Definition at line 71 of file ParallelExecutor.cc.

{
  bool dynamic_input_exists = hasDynamicInput();
 
  // Init scheduler
  // TODO Consider to have distinct backend set in GraphLowerInfo
  BackendSet backends;
  for (const auto &[idx, backend] : _lowered_graph->lower_info().operation)
    backends.add(backend);
 
  _scheduler = std::make_unique<ParallelScheduler>(backends);
 
  assert(noWaitingJobs());
 
  // Execution setup
  _waiting_jobs.swap(_finished_jobs); // Move finished jobs to waiting jobs
 
  for (uint32_t i = 0; i < _waiting_jobs.size(); ++i)
  {
    VERBOSE(ParallelExecutor) << i << ": " << _input_info[i] << std::endl;
    if (_input_info[i] == 0)
    {
      emplaceToReadyJobs(i);
    }
  }
  assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
 
  VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
 
  auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
 
  subject.notifySubgraphBegin(profiling_subg_index);
 
  while (true)
  {
    std::unique_lock<std::mutex> lock{_mu_jobs};
 
    if (_ready_jobs.empty())
    {
      _cv_jobs.wait(lock, [this] { return !_ready_jobs.empty() || noWaitingJobs(); });
      // Check finish condition
      if (_ready_jobs.empty() && noWaitingJobs())
      {
        break;
      }
    }
 
    auto job = std::move(_ready_jobs.begin()->second);
    _ready_jobs.erase(_ready_jobs.begin());
 
    lock.unlock();
 
    VERBOSE(ParallelExecutor) << "Assigning fn " << job->index() << std::endl;
 
    auto job_index = job->index();
    auto op_ind = _job_to_op[job_index];
    const auto backend = _lowered_graph->lower_info().operation.at(op_ind);
    auto setup = [&, op_ind, backend]() {
      subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
    };
    auto teardown = [&, job_index, op_ind, backend]() {
      subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
      notify(job_index);
    };
 
    job->fn_seq()->initRunning();
 
    // dynamic tensor setting
    bool handle_dynamic_tensor =
      _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
    job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
 
    _scheduler->assign(std::make_unique<HookFunction>(job->fn_seq(), setup, teardown), backend);
    _finished_jobs[job_index] = std::move(job);
  }
 
  assert(noWaitingJobs());
 
  // Wait for all the jobs done
  _scheduler->finish();
  subject.notifySubgraphEnd(profiling_subg_index);
 
  // Reset input info for the next execution
  _input_info = _initial_input_info;
}

◆ notify()

void onert::exec::ParallelExecutor::notify ( uint32_t finished_job_id )

overrideprotectedvirtual

Reimplemented from onert::exec::DataflowExecutor.

Definition at line 50 of file ParallelExecutor.cc.

{
  std::unique_lock<std::mutex> lock{_mu_jobs};
 
  DataflowExecutor::notify(finished_job_id);
 
  lock.unlock();
  _cv_jobs.notify_all();
}

References onert::exec::DataflowExecutor::notify().

Referenced by executeImpl().

The documentation for this class was generated from the following files:

runtime/onert/core/src/exec/ParallelExecutor.h
runtime/onert/core/src/exec/ParallelExecutor.cc

Public Member Functions