NPU device context of trix backend. More...

#include <DevContext.h>

Public Member Functions
	DevContext ()
	Construct a new device Context object.

	~DevContext ()
	Destroy the device Context object.

	DevContext (const DevContext &)=delete

DevContext &	operator= (const DevContext &)=delete

ModelID	registerModel (const std::string &model_file_path)
	Register a trix model for all NPU devices.

void	unRegisterModel (ModelID model_id)
	Unregister a trix model.

void	requestRun (ModelID model_id, input_buffers input_bufs, tensors_data_info in_info, output_buffers output_bufs, tensors_data_info out_info, size_t batch_size)
	Request a trix model to be run on NPU.

Detailed Description

NPU device context of trix backend.

Definition at line 36 of file DevContext.h.

Constructor & Destructor Documentation

◆ DevContext() [1/2]

onert::backend::trix::DevContext::DevContext ( )

Construct a new device Context object.

Definition at line 33 of file DevContext.cc.

                       : _dev_handles{}, _model_ids{}, _meta_map{}
{
  auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV24_CONN_SOCIP);
  if (dev_count <= 0)
  {
    throw std::runtime_error("Unable to find TRIX NPU device");
  }
 
  // Get NPU device handles
  for (int i = 0; i < dev_count; ++i)
  {
    npudev_h handle;
    if (getNPUdeviceByType(&handle, NPUCOND_TRIV24_CONN_SOCIP, i) < 0)
    {
      throw std::runtime_error("Failed to get TRIX NPU device handle");
    }
    _dev_handles.emplace_back(handle);
  }
 
  // NOTE Do not change the number of threads as long as jobs in thread call
  //      the synchronous APIs such as submitNPU_request()
  _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
  // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
  // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
  // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
}

◆ ~DevContext()

onert::backend::trix::DevContext::~DevContext ( )

Destroy the device Context object.

Definition at line 60 of file DevContext.cc.

{
  // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
  //      be terminated
  _batch_thread_pool.reset(nullptr);
 
  for (const auto &dev_handle : _dev_handles)
  {
    unregisterNPUmodel_all(dev_handle);
    putNPUdevice(dev_handle);
  }
}

◆ DevContext() [2/2]

onert::backend::trix::DevContext::DevContext ( const DevContext & )

delete

Member Function Documentation

◆ operator=()

DevContext & onert::backend::trix::DevContext::operator= ( const DevContext & )

delete

◆ registerModel()

ModelID onert::backend::trix::DevContext::registerModel ( const std::string & model_file_path )

Register a trix model for all NPU devices.

Parameters

model_file_path File path of a trix model

Returns: ModelID Internal ID of the trix model

Definition at line 73 of file DevContext.cc.

{
  if (_dev_handles.size() == 0)
  {
    throw std::runtime_error("No npu device is available");
  }
 
  std::unique_ptr<npubin_meta, decltype(&free)> meta(
    getNPUmodel_metadata(model_file_path.c_str(), false), free);
 
  if (meta == nullptr)
  {
    throw std::runtime_error("Unable to extract the model metadata");
  }
 
  generic_buffer file_info;
  file_info.type = BUFFER_FILE;
  file_info.filepath = model_file_path.c_str();
  file_info.size = meta->size;
 
  ModelID model_id = 0;
 
  for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
  {
    // Register model for each device
    uint32_t model_id_at_device;
    if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
    {
      throw std::runtime_error("Failed to register npu model");
    }
 
    if (dev_num == 0)
    {
      model_id = model_id_at_device;
      _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(std::move(meta));
    }
    else
    {
      _meta_map[model_id_at_device] = _meta_map[model_id];
    }
 
    _model_ids[model_id].resize(dev_num + 1);
    _model_ids[model_id].at(dev_num) = model_id_at_device;
  }
 
  // Return the model id for device 0 only
  return model_id;
}

◆ requestRun()

void onert::backend::trix::DevContext::requestRun	(	ModelID	model_id,
		input_buffers *	input_bufs,
		tensors_data_info *	in_info,
		output_buffers *	output_bufs,
		tensors_data_info *	out_info,
		size_t	batch_size
	)

Request a trix model to be run on NPU.

Parameters

model_id	Internal ID of a trix model
input_bufs	Buffer data of inputs
in_info	Data info of inputs
output_bufs	Buffer data of outputs
out_info	data info of outputs
batch_size	Batch size

Definition at line 139 of file DevContext.cc.

{
  if (batch_size > 1)
  {
    if (in_info->num_info != 1)
    {
      throw std::runtime_error("Supported only an input that has batch now");
    }
    if (out_info->num_info != 1)
    {
      throw std::runtime_error("Supported only one output now");
    }
 
    if (input_bufs->bufs[0].size % batch_size != 0)
    {
      throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
                               ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
    }
 
    if (output_bufs->bufs[0].size % batch_size != 0)
    {
      throw std::runtime_error(
        "Invalid batch size. batch size :" + std::to_string(batch_size) +
        ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
    }
 
    // inputs/outputs for each batch
    std::vector<input_buffers> in_buffers_vec(batch_size);
    std::vector<output_buffers> out_buffers_vec(batch_size);
 
    // Run on thread pool
    std::vector<std::future<int32_t>> batch_futures;
    for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
    {
      // Enqueue jobs
      // The in_info and out_info are always the same even if they are divided by batch, so they are
      // used as they are.
      auto future = _batch_thread_pool->enqueueJob(
        [batch_size, in_info, out_info,
         this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
               const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
          // Set buffers of inputs/outputs for each batch
          // TODO Support multiple inputs/outputs
          input_buffers in_batch_buffers;
          in_batch_buffers.num_buffers = input_bufs->num_buffers;
          const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
          setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
                           &in_batch_buffers.bufs[0]);
 
          output_buffers out_batch_buffers;
          out_batch_buffers.num_buffers = output_bufs->num_buffers;
          const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
          setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
                           &out_batch_buffers.bufs[0]);
 
          try
          {
            // dev_num is the same as the thread number in _batch_thread_pool
            this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
                              out_info);
          }
          catch (...)
          {
            _eptr = std::current_exception();
          }
 
          return batch_num;
        },
        model_id, input_bufs, output_bufs, batch_num);
      batch_futures.emplace_back(std::move(future));
    }
 
    for (auto &&future : batch_futures)
    {
      future.get();
    }
 
    if (_eptr)
    {
      std::exception_ptr eptr(nullptr);
      _eptr.swap(eptr);
      std::rethrow_exception(eptr);
    }
  }
  else
  {
    runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
  }
}

◆ unRegisterModel()

void onert::backend::trix::DevContext::unRegisterModel ( ModelID model_id )

Unregister a trix model.

Parameters

model_id Internal ID of the trix model to be unregistered

Definition at line 122 of file DevContext.cc.

{
  for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
  {
    const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
    const auto &dev_handle = _dev_handles.at(dev_num);
 
    // Remove meta data
    _meta_map.erase(model_id_at_device);
 
    // Unregister Model for each device
    unregisterNPUmodel(dev_handle, model_id_at_device);
  }
  // Remove model IDs
  _model_ids.erase(model_id);
}

The documentation for this class was generated from the following files:

runtime/onert/backend/trix/DevContext.h
runtime/onert/backend/trix/DevContext.cc

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ DevContext() [1/2]

◆ ~DevContext()

◆ DevContext() [2/2]

Member Function Documentation

◆ operator=()

◆ registerModel()

◆ requestRun()

◆ unRegisterModel()