ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
onert::backend::trix::DevContext Class Reference

NPU device context of trix backend. More...

#include <DevContext.h>

Public Member Functions

 DevContext ()
 Construct a new device Context object.
 
 ~DevContext ()
 Destroy the device Context object.
 
 DevContext (const DevContext &)=delete
 
DevContextoperator= (const DevContext &)=delete
 
ModelID registerModel (const std::string &model_file_path)
 Register a trix model for all NPU devices.
 
void unRegisterModel (ModelID model_id)
 Unregister a trix model.
 
void requestRun (ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info, output_buffers *output_bufs, tensors_data_info *out_info, size_t batch_size)
 Request a trix model to be run on NPU.
 

Detailed Description

NPU device context of trix backend.

Definition at line 36 of file DevContext.h.

Constructor & Destructor Documentation

◆ DevContext() [1/2]

onert::backend::trix::DevContext::DevContext ( )

Construct a new device Context object.

Definition at line 28 of file DevContext.cc.

28 : _dev_handles{}, _model_ids{}, _meta_map{}
29{
30 auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV24_CONN_SOCIP);
31 if (dev_count <= 0)
32 {
33 throw std::runtime_error("Unable to find TRIX NPU device");
34 }
35
36 // Get NPU device handles
37 for (int i = 0; i < dev_count; ++i)
38 {
39 npudev_h handle;
40 if (getNPUdeviceByType(&handle, NPUCOND_TRIV24_CONN_SOCIP, i) < 0)
41 {
42 throw std::runtime_error("Failed to get TRIX NPU device handle");
43 }
44 _dev_handles.emplace_back(handle);
45 }
46
47 // NOTE Do not change the number of threads as long as jobs in thread call
48 // the synchronous APIs such as submitNPU_request()
49 _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
50 // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
51 // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
52 // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
53}

◆ ~DevContext()

onert::backend::trix::DevContext::~DevContext ( )

Destroy the device Context object.

Definition at line 55 of file DevContext.cc.

56{
57 // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
58 // be terminated
59 _batch_thread_pool.reset(nullptr);
60
61 for (const auto &dev_handle : _dev_handles)
62 {
63 unregisterNPUmodel_all(dev_handle);
64 putNPUdevice(dev_handle);
65 }
66}

◆ DevContext() [2/2]

onert::backend::trix::DevContext::DevContext ( const DevContext )
delete

Member Function Documentation

◆ operator=()

DevContext & onert::backend::trix::DevContext::operator= ( const DevContext )
delete

◆ registerModel()

ModelID onert::backend::trix::DevContext::registerModel ( const std::string &  model_file_path)

Register a trix model for all NPU devices.

Parameters
model_file_pathFile path of a trix model
Returns
ModelID Internal ID of the trix model

Definition at line 68 of file DevContext.cc.

69{
70 if (_dev_handles.size() == 0)
71 {
72 throw std::runtime_error("No npu device is available");
73 }
74
75 std::unique_ptr<npubin_meta, decltype(&free)> meta(
76 getNPUmodel_metadata(model_file_path.c_str(), false), free);
77
78 if (meta == nullptr)
79 {
80 throw std::runtime_error("Unable to extract the model metadata");
81 }
82
83 generic_buffer file_info;
84 file_info.type = BUFFER_FILE;
85 file_info.filepath = model_file_path.c_str();
86 file_info.size = meta->size;
87
88 ModelID model_id = 0;
89
90 for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
91 {
92 // Register model for each device
93 uint32_t model_id_at_device;
94 if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
95 {
96 throw std::runtime_error("Failed to register npu model");
97 }
98
99 if (dev_num == 0)
100 {
101 model_id = model_id_at_device;
102 _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(std::move(meta));
103 }
104 else
105 {
106 _meta_map[model_id_at_device] = _meta_map[model_id];
107 }
108
109 _model_ids[model_id].resize(dev_num + 1);
110 _model_ids[model_id].at(dev_num) = model_id_at_device;
111 }
112
113 // Return the model id for device 0 only
114 return model_id;
115}

◆ requestRun()

void onert::backend::trix::DevContext::requestRun ( ModelID  model_id,
input_buffers *  input_bufs,
tensors_data_info *  in_info,
output_buffers *  output_bufs,
tensors_data_info *  out_info,
size_t  batch_size 
)

Request a trix model to be run on NPU.

Parameters
model_idInternal ID of a trix model
input_bufsBuffer data of inputs
in_infoData info of inputs
output_bufsBuffer data of outputs
out_infodata info of outputs
batch_sizeBatch size

Definition at line 134 of file DevContext.cc.

137{
138 if (batch_size > 1)
139 {
140 if (in_info->num_info != 1)
141 {
142 throw std::runtime_error("Supported only an input that has batch now");
143 }
144 if (out_info->num_info != 1)
145 {
146 throw std::runtime_error("Supported only one output now");
147 }
148
149 if (input_bufs->bufs[0].size % batch_size != 0)
150 {
151 throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
152 ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
153 }
154
155 if (output_bufs->bufs[0].size % batch_size != 0)
156 {
157 throw std::runtime_error(
158 "Invalid batch size. batch size :" + std::to_string(batch_size) +
159 ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
160 }
161
162 // inputs/outputs for each batch
163 std::vector<input_buffers> in_buffers_vec(batch_size);
164 std::vector<output_buffers> out_buffers_vec(batch_size);
165
166 // Run on thread pool
167 std::vector<std::future<int32_t>> batch_futures;
168 for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
169 {
170 // Enqueue jobs
171 // The in_info and out_info are always the same even if they are divided by batch, so they are
172 // used as they are.
173 auto future = _batch_thread_pool->enqueueJob(
174 [batch_size, in_info, out_info,
175 this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
176 const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
177 // Set buffers of inputs/outputs for each batch
178 // TODO Support multiple inputs/outputs
179 input_buffers in_batch_buffers;
180 in_batch_buffers.num_buffers = input_bufs->num_buffers;
181 const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
182 setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
183 &in_batch_buffers.bufs[0]);
184
185 output_buffers out_batch_buffers;
186 out_batch_buffers.num_buffers = output_bufs->num_buffers;
187 const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
188 setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
189 &out_batch_buffers.bufs[0]);
190
191 try
192 {
193 // dev_num is the same as the thread number in _batch_thread_pool
194 this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
195 out_info);
196 }
197 catch (...)
198 {
199 _eptr = std::current_exception();
200 }
201
202 return batch_num;
203 },
204 model_id, input_bufs, output_bufs, batch_num);
205 batch_futures.emplace_back(std::move(future));
206 }
207
208 for (auto &&future : batch_futures)
209 {
210 future.get();
211 }
212
213 if (_eptr)
214 {
215 std::exception_ptr eptr(nullptr);
216 _eptr.swap(eptr);
217 std::rethrow_exception(eptr);
218 }
219 }
220 else
221 {
222 runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
223 }
224}

◆ unRegisterModel()

void onert::backend::trix::DevContext::unRegisterModel ( ModelID  model_id)

Unregister a trix model.

Parameters
model_idInternal ID of the trix model to be unregistered

Definition at line 117 of file DevContext.cc.

118{
119 for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
120 {
121 const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
122 const auto &dev_handle = _dev_handles.at(dev_num);
123
124 // Remove meta data
125 _meta_map.erase(model_id_at_device);
126
127 // Unregister Model for each device
128 unregisterNPUmodel(dev_handle, model_id_at_device);
129 }
130 // Remove model IDs
131 _model_ids.erase(model_id);
132}

The documentation for this class was generated from the following files: