ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert::backend::trix::DevContext Class Reference

NPU device context of trix backend. More...

#include <DevContext.h>

Public Member Functions

 DevContext ()
 Construct a new device Context object.
 
 ~DevContext ()
 Destroy the device Context object.
 
 DevContext (const DevContext &)=delete
 
DevContextoperator= (const DevContext &)=delete
 
ModelID registerModel (const std::string &model_file_path)
 Register a trix model for all NPU devices.
 
void unRegisterModel (ModelID model_id)
 Unregister a trix model.
 
void requestRun (ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info, output_buffers *output_bufs, tensors_data_info *out_info, size_t batch_size)
 Request a trix model to be run on NPU.
 

Detailed Description

NPU device context of trix backend.

Definition at line 36 of file DevContext.h.

Constructor & Destructor Documentation

◆ DevContext() [1/2]

onert::backend::trix::DevContext::DevContext ( )

Construct a new device Context object.

Definition at line 33 of file DevContext.cc.

33 : _dev_handles{}, _model_ids{}, _meta_map{}
34{
35 auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV24_CONN_SOCIP);
36 if (dev_count <= 0)
37 {
38 throw std::runtime_error("Unable to find TRIX NPU device");
39 }
40
41 // Get NPU device handles
42 for (int i = 0; i < dev_count; ++i)
43 {
44 npudev_h handle;
45 if (getNPUdeviceByType(&handle, NPUCOND_TRIV24_CONN_SOCIP, i) < 0)
46 {
47 throw std::runtime_error("Failed to get TRIX NPU device handle");
48 }
49 _dev_handles.emplace_back(handle);
50 }
51
52 // NOTE Do not change the number of threads as long as jobs in thread call
53 // the synchronous APIs such as submitNPU_request()
54 _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
55 // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
56 // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
57 // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
58}

◆ ~DevContext()

onert::backend::trix::DevContext::~DevContext ( )

Destroy the device Context object.

Definition at line 60 of file DevContext.cc.

61{
62 // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
63 // be terminated
64 _batch_thread_pool.reset(nullptr);
65
66 for (const auto &dev_handle : _dev_handles)
67 {
68 unregisterNPUmodel_all(dev_handle);
69 putNPUdevice(dev_handle);
70 }
71}

◆ DevContext() [2/2]

onert::backend::trix::DevContext::DevContext ( const DevContext )
delete

Member Function Documentation

◆ operator=()

DevContext & onert::backend::trix::DevContext::operator= ( const DevContext )
delete

◆ registerModel()

ModelID onert::backend::trix::DevContext::registerModel ( const std::string &  model_file_path)

Register a trix model for all NPU devices.

Parameters
model_file_pathFile path of a trix model
Returns
ModelID Internal ID of the trix model

Definition at line 73 of file DevContext.cc.

74{
75 if (_dev_handles.size() == 0)
76 {
77 throw std::runtime_error("No npu device is available");
78 }
79
80 std::unique_ptr<npubin_meta, decltype(&free)> meta(
81 getNPUmodel_metadata(model_file_path.c_str(), false), free);
82
83 if (meta == nullptr)
84 {
85 throw std::runtime_error("Unable to extract the model metadata");
86 }
87
88 generic_buffer file_info;
89 file_info.type = BUFFER_FILE;
90 file_info.filepath = model_file_path.c_str();
91 file_info.size = meta->size;
92
93 ModelID model_id = 0;
94
95 for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
96 {
97 // Register model for each device
98 uint32_t model_id_at_device;
99 if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
100 {
101 throw std::runtime_error("Failed to register npu model");
102 }
103
104 if (dev_num == 0)
105 {
106 model_id = model_id_at_device;
107 _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(std::move(meta));
108 }
109 else
110 {
111 _meta_map[model_id_at_device] = _meta_map[model_id];
112 }
113
114 _model_ids[model_id].resize(dev_num + 1);
115 _model_ids[model_id].at(dev_num) = model_id_at_device;
116 }
117
118 // Return the model id for device 0 only
119 return model_id;
120}

◆ requestRun()

void onert::backend::trix::DevContext::requestRun ( ModelID  model_id,
input_buffers *  input_bufs,
tensors_data_info *  in_info,
output_buffers *  output_bufs,
tensors_data_info *  out_info,
size_t  batch_size 
)

Request a trix model to be run on NPU.

Parameters
model_idInternal ID of a trix model
input_bufsBuffer data of inputs
in_infoData info of inputs
output_bufsBuffer data of outputs
out_infodata info of outputs
batch_sizeBatch size

Definition at line 139 of file DevContext.cc.

142{
143 if (batch_size > 1)
144 {
145 if (in_info->num_info != 1)
146 {
147 throw std::runtime_error("Supported only an input that has batch now");
148 }
149 if (out_info->num_info != 1)
150 {
151 throw std::runtime_error("Supported only one output now");
152 }
153
154 if (input_bufs->bufs[0].size % batch_size != 0)
155 {
156 throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
157 ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
158 }
159
160 if (output_bufs->bufs[0].size % batch_size != 0)
161 {
162 throw std::runtime_error(
163 "Invalid batch size. batch size :" + std::to_string(batch_size) +
164 ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
165 }
166
167 // inputs/outputs for each batch
168 std::vector<input_buffers> in_buffers_vec(batch_size);
169 std::vector<output_buffers> out_buffers_vec(batch_size);
170
171 // Run on thread pool
172 std::vector<std::future<int32_t>> batch_futures;
173 for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
174 {
175 // Enqueue jobs
176 // The in_info and out_info are always the same even if they are divided by batch, so they are
177 // used as they are.
178 auto future = _batch_thread_pool->enqueueJob(
179 [batch_size, in_info, out_info,
180 this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
181 const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
182 // Set buffers of inputs/outputs for each batch
183 // TODO Support multiple inputs/outputs
184 input_buffers in_batch_buffers;
185 in_batch_buffers.num_buffers = input_bufs->num_buffers;
186 const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
187 setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
188 &in_batch_buffers.bufs[0]);
189
190 output_buffers out_batch_buffers;
191 out_batch_buffers.num_buffers = output_bufs->num_buffers;
192 const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
193 setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
194 &out_batch_buffers.bufs[0]);
195
196 try
197 {
198 // dev_num is the same as the thread number in _batch_thread_pool
199 this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
200 out_info);
201 }
202 catch (...)
203 {
204 _eptr = std::current_exception();
205 }
206
207 return batch_num;
208 },
209 model_id, input_bufs, output_bufs, batch_num);
210 batch_futures.emplace_back(std::move(future));
211 }
212
213 for (auto &&future : batch_futures)
214 {
215 future.get();
216 }
217
218 if (_eptr)
219 {
220 std::exception_ptr eptr(nullptr);
221 _eptr.swap(eptr);
222 std::rethrow_exception(eptr);
223 }
224 }
225 else
226 {
227 runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
228 }
229}

◆ unRegisterModel()

void onert::backend::trix::DevContext::unRegisterModel ( ModelID  model_id)

Unregister a trix model.

Parameters
model_idInternal ID of the trix model to be unregistered

Definition at line 122 of file DevContext.cc.

123{
124 for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
125 {
126 const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
127 const auto &dev_handle = _dev_handles.at(dev_num);
128
129 // Remove meta data
130 _meta_map.erase(model_id_at_device);
131
132 // Unregister Model for each device
133 unregisterNPUmodel(dev_handle, model_id_at_device);
134 }
135 // Remove model IDs
136 _model_ids.erase(model_id);
137}

The documentation for this class was generated from the following files: