ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert_micro::import::OMExecutionPlanCreator Struct Reference

#include <OMExecutionPlanCreator.h>

Static Public Member Functions

static OMStatus createExecutionPlan (core::OMRuntimeStorage &runtime_storage, core::OMRuntimeContext &runtime_context, core::memory::OMRuntimeAllocator &allocator, const OMConfig &configs)
 
static OMStatus createForwardExecutionPlan (core::OMRuntimeStorage &runtime_storage, core::OMRuntimeContext &runtime_context, core::memory::OMRuntimeAllocator &allocator, const OMConfig &configs)
 
static OMStatus createBackwardExecutionPlan (core::OMRuntimeStorage &runtime_storage, core::OMRuntimeContext &runtime_context, core::memory::OMRuntimeAllocator &allocator, const OMConfig &configs)
 

Detailed Description

Definition at line 31 of file OMExecutionPlanCreator.h.

Member Function Documentation

◆ createBackwardExecutionPlan()

OMStatus OMExecutionPlanCreator::createBackwardExecutionPlan ( core::OMRuntimeStorage runtime_storage,
core::OMRuntimeContext runtime_context,
core::memory::OMRuntimeAllocator allocator,
const OMConfig configs 
)
static

Definition at line 336 of file OMExecutionPlanCreator.cpp.

339{
340 bool keep_input = configs.keep_input;
341 bool train_mode = configs.train_mode;
342 assert(train_mode);
343 if (train_mode == false)
344 return UnknownError;
345
346 std::vector<std::vector<uint16_t>> &alloc_plan = allocator.getAllocPlan();
347 std::vector<std::vector<uint16_t>> &dealloc_plan = allocator.getDeallocPlan();
348
349 // First remove prev plan (if it was created)
350 alloc_plan.clear();
351 dealloc_plan.clear();
352
353 using Lifetime = std::pair<int32_t, int32_t>;
354 std::map<uint16_t, Lifetime> lifetimes;
355
356 const reader::CircleOperators *operators = runtime_context.getCircleOperators();
357 const uint32_t num_kernels = operators->size();
358
359 uint32_t num_train_layers =
361 ? num_kernels
362 : std::min(num_kernels, configs.training_context.num_of_train_layers);
363 auto graph_outputs = runtime_context.getCircleOutputs();
364
365 for (const auto output_ind : *graph_outputs)
366 {
367 assert(lifetimes.count(output_ind) == 0);
368 lifetimes[output_ind] = Lifetime(-1, 0);
369 }
370
371 std::unordered_map<uint16_t, uint8_t> trainable_ops_config =
372 runtime_context.getTrainableOpsIndexes();
373
374 // If context has config file defined trainable operations
375 // than ignore configs.training_context.num_of_train_layers value
376 // and use max value from trainable_ops_indexes to define last train op
377 uint16_t last_train_op_indx = num_kernels - num_train_layers;
378 if (!trainable_ops_config.empty())
379 {
380 last_train_op_indx = std::numeric_limits<uint16_t>::max();
381 // Find op trainable index with min value
382 for (auto &p : trainable_ops_config)
383 {
384 last_train_op_indx = std::min(p.first, last_train_op_indx);
385 }
386 num_train_layers = (num_kernels - last_train_op_indx);
387 }
388
389 const auto *op_codes = runtime_context.getCircleOpcodes();
390 for (int32_t index = 0; index < num_train_layers; ++index)
391 {
392 uint32_t cur_op_index = num_kernels - index - 1;
393 auto *cur_op = operators->operator[](cur_op_index);
394
395 uint32_t cur_opcode_index = cur_op->opcode_index();
396
397 assert(cur_opcode_index < op_codes->size());
398
399 const auto opcode = op_codes->operator[](cur_opcode_index);
400
401 const auto *op_inputs = cur_op->inputs();
402 const auto *op_outputs = cur_op->outputs();
403
404 bool is_trainable_ops =
405 trainable_ops_config.empty() == true
406 ? isTrainableWeights(opcode)
407 : trainable_ops_config.find(cur_op_index) != trainable_ops_config.end();
408
409 // Warning: this is right for Conv2D and for FullyConnected kernels
410 const int32_t bias_index = 2;
411
412 for (int32_t j = 0; j < op_inputs->size(); ++j)
413 {
414 const auto input_index = op_inputs->operator[](j);
415 const auto is_const = runtime_context.isConstTensor(input_index);
416 // Note: we dont need to allocate for last node and for empty tensor
417 if (input_index == -1 or (is_const and not is_trainable_ops))
418 {
419 continue;
420 }
421
422 if ((index == num_train_layers - 1) and !is_const)
423 {
424 lifetimes[input_index] = {-1, index};
425 }
426 else if (is_const and
427 trainable_ops_config.find(cur_op_index) != trainable_ops_config.end() and
428 trainable_ops_config[cur_op_index] == ONLY_BIAS and j != bias_index)
429 {
430 // Do nothing, due to update only bias
431 continue;
432 }
433 else
434 {
435 lifetimes[input_index] = {index, -1};
436 }
437 }
438
439 for (int32_t j = 0; j < op_outputs->size(); ++j)
440 {
441 const auto output_index = op_outputs->operator[](j);
442 if (lifetimes.count(output_index) > 0)
443 lifetimes.at(output_index).second = index;
444 }
445 }
446
447 alloc_plan.assign(num_train_layers, std::vector<uint16_t>());
448 dealloc_plan.assign(num_train_layers, std::vector<uint16_t>());
449
450 for (const auto &item : lifetimes)
451 {
452 if (item.second.first != -1)
453 alloc_plan[item.second.first].push_back(item.first);
454 if (item.second.second != -1)
455 dealloc_plan[item.second.second].push_back(item.first);
456 }
457
458 return Ok;
459}
uoffset_t size() const
const reader::CircleValues * getCircleOutputs()
std::unordered_map< uint16_t, uint8_t > getTrainableOpsIndexes()
const reader::CircleOperatorCodes * getCircleOpcodes()
const reader::CircleOperators * getCircleOperators()
bool isConstTensor(uint32_t tensor_index)
std::vector< std::vector< uint16_t > > & getAllocPlan()
std::vector< std::vector< uint16_t > > & getDeallocPlan()
Section find(const Sections &sections, const std::string &name)
Find a section with name, empty section if not found.
loco::GraphInputIndex index(const TFPlaceholder *node)
Definition TFNode.cpp:54
ShapeIterator end(const Shape &s)
int32_t size[5]
Definition Slice.cpp:35
OMTrainingContext training_context
Definition OMConfig.h:107

References onert_micro::core::memory::OMRuntimeAllocator::getAllocPlan(), onert_micro::core::OMRuntimeContext::getCircleOpcodes(), onert_micro::core::OMRuntimeContext::getCircleOperators(), onert_micro::core::OMRuntimeContext::getCircleOutputs(), onert_micro::core::memory::OMRuntimeAllocator::getDeallocPlan(), onert_micro::core::OMRuntimeContext::getTrainableOpsIndexes(), onert_micro::core::OMRuntimeContext::isConstTensor(), onert_micro::OMConfig::keep_input, onert_micro::OMTrainingContext::num_of_train_layers, onert_micro::Ok, onert_micro::core::ONLY_BIAS, flatbuffers::Vector< T >::size(), size, onert_micro::OMConfig::train_mode, onert_micro::OMConfig::training_context, and onert_micro::UnknownError.

Referenced by onert_micro::core::OMTrainingRuntimeModule::importTrainModel().

◆ createExecutionPlan()

OMStatus OMExecutionPlanCreator::createExecutionPlan ( core::OMRuntimeStorage runtime_storage,
core::OMRuntimeContext runtime_context,
core::memory::OMRuntimeAllocator allocator,
const OMConfig configs 
)
static

Definition at line 83 of file OMExecutionPlanCreator.cpp.

87{
88 // Check is non trainable mode
89 assert(configs.train_mode != true);
90 if (configs.train_mode == true)
91 return UnknownError;
92
93 bool keep_input = configs.keep_input;
94
95 std::vector<std::vector<uint16_t>> &alloc_plan = allocator.getAllocPlan();
96 std::vector<std::vector<uint16_t>> &dealloc_plan = allocator.getDeallocPlan();
97
98 // First remove prev plan (if it was created)
99 alloc_plan.clear();
100 dealloc_plan.clear();
101
102 using Lifetime = std::pair<int32_t, int32_t>;
103
104 std::map<uint16_t, Lifetime> lifetimes;
105
106 const reader::CircleOperators *operators = runtime_context.getCircleOperators();
107
108 const size_t num_kernels = operators->size();
109
110 if (not keep_input)
111 {
112 auto graph_inputs = runtime_context.getCircleInputs();
113 for (const auto input_ind : *graph_inputs)
114 {
115 assert(lifetimes.count(input_ind) == 0);
116 lifetimes[input_ind] = Lifetime(-1, 0);
117 }
118 }
119
120 for (int32_t index = 0; index < num_kernels; ++index)
121 {
122 auto *cur_op = operators->operator[](index);
123
124 const auto *op_inputs = cur_op->inputs();
125 const auto *op_outputs = cur_op->outputs();
126 auto kernel_type = runtime_storage.getKernelType(index);
127 for (int32_t j = 0; j < op_inputs->size(); ++j)
128 {
129 const auto input_index = op_inputs->operator[](j);
130
131 if (input_index == -1)
132 continue;
133
134 // Pass constant tensors
135 if (runtime_context.isConstTensor(input_index))
136 continue;
137
138 if (lifetimes.count(input_index) > 0)
139 {
140 if (kernel_type == Inplace)
141 lifetimes.at(input_index).second = -1;
142 else
143 lifetimes.at(input_index).second = index;
144 }
145 }
146
147 for (int32_t j = 0; j < op_outputs->size(); ++j)
148 {
149 const auto output_index = op_outputs->operator[](j);
150
151 if (kernel_type == Inplace)
152 lifetimes[output_index] = Lifetime(-1, index);
153 else
154 lifetimes[output_index] = Lifetime(index, index);
155 }
156 }
157 auto graph_outputs = runtime_context.getCircleOutputs();
158 for (const auto output_ind : *graph_outputs)
159 {
160 if (lifetimes.count(output_ind) > 0)
161 lifetimes.at(output_ind).second = static_cast<int32_t>(num_kernels);
162 }
163
164 alloc_plan.assign(num_kernels, std::vector<uint16_t>());
165 dealloc_plan.assign(num_kernels + 1, std::vector<uint16_t>());
166
167 for (const auto &item : lifetimes)
168 {
169 if (item.second.first != -1)
170 alloc_plan[item.second.first].push_back(item.first);
171 if (item.second.second != -1)
172 dealloc_plan[item.second.second].push_back(item.first);
173 }
174
175 return Ok;
176}
const reader::CircleValues * getCircleInputs()
OMKernelType getKernelType(uint16_t op_index)

References onert_micro::core::memory::OMRuntimeAllocator::getAllocPlan(), onert_micro::core::OMRuntimeContext::getCircleInputs(), onert_micro::core::OMRuntimeContext::getCircleOperators(), onert_micro::core::OMRuntimeContext::getCircleOutputs(), onert_micro::core::memory::OMRuntimeAllocator::getDeallocPlan(), onert_micro::core::OMRuntimeStorage::getKernelType(), onert_micro::core::Inplace, onert_micro::core::OMRuntimeContext::isConstTensor(), onert_micro::OMConfig::keep_input, onert_micro::Ok, flatbuffers::Vector< T >::size(), onert_micro::OMConfig::train_mode, and onert_micro::UnknownError.

Referenced by onert_micro::core::OMRuntimeModule::importModel().

◆ createForwardExecutionPlan()

OMStatus OMExecutionPlanCreator::createForwardExecutionPlan ( core::OMRuntimeStorage runtime_storage,
core::OMRuntimeContext runtime_context,
core::memory::OMRuntimeAllocator allocator,
const OMConfig configs 
)
static

Definition at line 182 of file OMExecutionPlanCreator.cpp.

185{
186 // Check is trainable mode
187 assert(configs.train_mode == true);
188 if (configs.train_mode != true)
189 return UnknownError;
190
191 bool keep_input = configs.keep_input;
192 std::vector<std::vector<uint16_t>> &alloc_plan = allocator.getAllocPlan();
193 std::vector<std::vector<uint16_t>> &dealloc_plan = allocator.getDeallocPlan();
194
195 // First remove prev plan (if it was created)
196 alloc_plan.clear();
197 dealloc_plan.clear();
198
199 using Lifetime = std::pair<int32_t, int32_t>;
200
201 std::map<uint16_t, Lifetime> lifetimes;
202
203 const reader::CircleOperators *operators = runtime_context.getCircleOperators();
204
205 const size_t num_kernels = operators->size();
206
207 uint32_t num_train_layers = configs.training_context.num_of_train_layers;
208 if (num_train_layers == 0)
209 num_train_layers = num_kernels;
210
211 std::unordered_map<uint16_t, uint8_t> trainable_ops_config =
212 runtime_context.getTrainableOpsIndexes();
213
214 // If context has config file defined trainable operations
215 // than ignore configs.training_context.num_of_train_layers value
216 // and use max value from trainable_ops_indexes to define last train op
217 uint16_t last_train_op_indx = num_kernels - num_train_layers;
218 if (!trainable_ops_config.empty())
219 {
220 last_train_op_indx = std::numeric_limits<uint16_t>::max();
221 // Find op trainable index with min value
222 for (auto &p : trainable_ops_config)
223 {
224 last_train_op_indx = std::min(p.first, last_train_op_indx);
225 }
226 num_train_layers = (num_kernels - last_train_op_indx);
227 }
228
229 if (not keep_input)
230 {
231 auto graph_inputs = runtime_context.getCircleInputs();
232 for (const auto input_ind : *graph_inputs)
233 {
234 assert(lifetimes.count(input_ind) == 0);
235 lifetimes[input_ind] = Lifetime(-1, 0);
236 }
237 }
238
239 const auto *op_codes = runtime_context.getCircleOpcodes();
240
241 for (int32_t index = 0; index < num_kernels; ++index)
242 {
243 auto *cur_op = operators->operator[](index);
244
245 const auto *op_inputs = cur_op->inputs();
246 const auto *op_outputs = cur_op->outputs();
247 auto kernel_type = runtime_storage.getKernelType(index);
248
249 uint32_t cur_opcode_index = cur_op->opcode_index();
250
251 assert(cur_opcode_index < op_codes->size());
252
253 const auto opcode = op_codes->operator[](cur_opcode_index);
254
255 // Flag to determine is current operation needed to save input data (is this op in training part
256 // of the graph)
257 bool need_to_save_input_data =
258 (index >= last_train_op_indx) and
259 ((trainable_ops_config.empty() or
260 trainable_ops_config.find(index) != trainable_ops_config.end() and
261 trainable_ops_config[index] != ONLY_BIAS) or
262 isOpNeedSaveInputData(opcode));
263
264 // Flag to determine is current operation needed to save output data (is this op in training
265 // part of the graph)
266 bool need_to_save_output_data =
267 (index >= last_train_op_indx) and
268 ((trainable_ops_config.find(index) != trainable_ops_config.end() and
269 trainable_ops_config[index] != ONLY_BIAS) or
270 isOpNeedSaveOutputData(opcode, cur_op));
271
272 for (int32_t j = 0; j < op_inputs->size(); ++j)
273 {
274 const auto input_index = op_inputs->operator[](j);
275
276 if (input_index == -1)
277 continue;
278
279 // Pass constant tensors
280 if (runtime_context.isConstTensor(input_index))
281 continue;
282
283 if (lifetimes.count(input_index) > 0)
284 {
285 // lifetimes.at(input_index).second == -2 - Means need to save data for input_index tensor
286 if (kernel_type == Inplace or need_to_save_input_data or
287 (lifetimes.at(input_index).second == -2))
288 lifetimes.at(input_index).second = -1;
289 else
290 lifetimes.at(input_index).second = index;
291 }
292 }
293
294 for (int32_t j = 0; j < op_outputs->size(); ++j)
295 {
296 const auto output_index = op_outputs->operator[](j);
297
298 if (kernel_type == Inplace)
299 lifetimes[output_index] = Lifetime(-1, index);
300 else if (need_to_save_output_data)
301 lifetimes[output_index] = Lifetime(index, -2);
302 else
303 lifetimes[output_index] = Lifetime(index, index);
304 }
305 }
306 auto graph_outputs = runtime_context.getCircleOutputs();
307 for (const auto output_ind : *graph_outputs)
308 {
309 if (lifetimes.count(output_ind) > 0)
310 lifetimes.at(output_ind).second = static_cast<int32_t>(num_kernels);
311 }
312
313 alloc_plan.assign(num_kernels, std::vector<uint16_t>());
314 dealloc_plan.assign(num_kernels + 1, std::vector<uint16_t>());
315
316 for (const auto &item : lifetimes)
317 {
318 if (item.second.first >= 0)
319 alloc_plan[item.second.first].push_back(item.first);
320 if (item.second.second >= 0)
321 dealloc_plan[item.second.second].push_back(item.first);
322 }
323
324 return Ok;
325}

References onert_micro::core::memory::OMRuntimeAllocator::getAllocPlan(), onert_micro::core::OMRuntimeContext::getCircleInputs(), onert_micro::core::OMRuntimeContext::getCircleOpcodes(), onert_micro::core::OMRuntimeContext::getCircleOperators(), onert_micro::core::OMRuntimeContext::getCircleOutputs(), onert_micro::core::memory::OMRuntimeAllocator::getDeallocPlan(), onert_micro::core::OMRuntimeStorage::getKernelType(), onert_micro::core::OMRuntimeContext::getTrainableOpsIndexes(), onert_micro::core::Inplace, onert_micro::core::OMRuntimeContext::isConstTensor(), onert_micro::OMConfig::keep_input, onert_micro::OMTrainingContext::num_of_train_layers, onert_micro::Ok, onert_micro::core::ONLY_BIAS, flatbuffers::Vector< T >::size(), size, onert_micro::OMConfig::train_mode, onert_micro::OMConfig::training_context, and onert_micro::UnknownError.

Referenced by onert_micro::core::OMRuntimeModule::importModel().


The documentation for this struct was generated from the following files: