ONE - On-device Neural Engine
Loading...
Searching...
No Matches
RuntimeGraph.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "core/RuntimeGraph.h"
19
20#include <algorithm>
21#include <map>
22
23namespace luci_interpreter
24{
25
26// IBaseRuntimeGraph
28 RuntimeModule *runtime_module, uint32_t subgraph_index)
29 : _memory_manager(memory_manager),
30 _tensor_to_data(std::unordered_map<const circle::Tensor *, uint8_t *>{}),
31 _runtime_module(runtime_module), _reader(circle_reader),
32 _inplace_op_indexes(std::unordered_set<const circle::Operator *>{}),
33 _subgraph_index(subgraph_index)
34{
35}
36
38{
39 for (auto &idx_to_tensor : _tensor_to_data)
40 {
41 auto *data = idx_to_tensor.second;
42
43 _memory_manager->release_memory(data);
44 }
45}
46
47// TODO: modify this
48void RuntimeGraph::buildAllocDeallocPlan(bool dealloc_input)
49{
50 assert(_reader->get_current_subgraph_index() == _subgraph_index);
51 invalidate();
52 using Lifetime = std::pair<int32_t, int32_t>;
53 std::map<const circle::Tensor *, Lifetime> lifetimes;
54 const size_t num_kernels = _reader->operators().size();
55
56 if (dealloc_input)
57 {
58 for (const auto input_ind : _reader->inputs())
59 {
60 const auto raw_tensor = _reader->tensors()[input_ind];
61
62 assert(lifetimes.count(raw_tensor) == 0);
63 lifetimes[raw_tensor] = Lifetime(-1, 0);
64 }
65 }
66
67 for (int32_t index = 0; index < num_kernels; ++index)
68 {
69 const auto kernel = _reader->operators().at(index);
70 assert(kernel != nullptr);
71
72 for (int32_t j = 0; j < kernel->inputs()->size(); ++j)
73 {
74 const auto input_index = kernel->inputs()->operator[](j);
75
76 if (input_index == -1)
77 continue;
78
79 const auto raw_tensor = _reader->tensors()[input_index];
80
81 // Pass constant tensors
82 if (Tensor::is_constant_tensor(_reader, raw_tensor))
83 continue;
84
85 if (lifetimes.count(raw_tensor) > 0)
86 {
87 if (_inplace_op_indexes.find(kernel) != _inplace_op_indexes.end())
88 lifetimes.at(raw_tensor).second = -1;
89 else
90 lifetimes.at(raw_tensor).second = index;
91 }
92 }
93
94 for (int32_t j = 0; j < kernel->outputs()->size(); ++j)
95 {
96 const auto output_index = kernel->outputs()->operator[](j);
97 const auto raw_tensor = _reader->tensors()[output_index];
98
99 assert(lifetimes.count(raw_tensor) == 0);
100 if (_inplace_op_indexes.find(kernel) != _inplace_op_indexes.end())
101 lifetimes[raw_tensor] = Lifetime(-1, index);
102 else
103 lifetimes[raw_tensor] = Lifetime(index, index);
104 }
105 }
106
107 for (const auto output_ind : _reader->outputs())
108 {
109 const auto raw_tensor = _reader->tensors()[output_ind];
110
111 if (lifetimes.count(raw_tensor) > 0)
112 lifetimes.at(raw_tensor).second = num_kernels;
113 }
114
115 _alloc_plan.assign(num_kernels, std::vector<const circle::Tensor *>());
116 _dealloc_plan.assign(num_kernels + 1, std::vector<const circle::Tensor *>());
117 for (const auto &item : lifetimes)
118 {
119 if (item.second.first != -1)
120 _alloc_plan[item.second.first].push_back(item.first);
121 if (item.second.second != -1)
122 _dealloc_plan[item.second.second].push_back(item.first);
123 }
124 _is_valid = true;
125}
126
127void RuntimeGraph::allocate(size_t kernel_index)
128{
129 assert(_reader->get_current_subgraph_index() == _subgraph_index);
130 assert(_is_valid && kernel_index < _alloc_plan.size());
131 for (const circle::Tensor *tensor : _alloc_plan[kernel_index])
132 {
133 if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
134 {
135 auto *data = _tensor_to_data.at(tensor);
136 _memory_manager->release_memory(data);
137 }
138 auto *data = _memory_manager->allocate_memory(tensor);
139 _tensor_to_data[tensor] = data;
140 }
141}
142
143#ifndef DIS_DYN_SHAPES
144void RuntimeGraph::addDynamicShapeTensor(const circle::Tensor *tensor,
146{
147 assert(_reader->get_current_subgraph_index() == _subgraph_index);
148 _dynamic_tensor_shapes[tensor] = std::move(shapes);
149}
150
152{
153 assert(_reader->get_current_subgraph_index() == _subgraph_index);
154 auto it = _dynamic_tensor_shapes.find(tensor);
155
156 return it == _dynamic_tensor_shapes.end() ? nullptr : &_dynamic_tensor_shapes[tensor];
157}
158
159void RuntimeGraph::removeDynamicShapeTensor(const circle::Tensor *tensor)
160{
161 assert(_reader->get_current_subgraph_index() == _subgraph_index);
162 auto it = _dynamic_tensor_shapes.find(tensor);
163
164 assert(it != _dynamic_tensor_shapes.end());
165
166 _dynamic_tensor_shapes.erase(it);
167}
168
169#endif // DIS_DYN_SHAPES
170
171void RuntimeGraph::deallocate(size_t kernel_index)
172{
173 assert(_reader->get_current_subgraph_index() == _subgraph_index);
174 assert(_is_valid && kernel_index < _dealloc_plan.size());
175
176#ifdef ENABLE_TRAINING
177 // const uint32_t number_of_trainable_last_layers = _number_of_last_trainable_layers;
178 const uint32_t last_layer = _number_of_last_trainable_layers > 0
179 ? _reader->operators().size() - _number_of_last_trainable_layers
180 : 0;
181#endif // ENABLE_TRAINING
182
183 for (const circle::Tensor *tensor : _dealloc_plan[kernel_index])
184 {
185 const auto it = _tensor_to_data.find(tensor);
186 assert(it != _tensor_to_data.end());
187
188 auto *data = _tensor_to_data.at(tensor);
189
190#ifdef ENABLE_TRAINING
191 if (_number_of_last_trainable_layers > 0 and kernel_index >= last_layer)
192 {
193 _gradient_calc_storage->saveDataToTensor(tensor, data);
194 }
195 else
196 {
197 _memory_manager->release_memory(data);
198 }
199#else
200 _memory_manager->release_memory(data);
201#endif // ENABLE_TRAINING
202
203 _tensor_to_data.erase(it);
204 }
205}
206
207void RuntimeGraph::resetTensorData(uint8_t *new_data, const circle::Tensor *tensor)
208{
209 assert(_reader->get_current_subgraph_index() == _subgraph_index);
210 auto tensor_it = _tensor_to_data.find(tensor);
211 if (tensor_it != _tensor_to_data.end())
212 {
213 auto *data = _tensor_to_data.at(tensor);
214 _memory_manager->release_memory(data);
215 }
216
217 _tensor_to_data[tensor] = new_data;
218}
219
221{
222 assert(_reader->get_current_subgraph_index() == _subgraph_index);
223 const auto graph_inputs = _reader->inputs();
224 for (int i = 0; i < _reader->outputs().size(); ++i)
225 {
226 const auto tensor_index = _reader->outputs()[i];
227 assert(tensor_index != -1);
228
229 if (std::find(graph_inputs.begin(), graph_inputs.end(), tensor_index) != graph_inputs.end())
230 return;
231
232 const auto tensor = _reader->tensors()[tensor_index];
233 assert(tensor != nullptr);
234
235 auto tensor_it = _tensor_to_data.find(tensor);
236 if (tensor_it != _tensor_to_data.end())
237 {
238 auto *data = _tensor_to_data.at(tensor);
239 _memory_manager->release_memory(data);
240 _tensor_to_data.erase(tensor_it);
241 }
242 }
243}
244
245uint8_t *RuntimeGraph::configureGraphInput(int32_t input_index)
246{
247 assert(_reader->get_current_subgraph_index() == _subgraph_index);
248
249 const auto tensor_index = _reader->inputs()[input_index];
250 assert(tensor_index != -1);
251 const auto tensor = _reader->tensors()[tensor_index];
252 assert(tensor != nullptr);
253
254 auto *data = _memory_manager->allocate_memory(tensor);
255 configureGraphInput(input_index, data);
256
257 return data;
258}
259
260void RuntimeGraph::configureGraphInput(int32_t input_index, uint8_t *data)
261{
262 assert(_reader->get_current_subgraph_index() == _subgraph_index);
264
265 const auto tensor_index = _reader->inputs()[input_index];
266 assert(tensor_index != -1);
267 const auto tensor = _reader->tensors()[tensor_index];
268 assert(tensor != nullptr);
269
270 if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
271 {
272 auto *data_prev = _tensor_to_data.at(tensor);
273 if (data_prev != data)
274 _memory_manager->release_memory(data_prev);
275 }
276 _tensor_to_data[tensor] = data;
277}
278
279int32_t RuntimeGraph::getInputDataSizeByIndex(int32_t input_index)
280{
281 assert(_reader->get_current_subgraph_index() == _subgraph_index);
282 const auto tensor_index = _reader->inputs()[input_index];
283 assert(tensor_index != -1);
284 const auto tensor = _reader->tensors()[tensor_index];
285 assert(tensor != nullptr);
286
287 return Tensor::num_elements(tensor) * size(Tensor::element_type(tensor));
288}
289
291{
292 assert(_reader->get_current_subgraph_index() == _subgraph_index);
293 return _reader->inputs().size();
294}
295
297{
298 assert(_reader->get_current_subgraph_index() == _subgraph_index);
299 return _reader->outputs().size();
300}
301
302const circle::Tensor *RuntimeGraph::getInputTensorByIndex(int32_t input_index)
303{
304 assert(_reader->get_current_subgraph_index() == _subgraph_index);
305
306 const auto tensor_index = _reader->inputs()[input_index];
307 const auto tensor = _reader->tensors()[tensor_index];
308 assert(tensor != nullptr);
309 return tensor;
310}
311
312const circle::Tensor *RuntimeGraph::getOutputTensorByIndex(int32_t input_index)
313{
314 assert(_reader->get_current_subgraph_index() == _subgraph_index);
315
316 const auto tensor_index = _reader->outputs()[input_index];
317 const auto tensor = _reader->tensors()[tensor_index];
318 assert(tensor != nullptr);
319 return tensor;
320}
321
322int32_t RuntimeGraph::getOutputDataSizeByIndex(int32_t output_index)
323{
324 assert(_reader->get_current_subgraph_index() == _subgraph_index);
325
326 const auto tensor_index = _reader->outputs()[output_index];
327 assert(tensor_index != -1);
328 const auto tensor = _reader->tensors()[tensor_index];
329 assert(tensor != nullptr);
330
331 uint32_t num_elements = Tensor::num_elements(tensor);
332 auto it = _dynamic_tensor_shapes.find(tensor);
333 if (it != _dynamic_tensor_shapes.end())
334 {
335 num_elements = it->second.flatSize();
336 }
337
338 return num_elements * size(Tensor::element_type(tensor));
339}
340
341uint8_t *RuntimeGraph::getOutputDataByIndex(int32_t output_index)
342{
343 assert(_reader->get_current_subgraph_index() == _subgraph_index);
344
345 const auto tensor_index = _reader->outputs()[output_index];
346 assert(tensor_index != -1);
347 const auto tensor = _reader->tensors()[tensor_index];
348 assert(tensor != nullptr);
349
350 assert(_tensor_to_data.find(tensor) != _tensor_to_data.end());
351
352 return _tensor_to_data[tensor];
353}
354
355uint8_t *RuntimeGraph::getDataByTensor(const circle::Tensor *raw_tensor)
356{
357 assert(_reader->get_current_subgraph_index() == _subgraph_index);
358
359 if (raw_tensor == nullptr)
360 return nullptr;
361
362 if (_tensor_to_data.find(raw_tensor) == _tensor_to_data.end())
363 {
364 return nullptr;
365 }
366
367 return _tensor_to_data.at(raw_tensor);
368}
369
370void RuntimeGraph::clearTensors() { _tensor_to_data.clear(); }
371
372void RuntimeGraph::makeInplaceOperation(const circle::Tensor *removing_tensor,
373 const circle::Tensor *dst_tensor)
374{
375 assert(_reader->get_current_subgraph_index() == _subgraph_index);
376 assert(removing_tensor != nullptr);
377
378 auto src_it = _tensor_to_data.find(removing_tensor);
379
380 if (src_it == _tensor_to_data.end())
381 return;
382
383 auto *data = _tensor_to_data[removing_tensor];
384
385 _tensor_to_data.erase(src_it);
386
387 if (dst_tensor == nullptr)
388 {
389 delete[] data;
390 return;
391 }
392
393 assert(_tensor_to_data.find(dst_tensor) == _tensor_to_data.end() &&
394 "Failed makeInplaceOperation");
395 _tensor_to_data[dst_tensor] = data;
396}
397
398uint8_t *RuntimeGraph::getConstDataByTensor(const circle::Tensor *raw_tensor)
399{
400 assert(_reader->get_current_subgraph_index() == _subgraph_index);
401 if (raw_tensor == nullptr)
402 return nullptr;
403
404#ifdef ENABLE_TRAINING
405 assert(_number_of_last_trainable_layers == 0 or
406 _storage != nullptr && "Storage should not be null here");
407
408 if (_storage != nullptr)
409 {
410 uint8_t *result = nullptr;
411 _storage->getTrainWeightDataByTensor(raw_tensor, &result);
412
413 if (result != nullptr)
414 return result;
415 }
416
417#endif // ENABLE_TRAINING
418
419 auto const &buffer = wrap(_reader->buffers()[raw_tensor->buffer()]->data());
420
421 return const_cast<uint8_t *>(buffer.data());
422}
423
424const circle::Tensor *RuntimeGraph::getCircleTensorByIndex(int32_t index)
425{
426 assert(_reader->get_current_subgraph_index() == _subgraph_index);
427 if (index < 0)
428 return nullptr;
429
430 const auto raw_tensor = _reader->tensors()[index];
431
432 return raw_tensor;
433}
434
435void RuntimeGraph::configure(bool dealloc_input)
436{
438
439 for (uint32_t i = 0; i < _reader->operators().size(); ++i)
440 {
441 const auto op = _reader->operators().at(i);
442 assert(op != nullptr);
443
444 const auto opcode = _reader->builtin_code(op);
445
446 kernel_configure.configure_kernel(op, opcode, this);
447 }
448
449 if (not _is_valid)
450 buildAllocDeallocPlan(dealloc_input);
451
452 _is_valid = true;
453}
454
455void RuntimeGraph::setDataToTensor(const circle::Tensor *tensor, uint8_t *data)
456{
457 _tensor_to_data[tensor] = data;
458}
459
461{
463
464 if (not _is_valid)
465 configure(true);
466
467 const auto operators_size = _reader->operators().size();
468 const auto operators = _reader->operators();
469
470 for (uint32_t i = 0; i < operators_size; ++i)
471 {
472 const auto op = operators.at(i);
473 assert(op != nullptr);
474
475 const auto opcode = _reader->builtin_code(op);
476
477 allocate(i);
478
479 kernel_executor.execute_kernel(op, opcode, this);
480
481 deallocate(i);
482 }
483
484#ifdef ENABLE_TRAINING
485 if (_number_of_last_trainable_layers > 0)
486 {
487 const auto graph_output = _reader->outputs();
488
489 assert(graph_output.size() == 1);
490
491 const auto output_tensor = _reader->tensors()[graph_output[0]];
492 uint8_t *output_data = _tensor_to_data.at(output_tensor);
493 _gradient_calc_storage->saveDataToTensor(output_tensor, output_data);
494 _tensor_to_data.erase(output_tensor);
495 }
496#endif // ENABLE_TRAINING
497}
498
499} // namespace luci_interpreter
Loads Circle file and provides helpers to access attributes.
CircleOperators operators() const
CircleBuffers buffers() const
CircleTensors tensors() const
VectorWrapper< int32_t > outputs() const
circle::BuiltinOperator builtin_code(const circle::Operator *op) const
VectorWrapper< int32_t > inputs() const
uint32_t size() const
const T * data() const
value_type at(uint32_t i) const
virtual void release_memory(luci_interpreter::Tensor &tensor)=0
virtual void allocate_memory(luci_interpreter::Tensor &tensor)=0
void configure_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode, BaseRuntimeGraph *runtime_graph) const
void execute_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode, BaseRuntimeGraph *runtime_graph) const
void makeInplaceOperation(const circle::Tensor *src_tensor, const circle::Tensor *dst_tensor)
luci_interpreter::RuntimeShape * getDynamicShapeTensor(const circle::Tensor *tensor)
uint8_t * getConstDataByTensor(const circle::Tensor *raw_tensor)
void configure(bool dealloc_input)
const circle::Tensor * getCircleTensorByIndex(int32_t index)
void addDynamicShapeTensor(const circle::Tensor *tensor, luci_interpreter::RuntimeShape &&shapes)
void setDataToTensor(const circle::Tensor *tensor, uint8_t *data)
void resetTensorData(uint8_t *new_data, const circle::Tensor *tensor)
int32_t getInputDataSizeByIndex(int32_t input_index)
int32_t getOutputDataSizeByIndex(int32_t output_index)
uint8_t * getOutputDataByIndex(int32_t output_index)
uint8_t * getDataByTensor(const circle::Tensor *raw_tensor)
const circle::Tensor * getOutputTensorByIndex(int32_t input_index)
const circle::Tensor * getInputTensorByIndex(int32_t input_index)
void removeDynamicShapeTensor(const circle::Tensor *tensor)
uint8_t * configureGraphInput(int32_t input_index)
const T * data(const std::vector< T, Alloc > &v)
constexpr KernelConfigureRegistry kernel_configure
constexpr KernelExecuteRegistry kernel_executor
VectorWrapper< T > wrap(const flatbuffers::Vector< T > *vec)
loco::GraphInputIndex index(const TFPlaceholder *node)
Definition TFNode.cpp:54
int32_t size[5]
Definition Slice.cpp:35