ONE - On-device Neural Engine
Loading...
Searching...
No Matches
GraphLoader.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "loader/GraphLoader.h"
18
20
22#include <loco/IR/Algorithm.h>
23
24namespace luci_interpreter
25{
26namespace
27{
28
29template <typename NodeT> Shape getNodeShape(const NodeT *node)
30{
31 Shape shape(node->rank());
32 for (uint32_t i = 0; i < node->rank(); ++i)
33 {
34 shape.dim(i) = node->dim(i).value();
35 }
36 return shape;
37}
38
39template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
40{
41 const size_t element_size = getDataTypeSize(DT);
42 const int64_t num_elements = node->size<DT>(); // Assuming size<DT>() uses large_num_elements()
43
44 // Check for integer overflow in size calculation
45 if (num_elements < 0 || static_cast<uint64_t>(num_elements) > SIZE_MAX / element_size)
46 {
47 throw std::runtime_error("Integer overflow in size calculation");
48 }
49
50 const int64_t total_size = num_elements * element_size;
51 *data_size = static_cast<size_t>(total_size);
52 if (*data_size > 0)
53 {
54 // FIXME There is no good way to get the pointer to the data currently.
55 return &node->at<DT>(0);
56 }
57 return nullptr;
58}
59
60const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
61{
62 switch (node->dtype())
63 {
64 case DataType::U4:
66 case DataType::U8:
68 case DataType::FLOAT32:
70 case DataType::S4:
72 case DataType::S8:
74 case DataType::S16:
76 case DataType::S32:
78 case DataType::S64:
80 case DataType::BOOL:
82 default:
83 throw std::runtime_error("luci-intp (getNodeData) Unsupported type.");
84 }
85}
86
87const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
88{
89 if (node->custom_code() != "CircleReferencingConst")
90 return nullptr;
91
92 // helper struct which describes data loaded to custom_options of CircleReferencingConst node
93 // TODO move this struct to header
94 struct ConstDataReference
95 {
96 const uint8_t *data = nullptr;
97 uint32_t size = 0;
98 };
99
100 const auto &custom_options = node->custom_options();
101 const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
102
104 return const_data_ref.data;
105}
106
107bool isExecutableNode(const luci::CircleNode *node)
108{
109 switch (node->opcode())
110 {
111 // These nodes denote inputs / outputs of a graph.
112 case luci::CircleOpcode::CIRCLECONST:
113 case luci::CircleOpcode::CIRCLEINPUT:
114 case luci::CircleOpcode::CIRCLEOUTPUT:
115 case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
116 // The following nodes denote outputs of multiple-output nodes.
117 case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
118 case luci::CircleOpcode::CIRCLECUSTOMOUT:
119 case luci::CircleOpcode::CIRCLEIFOUT:
120 case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
121 case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
122 case luci::CircleOpcode::CIRCLESPLITOUT:
123 case luci::CircleOpcode::CIRCLESPLITVOUT:
124 case luci::CircleOpcode::CIRCLETOPKV2OUT:
125 case luci::CircleOpcode::CIRCLEUNIQUEOUT:
126 case luci::CircleOpcode::CIRCLEUNPACKOUT:
127 case luci::CircleOpcode::CIRCLEVARIABLE:
128 case luci::CircleOpcode::CIRCLEWHILEOUT:
129 return false;
130 // Custom nodes may be executable and non-executable
131 case luci::CircleOpcode::CUSTOM:
132 {
134
135 // TODO handle more non-executable Custom ops here
136 if (custom_node->custom_code() == "CircleReferencingConst")
137 return false;
138
139 return true;
140 }
141 default:
142 return true;
143 }
144}
145
147{
148 switch (node->opcode())
149 {
150 // Output nodes do not produce tensors.
151 case luci::CircleOpcode::CIRCLEOUTPUT:
152 // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
153 // are produced by the corresponding *Out nodes instead.
154 case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
155 case luci::CircleOpcode::CUSTOM:
156 case luci::CircleOpcode::IF:
157 case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
158 case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
159 case luci::CircleOpcode::SPLIT:
160 case luci::CircleOpcode::SPLIT_V:
161 case luci::CircleOpcode::TOPK_V2:
162 case luci::CircleOpcode::UNIQUE:
163 case luci::CircleOpcode::UNPACK:
164 case luci::CircleOpcode::WHILE:
165 return false;
166 default:
167 return true;
168 }
169}
170
172{
174
175 // TODO handle more Custom ops here
176 if (custom_node->custom_code() == "CircleReferencingConst")
177 return true;
178
179 return false;
180}
181
182} // namespace
183
185 const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
186 const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
187 std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
188 : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
189 _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
190 _memory_manager(memory_manager)
191{
192}
193
195{
196 for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
197 {
198 const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
199
200 if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
201 {
203 throw std::runtime_error("Unsupported Custom operator. " + cnode->custom_code() + " in " +
204 node->name());
205 }
206
207 if (!isTensorProducingNode(node))
208 continue;
209
210 // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
211 // be inferred.
212 Shape shape{};
213 switch (node->opcode())
214 {
215 case luci::CircleOpcode::CIRCLECONST:
216 case luci::CircleOpcode::CIRCLECUSTOMOUT:
217 case luci::CircleOpcode::CIRCLEINPUT:
218 case luci::CircleOpcode::CIRCLEVARIABLE:
219 shape = getNodeShape(node);
220 break;
221 default:
222 break;
223 }
224
225 AffineQuantization quantization;
226 if (node->quantparam() != nullptr)
227 {
228 const luci::CircleQuantParam *params = node->quantparam();
229 assert(params->scale.size() == params->zerop.size());
230 quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
231 quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
232 quantization.quantized_dimension = params->quantized_dimension;
233 }
234
235 auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
236 node->name());
237
238 // If node has execution plan then read memory offsets for nodes
239 // from the beginning of shared memory buffer. Used in Static Memory Manager.
240 if (luci::has_execution_plan(node))
241 {
243 assert(!execution_plan.offsets().empty());
244 tensor->set_offset(execution_plan.offsets().front());
245 }
246
247 if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
248 {
249 size_t data_size{};
250 const void *const_data = getNodeData(const_node, &data_size);
251 if (const_data != nullptr)
252 {
253 _memory_manager->allocate_memory(*tensor);
254 tensor->writeData(const_data, data_size);
255 }
256 }
257 else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
258 {
259 const auto *custom_node =
261
262 if (custom_node->custom_code() == "CircleReferencingConst")
263 {
264 size_t data_size{};
266 if (const_data != nullptr)
267 {
268 _memory_manager->allocate_memory(*tensor);
269 tensor->writeData(const_data, data_size);
270 }
271 }
272 }
273
274 _node_to_tensor.emplace(node, tensor.get());
275 _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
276
277 _runtime_graph->addTensor(std::move(tensor));
278 }
279}
280
282{
283 auto input_nodes = loco::input_nodes(_graph);
284 std::vector<Tensor *> input_tensors(input_nodes.size());
285 for (size_t i = 0; i < input_nodes.size(); ++i)
286 {
287 input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
288 _memory_manager->allocate_memory(*input_tensors[i]);
289 }
290 _runtime_graph->setInputTensors(input_tensors);
291
292 auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
293 std::vector<Tensor *> output_tensors(output_nodes.size());
294 for (size_t i = 0; i < output_nodes.size(); ++i)
295 {
296 const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
297 output_tensors[i] = _node_to_tensor.at(node->from());
298 }
299 _runtime_graph->setOutputTensors(output_tensors);
300}
301
303{
304 KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
305
306 // Create kernels for executable nodes. This has to be done in execution order.
307 auto graph = const_cast<loco::Graph *>(_graph);
308
309 auto const graph_nodes = loco::all_nodes(graph);
310
311 // Checking for execution plan in node annotations.
312 bool has_execution_annotation = true;
313 auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
317 };
318 std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
319
321 {
322 // Build ordered_nodes vector that stores the order of execution of graph nodes.
323 std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
324
325 auto const filler = [&ordered_nodes](auto const node) {
329 };
330 std::for_each(begin(graph_nodes), end(graph_nodes), filler);
331
332 for (auto node : ordered_nodes)
333 {
334 if (isExecutableNode(node))
335 {
336 std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
337 _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
338 _runtime_graph->addKernel(std::move(kernel));
339 }
340 }
341 }
342 else
343 {
344 // If it is impossible to build the execution order plan,
345 // then we use the default postorder_traversal approach.
347 {
349 if (isExecutableNode(node))
350 {
351 std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
352 _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
353 _runtime_graph->addKernel(std::move(kernel));
354 }
355 }
356 }
357}
358
359} // namespace luci_interpreter
A neural network graph.
Definition Graph.h:161
NodeContext * nodes(void)
Definition Graph.h:218
Logical unit of computation.
Definition Node.h:55
T * at(uint32_t n) const
Access N-th object.
Definition ObjectPool.h:42
Class to build tensor data.
Definition CircleConst.h:35
const loco::DataTypeImpl< DT >::Type & at(uint32_t n) const
uint32_t size(void) const
CUSTOM in Circle.
const std::vector< uint8_t > & custom_options(void) const
const std::string & custom_code(void) const
Virtual CIRCLECUSTOMOUT in Circle.
GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, const std::unordered_map< const loco::Graph *, RuntimeGraph * > &graph_to_runtime_graph, std::unordered_map< const loco::Node *, Tensor * > &node_to_tensor, IMemoryManager *memory_manager)
virtual void allocate_memory(luci_interpreter::Tensor &tensor)=0
void addKernel(std::unique_ptr< Kernel > &&kernel)
Tensor * addTensor(std::unique_ptr< Tensor > &&tensor)
void setOutputTensors(const std::vector< Tensor * > &output_tensors)
void setInputTensors(const std::vector< Tensor * > &input_tensors)
std::vector< loco::Node * > postorder_traversal(const std::vector< loco::Node * > &roots)
Generate postorder traversal sequence starting from "roots".
Definition Algorithm.cpp:53
std::set< Node * > all_nodes(Graph *)
Enumerate all the nodes in a given graph.
Definition Graph.cpp:59
std::vector< Node * > input_nodes(const Graph *)
Definition Graph.cpp:71
T must_cast(FeatureEncoder *node)
A helper dynamic_cast that throws when failed.
std::vector< Node * > output_nodes(Graph *)
Definition Graph.cpp:101
size_t getDataTypeSize(DataType data_type)
Definition DataType.h:33
T must_cast(loco::Node *node)
bool has_execution_plan(const luci::CircleNode *circle_node)
luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node)
int32_t size[5]
Definition Slice.cpp:35
int32_t begin[5]
Definition Slice.cpp:33
Definition Shape.h:28
CircleQuantParam * quantparam(void) const
NodeName name(void) const
virtual CircleOpcode opcode(void) const =0
std::vector< float > scale
std::vector< int64_t > zerop
std::vector< float > scale
Definition Tensor.h:95
std::vector< int32_t > zero_point
Definition Tensor.h:96
std::unordered_map< const Tensor *, const luci::CircleNode * > tensor_to_node
Definition RuntimeToIR.h:32
std::unordered_map< const Kernel *, const luci::CircleNode * > kernel_to_node
Definition RuntimeToIR.h:33