ONE - On-device Neural Engine
Loading...
Searching...
No Matches
GraphLoader.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "loader/GraphLoader.h"
18
20
22#include <loco/IR/Algorithm.h>
23
24namespace luci_interpreter
25{
26namespace
27{
28
29template <typename NodeT> Shape getNodeShape(const NodeT *node)
30{
31 Shape shape(node->rank());
32 for (uint32_t i = 0; i < node->rank(); ++i)
33 {
34 shape.dim(i) = node->dim(i).value();
35 }
36 return shape;
37}
38
39template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
40{
41 const size_t element_size = getDataTypeSize(DT);
42 const int32_t num_elements = node->size<DT>();
43
44 *data_size = num_elements * element_size;
45 if (*data_size > 0)
46 {
47 // FIXME There is no good way to get the pointer to the data currently.
48 return &node->at<DT>(0);
49 }
50 return nullptr;
51}
52
53const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
54{
55 switch (node->dtype())
56 {
57 case DataType::U4:
58 return getNodeDataImpl<DataType::U4>(node, data_size);
59 case DataType::U8:
60 return getNodeDataImpl<DataType::U8>(node, data_size);
61 case DataType::FLOAT32:
62 return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
63 case DataType::S4:
64 return getNodeDataImpl<DataType::S4>(node, data_size);
65 case DataType::S8:
66 return getNodeDataImpl<DataType::S8>(node, data_size);
67 case DataType::S16:
68 return getNodeDataImpl<DataType::S16>(node, data_size);
69 case DataType::S32:
70 return getNodeDataImpl<DataType::S32>(node, data_size);
71 case DataType::S64:
72 return getNodeDataImpl<DataType::S64>(node, data_size);
73 case DataType::BOOL:
74 return getNodeDataImpl<DataType::BOOL>(node, data_size);
75 default:
76 throw std::runtime_error("luci-intp (getNodeData) Unsupported type.");
77 }
78}
79
80const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
81{
82 if (node->custom_code() != "CircleReferencingConst")
83 return nullptr;
84
85 // helper struct which describes data loaded to custom_options of CircleReferencingConst node
86 // TODO move this struct to header
87 struct ConstDataReference
88 {
89 const uint8_t *data = nullptr;
90 uint32_t size = 0;
91 };
92
93 const auto &custom_options = node->custom_options();
94 const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
95
96 *data_size = const_data_ref.size;
97 return const_data_ref.data;
98}
99
100bool isExecutableNode(const luci::CircleNode *node)
101{
102 switch (node->opcode())
103 {
104 // These nodes denote inputs / outputs of a graph.
105 case luci::CircleOpcode::CIRCLECONST:
106 case luci::CircleOpcode::CIRCLEINPUT:
107 case luci::CircleOpcode::CIRCLEOUTPUT:
108 case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
109 // The following nodes denote outputs of multiple-output nodes.
110 case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
111 case luci::CircleOpcode::CIRCLECUSTOMOUT:
112 case luci::CircleOpcode::CIRCLEIFOUT:
113 case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
114 case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
115 case luci::CircleOpcode::CIRCLESPLITOUT:
116 case luci::CircleOpcode::CIRCLESPLITVOUT:
117 case luci::CircleOpcode::CIRCLETOPKV2OUT:
118 case luci::CircleOpcode::CIRCLEUNIQUEOUT:
119 case luci::CircleOpcode::CIRCLEUNPACKOUT:
120 case luci::CircleOpcode::CIRCLEVARIABLE:
121 case luci::CircleOpcode::CIRCLEWHILEOUT:
122 return false;
123 // Custom nodes may be executable and non-executable
124 case luci::CircleOpcode::CUSTOM:
125 {
126 auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
127
128 // TODO handle more non-executable Custom ops here
129 if (custom_node->custom_code() == "CircleReferencingConst")
130 return false;
131
132 return true;
133 }
134 default:
135 return true;
136 }
137}
138
139bool isTensorProducingNode(const luci::CircleNode *node)
140{
141 switch (node->opcode())
142 {
143 // Output nodes do not produce tensors.
144 case luci::CircleOpcode::CIRCLEOUTPUT:
145 // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
146 // are produced by the corresponding *Out nodes instead.
147 case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
148 case luci::CircleOpcode::CUSTOM:
149 case luci::CircleOpcode::IF:
150 case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
151 case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
152 case luci::CircleOpcode::SPLIT:
153 case luci::CircleOpcode::SPLIT_V:
154 case luci::CircleOpcode::TOPK_V2:
155 case luci::CircleOpcode::UNIQUE:
156 case luci::CircleOpcode::UNPACK:
157 case luci::CircleOpcode::WHILE:
158 return false;
159 default:
160 return true;
161 }
162}
163
164bool isSupportedCustomNode(const luci::CircleNode *node)
165{
166 const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
167
168 // TODO handle more Custom ops here
169 if (custom_node->custom_code() == "CircleReferencingConst")
170 return true;
171
172 return false;
173}
174
175} // namespace
176
178 const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
179 const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
180 std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
181 : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
182 _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
183 _memory_manager(memory_manager)
184{
185}
186
188{
189 for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
190 {
191 const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
192
193 if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
194 {
195 const auto *cnode = loco::must_cast<const luci::CircleCustom *>(node);
196 throw std::runtime_error("Unsupported Custom operator. " + cnode->custom_code() + " in " +
197 node->name());
198 }
199
200 if (!isTensorProducingNode(node))
201 continue;
202
203 // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
204 // be inferred.
205 Shape shape{};
206 switch (node->opcode())
207 {
208 case luci::CircleOpcode::CIRCLECONST:
209 case luci::CircleOpcode::CIRCLECUSTOMOUT:
210 case luci::CircleOpcode::CIRCLEINPUT:
211 case luci::CircleOpcode::CIRCLEVARIABLE:
212 shape = getNodeShape(node);
213 break;
214 default:
215 break;
216 }
217
218 AffineQuantization quantization;
219 if (node->quantparam() != nullptr)
220 {
221 const luci::CircleQuantParam *params = node->quantparam();
222 assert(params->scale.size() == params->zerop.size());
223 quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
224 quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
225 quantization.quantized_dimension = params->quantized_dimension;
226 }
227
228 auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
229 node->name());
230
231 // If node has execution plan then read memory offsets for nodes
232 // from the beginning of shared memory buffer. Used in Static Memory Manager.
233 if (luci::has_execution_plan(node))
234 {
235 auto execution_plan = luci::get_execution_plan(node);
236 assert(!execution_plan.offsets().empty());
237 tensor->set_offset(execution_plan.offsets().front());
238 }
239
240 if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
241 {
242 size_t data_size{};
243 const void *const_data = getNodeData(const_node, &data_size);
244 if (const_data != nullptr)
245 {
246 _memory_manager->allocate_memory(*tensor);
247 tensor->writeData(const_data, data_size);
248 }
249 }
250 else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
251 {
252 const auto *custom_node =
253 loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
254
255 if (custom_node->custom_code() == "CircleReferencingConst")
256 {
257 size_t data_size{};
258 const void *const_data = getNodeData(custom_node, &data_size);
259 if (const_data != nullptr)
260 {
261 _memory_manager->allocate_memory(*tensor);
262 tensor->writeData(const_data, data_size);
263 }
264 }
265 }
266
267 _node_to_tensor.emplace(node, tensor.get());
268 _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
269
270 _runtime_graph->addTensor(std::move(tensor));
271 }
272}
273
275{
276 auto input_nodes = loco::input_nodes(_graph);
277 std::vector<Tensor *> input_tensors(input_nodes.size());
278 for (size_t i = 0; i < input_nodes.size(); ++i)
279 {
280 input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
281 _memory_manager->allocate_memory(*input_tensors[i]);
282 }
283 _runtime_graph->setInputTensors(input_tensors);
284
285 auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
286 std::vector<Tensor *> output_tensors(output_nodes.size());
287 for (size_t i = 0; i < output_nodes.size(); ++i)
288 {
289 const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
290 output_tensors[i] = _node_to_tensor.at(node->from());
291 }
292 _runtime_graph->setOutputTensors(output_tensors);
293}
294
296{
297 KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
298
299 // Create kernels for executable nodes. This has to be done in execution order.
300 auto graph = const_cast<loco::Graph *>(_graph);
301
302 auto const graph_nodes = loco::all_nodes(graph);
303
304 // Checking for execution plan in node annotations.
305 bool has_execution_annotation = true;
306 auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
307 const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
308 if (!luci::has_execution_plan(circle_node))
309 has_execution_annotation = false;
310 };
311 std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
312
313 if (has_execution_annotation)
314 {
315 // Build ordered_nodes vector that stores the order of execution of graph nodes.
316 std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
317
318 auto const filler = [&ordered_nodes](auto const node) {
319 const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
320 auto const position = luci::get_execution_plan(circle_node).order_in_plan();
321 ordered_nodes.at(position) = circle_node;
322 };
323 std::for_each(begin(graph_nodes), end(graph_nodes), filler);
324
325 for (auto node : ordered_nodes)
326 {
327 if (isExecutableNode(node))
328 {
329 std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
330 _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
331 _runtime_graph->addKernel(std::move(kernel));
332 }
333 }
334 }
335 else
336 {
337 // If it is impossible to build the execution order plan,
338 // then we use the default postorder_traversal approach.
339 for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
340 {
341 const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
342 if (isExecutableNode(node))
343 {
344 std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
345 _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
346 _runtime_graph->addKernel(std::move(kernel));
347 }
348 }
349 }
350}
351
352} // namespace luci_interpreter
A neural network graph.
Definition Graph.h:161
NodeContext * nodes(void)
Definition Graph.h:218
Logical unit of computation.
Definition Node.h:54
T * at(uint32_t n) const
Access N-th object.
Definition ObjectPool.h:41
Class to build tensor data.
Definition CircleConst.h:35
const loco::DataTypeImpl< DT >::Type & at(uint32_t n) const
uint32_t size(void) const
CUSTOM in Circle.
const std::vector< uint8_t > & custom_options(void) const
const std::string & custom_code(void) const
Virtual CIRCLECUSTOMOUT in Circle.
GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, const std::unordered_map< const loco::Graph *, RuntimeGraph * > &graph_to_runtime_graph, std::unordered_map< const loco::Node *, Tensor * > &node_to_tensor, IMemoryManager *memory_manager)
virtual void allocate_memory(luci_interpreter::Tensor &tensor)=0
std::unique_ptr< Kernel > build(const luci::CircleNode *node)
void addKernel(std::unique_ptr< Kernel > &&kernel)
Tensor * addTensor(std::unique_ptr< Tensor > &&tensor)
void setOutputTensors(const std::vector< Tensor * > &output_tensors)
void setInputTensors(const std::vector< Tensor * > &input_tensors)
std::vector< loco::Node * > postorder_traversal(const std::vector< loco::Node * > &roots)
Generate postorder traversal sequence starting from "roots".
Definition Algorithm.cpp:53
std::set< Node * > all_nodes(Graph *)
Enumerate all the nodes in a given graph.
Definition Graph.cpp:59
std::vector< Node * > input_nodes(const Graph *)
Definition Graph.cpp:71
std::vector< Node * > output_nodes(Graph *)
Definition Graph.cpp:101
size_t getDataTypeSize(DataType data_type)
Definition DataType.h:33
bool has_execution_plan(const luci::CircleNode *circle_node)
luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node)
uint32_t num_elements(const Shape &shape)
The number of elements of a feature map of a given shape.
Definition Shape.h:59
int32_t size[5]
Definition Slice.cpp:35
int32_t begin[5]
Definition Slice.cpp:33
Definition Shape.h:28
CircleQuantParam * quantparam(void) const
NodeName name(void) const
virtual CircleOpcode opcode(void) const =0
std::vector< float > scale
std::vector< int64_t > zerop
std::vector< float > scale
Definition Tensor.h:95
std::vector< int32_t > zero_point
Definition Tensor.h:96
std::unordered_map< const Tensor *, const luci::CircleNode * > tensor_to_node
Definition RuntimeToIR.h:32
std::unordered_map< const Kernel *, const luci::CircleNode * > kernel_to_node
Definition RuntimeToIR.h:33