ONE - On-device Neural Engine
Loading...
Searching...
No Matches
CircleExporterImpl.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "CircleExporterImpl.h"
21#include "CircleExporterUtils.h"
22#include "ProgressReporter.h"
23
24#include <luci/IR/CircleNodes.h>
27
28#include <loco.h>
29#include <logo/Phase.h>
30#include <oops/InternalExn.h>
31#include <mio/circle/schema_generated.h>
33
34#include <cassert>
35#include <unordered_map>
36#include <string>
37#include <vector>
38
39namespace
40{
41
42void registerGraphInputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
43{
44 for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
45 {
46 auto node = luci::input_node(graph, n);
47 assert(node != nullptr);
48 ctx._inputs.push_back(luci::get_tensor_index(node));
49 }
50}
51
52void registerGraphOutputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
53{
54 for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
55 {
56 auto push = luci::output_node(graph, n);
57 assert(push != nullptr);
58 auto node = push->from();
59 assert(node != nullptr);
60
61 // Do not export CircleOutput when it's input is CircleOutputExclude
62 if (dynamic_cast<luci::CircleOutputExclude *>(push->from()) != nullptr)
63 {
64 continue;
65 }
66
67 ctx._outputs.push_back(luci::get_tensor_index(node));
68 }
69}
70
71} // namespace
72
73namespace
74{
75
76using namespace circle;
77using namespace flatbuffers;
78
79Offset<Vector<Offset<OperatorCode>>>
80encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode, uint32_t> &opcodes)
81{
82 std::vector<Offset<OperatorCode>> operator_codes_vec(opcodes.size());
83 for (const auto &it : opcodes)
84 {
85 uint32_t idx = it.second;
86 int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
87 if (it.first.opcode < BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
88 dep_code = static_cast<int8_t>(it.first.opcode);
89 if (it.first.opcode != BuiltinOperator_CUSTOM)
90 {
91 operator_codes_vec[idx] =
92 CreateOperatorCode(builder, dep_code, 0, it.first.version, it.first.opcode);
93 }
94 else
95 {
96 operator_codes_vec[idx] =
97 CreateOperatorCode(builder, dep_code, builder.CreateString(it.first.custom_code),
98 it.first.version, it.first.opcode);
99 }
100 }
101
102 return builder.CreateVector(operator_codes_vec);
103}
104
105} // namespace
106
107namespace
108{
109
110void optimize(loco::Graph *g)
111{
112 logo::Phase phase;
113 {
114 // prepare type and shape before optimization
115 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
116 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
117
118 // TODO add more optimization passes (with a knob)
119 }
120
122
124 phase_runner.attach(&prog);
125 phase_runner.run(phase);
126}
127
128} // namespace
129
130namespace luci
131{
132
133using namespace circle;
134using namespace flatbuffers;
135
136CircleExporterImpl::CircleExporterImpl(Module *module) { exportModule(module); }
137
138::flatbuffers::Offset<::circle::SubGraph>
139CircleExporterImpl::exportSubgraph(SerializedGraphData &gd)
140{
141 auto tensors = _builder.CreateVector(gd._tensors);
142 auto inputs = _builder.CreateVector(gd._inputs);
143 auto outputs = _builder.CreateVector(gd._outputs);
144 auto operators = _builder.CreateVector(gd._operators);
145 auto name = _builder.CreateString(gd._name);
146 auto subgraph = CreateSubGraph(_builder, tensors, inputs, outputs, operators, name);
147 return subgraph;
148}
149
150void CircleExporterImpl::exportModule(Module *module)
151{
152 assert(module->size() > 0);
153 // do graph optimization
154
155 SerializedModelData md;
156
157 _builder.Clear();
158
159 // prepare model data
160 prepareModelData(_builder, md);
161
162 // if source is extended buffer mode, force export to use extended buffer
163 md._ext_buffer = module->ext_buffer();
164
165 if (!exportModuleData(module, md) && md._require_ext_buffer)
166 {
167 assert(md._ext_buffer == false);
168
169 // do some cleanups for re-run
170 _builder.Clear();
171 for (size_t g = 0; g < module->size(); ++g)
172 {
173 auto graph = module->graph(g);
174 clearExportInfo(graph);
175 }
176 prepareModelData(_builder, md);
177
178 // run again with ext_buffer mode
179 md._ext_buffer = true;
180 exportModuleData(module, md);
181 }
182
183 finalizeWithExtendedBuffer(md);
184}
185
186bool CircleExporterImpl::exportModuleData(Module *module, SerializedModelData &md)
187{
188 std::vector<flatbuffers::Offset<circle::SubGraph>> subgraph_vec;
189
190 for (size_t g = 0; g < module->size(); ++g)
191 {
192 auto graph = module->graph(g);
193
194 optimize(graph);
195
196 SerializedGraphData gd;
197
198 // set Subgraph name
199 gd._name = graph->name();
200
201 // parse graph into SerializedModelData structure
202 exportOpDefinedTensors(graph, _builder, md, gd);
203
204 // NOTE Invoke these register functions only after each node is annotated with its tensor_index
205 registerGraphInputTensors(graph, gd);
207
208 exportNodes(graph, _builder, md, gd);
209
210 // Subgraphs
211 Offset<SubGraph> subgraph = exportSubgraph(gd);
212 subgraph_vec.push_back(subgraph);
213 }
214
215 auto subgraphs = _builder.CreateVector(std::vector<Offset<SubGraph>>{subgraph_vec});
216
217 // encode operator codes
218 auto operator_codes = encodeOperatorCodes(_builder, md._operator_codes);
219
220 // Description
221 std::string description_str = "ONE-luci/export";
223
224 // Metadata
225 md._metadata.source_table(module->source_table());
226 auto metadata_vec = createCircleMetadataVector(_builder, md);
227 auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
228
229 // create array of buffers
230 auto buffers = _builder.CreateVector(md._buffers);
231
232 // check current total size exceeds limit
233 if (check_size_limit(_builder, 0))
234 {
235 md._require_ext_buffer = true;
236 return false;
237 }
238
239 // This version is taken from comment in fbs
240 constexpr uint32_t version = 0;
241
242 // Model
243 auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
244 buffers, 0 /* metadata_buffer */, metadata);
246
247 return true;
248}
249
250void CircleExporterImpl::finalizeWithExtendedBuffer(SerializedModelData &md)
251{
252 _ext_buffer = md._ext_buffer;
253 if (!_ext_buffer)
254 return;
255
256 _fb_data_with_ext.clear();
257
258 auto align16 = [](size_t &v) {
259 while (v % 16 != 0)
260 v++;
261 };
262
263 // get total memory for flatbuffer + all buffer_data
264 size_t result_size = _builder.GetSize();
266 for (auto &it : md._buffer_data_map)
267 {
269 result_size += buffer_data.size();
271 }
273 result_size += 16; // for safety
274
275 std::string result;
276 const char *buff_ptr = reinterpret_cast<const char *>(_builder.GetBufferPointer());
277
278 auto padalign16 = [](std::string &str) {
279 while (str.size() % 16 != 0)
280 str += '\0';
281 };
282
283 result.reserve(result_size);
284 result.append(buff_ptr, _builder.GetSize());
285
286 auto mutable_model = circle::GetMutableModel(result.data());
287 auto mutable_buffers = mutable_model->mutable_buffers();
288
289 // pad to be 16 bytes aligned
290 padalign16(result);
291 for (auto &it : md._buffer_data_map)
292 {
293 int32_t buffer_index = it.first;
295 uint64_t offset = result.size();
296 uint64_t size = buffer_data.size();
297
298 circle::Buffer *mutable_buffer = mutable_buffers->GetMutableObject(buffer_index);
299 mutable_buffer->mutate_offset(offset);
300 mutable_buffer->mutate_size(size);
301
302 result.append(buffer_data.begin(), buffer_data.end());
303 padalign16(result);
304 }
305 padalign16(result);
306
307 // use final result
308 _fb_data_with_ext = result;
309}
310
312{
313 if (_ext_buffer)
314 return reinterpret_cast<const char *>(_fb_data_with_ext.data());
315 return reinterpret_cast<const char *>(_builder.GetBufferPointer());
316}
317
319{
320 if (_ext_buffer)
321 return _fb_data_with_ext.size();
322 return _builder.GetSize();
323}
324
325} // namespace luci
Helper class to hold data needed in creation of a FlatBuffer. To serialize data, you typically call o...
OffsetT< String > CreateString(const char *str, size_t len)
Store a string in the buffer, which can contain any binary data.
SizeT GetSize() const
The current size of the serialized buffer, counting from the end.
void Clear()
Reset all the state in this FlatBufferBuilder so it can be reused to construct another buffer.
OffsetT< VectorT< T > > CreateVector(const T *v, size_t len)
Serialize an array into a FlatBuffer vector.
uint8_t * GetBufferPointer() const
Get the serialized buffer (after you call Finish()).
A neural network graph.
Definition Graph.h:161
const char * getBufferPointer() const
CircleOutputExclude is used to specifying not exported nodes.
Collection of 'loco::Graph's.
Definition Module.h:33
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
void optimize(loco::Graph *g)
Run passes for a graph after completion of converting canonical nodes into TFL nodes.
result
Definition infer.py:103
str
Definition infer.py:18
std::vector< std::unique_ptr< Pass > > Phase
Definition Phase.h:31
bool check_size_limit(const flatbuffers::FlatBufferBuilder &fb, const uint64_t data_size)
T must_cast(loco::Node *node)
void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md, SerializedGraphData &gd)
create Operators corresponding to model nodes
void exportOpDefinedTensors(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md, SerializedGraphData &gd)
create Tensors corresponding to results of all nodes in graph
void prepareModelData(FlatBufferBuilder &builder, SerializedModelData &md)
one time preparation for SerializedModelData
CircleTensorIndex get_tensor_index(loco::Node *node)
CircleOutput * output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
Find a CircleOutput node with a given output index.
CircleInput * input_node(loco::Graph *g, const loco::GraphInputIndex &index)
Find a Pull node with a given input index.
void clearExportInfo(loco::Graph *g)
clear temporary export information annotated to graph nodes
std::vector< flatbuffers::Offset< circle::Metadata > > createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::SerializedModelData &md)
Create Metadata corresponding to model metadata.
version
Definition setup.py:159
description
Definition setup.py:160
int32_t size[5]
Definition Slice.cpp:35
std::vector< flatbuffers::Offset< circle::Operator > > _operators
std::vector< flatbuffers::Offset< circle::Tensor > > _tensors
std::vector< uint8_t > BufferData
Record the information of T/F Lite SubGraph and its mapping to loco.
std::string _name
SubGraph name.
std::vector< int32_t > _inputs
SubGraph input tensor id.
std::vector< int32_t > _outputs
SubGraph output tensor id.