ONE - On-device Neural Engine
Loading...
Searching...
No Matches
CircleExporterImpl.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "CircleExporterImpl.h"
21#include "CircleExporterUtils.h"
22#include "ProgressReporter.h"
23
24#include <luci/IR/CircleNodes.h>
27
28#include <loco.h>
29#include <logo/Phase.h>
30#include <oops/InternalExn.h>
31#include <mio/circle/schema_generated.h>
33
34#include <cassert>
35#include <unordered_map>
36#include <string>
37#include <vector>
38
39namespace
40{
41
42void registerGraphInputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
43{
44 for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
45 {
46 auto node = luci::input_node(graph, n);
47 assert(node != nullptr);
48 ctx._inputs.push_back(luci::get_tensor_index(node));
49 }
50}
51
52void registerGraphOutputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
53{
54 for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
55 {
56 auto push = luci::output_node(graph, n);
57 assert(push != nullptr);
58 auto node = push->from();
59 assert(node != nullptr);
60
61 // Do not export CircleOutput when it's input is CircleOutputExclude
62 if (dynamic_cast<luci::CircleOutputExclude *>(push->from()) != nullptr)
63 {
64 continue;
65 }
66
67 ctx._outputs.push_back(luci::get_tensor_index(node));
68 }
69}
70
71} // namespace
72
73namespace
74{
75
76using namespace circle;
77using namespace flatbuffers;
78
80encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode, uint32_t> &opcodes)
81{
82 std::vector<Offset<OperatorCode>> operator_codes_vec(opcodes.size());
83 for (const auto &it : opcodes)
84 {
85 uint32_t idx = it.second;
86 int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
87 if (it.first.opcode < BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
88 dep_code = static_cast<int8_t>(it.first.opcode);
89 if (it.first.opcode != BuiltinOperator_CUSTOM)
90 {
91 operator_codes_vec[idx] =
92 CreateOperatorCode(builder, dep_code, 0, it.first.version, it.first.opcode);
93 }
94 else
95 {
96 operator_codes_vec[idx] =
97 CreateOperatorCode(builder, dep_code, builder.CreateString(it.first.custom_code),
98 it.first.version, it.first.opcode);
99 }
100 }
101
102 return builder.CreateVector(operator_codes_vec);
103}
104
105} // namespace
106
107namespace
108{
109
110void optimize(loco::Graph *g)
111{
112 logo::Phase phase;
113 {
114 // prepare type and shape before optimization
115 phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
116 phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
117
118 // TODO add more optimization passes (with a knob)
119 }
120
122
124 phase_runner.attach(&prog);
125 phase_runner.run(phase);
126}
127
128} // namespace
129
130namespace luci
131{
132
133using namespace circle;
134using namespace flatbuffers;
135
136CircleExporterImpl::CircleExporterImpl(Module *module) { exportModule(module); }
137
139CircleExporterImpl::exportSubgraph(SerializedGraphData &gd)
140{
141 auto tensors = _builder.CreateVector(gd._tensors);
142 auto inputs = _builder.CreateVector(gd._inputs);
143 auto outputs = _builder.CreateVector(gd._outputs);
144 auto operators = _builder.CreateVector(gd._operators);
145 auto name = _builder.CreateString(gd._name);
146 auto subgraph = CreateSubGraph(_builder, tensors, inputs, outputs, operators, name);
147 return subgraph;
148}
149
150void CircleExporterImpl::exportModule(Module *module)
151{
152 assert(module->size() > 0);
153 // do graph optimization
154
155 SerializedModelData md;
156
157 _builder.Clear();
158
159 // prepare model data
160 prepareModelData(_builder, md);
161
162 // if source is extended buffer mode, force export to use extended buffer
163 md._ext_buffer = module->ext_buffer();
164
165 if (!exportModuleData(module, md) && md._require_ext_buffer)
166 {
167 assert(md._ext_buffer == false);
168
169 // do some cleanups for re-run
170 _builder.Clear();
171 for (size_t g = 0; g < module->size(); ++g)
172 {
173 auto graph = module->graph(g);
174 clearExportInfo(graph);
175 }
176 prepareModelData(_builder, md);
177
178 // run again with ext_buffer mode
179 md._ext_buffer = true;
180 exportModuleData(module, md);
181 }
182
183 finalizeWithExtendedBuffer(md);
184}
185
186bool CircleExporterImpl::exportModuleData(Module *module, SerializedModelData &md)
187{
188 std::vector<flatbuffers::Offset<circle::SubGraph>> subgraph_vec;
189
190 for (size_t g = 0; g < module->size(); ++g)
191 {
192 auto graph = module->graph(g);
193
194 optimize(graph);
195
196 SerializedGraphData gd;
197
198 // set Subgraph name
199 gd._name = graph->name();
200
201 // parse graph into SerializedModelData structure
202 exportOpDefinedTensors(graph, _builder, md, gd);
203
204 // NOTE Invoke these register functions only after each node is annotated with its tensor_index
205 registerGraphInputTensors(graph, gd);
206 registerGraphOutputTensors(graph, gd);
207
208 exportNodes(graph, _builder, md, gd);
209
210 // Subgraphs
211 Offset<SubGraph> subgraph = exportSubgraph(gd);
212 subgraph_vec.push_back(subgraph);
213 }
214
215 auto subgraphs = _builder.CreateVector(std::vector<Offset<SubGraph>>{subgraph_vec});
216
217 // encode operator codes
218 auto operator_codes = encodeOperatorCodes(_builder, md._operator_codes);
219
220 // Description
221 std::string description_str = "ONE-luci/export";
222 auto description = _builder.CreateString(description_str);
223
224 // Metadata
225 md._metadata.source_table(module->source_table());
226 auto metadata_vec = createCircleMetadataVector(_builder, md);
227 auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
228
229 // create array of buffers
230 auto buffers = _builder.CreateVector(md._buffers);
231
232 // check current total size exceeds limit
233 if (check_size_limit(_builder, 0))
234 {
235 md._require_ext_buffer = true;
236 return false;
237 }
238
239 // This version is taken from comment in fbs
240 constexpr uint32_t version = 0;
241
242 // Model
243 auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
244 buffers, 0 /* metadata_buffer */, metadata);
245 FinishModelBuffer(_builder, model_offset);
246
247 return true;
248}
249
250void CircleExporterImpl::finalizeWithExtendedBuffer(SerializedModelData &md)
251{
252 _ext_buffer = md._ext_buffer;
253 if (!_ext_buffer)
254 return;
255
256 _fb_data_with_ext.clear();
257
258 auto align16 = [](size_t &v) {
259 while (v % 16 != 0)
260 v++;
261 };
262
263 // get total memory for flatbuffer + all buffer_data
264 size_t result_size = _builder.GetSize();
265 align16(result_size);
266 for (auto &it : md._buffer_data_map)
267 {
268 SerializedModelData::BufferData &buffer_data = it.second;
269 result_size += buffer_data.size();
270 align16(result_size);
271 }
272 align16(result_size);
273 result_size += 16; // for safety
274
275 std::string result;
276 const char *buff_ptr = reinterpret_cast<const char *>(_builder.GetBufferPointer());
277
278 auto padalign16 = [](std::string &str) {
279 while (str.size() % 16 != 0)
280 str += '\0';
281 };
282
283 result.reserve(result_size);
284 result.append(buff_ptr, _builder.GetSize());
285
286 auto mutable_model = circle::GetMutableModel(result.data());
287 auto mutable_buffers = mutable_model->mutable_buffers();
288
289 // pad to be 16 bytes aligned
290 padalign16(result);
291 for (auto &it : md._buffer_data_map)
292 {
293 int32_t buffer_index = it.first;
294 SerializedModelData::BufferData &buffer_data = it.second;
295 uint64_t offset = result.size();
296 uint64_t size = buffer_data.size();
297
298 circle::Buffer *mutable_buffer = mutable_buffers->GetMutableObject(buffer_index);
299 mutable_buffer->mutate_offset(offset);
300 mutable_buffer->mutate_size(size);
301
302 result.append(buffer_data.begin(), buffer_data.end());
303 padalign16(result);
304 }
305 padalign16(result);
306
307 // use final result
308 _fb_data_with_ext = result;
309}
310
312{
313 if (_ext_buffer)
314 return reinterpret_cast<const char *>(_fb_data_with_ext.data());
315 return reinterpret_cast<const char *>(_builder.GetBufferPointer());
316}
317
319{
320 if (_ext_buffer)
321 return _fb_data_with_ext.size();
322 return _builder.GetSize();
323}
324
325} // namespace luci
Helper class to hold data needed in creation of a FlatBuffer. To serialize data, you typically call o...
Offset< String > CreateString(const char *str, size_t len)
Store a string in the buffer, which can contain any binary data.
uoffset_t GetSize() const
The current size of the serialized buffer, counting from the end.
uint8_t * GetBufferPointer() const
Get the serialized buffer (after you call Finish()).
Offset< Vector< T > > CreateVector(const T *v, size_t len)
Serialize an array into a FlatBuffer vector.
void Clear()
Reset all the state in this FlatBufferBuilder so it can be reused to construct another buffer.
A neural network graph.
Definition Graph.h:161
const char * getBufferPointer() const
CircleOutputExclude is used to specifying not exported nodes.
Collection of 'loco::Graph's.
Definition Module.h:33
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
void optimize(loco::Graph *g)
Run passes for a graph after completion of converting canonical nodes into TFL nodes.
const char * str
Definition util.h:290
result
Definition infer.py:103
std::vector< std::unique_ptr< Pass > > Phase
Definition Phase.h:31
bool check_size_limit(const flatbuffers::FlatBufferBuilder &fb, const uint64_t data_size)
void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md, SerializedGraphData &gd)
create Operators corresponding to model nodes
void exportOpDefinedTensors(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md, SerializedGraphData &gd)
create Tensors corresponding to results of all nodes in graph
void prepareModelData(FlatBufferBuilder &builder, SerializedModelData &md)
one time preparation for SerializedModelData
CircleTensorIndex get_tensor_index(loco::Node *node)
CircleOutput * output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
Find a CircleOutput node with a given output index.
CircleInput * input_node(loco::Graph *g, const loco::GraphInputIndex &index)
Find a Pull node with a given input index.
void clearExportInfo(loco::Graph *g)
clear temporary export information annotated to graph nodes
std::vector< flatbuffers::Offset< circle::Metadata > > createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::SerializedModelData &md)
Create Metadata corresponding to model metadata.
int32_t size[5]
Definition Slice.cpp:35
std::vector< flatbuffers::Offset< circle::Operator > > _operators
std::vector< flatbuffers::Offset< circle::Tensor > > _tensors
std::vector< uint8_t > BufferData
Record the information of T/F Lite SubGraph and its mapping to loco.
std::string _name
SubGraph name.
std::vector< int32_t > _inputs
SubGraph input tensor id.
std::vector< int32_t > _outputs
SubGraph output tensor id.