Do compilation with the options.
56{
57
58
59
60 if (!_options)
61 throw std::runtime_error{"Empty compile option"};
62
63
64
66 {
68 throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
69
70 if (_options->
executor !=
"Dataflow")
71 throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
72 }
73
76
77 auto custom_kernel_builder = _model->getKernelBuilder();
78
80 auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
81
82 compiler::pass::PassRunner{}
83 .append(std::make_unique<compiler::pass::ConstantOutputPass>(subg))
84 .append(std::make_unique<compiler::pass::OddOutputPass>(subg))
85 .run();
86
87
88 compiler::pass::PassRunner{}
89 .append(std::make_unique<compiler::pass::UnusedOperandEliminationPass>(subg))
90 .run();
91 });
92
93 std::unordered_map<ir::SubgraphIndex, std::shared_ptr<ir::train::TrainableGraph>>
94 trainable_subgraphs;
95
96 if (_model->hasOnly<ir::Graph>())
97 {
98
99 _model->iterate([&](
const ir::SubgraphIndex &subg_index,
const ir::IGraph &graph) {
100 const auto &subg = nnfw::misc::polymorphic_downcast<const ir::Graph &>(graph);
101
102 auto trainable_subg = std::make_shared<ir::train::TrainableGraph>(subg);
103
104
105 auto converter = TrainableOperationConverter{*trainable_subg, &_training_info};
107 subg.operations().iterate(
109 auto trainable_op = converter(op);
112 {
113 trainable_op->enableWeightsUpdate();
114 if (op_index.
value() < min_trainable_op_idx.value())
115 {
116 min_trainable_op_idx = op_index;
117 }
118 }
119 [[maybe_unused]] auto gen_index =
120 trainable_subg->replaceOperation(op_index, std::move(trainable_op));
121 assert(gen_index == op_index);
122 });
123
125 idx.
value() < trainable_subg->operations().size(); idx++)
126 {
127 trainable_subg->enableBackward(idx);
128 }
129
130 trainable_subgraphs[subg_index] = std::move(trainable_subg);
131 });
132 }
133 else
134 {
135
136 throw std::runtime_error("TrainingCompiler: Invalid model");
137 }
138
139
140 _model.reset();
141
142
145
146 for (const auto &[subg_index, subg] : trainable_subgraphs)
147 {
148 dot_dumper.dump(*subg,
nnfw::misc::str(
"before_loss_insertion-", subg_index.value()));
149 }
150
151
152 for (auto &&[subg_index, trainable_subg] : trainable_subgraphs)
153 {
154 compiler::pass::PassRunner{}
155 .append(std::make_unique<train::pass::LossInsertionPass>(*trainable_subg, &_training_info,
156 subg_index))
157 .run();
158 }
159
160 for (const auto &[subg_index, subg] : trainable_subgraphs)
161 {
162 dot_dumper.dump(*subg,
nnfw::misc::str(
"after_loss_insertion-", subg_index.value()));
163 }
164
165 for (auto &&[subg_index, subg] : trainable_subgraphs)
166 {
167 subg->updateGraphDependency();
168 subg->verify();
169
170 dot_dumper.dump(*subg,
171 nnfw::misc::str(
"after_initializing_training_usedefs-", subg_index.value()));
172 }
173
174
175 for (auto &&pair : trainable_subgraphs)
176 {
177 auto trainable_subg = pair.second;
178
179 for (const auto &ind : trainable_subg->getInputs())
180 {
181 auto &
input = trainable_subg->operands().at(ind);
182 auto new_shape =
input.info().shape();
183
184 if (new_shape.dim(0) != 1 && new_shape.dim(0) != ir::Shape::kUnspecifiedDim)
185 throw std::runtime_error("the first dim is not 1. It is not supported yet.");
186 new_shape.dim(0) = _training_info.
batchSize();
187 input.info().shape(new_shape);
188 }
189 }
190
191
192
193
194
195 auto tracing_ctx = std::make_unique<util::TracingCtx>();
196
197
198 std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::train::LoweredTrainableGraph>>
199 lowered_subgs;
200 {
201 for (auto &&[subg_index, trainable_subg] : trainable_subgraphs)
202 {
203
204 lowered_subgs[subg_index] =
205 std::make_unique<compiler::train::LoweredTrainableGraph>(*trainable_subg, *_options);
206
207 tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->
graph()),
209 }
210 }
211
212 for (const auto &[subg_index, lowered_subg] : lowered_subgs)
213 {
214 dot_dumper.dump(*lowered_subg,
nnfw::misc::str(
"after_lower_subg-", subg_index.value()));
215 }
216
217
218 for (const auto &pair : lowered_subgs)
219 {
220 auto lowered_subg = pair.second.get();
221 auto &tgraph = lowered_subg->trainable_graph();
222 tgraph.operands().iterate([&](
const ir::OperandIndex &index,
const ir::Operand &obj) {
223 if (!
obj.isConstant())
224 {
225 auto bwd_operand = std::make_unique<ir::Operand>(obj);
226 [[maybe_unused]] const auto gen_index =
227 tgraph.addBackwardOperand(index, std::move(bwd_operand));
228 assert(gen_index == index);
229 }
230 });
231 }
232
233
234 {
235
236
237 std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
238 createStaticShapeInferers(lowered_subgs);
239
241 inferers.at(primary_subg_idx)->infer();
242
243 for (const auto &pair_inferer : inferers)
244 {
245 const auto inferer = pair_inferer.second.get();
246 inferer->dump();
247 }
248
249
250
251 for (auto &&pair : lowered_subgs)
252 {
253 auto &lowered_subg = pair.second;
254 auto inferer = std::make_unique<StaticBackwardShapeInferer>(lowered_subg.get());
255 inferer->infer();
256 inferer->dump();
257 }
258 }
259
260
261 for (const auto &pair : lowered_subgs)
262 {
263 auto &lowered_subg = pair.second;
264 compiler::ShapeValidator{lowered_subg->graph()}();
265 }
266
267
268
269
270
271
272 auto executors = std::make_shared<exec::train::TrainableExecutors>();
273 for (auto &&[subg_index, lowered_subg] : lowered_subgs)
274 {
276 auto const indexed_ranks = lowered_subg->indexed_ranks();
277
278 ir::OperationDumper dumper("Executor generation of Subgraph " +
279 std::to_string(subg_index.value()));
280 lowered_subg->graph().operations().iterate(
282
283 ExecutorFactoryArgs
args;
284 args.tracing_ctx = tracing_ctx.get();
285 args.options = _options;
286 args.model_index = model_index;
287 args.custom_kernel_builder = custom_kernel_builder;
288
289 auto executor = std::unique_ptr<exec::IExecutor>{
292 executors->emplace(model_index, subg_index, std::move(executor));
293 }
294
295
296
297
298 return std::make_unique<CompilerArtifact>(executors, std::move(tracing_ctx));
299}
exec::IExecutor * create(std::unique_ptr< compiler::LoweredGraph > lowered_graph, const std::shared_ptr< exec::IExecutors > &executors, const ExecutorFactoryArgs &args)
static ExecutorFactory & get()
const std::set< OperationIndex > & getTrainableOps() const
uint32_t batchSize() const
T value() const
Return underlying value.
std::string str(Args &&...args)
::onert::util::Index< uint32_t, OperationIndexTag > OperationIndex
::onert::util::Index< uint16_t, ModelIndexTag > ModelIndex
::onert::util::Index< uint32_t, OperandIndexTag > OperandIndex
::onert::util::Index< uint16_t, SubgraphIndexTag > SubgraphIndex
void forceInternalOptions()
Force default values of CompilerOptions for correct compilations.
void verboseOptions()
Print option value.
virtual void setIndexedRanks(std::shared_ptr< ir::OperationIndexMap< int64_t > >)=0
Set an ordering on operations.