ONE - On-device Neural Engine
Loading...
Searching...
No Matches
BackendContext.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "BackendContext.h"
18
19#include "TensorBuilder.h"
20#include "TensorPlanner.h"
21#include "KernelGenerator.h"
23
26
27#include <cassert>
28
29namespace onert
30{
31namespace backend
32{
33namespace train
34{
35
36namespace
37{
38ir::OperandInfo createBackwardTensorInfo(const ir::Operand &operand)
39{
40 // TODO Use different shape of back-propagated tensor if it exists
41 return ir::OperandInfo{operand.shape(), operand.typeInfo(), operand.info().memAllocType(),
42 operand.isConstant()};
43}
44
45void AddBackPropInitializers(const ir::train::TrainableGraph &tgraph, TensorRegistry &tensor_reg,
46 FunctionMap &fn_map)
47{
48 util::Set<ir::OperandIndex> unvisited;
49 tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &operand) {
50 if (!tgraph.getInputs().contains(index) && !operand.isConstant())
51 unvisited.add(index);
52 });
53
54 for (const auto &op_index : tgraph.essentialBackwardOrder())
55 {
56 assert(fn_map.find(op_index) != fn_map.end());
57
58 auto &tn_seq = fn_map.at(op_index);
59
60 // The function added latest is executed first in a sequence during backwarding.
61 std::vector<BackPropTensor *> back_props;
62 const auto &op = tgraph.operation(op_index);
63 for (const auto &back_prop_index :
64 op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
65 {
66 assert(op.isRequiredForBackward());
67 if (unvisited.contains(back_prop_index))
68 {
69 auto back_prop_tensor = tensor_reg.getBackPropTensor(back_prop_index);
70 assert(back_prop_tensor != nullptr);
71 back_props.emplace_back(back_prop_tensor);
72 unvisited.remove(back_prop_index);
73 }
74 }
75 if (back_props.size() != 0)
76 {
77 auto initializer = std::make_unique<ops::BackPropInitializer>(back_props);
78 tn_seq->append(std::move(initializer));
79 }
80 }
81}
82
83util::Set<ir::train::TrainingOperandIndex>
84getBackwardTensorList(const ir::train::TrainableGraph &tgraph,
85 const util::Set<ir::OperandIndex> &external_operands)
86{
87 util::Set<ir::train::TrainingOperandIndex> ret;
88
89 // TODO Reuse registered tensors when they are planned for memory optimization.
90 auto border = tgraph.essentialBackwardOrder();
91 for (const auto op_index : border)
92 {
93 const auto &trainable_op = tgraph.operation(op_index);
94 assert(trainable_op.isRequiredForBackward());
95 // This assumes that back-propagated tensors of loss outputs are not used
96 for (const auto &ind :
97 trainable_op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
98 {
99 if (external_operands.contains(ind))
100 continue;
101
102 const auto &operand_index = ir::train::TrainingOperandIndex{ind, false};
103
104 const auto &training_usedefs = tgraph.trainingUseDefs();
105 const auto &usedefs = training_usedefs.at(ir::train::TrainingOperandIndex{ind, false});
106 const bool not_used = usedefs.getTrainingDefs().empty() && usedefs.getTrainingUses().empty();
107 if (not_used)
108 continue;
109
110 ret.add(operand_index);
111 }
112 }
113
114 return ret;
115}
116
117util::Set<DisposableTensorIndex>
118getDisposableBackPropTensorList(const ir::train::TrainableGraph &tgraph,
119 const util::Set<ir::OperandIndex> &external_operands)
120{
121 util::Set<DisposableTensorIndex> ret;
122
123 const auto candidates = getBackwardTensorList(tgraph, external_operands);
124 for (const auto &backwarding_operand_index : candidates)
125 {
126 const auto &operand = tgraph.operands().at(backwarding_operand_index.index());
127 const auto &training_usedefs = tgraph.trainingUseDefs();
128 const auto &usedefs = training_usedefs.at(backwarding_operand_index);
129 const bool is_multiple_defs = usedefs.getTrainingDefs().size() > 1;
130 if (!operand.isConstant() && is_multiple_defs)
131 for (const auto &def : usedefs.getTrainingDefs())
132 ret.add(DisposableTensorIndex{def.index(), backwarding_operand_index.index()});
133 }
134
135 return ret;
136}
137} // namespace
138
140{
141 planForwardTensors();
142 planBackwardTensors();
143
144 _tensor_builder->allocate();
145 _tensor_builder->allocateBackward();
146
147 auto fn_map = generateFunctionMap();
148
149 // Initialize TrainableTensors
151 [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
152 if (external_operands().contains(ind) || !operand.isConstant())
153 return;
154
155 auto tensor = tensor_registry()->getNativeITensor(ind);
156 assert(tensor != nullptr);
157
158 VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
159
160 auto data = operand.shareData();
161 assert(data && data->base());
162 auto trainable_tensor = dynamic_cast<TrainableTensor *>(tensor);
163
164 if (trainable_tensor == nullptr)
165 throw std::runtime_error{"This tensor is not trainable tensor"};
166
167 trainable_tensor->fillBuffer(data);
168 });
169
170 // NOTE For memory optimization, we want to free some operand data
171 const_cast<ir::train::TrainableGraph &>(*_tdata->tgraph)
172 .operands()
173 .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
174
175 // TODO Enable
176 // for (auto &&it : ret)
177 // {
178 // auto &fn_seq = it.second;
179 // fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
180 // }
181
182 // NOTE: Since LayerScopeTensors is defined in each kernel(layer),
183 // It should be planned and allocated after the kernels generated.
184 planLayerScopeTensors(fn_map);
185 _tensor_builder->allocateLayerScope();
186
187 return fn_map;
188}
189
190void BackendContext::planForwardTensors()
191{
192 const auto &tgraph = *trainable_graph();
193
194 tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) {
195 if (external_operands().contains(index))
196 return;
197 if (!index.valid())
198 return;
199
200 _tensor_builder->registerTensorInfo(index, obj.info());
201 });
202
203 const auto ctx_data = data();
204 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
205 tensor_planner.planTrainableTensors(_tensor_builder.get());
206 tensor_planner.planNonConstTensors(_tensor_builder.get());
207}
208
209void BackendContext::planBackwardTensors()
210{
211 const ir::train::TrainableGraph &tgraph = *trainable_graph();
212
213 auto tensor_builder = _tensor_builder;
214
215 const auto operand_indices = getBackwardTensorList(tgraph, external_operands());
216 for (const auto &operand_index : operand_indices)
217 {
218 if (external_operands().contains(operand_index.index()))
219 continue;
220
221 assert(operand_index.valid());
222
223 assert(!operand_index.is_forward());
224 const auto &operand = tgraph.operands().at(operand_index.index());
225 tensor_builder->registerBackwardTensorInfo(operand_index.index(),
226 createBackwardTensorInfo(operand));
227 }
228
229 const auto disposable_indices = getDisposableBackPropTensorList(tgraph, external_operands());
230 for (const auto &disposable_index : disposable_indices)
231 {
232 const auto &operand = tgraph.operands().at(disposable_index.operand_index());
233 tensor_builder->registerDisposableBackwardTensorInfo(disposable_index,
234 createBackwardTensorInfo(operand));
235 }
236
237 // Plan tensors only in backwarding to reduce peak memory usage
238 const auto ctx_data = data();
239 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
240 tensor_planner.planGradientTensors(tensor_builder.get());
241 tensor_planner.planBackPropTensors(tensor_builder.get());
242 tensor_planner.planDisposableBackPropTensors(tensor_builder.get());
243}
244
245FunctionMap BackendContext::generateFunctionMap()
246{
248
249 for (const auto &op_ind : _tdata->op_order)
250 {
251 auto fn_seq = kernel_gen->generate(op_ind);
252 ret.emplace(op_ind, std::move(fn_seq));
253 }
254
255 // NOTE Each BackPropInitializer should be called first in each op node during backwarding
256 const auto &tgraph = *_tdata->tgraph;
257 auto tensor_reg = nnfw::misc::polymorphic_downcast<TensorRegistry *>(_tensor_registry.get());
258 AddBackPropInitializers(tgraph, *tensor_reg, ret);
259
260 return ret;
261}
262
263void BackendContext::planLayerScopeTensors([[maybe_unused]] const FunctionMap &fn_map)
264{
265 const auto &ops = trainable_graph()->operations();
266
267 auto register_tensors = [this](const ir::OperationIndex &op_idx,
268 std::optional<LayerScopeTensors> &&tensors) {
269 if (not tensors.has_value())
270 return;
271
272 auto ls_tensors = tensors.value();
273 for (auto i = 0u; i < ls_tensors.size(); ++i)
274 {
275 LayerScopeTensorIndex tensor_idx(op_idx, i);
276 _tensor_builder->registerLayerScopeTensor(tensor_idx, ls_tensors[i]);
277
278 VERBOSE(BackendContext) << "(idx:" << tensor_idx << ") registered" << std::endl;
279 }
280 return;
281 };
282
283 for (auto &pair : fn_map)
284 {
285 const auto &op_idx = pair.first;
286 auto &fn_seq = pair.second;
287
288 const ir::IOperation *op = &ops.at(op_idx);
289 const auto trainable_op = dynamic_cast<const ir::train::TrainableOperation *>(op);
290 assert(trainable_op != nullptr);
291
292 if (not trainable_op->isRequiredForBackward())
293 continue;
294
295 VERBOSE(BackendContext) << "register layerscope tensor for " << trainable_op->name()
296 << std::endl;
297
298 fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
299 register_tensors(op_idx, (&fn)->registerLayerScopeTensors());
300 });
301 }
302
303 const auto ctx_data = data();
304 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
305 tensor_planner.planLayerScopeTensors(_tensor_builder.get());
306 return;
307}
308
309} // namespace train
310} // namespace backend
311} // namespace onert
void fillBuffer(const std::shared_ptr< ir::Data > &data)
std::shared_ptr< KernelGenerator > kernel_gen
std::shared_ptr< ITensorRegistry > tensor_registry()
std::shared_ptr< ITensorRegistry > _tensor_registry
const ir::train::TrainableGraph * trainable_graph() const
const util::Set< ir::OperandIndex > & external_operands() const
std::unique_ptr< TrainableContextData > _tdata
bool isConstant(void) const
Get true if Operand is const, otherwise false a.
Definition Operand.h:79
std::shared_ptr< Data > shareData(void) const
Definition Operand.h:73
const Operations & operations() const override
const Operands & operands() const override
void iterate(const std::function< void(const Index &, const Object &)> &fn) const
Iterate over the container with given function.
#define VERBOSE(name, lv)
Definition Log.h:71
loco::GraphInputIndex index(const TFPlaceholder *node)
Definition TFNode.cpp:54
std::unordered_map< ir::OperationIndex, std::unique_ptr< exec::train::TrainableFnSequence > > FunctionMap
TrainingIndex< OperandIndex > TrainingOperandIndex
Type that provides index of operand for training.
Definition Index.h:132
::onert::util::Index< uint32_t, OperationIndexTag > OperationIndex
Definition Index.h:32
::onert::util::Index< uint32_t, OperandIndexTag > OperandIndex
Definition Index.h:35