ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
BackendContext.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "BackendContext.h"
18
19#include "TensorBuilder.h"
20#include "TensorPlanner.h"
21#include "KernelGenerator.h"
23
26
27#include <cassert>
28
30{
31
32namespace
33{
34ir::OperandInfo createBackwardTensorInfo(const ir::Operand &operand)
35{
36 // TODO Use different shape of back-propagated tensor if it exists
37 return ir::OperandInfo{operand.shape(), operand.typeInfo(), operand.info().memAllocType(),
38 operand.isConstant()};
39}
40
41void AddBackPropInitializers(const ir::train::TrainableGraph &tgraph, TensorRegistry &tensor_reg,
42 FunctionMap &fn_map)
43{
44 util::Set<ir::OperandIndex> unvisited;
45 tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &operand) {
46 if (!tgraph.getInputs().contains(index) && !operand.isConstant())
47 unvisited.add(index);
48 });
49
50 for (const auto &op_index : tgraph.essentialBackwardOrder())
51 {
52 assert(fn_map.find(op_index) != fn_map.end());
53
54 auto &tn_seq = fn_map.at(op_index);
55
56 // The function added latest is executed first in a sequence during backwarding.
57 std::vector<BackPropTensor *> back_props;
58 const auto &op = tgraph.operation(op_index);
59 for (const auto &back_prop_index :
60 op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
61 {
62 assert(op.isRequiredForBackward());
63 if (unvisited.contains(back_prop_index))
64 {
65 auto back_prop_tensor = tensor_reg.getBackPropTensor(back_prop_index);
66 assert(back_prop_tensor != nullptr);
67 back_props.emplace_back(back_prop_tensor);
68 unvisited.remove(back_prop_index);
69 }
70 }
71 if (back_props.size() != 0)
72 {
73 auto initializer = std::make_unique<ops::BackPropInitializer>(back_props);
74 tn_seq->append(std::move(initializer));
75 }
76 }
77}
78
79util::Set<ir::train::TrainingOperandIndex>
80getBackwardTensorList(const ir::train::TrainableGraph &tgraph,
81 const util::Set<ir::OperandIndex> &external_operands)
82{
83 util::Set<ir::train::TrainingOperandIndex> ret;
84
85 // TODO Reuse registered tensors when they are planned for memory optimization.
86 auto border = tgraph.essentialBackwardOrder();
87 for (const auto op_index : border)
88 {
89 const auto &trainable_op = tgraph.operation(op_index);
90 assert(trainable_op.isRequiredForBackward());
91 // This assumes that back-propagated tensors of loss outputs are not used
92 for (const auto &ind :
93 trainable_op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
94 {
95 if (external_operands.contains(ind))
96 continue;
97
98 const auto &operand_index = ir::train::TrainingOperandIndex{ind, false};
99
100 const auto &training_usedefs = tgraph.trainingUseDefs();
101 const auto &usedefs = training_usedefs.at(ir::train::TrainingOperandIndex{ind, false});
102 const bool not_used = usedefs.getTrainingDefs().empty() && usedefs.getTrainingUses().empty();
103 if (not_used)
104 continue;
105
106 ret.add(operand_index);
107 }
108 }
109
110 return ret;
111}
112
113util::Set<DisposableTensorIndex>
114getDisposableBackPropTensorList(const ir::train::TrainableGraph &tgraph,
115 const util::Set<ir::OperandIndex> &external_operands)
116{
117 util::Set<DisposableTensorIndex> ret;
118
119 const auto candidates = getBackwardTensorList(tgraph, external_operands);
120 for (const auto &backwarding_operand_index : candidates)
121 {
122 const auto &operand = tgraph.operands().at(backwarding_operand_index.index());
123 const auto &training_usedefs = tgraph.trainingUseDefs();
124 const auto &usedefs = training_usedefs.at(backwarding_operand_index);
125 const bool is_multiple_defs = usedefs.getTrainingDefs().size() > 1;
126 if (!operand.isConstant() && is_multiple_defs)
127 for (const auto &def : usedefs.getTrainingDefs())
128 ret.add(DisposableTensorIndex{def.index(), backwarding_operand_index.index()});
129 }
130
131 return ret;
132}
133} // namespace
134
136{
137 planForwardTensors();
138 planBackwardTensors();
139
140 _tensor_builder->allocate();
141 _tensor_builder->allocateBackward();
142
143 auto fn_map = generateFunctionMap();
144
145 // Initialize TrainableTensors
147 [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
148 if (external_operands().contains(ind) || !operand.isConstant())
149 return;
150
151 auto tensor = tensor_registry()->getNativeITensor(ind);
152 assert(tensor != nullptr);
153
154 VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
155
156 auto data = operand.shareData();
157 assert(data && data->base());
158 auto trainable_tensor = dynamic_cast<TrainableTensor *>(tensor);
159
160 if (trainable_tensor == nullptr)
161 throw std::runtime_error{"This tensor is not trainable tensor"};
162
163 trainable_tensor->fillBuffer(data);
164 });
165
166 // NOTE For memory optimization, we want to free some operand data
167 const_cast<ir::train::TrainableGraph &>(*_tdata->tgraph)
168 .operands()
169 .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
170
171 // TODO Enable
172 // for (auto &&it : ret)
173 // {
174 // auto &fn_seq = it.second;
175 // fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
176 // }
177
178 // NOTE: Since LayerScopeTensors is defined in each kernel(layer),
179 // It should be planned and allocated after the kernels generated.
180 planLayerScopeTensors(fn_map);
181 _tensor_builder->allocateLayerScope();
182
183 return fn_map;
184}
185
186void BackendContext::planForwardTensors()
187{
188 const auto &tgraph = *trainable_graph();
189
190 tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) {
191 if (external_operands().contains(index))
192 return;
193 if (!index.valid())
194 return;
195
196 _tensor_builder->registerTensorInfo(index, obj.info());
197 });
198
199 const auto ctx_data = data();
200 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
201 tensor_planner.planTrainableTensors(_tensor_builder.get());
202 tensor_planner.planNonConstTensors(_tensor_builder.get());
203}
204
205void BackendContext::planBackwardTensors()
206{
207 const ir::train::TrainableGraph &tgraph = *trainable_graph();
208
209 auto tensor_builder = _tensor_builder;
210
211 const auto operand_indices = getBackwardTensorList(tgraph, external_operands());
212 for (const auto &operand_index : operand_indices)
213 {
214 if (external_operands().contains(operand_index.index()))
215 continue;
216
217 assert(operand_index.valid());
218
219 assert(!operand_index.is_forward());
220 const auto &operand = tgraph.operands().at(operand_index.index());
221 tensor_builder->registerBackwardTensorInfo(operand_index.index(),
222 createBackwardTensorInfo(operand));
223 }
224
225 const auto disposable_indices = getDisposableBackPropTensorList(tgraph, external_operands());
226 for (const auto &disposable_index : disposable_indices)
227 {
228 const auto &operand = tgraph.operands().at(disposable_index.operand_index());
229 tensor_builder->registerDisposableBackwardTensorInfo(disposable_index,
230 createBackwardTensorInfo(operand));
231 }
232
233 // Plan tensors only in backwarding to reduce peak memory usage
234 const auto ctx_data = data();
235 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
236 tensor_planner.planGradientTensors(tensor_builder.get());
237 tensor_planner.planBackPropTensors(tensor_builder.get());
238 tensor_planner.planDisposableBackPropTensors(tensor_builder.get());
239}
240
241FunctionMap BackendContext::generateFunctionMap()
242{
244
245 for (const auto &op_ind : _tdata->op_order)
246 {
247 auto fn_seq = kernel_gen->generate(op_ind);
248 ret.emplace(op_ind, std::move(fn_seq));
249 }
250
251 // NOTE Each BackPropInitializer should be called first in each op node during backwarding
252 const auto &tgraph = *_tdata->tgraph;
253 auto tensor_reg = nnfw::misc::polymorphic_downcast<TensorRegistry *>(_tensor_registry.get());
254 AddBackPropInitializers(tgraph, *tensor_reg, ret);
255
256 return ret;
257}
258
259void BackendContext::planLayerScopeTensors([[maybe_unused]] const FunctionMap &fn_map)
260{
261 const auto &ops = trainable_graph()->operations();
262
263 auto register_tensors = [this](const ir::OperationIndex &op_idx,
264 std::optional<LayerScopeTensors> &&tensors) {
265 if (not tensors.has_value())
266 return;
267
268 auto ls_tensors = tensors.value();
269 for (auto i = 0u; i < ls_tensors.size(); ++i)
270 {
271 LayerScopeTensorIndex tensor_idx(op_idx, i);
272 _tensor_builder->registerLayerScopeTensor(tensor_idx, ls_tensors[i]);
273
274 VERBOSE(BackendContext) << "(idx:" << tensor_idx << ") registered" << std::endl;
275 }
276 return;
277 };
278
279 for (auto &pair : fn_map)
280 {
281 const auto &op_idx = pair.first;
282 auto &fn_seq = pair.second;
283
284 const ir::IOperation *op = &ops.at(op_idx);
285 const auto trainable_op = dynamic_cast<const ir::train::TrainableOperation *>(op);
286 assert(trainable_op != nullptr);
287
288 if (not trainable_op->isRequiredForBackward())
289 continue;
290
291 VERBOSE(BackendContext) << "register layerscope tensor for " << trainable_op->name()
292 << std::endl;
293
294 fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
295 register_tensors(op_idx, (&fn)->registerLayerScopeTensors());
296 });
297 }
298
299 const auto ctx_data = data();
300 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
301 tensor_planner.planLayerScopeTensors(_tensor_builder.get());
302 return;
303}
304
305} // namespace onert::backend::train
void fillBuffer(const std::shared_ptr< ir::Data > &data)
std::shared_ptr< KernelGenerator > kernel_gen
std::shared_ptr< ITensorRegistry > tensor_registry()
std::shared_ptr< ITensorRegistry > _tensor_registry
const ir::train::TrainableGraph * trainable_graph() const
const util::Set< ir::OperandIndex > & external_operands() const
std::unique_ptr< TrainableContextData > _tdata
bool isConstant(void) const
Get true if Operand is const, otherwise false a.
Definition Operand.h:77
std::shared_ptr< Data > shareData(void) const
Definition Operand.h:71
const Operations & operations() const override
const Operands & operands() const override
void iterate(const std::function< void(const Index &, const Object &)> &fn) const
Iterate over the container with given function.
#define VERBOSE(name, lv)
Definition Log.h:71
loco::GraphInputIndex index(const TFPlaceholder *node)
Definition TFNode.cpp:54
std::unordered_map< ir::OperationIndex, std::unique_ptr< exec::train::TrainableFnSequence > > FunctionMap
TrainingIndex< OperandIndex > TrainingOperandIndex
Type that provides index of operand for training.
Definition Index.h:128
::onert::util::Index< uint32_t, OperationIndexTag > OperationIndex
Definition Index.h:30
::onert::util::Index< uint32_t, OperandIndexTag > OperandIndex
Definition Index.h:33