ONE - On-device Neural Engine
Loading...
Searching...
No Matches
BackendContext.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "BackendContext.h"
18
19#include "TensorBuilder.h"
20#include "TensorPlanner.h"
21#include "KernelGenerator.h"
23
26
27#include <cassert>
28
30{
31
32namespace
33{
34ir::OperandInfo createBackwardTensorInfo(const ir::Operand &operand)
35{
36 // TODO Use different shape of back-propagated tensor if it exists
37 return ir::OperandInfo{operand.shape(), operand.typeInfo(), operand.info().memAllocType(),
38 operand.isConstant()};
39}
40
41void AddBackPropInitializers(const ir::train::TrainableGraph &tgraph, TensorRegistry &tensor_reg,
42 FunctionMap &fn_map)
43{
44 util::Set<ir::OperandIndex> unvisited;
45 tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &operand) {
46 if (!tgraph.getInputs().contains(index) && !operand.isConstant())
47 unvisited.add(index);
48 });
49
50 for (const auto &op_index : tgraph.essentialBackwardOrder())
51 {
52 assert(fn_map.find(op_index) != fn_map.end());
53
54 auto &tn_seq = fn_map.at(op_index);
55
56 // The function added latest is executed first in a sequence during backwarding.
57 std::vector<BackPropTensor *> back_props;
58 const auto &op = tgraph.operation(op_index);
59 for (const auto &back_prop_index : op.getUsedInputSet())
60 {
61 assert(op.isRequiredForBackward());
62 if (unvisited.contains(back_prop_index))
63 {
64 auto back_prop_tensor = tensor_reg.getBackPropTensor(back_prop_index);
65 assert(back_prop_tensor != nullptr);
66 back_props.emplace_back(back_prop_tensor);
67 unvisited.remove(back_prop_index);
68 }
69 }
70 if (back_props.size() != 0)
71 {
72 auto initializer = std::make_unique<ops::BackPropInitializer>(back_props);
73 tn_seq->append(std::move(initializer));
74 }
75 }
76}
77
78util::Set<ir::train::TrainingOperandIndex>
79getBackwardTensorList(const ir::train::TrainableGraph &tgraph,
80 const util::Set<ir::OperandIndex> &external_operands)
81{
82 util::Set<ir::train::TrainingOperandIndex> ret;
83
84 // TODO Reuse registered tensors when they are planned for memory optimization.
85 auto border = tgraph.essentialBackwardOrder();
86 for (const auto op_index : border)
87 {
88 const auto &trainable_op = tgraph.operation(op_index);
89 assert(trainable_op.isRequiredForBackward());
90 // This assumes that back-propagated tensors of loss outputs are not used
91 for (const auto &ind : trainable_op.getUsedInputSet())
92 {
93 if (external_operands.contains(ind))
94 continue;
95
96 const auto &operand_index = ir::train::TrainingOperandIndex{ind, false};
97
98 const auto &training_usedefs = tgraph.trainingUseDefs();
99 const auto &usedefs = training_usedefs.at(ir::train::TrainingOperandIndex{ind, false});
100 const bool not_used = usedefs.getTrainingDefs().empty() && usedefs.getTrainingUses().empty();
101 if (not_used)
102 continue;
103
104 ret.add(operand_index);
105 }
106 }
107
108 return ret;
109}
110
111util::Set<DisposableTensorIndex>
112getDisposableBackPropTensorList(const ir::train::TrainableGraph &tgraph,
113 const util::Set<ir::OperandIndex> &external_operands)
114{
115 util::Set<DisposableTensorIndex> ret;
116
117 const auto candidates = getBackwardTensorList(tgraph, external_operands);
118 for (const auto &backwarding_operand_index : candidates)
119 {
120 const auto &operand = tgraph.operands().at(backwarding_operand_index.index());
121 const auto &training_usedefs = tgraph.trainingUseDefs();
122 const auto &usedefs = training_usedefs.at(backwarding_operand_index);
123 const bool is_multiple_defs = usedefs.getTrainingDefs().size() > 1;
124 if (!operand.isConstant() && is_multiple_defs)
125 for (const auto &def : usedefs.getTrainingDefs())
126 ret.add(DisposableTensorIndex{def.index(), backwarding_operand_index.index()});
127 }
128
129 return ret;
130}
131} // namespace
132
134{
135 planForwardTensors();
136 planBackwardTensors();
137
138 _tensor_builder->allocate();
139 _tensor_builder->allocateBackward();
140
141 auto fn_map = generateFunctionMap();
142
143 // Initialize TrainableTensors
145 [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
146 if (external_operands().contains(ind) || !operand.isConstant())
147 return;
148
149 auto tensor = tensor_registry()->getNativeITensor(ind);
150 assert(tensor != nullptr);
151
152 VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
153
154 auto data = operand.shareData();
155 assert(data && data->base());
156 auto trainable_tensor = dynamic_cast<TrainableTensor *>(tensor);
157
158 if (trainable_tensor == nullptr)
159 throw std::runtime_error{"This tensor is not trainable tensor"};
160
161 trainable_tensor->fillBuffer(data);
162 });
163
164 // NOTE For memory optimization, we want to free some operand data
165 const_cast<ir::train::TrainableGraph &>(*_tdata->tgraph)
166 .operands()
167 .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
168
169 // TODO Enable
170 // for (auto &&it : ret)
171 // {
172 // auto &fn_seq = it.second;
173 // fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
174 // }
175
176 // NOTE: Since LayerScopeTensors is defined in each kernel(layer),
177 // It should be planned and allocated after the kernels generated.
178 planLayerScopeTensors(fn_map);
179 _tensor_builder->allocateLayerScope();
180
181 return fn_map;
182}
183
184void BackendContext::planForwardTensors()
185{
186 const auto &tgraph = *trainable_graph();
187
188 tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) {
189 if (external_operands().contains(index))
190 return;
191 if (!index.valid())
192 return;
193
194 _tensor_builder->registerTensorInfo(index, obj.info());
195 });
196
197 const auto ctx_data = data();
198 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
199 tensor_planner.planTrainableTensors(_tensor_builder.get());
200 tensor_planner.planNonConstTensors(_tensor_builder.get());
201}
202
203void BackendContext::planBackwardTensors()
204{
205 const ir::train::TrainableGraph &tgraph = *trainable_graph();
206
207 auto tensor_builder = _tensor_builder;
208
209 const auto operand_indices = getBackwardTensorList(tgraph, external_operands());
210 for (const auto &operand_index : operand_indices)
211 {
212 if (external_operands().contains(operand_index.index()))
213 continue;
214
215 assert(operand_index.valid());
216
217 assert(!operand_index.is_forward());
218 const auto &operand = tgraph.operands().at(operand_index.index());
219 tensor_builder->registerBackwardTensorInfo(operand_index.index(),
220 createBackwardTensorInfo(operand));
221 }
222
223 const auto disposable_indices = getDisposableBackPropTensorList(tgraph, external_operands());
224 for (const auto &disposable_index : disposable_indices)
225 {
226 const auto &operand = tgraph.operands().at(disposable_index.operand_index());
227 tensor_builder->registerDisposableBackwardTensorInfo(disposable_index,
228 createBackwardTensorInfo(operand));
229 }
230
231 // Plan tensors only in backwarding to reduce peak memory usage
232 const auto ctx_data = data();
233 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
234 tensor_planner.planGradientTensors(tensor_builder.get());
235 tensor_planner.planBackPropTensors(tensor_builder.get());
236 tensor_planner.planDisposableBackPropTensors(tensor_builder.get());
237}
238
239FunctionMap BackendContext::generateFunctionMap()
240{
242
243 for (const auto &op_ind : _tdata->op_order)
244 {
245 auto fn_seq = kernel_gen->generate(op_ind);
246 ret.emplace(op_ind, std::move(fn_seq));
247 }
248
249 // NOTE Each BackPropInitializer should be called first in each op node during backwarding
250 const auto &tgraph = *_tdata->tgraph;
251 auto tensor_reg = nnfw::misc::polymorphic_downcast<TensorRegistry *>(_tensor_registry.get());
252 AddBackPropInitializers(tgraph, *tensor_reg, ret);
253
254 return ret;
255}
256
257void BackendContext::planLayerScopeTensors([[maybe_unused]] const FunctionMap &fn_map)
258{
259 const auto &ops = trainable_graph()->operations();
260
261 auto register_tensors = [this](const ir::OperationIndex &op_idx,
262 std::optional<LayerScopeTensors> &&tensors) {
263 if (not tensors.has_value())
264 return;
265
266 auto ls_tensors = tensors.value();
267 for (auto i = 0u; i < ls_tensors.size(); ++i)
268 {
269 LayerScopeTensorIndex tensor_idx(op_idx, i);
270 _tensor_builder->registerLayerScopeTensor(tensor_idx, ls_tensors[i]);
271
272 VERBOSE(BackendContext) << "(idx:" << tensor_idx << ") registered" << std::endl;
273 }
274 return;
275 };
276
277 for (auto &pair : fn_map)
278 {
279 const auto &op_idx = pair.first;
280 auto &fn_seq = pair.second;
281
282 const ir::IOperation *op = &ops.at(op_idx);
283 const auto trainable_op = dynamic_cast<const ir::train::TrainableOperation *>(op);
284 assert(trainable_op != nullptr);
285
286 if (not trainable_op->isRequiredForBackward())
287 continue;
288
289 VERBOSE(BackendContext) << "register layerscope tensor for " << trainable_op->name()
290 << std::endl;
291
292 fn_seq->iterate([&](exec::train::ITrainableFunction &fn) {
293 register_tensors(op_idx, (&fn)->registerLayerScopeTensors());
294 });
295 }
296
297 const auto ctx_data = data();
298 TensorPlanner tensor_planner{*ctx_data->tgraph.get(), ctx_data->external_operands};
299 tensor_planner.planLayerScopeTensors(_tensor_builder.get());
300 return;
301}
302
303} // namespace onert::backend::train
void fillBuffer(const std::shared_ptr< ir::Data > &data)
std::shared_ptr< KernelGenerator > kernel_gen
std::shared_ptr< ITensorRegistry > tensor_registry()
std::shared_ptr< ITensorRegistry > _tensor_registry
const ir::train::TrainableGraph * trainable_graph() const
const util::Set< ir::OperandIndex > & external_operands() const
std::unique_ptr< TrainableContextData > _tdata
bool isConstant(void) const
Get true if Operand is const, otherwise false a.
Definition Operand.h:77
std::shared_ptr< Data > shareData(void) const
Definition Operand.h:71
const Operations & operations() const override
const Operands & operands() const override
void iterate(const std::function< void(const Index &, const Object &)> &fn) const
Iterate over the container with given function.
#define VERBOSE(name, lv)
Definition Log.h:71
loco::GraphInputIndex index(const TFPlaceholder *node)
Definition TFNode.cpp:54
std::unordered_map< ir::OperationIndex, std::unique_ptr< exec::train::TrainableFnSequence > > FunctionMap
TrainingIndex< OperandIndex > TrainingOperandIndex
Type that provides index of operand for training.
Definition Index.h:128
::onert::util::Index< uint32_t, OperationIndexTag > OperationIndex
Definition Index.h:30
::onert::util::Index< uint32_t, OperandIndexTag > OperandIndex
Definition Index.h:33