20#include "ops/BinaryArithmeticLayer.h"
21#include "ops/ConvolutionLayer.h"
22#include "ops/DepthwiseConvolutionLayer.h"
23#include "ops/ElementwiseActivationLayer.h"
24#include "ops/FullyConnectedLayer.h"
27#include "ops/MeanLayer.h"
29#include "ops/PadLayer.h"
30#include "ops/PoolLayer.h"
31#include "ops/ReshapeLayer.h"
32#include "ops/SoftMaxLayer.h"
47ops::ElementwiseActivationType
55 throw std::runtime_error(
"train KernelGenerator : Not supported operation yet");
69 throw std::runtime_error(
"train KernelGenerator : Not supported operation yet");
73std::unique_ptr<ops::BackPropAccumulator>
74generateBackPropAccumulator(
const IPortableTensor *disposable,
BackPropTensor *gradient)
76 auto update_fn = std::make_unique<ops::BackPropAccumulator>(disposable, gradient);
80void appendBackPropAccumulators(
const ir::train::ITrainableOperation &op,
82 exec::train::TrainableFnSequence *seq)
84 if (!op.isRequiredForBackward())
87 for (
const auto &input_index : (op.getInputs() | ir::
Remove::
UNDEFINED))
89 const auto disposable =
90 tensor_reg->getDisposableBackPropTensor(DisposableTensorIndex{op_index,
input_index});
91 if (disposable !=
nullptr)
93 auto back_prop = tensor_reg->getBackPropTensor(input_index);
95 seq->append(generateBackPropAccumulator(disposable, back_prop));
100std::unique_ptr<ops::GradientApplier>
101generateGradientApplier(
const exec::train::optimizer::Optimizer *
optimizer,
102 const IPortableTensor *gradient, ITrainableTensor *trainable)
104 auto update_fn = std::make_unique<ops::GradientApplier>();
105 update_fn->configure(
optimizer, gradient, trainable);
115 auto ret = std::make_unique<exec::train::TrainableFnSequence>();
121 appendBackPropAccumulators(op, idx, _tensor_reg.get(), ret.get());
127 for (
auto &&update_fn : _update_funcs)
128 ret->append(std::move(update_fn));
129 _update_funcs.clear();
133 auto tensor = _tensor_reg->getNonConstTensor(ind);
136 tensor->increase_ref();
143 const std::shared_ptr<TensorRegistry> &tensor_reg,
144 const std::shared_ptr<ExternalContext> &external_context,
147 _external_context(external_context), _optimizer{
optimizer}, _update_funcs{}, _node_to_idx{}
151 assert(_node_to_idx.find(&op) == _node_to_idx.end());
152 _node_to_idx[&op] = idx;
161 const auto lhs_index{node.
getInputs().
at(BinaryArithmetic::Input::LHS)};
162 const auto rhs_index{node.
getInputs().
at(BinaryArithmetic::Input::RHS)};
167 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
168 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
169 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
171 auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
172 fn->configure(lhs_tensor, rhs_tensor, output_tensor, activation,
177 auto back_prop_output_tensor = getBackPropOut(output_index);
178 auto back_prop_lhs_tensor = getBackPropIn(node, lhs_index);
179 auto back_prop_rhs_tensor = getBackPropIn(node, rhs_index);
181 fn->configureBackward(back_prop_lhs_tensor, back_prop_rhs_tensor, back_prop_output_tensor,
192 const auto in_index{node.
getInputs().
at(Conv2D::Input::INPUT)};
193 const auto ker_index{node.
getInputs().
at(Conv2D::Input::KERNEL)};
194 const auto bias_index{node.
getInputs().
at(Conv2D::Input::BIAS)};
196 auto out_tensor = _tensor_reg->getPortableTensor(out_index);
197 auto in_tensor = _tensor_reg->getPortableTensor(in_index);
198 auto ker_tensor = _tensor_reg->getTrainableTensor(ker_index);
199 auto bias_tensor = _tensor_reg->getTrainableTensor(bias_index);
206 auto fn = std::make_unique<ops::ConvolutionLayer>();
209 const auto ifm_shape = operands.
at(in_index).shape().asFeature();
210 const auto ofm_shape = operands.at(out_index).shape().asFeature();
212 const auto &ker_shape = operands.at(ker_index).shape();
213 const auto ker_height = ker_shape.dim(1);
214 const auto ker_width = ker_shape.dim(2);
218 dilation.width_factor, dilation.height_factor);
220 const bool is_cacheable_weights =
false;
222 padding.top, padding.bottom, stride.horizontal, stride.vertical,
223 dilation.width_factor, dilation.height_factor, activation, out_tensor,
224 is_cacheable_weights);
226 auto ker_grad_tensor = _tensor_reg->getGradientTensor(ker_index);
227 auto bias_grad_tensor = _tensor_reg->getGradientTensor(bias_index);
232 auto out_back_prop_tensor = getBackPropOut(out_index);
233 auto in_back_prop_tensor = getBackPropIn(node, in_index);
235 fn->configureBackward(
ker_tensor, in_back_prop_tensor, ker_grad_tensor, bias_grad_tensor,
236 out_back_prop_tensor, activation);
240 _update_funcs.emplace_back(
241 generateGradientApplier(_optimizer, bias_grad_tensor,
bias_tensor));
242 _update_funcs.emplace_back(generateGradientApplier(_optimizer, ker_grad_tensor,
ker_tensor));
253 const auto ifm_index{node.
getInputs().
at(DepthwiseConv2D::Input::INPUT)};
254 const auto ker_index{node.
getInputs().
at(DepthwiseConv2D::Input::KERNEL)};
255 const auto bias_index{node.
getInputs().
at(DepthwiseConv2D::Input::BIAS)};
257 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
258 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
259 auto ker_tensor = _tensor_reg->getTrainableTensor(ker_index);
260 auto bias_tensor = _tensor_reg->getTrainableTensor(bias_index);
264 const auto ofm_shape = operands.
at(ofm_index).shape().asFeature();
265 const auto ifm_shape = operands.at(ifm_index).shape().asFeature();
267 const auto &ker_shape = operands.at(ker_index).shape();
268 const auto ker_height = ker_shape.dim(1);
269 const auto ker_width = ker_shape.dim(2);
273 ker_width, ker_height, dilation_width, dilation_height);
277 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
280 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
281 dilation_height, activation, ofm_tensor, _external_context);
285 auto ker_grad_tensor = _tensor_reg->getGradientTensor(ker_index);
286 auto bias_grad_tensor = _tensor_reg->getGradientTensor(bias_index);
288 auto ofm_back_prop_tensor = getBackPropOut(ofm_index);
289 auto ifm_back_prop_tensor = getBackPropIn(node, ifm_index);
291 fn->configureBackward(ifm_back_prop_tensor, ker_grad_tensor, bias_grad_tensor,
292 ofm_back_prop_tensor, activation);
296 _update_funcs.emplace_back(
297 generateGradientApplier(_optimizer, bias_grad_tensor,
bias_tensor));
298 _update_funcs.emplace_back(generateGradientApplier(_optimizer, ker_grad_tensor,
ker_tensor));
309 const auto input_index{node.
getInputs().
at(ElementwiseActivation::Input::INPUT)};
311 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
312 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
314 auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
322 throw std::invalid_argument(
"Unsupported ElementwiseActivation::Type");
331 auto back_prop_input_tensor = getBackPropIn(node, input_index);
332 auto back_prop_output_tensor = getBackPropOut(output_index);
334 fn->configureBackward(input_tensor, back_prop_input_tensor, back_prop_output_tensor,
336 convertElementwiseActivationType(node.
param().
op_type));
347 const auto in_index{node.
getInputs().
at(FullyConnected::Input::INPUT)};
348 const auto weights_index{node.
getInputs().
at(FullyConnected::Input::WEIGHT)};
349 const auto bias_index{node.
getInputs().
at(FullyConnected::Input::BIAS)};
351 auto out_tensor = _tensor_reg->getPortableTensor(out_index);
352 auto in_tensor = _tensor_reg->getPortableTensor(in_index);
353 auto weights_tensor = _tensor_reg->getTrainableTensor(weights_index);
354 auto bias_tensor = _tensor_reg->getTrainableTensor(bias_index);
360 auto fn = std::make_unique<ops::FullyConnectedLayer>();
362 fn->configure(in_tensor, weights_tensor,
bias_tensor, activation, weights_format, out_tensor,
367 auto out_back_prop_tensor = getBackPropOut(out_index);
368 auto in_back_prop_tensor = getBackPropIn(node, in_index);
369 auto weights_grad_tensor = _tensor_reg->getGradientTensor(weights_index);
370 auto bias_grad_tensor = _tensor_reg->getGradientTensor(bias_index);
372 fn->configureBackward(in_tensor, weights_tensor, out_tensor, in_back_prop_tensor,
373 weights_grad_tensor, bias_grad_tensor, out_back_prop_tensor, activation,
378 _update_funcs.emplace_back(
379 generateGradientApplier(_optimizer, bias_grad_tensor,
bias_tensor));
380 _update_funcs.emplace_back(
381 generateGradientApplier(_optimizer, weights_grad_tensor, weights_tensor));
392 const auto y_pred_index{node.
getInputs().
at(Loss::Y_PRED)};
393 const auto y_true_index{node.
getInputs().
at(Loss::Y_TRUE)};
395 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
396 auto y_pred_tensor = _tensor_reg->getPortableTensor(y_pred_index);
397 auto y_true_tensor = _tensor_reg->getPortableTensor(y_true_index);
401 auto back_prop_y_pred_tensor = getBackPropIn(node, y_pred_index);
411 auto fn = std::make_unique<ops::LossMeanSquaredErrorLayer>();
412 fn->configure(y_pred_tensor, y_true_tensor, output_tensor, back_prop_y_pred_tensor,
420 bool is_normalization_required = (y_pred_op_code != ir::OpCode::Softmax);
421 const auto cce_params = std::get_if<ir::train::CategoricalCrossentropyParam>(&loss_param);
424 throw std::runtime_error(
"LossLayer: Expected loss_param to be "
425 "CategoricalCrossentropyParam but found a different type.");
427 auto fn = std::make_unique<ops::LossCategoricalCrossentropyLayer>();
428 fn->configure(y_pred_tensor, y_true_tensor, output_tensor, back_prop_y_pred_tensor,
429 reduction_type, cce_params->axis, cce_params->label_smoothing,
430 is_normalization_required);
435 throw std::runtime_error(
"LossLayer: unsupported loss type");
445 auto input = _tensor_reg->getPortableTensor(input_index);
446 auto pad = _tensor_reg->getPortableTensor(pad_index);
447 auto output = _tensor_reg->getPortableTensor(output_index);
449 auto fn = std::make_unique<ops::PadLayer>();
455 value = _tensor_reg->getPortableTensor(value_index);
458 fn->configure(input, pad, value, output);
461 auto out_back_prop_tensor = getBackPropOut(output_index);
462 auto in_back_prop_tensor = getBackPropIn(node, input_index);
463 fn->configureBackward(in_back_prop_tensor, out_back_prop_tensor);
476 const auto &ofm_shape = operands.
at(output_index).shape();
477 const auto &ifm_shape = operands.at(input_index).shape();
479 if (ifm_shape.rank() != 4)
481 throw std::runtime_error(node.
name() +
" only supports 4D tensor as input");
486 const auto kh = node.
param().
kh;
487 const auto kw = node.
param().
kw;
489 ofm_shape.asFeature(), stride, kw, kh);
491 auto out_tensor = _tensor_reg->getPortableTensor(output_index);
492 auto in_tensor = _tensor_reg->getPortableTensor(input_index);
495 const auto pool_type = convertPoolType(node.
param().
op_type);
497 auto fn = std::make_unique<ops::PoolLayer>();
507 throw std::runtime_error(
"PoolLayer: Unsupported pool type yet");
511 fn->configure(in_tensor, padding.left, padding.right, padding.top, padding.bottom,
512 stride.horizontal, stride.vertical, kw, kh, activation, out_tensor,
513 convertToInferPoolType(pool_type));
517 auto out_back_prop_tensor = getBackPropOut(output_index);
518 auto in_back_prop_tensor = getBackPropIn(node, input_index);
519 fn->configureBackward(padding.left, padding.right, padding.top, padding.bottom,
520 stride.horizontal, stride.vertical, kw, kh, activation, pool_type,
521 out_tensor, in_back_prop_tensor, out_back_prop_tensor);
532 const auto input_index{node.
getInputs().
at(Reduce::Input::INPUT)};
533 const auto axes_index{node.
getInputs().
at(Reduce::Input::AXES)};
537 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
538 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
539 auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
543 auto fn = std::make_unique<ops::MeanLayer>();
544 fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
547 auto back_prop_output_tensor = getBackPropOut(output_index);
548 auto back_prop_input_tensor = getBackPropIn(node, input_index);
549 fn->configureBackward(back_prop_input_tensor, back_prop_output_tensor);
555 throw std::runtime_error(
"ReduceLayer: unsupported reduce type");
566 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
567 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
575 shape_tensor = _tensor_reg->getPortableTensor(shape_index);
578 auto fn = std::make_unique<ops::ReshapeLayer>();
580 fn->configure(input_tensor, shape_tensor, output_tensor);
583 auto output_back_prop_tensor = getBackPropOut(output_index);
584 auto input_back_prop_tensor = getBackPropIn(node, input_index);
585 fn->configureBackward(input_back_prop_tensor, output_back_prop_tensor);
599 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
600 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
602 auto fn = std::make_unique<ops::SoftMaxLayer>();
604 fn->configure(input_tensor, beta, output_tensor);
608 auto output_back_prop_tensor = getBackPropOut(output_index);
609 auto input_back_prop_tensor = getBackPropIn(node, input_index);
610 fn->configureBackward(input_back_prop_tensor, output_back_prop_tensor);
618 const auto &op_index = _node_to_idx[&node];
621 const auto disposable_tensor =
622 _tensor_reg->getDisposableBackPropTensor(DisposableTensorIndex{op_index, operand_index});
623 if (disposable_tensor !=
nullptr)
625 [[maybe_unused]]
const auto &training_usedefs =
627 assert(std::count_if(training_usedefs.getTrainingDefs().begin(),
628 training_usedefs.getTrainingDefs().end(),
630 return _tgraph.operation(op_index.index()).isRequiredForBackward();
633 return disposable_tensor;
636 return _tensor_reg->getBackPropTensor(operand_index);
639IPortableTensor *KernelGenerator::getBackPropOut(
const ir::OperandIndex &output_index)
641 return _tensor_reg->getBackPropTensor(output_index);
A tensor class that is portable for other backends.
std::unique_ptr< exec::train::ITrainableFunction > _return_fn
const ir::train::TrainableGraph & _tgraph
std::unique_ptr< exec::train::TrainableFnSequence > generate(ir::OperationIndex op_ind) override
KernelGenerator(const ir::train::TrainableGraph &tgraph, const std::shared_ptr< TensorRegistry > &tensor_reg, const std::shared_ptr< ExternalContext > &external_context, const exec::train::optimizer::Optimizer *optimizer)
void visit(const ir::train::operation::BinaryArithmetic &) override
Base class for all optimizers.
const OperandIndex & at(IOIndex set_index) const
const OperandIndexSequence & getOutputs() const override
OperandIndexSequence & getInputs()
const Param & param() const
const Param & param() const
const Param & param() const
const Param & param() const
const Param & param() const
const Param & param() const
std::string name() const override
const Param & param() const
const Param & param() const
const ITrainableOperation & operation(OperationIndex index) const
const Operations & operations() const override
const Operands & operands() const override
const UseDefChains & trainingUseDefs() const
virtual bool isRequiredForBackward() const final
Class that provides index of tensor for training.
const LossInfo & param() const
ir::OpCode y_pred_op_code() const
void iterate(const std::function< void(const Index &, const Object &)> &fn) const
Iterate over the container with given function.
const Object & at(const Index &index) const
Get the object that is associated with the given index.
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
@ CategoricalCrossentropy
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf=1, uint32_t dhf=1)
::onert::util::Index< uint32_t, OperationIndexTag > OperationIndex
::onert::util::Index< uint32_t, OperandIndexTag > OperandIndex
ArithmeticType arithmetic_type
FullyConnectedWeightsFormat weights_format
LossReductionType reduction_type
std::variant< std::monostate, CategoricalCrossentropyParam > loss_param