ONE's runtime has three main modules: core, frontend and backend. This document provides some lightweight guidance about how to introduce a new operation into these modules to make onert support the operation.
Index
- How To Introduce a New Operation Into Runtime
- Index
- Core
- Frontend
- Loaders
- Base Loader
- TFLite Loader
- Circle Loader
- NNAPI
- Backend
- ShapeFixer
- KernelGenerator
- ConstantInitializer (in some cases)
- Samples (to be updated)
Core
This module has graph-based IR(intermediate representation). You have to add IR for the new operation.
- Add name of new operation at Operations.lst
- Create a class for node of new operation in here
#include "ir/Operation.h"
{
namespace ir
{
namespace operation
{
{
public:
{
COND = 0,
INPUT1 = 1,
INPUT2 = 2
};
enum Output
{
OUTPUT = 0,
};
public:
Select(
const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs);
public:
void accept(OperationVisitor &v)
const override;
};
}
}
}
Select(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
OpCode opcode() const final
void accept(OperationVisitor &v) const override
You can also define the class in other source file like below
{
namespace ir
{
namespace operation
{
Select::Select(
const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
:
Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
- Add to the OperationValidator to check if the node is valid.
void visit(const operation::Select &node) override;
void OperationValidator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(ir::operation::Select::Output::OUTPUT)};
const auto cond_index{node.getInputs().at(ir::operation::Select::Input::COND)};
const auto input1_index{node.getInputs().at(ir::operation::Select::Input::INPUT1)};
const auto input2_index{node.getInputs().at(ir::operation::Select::Input::INPUT2)};
UNUSED_RELEASE(output_index);
UNUSED_RELEASE(cond_index);
UNUSED_RELEASE(input1_index);
UNUSED_RELEASE(input2_index);
const auto output_type = _ctx.at(output_index).typeInfo();
const auto cond_type = _ctx.at(cond_index).typeInfo();
const auto input1_type = _ctx.at(input1_index).typeInfo();
const auto input2_type = _ctx.at(input2_index).typeInfo();
UNUSED_RELEASE(output_type);
UNUSED_RELEASE(cond_type);
UNUSED_RELEASE(input1_type);
UNUSED_RELEASE(input2_type);
assert(cond_type.type() == ir::DataType::BOOL8);
const auto cond_shape = _ctx.at(cond_index).shape();
const auto input1_shape = _ctx.at(input1_index).shape();
const auto input2_shape = _ctx.at(input2_index).shape();
UNUSED_RELEASE(cond_shape);
UNUSED_RELEASE(input1_shape);
UNUSED_RELEASE(input2_shape);
assert(cond_shape == input1_shape);
assert(input2_shape == input1_shape);
}
const luci_interpreter::RuntimeShape output_shape
- Add to the Dumper to dump IR information of new operation.
void Dumper::visit(const Select &node)
{
VERBOSE(LIR) <<
"* Select" << std::endl;
VERBOSE(LIR) <<
" - Inputs : Cond(" << node.getInputs().at(Select::Input::COND).value()
<< ") Input1" << node.getInputs().at(Select::Input::INPUT1).value() << ") Input2"
<< node.getInputs().at(Select::Input::INPUT2).value() << ")" << std::endl;
VERBOSE(LIR) <<
" - Output : Output(" << node.getOutputs().at(Select::Output::OUTPUT).value()
<< ")" << std::endl;
}
#define VERBOSE(name, lv)
- Add code for shape inference
- ONE runtime tries to calculate shapes and allocate memory during compilation time. For some calculations of output shapes that cannot be done during compilation time, ONE runtime will calculate shapes and allocate memory during execution time.
- Calculation of shapes during compilation time is called static shape inference and calculation of shapes during execution time is called dynamic shape inference.
StaticShapeInferer.h
void visit(const ir::operation::Select &op) override;
StaticShapeInferer.cc
void StaticShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
const auto &input_cond = _operands.at(input_cond_idx);
const auto &input_true = ...
const auto &input_false = ...
ir::Operand &output = ...
ir::Shape new_shape = shape_inference::inferSelectShape(
input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
output.info().shape(new_shape);
}
DynamicShapeInference.h
void visit(const ir::operation::Select &op) override;
DynamicShapeInference.cc
void DynamicShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
const auto &input_true = ...
const auto &input_false = ...
auto output = ...
if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
{
return;
}
auto input_cond_shape = input_cond->getShape();
auto input_true_shape = input_true->getShape();
auto input_false_shape = input_false->getShape();
ir::Shape new_shape =
shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
output->applyShape(new_shape);
}
Frontend
This module generates IR from a model. There are two kinds of frontend: Loader and NNAPI. First, Loader loads a model file and generates IR from it. Second, NNAPI generates IR from a model set via Neural Networks API of android
Loaders
Base Loader
This is where the common parts of loaders are implemented.
- Add to base_loader to load new operation and to generate IR from it
case BuiltinOperator::BuiltinOperator_SELECT:
loadSelect(op);
return;
template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
std::unique_ptr<ir::Operation> new_op{new ir::operation::Select{inputs, outputs}};
_graph.addOperation(std::move(new_op));
}
TFLite Loader
This loads a tflite file. If you want new operation to be loaded on only TFLite Loader, you only need to implement loading the operation here.
Circle Loader
This loads a circle file generated by the compiler. If you want new operation to be loaded on only Circle Loader, you only need to implement loading the operation here.
NNAPI
- Add to the OperationFactory to generate IR of new operation
_map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
OperandIndexSequence outputs{init_param.outputs[0]};
OperandIndexSequence inputs;
for (uint32_t n = 0; n < init_param.input_count; ++n)
{
inputs.append(OperandIndex{init_param.inputs[n]});
}
return new operation::Select{
inputs, outputs};
};
- If you want that NNAPI supports new operation of TFLite's model, you need to update the things related to the operation in nnapi_delegate like below
case tflite::BuiltinOperator_SELECT:
nnapi_version = 12;
nn_op_type = ANEURALNETWORKS_SELECT;
break;
Backend
This module generates kernels and tensors of backend such as ComputeLibrary from generated graph-based IR. For this, the runtime fairly works on it internally. But this is not enough because of dependence on backend. So, there are several components that require additional implementation on each backend.
ShapeFixer
Even for tensors of the same operation, the shape required for each backend can be different. Therefore, this component modifies and fixes shape of tensors of the backend.
acl_cl
The kernel of the ACL for the Add operation needs to match the same rank to support the broadcast.
void visit(const ir::operation::Add &) override;
void ShapeFixer::visit(const ir::operation::Add &node)
{
const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
{
const auto broadcast_rank =
std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
const_cast<ir::Shape &>(_ctx.at(lhs_index).shape()).extendRank(broadcast_rank);
const_cast<ir::Shape &>(_ctx.at(rhs_index).shape()).extendRank(broadcast_rank);
}
}
acl_neon
Same implementation as acl_cl is required.
cpu
This backend doesn't usually require a change of shape.
void visit(const ir::operation::Select &) override;
void ShapeFixer::visit(const ir::operation::Select &) { }
KernelGenerator
This component generates kernels of backend. You have to generate kernel of new operation. And then append it to execution builder. You can obtain information of the node from IR and necessary tensors from tensor builder.
acl_cl
void visit(const ir::operation::Select &) override;
void KernelGenerator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(ir::operation::Select::Output::OUTPUT)};
const auto cond_index{node.getInputs().at(ir::operation::Select::Input::COND)};
const auto input1_index{node.getInputs().at(ir::operation::Select::Input::INPUT1)};
const auto input2_index{node.getInputs().at(ir::operation::Select::Input::INPUT2)};
auto output_alloc = _tensor_builder->at(output_index).get();
auto cond_alloc = _tensor_builder->at(cond_index).get();
auto input1_alloc = _tensor_builder->at(input1_index).get();
auto input2_alloc = _tensor_builder->at(input2_index).get();
auto fn = std::make_unique<::arm_compute::CLSelect>();
fn->configure(cond_alloc->handle(), input1_alloc->handle(), input2_alloc->handle(),
output_alloc->handle());
_execution_builder->append(std::move(acl_fn));
}
std::unique_ptr< AclFunction > asAclFunction(std::unique_ptr<::arm_compute::IFunction > &&layer)
acl_neon
Similar implementation as acl_cl is required.
cpu
void visit(const ir::operation::Select &) override;
void KernelGenerator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
auto true_tensor = _tensor_reg->getPortableTensor(true_index);
auto false_tensor = _tensor_reg->getPortableTensor(false_index);
auto fn = std::make_unique<ops::SelectLayer>();
fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
_return_fn = std::move(fn);
}
ConstantInitializer (in some cases)
This component registers function initializing constant tensors and initialize constant tensor layer. Most tensors will be automatically registered internally. And there are some exceptions.
cpu
void visit(const ir::operation::Conv2D &) override;
void ConstantInitializer::visit(const ir::operation::Conv2D &node)
{
const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
const auto &kernel_obj = _operands.at(kernel_index);
registerCopyInitializer(kernel_index, kernel_obj);
const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
const auto &bias_obj = _operands.at(bias_index);
registerCopyInitializer(bias_index, bias_obj);
}
Samples (to be updated)