ONE/compiler_2luci-interpreter_2src_2loader_2_graph_loader_8cpp_source.html

/*

 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *    http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#include "loader/GraphLoader.h"


#include "loader/KernelBuilder.h"


#include <luci/Plan/CircleNodeExecutionPlan.h>

#include <loco/IR/Algorithm.h>


namespace luci_interpreter

{

namespace

{


template <typename NodeT> Shape getNodeShape(const NodeT *node)

{

  Shape shape(node->rank());

  for (uint32_t i = 0; i < node->rank(); ++i)

  {

    shape.dim(i) = node->dim(i).value();

  }

  return shape;

}


template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)

{

  const size_t element_size = getDataTypeSize(DT);

  const int32_t num_elements = node->size<DT>();


  *data_size = num_elements * element_size;

  if (*data_size > 0)

  {

    // FIXME There is no good way to get the pointer to the data currently.

    return &node->at<DT>(0);

  }

  return nullptr;

}


const void *getNodeData(const luci::CircleConst *node, size_t *data_size)

{

  switch (node->dtype())

  {

    case DataType::U4:

      return getNodeDataImpl<DataType::U4>(node, data_size);

    case DataType::U8:

      return getNodeDataImpl<DataType::U8>(node, data_size);

    case DataType::FLOAT32:

      return getNodeDataImpl<DataType::FLOAT32>(node, data_size);

    case DataType::S4:

      return getNodeDataImpl<DataType::S4>(node, data_size);

    case DataType::S8:

      return getNodeDataImpl<DataType::S8>(node, data_size);

    case DataType::S16:

      return getNodeDataImpl<DataType::S16>(node, data_size);

    case DataType::S32:

      return getNodeDataImpl<DataType::S32>(node, data_size);

    case DataType::S64:

      return getNodeDataImpl<DataType::S64>(node, data_size);

    case DataType::BOOL:

      return getNodeDataImpl<DataType::BOOL>(node, data_size);

    default:

      throw std::runtime_error("luci-intp (getNodeData) Unsupported type.");

  }

}


const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)

{

  if (node->custom_code() != "CircleReferencingConst")

    return nullptr;


  // helper struct which describes data loaded to custom_options of CircleReferencingConst node

  // TODO move this struct to header

  struct ConstDataReference

  {

    const uint8_t *data = nullptr;

    uint32_t size = 0;

  };


  const auto &custom_options = node->custom_options();

  const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());


  *data_size = const_data_ref.size;

  return const_data_ref.data;

}


bool isExecutableNode(const luci::CircleNode *node)

{

  switch (node->opcode())

  {

    // These nodes denote inputs / outputs of a graph.

    case luci::CircleOpcode::CIRCLECONST:

    case luci::CircleOpcode::CIRCLEINPUT:

    case luci::CircleOpcode::CIRCLEOUTPUT:

    case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:

    // The following nodes denote outputs of multiple-output nodes.

    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:

    case luci::CircleOpcode::CIRCLECUSTOMOUT:

    case luci::CircleOpcode::CIRCLEIFOUT:

    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:

    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:

    case luci::CircleOpcode::CIRCLESPLITOUT:

    case luci::CircleOpcode::CIRCLESPLITVOUT:

    case luci::CircleOpcode::CIRCLETOPKV2OUT:

    case luci::CircleOpcode::CIRCLEUNIQUEOUT:

    case luci::CircleOpcode::CIRCLEUNPACKOUT:

    case luci::CircleOpcode::CIRCLEVARIABLE:

    case luci::CircleOpcode::CIRCLEWHILEOUT:

      return false;

    // Custom nodes may be executable and non-executable

    case luci::CircleOpcode::CUSTOM:

    {

      auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);


      // TODO handle more non-executable Custom ops here

      if (custom_node->custom_code() == "CircleReferencingConst")

        return false;


      return true;

    }

    default:

      return true;

  }

}


bool isTensorProducingNode(const luci::CircleNode *node)

{

  switch (node->opcode())

  {

    // Output nodes do not produce tensors.

    case luci::CircleOpcode::CIRCLEOUTPUT:

    // The following nodes are multiple-output nodes. They do not produce tensors, the tensors

    // are produced by the corresponding *Out nodes instead.

    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:

    case luci::CircleOpcode::CUSTOM:

    case luci::CircleOpcode::IF:

    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:

    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:

    case luci::CircleOpcode::SPLIT:

    case luci::CircleOpcode::SPLIT_V:

    case luci::CircleOpcode::TOPK_V2:

    case luci::CircleOpcode::UNIQUE:

    case luci::CircleOpcode::UNPACK:

    case luci::CircleOpcode::WHILE:

      return false;

    default:

      return true;

  }

}


bool isSupportedCustomNode(const luci::CircleNode *node)

{

  const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);


  // TODO handle more Custom ops here

  if (custom_node->custom_code() == "CircleReferencingConst")

    return true;


  return false;

}


} // namespace


GraphLoader::GraphLoader(

  const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,

  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,

  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)

  : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),

    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),

    _memory_manager(memory_manager)

{

}


void GraphLoader::loadTensors()

{

  for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)

  {

    const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));


    if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))

    {

      const auto *cnode = loco::must_cast<const luci::CircleCustom *>(node);

      throw std::runtime_error("Unsupported Custom operator. " + cnode->custom_code() + " in " +

                               node->name());

    }


    if (!isTensorProducingNode(node))

      continue;


    // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will

    // be inferred.

    Shape shape{};

    switch (node->opcode())

    {

      case luci::CircleOpcode::CIRCLECONST:

      case luci::CircleOpcode::CIRCLECUSTOMOUT:

      case luci::CircleOpcode::CIRCLEINPUT:

      case luci::CircleOpcode::CIRCLEVARIABLE:

        shape = getNodeShape(node);

        break;

      default:

        break;

    }


    AffineQuantization quantization;

    if (node->quantparam() != nullptr)

    {

      const luci::CircleQuantParam *params = node->quantparam();

      assert(params->scale.size() == params->zerop.size());

      quantization.scale.assign(params->scale.cbegin(), params->scale.cend());

      quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());

      quantization.quantized_dimension = params->quantized_dimension;

    }


    auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),

                                           node->name());


    // If node has execution plan then read memory offsets for nodes

    // from the beginning of shared memory buffer. Used in Static Memory Manager.

    if (luci::has_execution_plan(node))

    {

      auto execution_plan = luci::get_execution_plan(node);

      assert(!execution_plan.offsets().empty());

      tensor->set_offset(execution_plan.offsets().front());

    }


    if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))

    {

      size_t data_size{};

      const void *const_data = getNodeData(const_node, &data_size);

      if (const_data != nullptr)

      {

        _memory_manager->allocate_memory(*tensor);

        tensor->writeData(const_data, data_size);

      }

    }

    else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))

    {

      const auto *custom_node =

        loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());


      if (custom_node->custom_code() == "CircleReferencingConst")

      {

        size_t data_size{};

        const void *const_data = getNodeData(custom_node, &data_size);

        if (const_data != nullptr)

        {

          _memory_manager->allocate_memory(*tensor);

          tensor->writeData(const_data, data_size);

        }

      }

    }


    _node_to_tensor.emplace(node, tensor.get());

    _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);


    _runtime_graph->addTensor(std::move(tensor));

  }

}


void GraphLoader::initInputOutputTensors() const

{

  auto input_nodes = loco::input_nodes(_graph);

  std::vector<Tensor *> input_tensors(input_nodes.size());

  for (size_t i = 0; i < input_nodes.size(); ++i)

  {

    input_tensors[i] = _node_to_tensor.at(input_nodes[i]);

    _memory_manager->allocate_memory(*input_tensors[i]);

  }

  _runtime_graph->setInputTensors(input_tensors);


  auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));

  std::vector<Tensor *> output_tensors(output_nodes.size());

  for (size_t i = 0; i < output_nodes.size(); ++i)

  {

    const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);

    output_tensors[i] = _node_to_tensor.at(node->from());

  }

  _runtime_graph->setOutputTensors(output_tensors);

}


void GraphLoader::loadOperators()

{

  KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);


  // Create kernels for executable nodes. This has to be done in execution order.

  auto graph = const_cast<loco::Graph *>(_graph);


  auto const graph_nodes = loco::all_nodes(graph);


  // Checking for execution plan in node annotations.

  bool has_execution_annotation = true;

  auto const checking_exec_plan = [&has_execution_annotation](auto const node) {

    const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);

    if (!luci::has_execution_plan(circle_node))

      has_execution_annotation = false;

  };

  std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);


  if (has_execution_annotation)

  {

    // Build ordered_nodes vector that stores the order of execution of graph nodes.

    std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());


    auto const filler = [&ordered_nodes](auto const node) {

      const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);

      auto const position = luci::get_execution_plan(circle_node).order_in_plan();

      ordered_nodes.at(position) = circle_node;

    };

    std::for_each(begin(graph_nodes), end(graph_nodes), filler);


    for (auto node : ordered_nodes)

    {

      if (isExecutableNode(node))

      {

        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);

        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);

        _runtime_graph->addKernel(std::move(kernel));

      }

    }

  }

  else

  {

    // If it is impossible to build the execution order plan,

    // then we use the default postorder_traversal approach.

    for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))

    {

      const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);

      if (isExecutableNode(node))

      {

        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);

        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);

        _runtime_graph->addKernel(std::move(kernel));

      }

    }

  }

}


} // namespace luci_interpreter

Algorithm.h

CircleNodeExecutionPlan.h

loco::Graph
A neural network graph.
Definition Graph.h:161

loco::Graph::nodes
NodeContext * nodes(void)
Definition Graph.h:218

loco::Node
Logical unit of computation.
Definition Node.h:54

loco::ObjectPool::at
T * at(uint32_t n) const
Access N-th object.
Definition ObjectPool.h:41

luci::CircleConst
Class to build tensor data.
Definition CircleConst.h:35

luci::CircleConst::at
const loco::DataTypeImpl< DT >::Type & at(uint32_t n) const
Definition CircleConst.cpp:38

luci::CircleConst::size
uint32_t size(void) const
Definition CircleConst.cpp:24

luci::CircleCustom
CUSTOM in Circle.
Definition CircleCustom.h:30

luci::CircleCustom::custom_options
const std::vector< uint8_t > & custom_options(void) const
Definition CircleCustom.h:48

luci::CircleCustom::custom_code
const std::string & custom_code(void) const
Definition CircleCustom.h:54

luci::CircleCustomOut
Virtual CIRCLECUSTOMOUT in Circle.
Definition CircleCustomOut.h:33

luci::CircleNodeExecutionPlan::order_in_plan
uint32_t order_in_plan(void) const
Definition CircleNodeExecutionPlan.h:38

luci_interpreter::GraphLoader::initInputOutputTensors
void initInputOutputTensors() const
Definition GraphLoader.cpp:274

luci_interpreter::GraphLoader::loadOperators
void loadOperators()
Definition GraphLoader.cpp:295

luci_interpreter::GraphLoader::loadTensors
void loadTensors()
Definition GraphLoader.cpp:187

luci_interpreter::GraphLoader::GraphLoader
GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, const std::unordered_map< const loco::Graph *, RuntimeGraph * > &graph_to_runtime_graph, std::unordered_map< const loco::Node *, Tensor * > &node_to_tensor, IMemoryManager *memory_manager)
Definition GraphLoader.cpp:177

luci_interpreter::IMemoryManager
Definition MemoryManager.h:27

luci_interpreter::IMemoryManager::allocate_memory
virtual void allocate_memory(luci_interpreter::Tensor &tensor)=0

luci_interpreter::KernelBuilder
Definition KernelBuilder.h:36

luci_interpreter::KernelBuilder::build
std::unique_ptr< Kernel > build(const luci::CircleNode *node)
Definition KernelBuilder.cpp:121

luci_interpreter::RuntimeGraph
Definition RuntimeGraph.h:33

luci_interpreter::RuntimeGraph::addKernel
void addKernel(std::unique_ptr< Kernel > &&kernel)
Definition RuntimeGraph.cpp:144

luci_interpreter::RuntimeGraph::addTensor
Tensor * addTensor(std::unique_ptr< Tensor > &&tensor)
Definition RuntimeGraph.cpp:118

luci_interpreter::RuntimeGraph::setOutputTensors
void setOutputTensors(const std::vector< Tensor * > &output_tensors)
Definition RuntimeGraph.cpp:132

luci_interpreter::RuntimeGraph::setInputTensors
void setInputTensors(const std::vector< Tensor * > &input_tensors)
Definition RuntimeGraph.cpp:125

luci_interpreter::Shape
Definition Tensor.h:33

KernelBuilder.h

GenH5RandomInputs.data
data
Definition GenH5RandomInputs.py:64

loco::postorder_traversal
std::vector< loco::Node * > postorder_traversal(const std::vector< loco::Node * > &roots)
Generate postorder traversal sequence starting from "roots".
Definition Algorithm.cpp:53

loco::all_nodes
std::set< Node * > all_nodes(Graph *)
Enumerate all the nodes in a given graph.
Definition Graph.cpp:59

loco::input_nodes
std::vector< Node * > input_nodes(const Graph *)
Definition Graph.cpp:71

loco::output_nodes
std::vector< Node * > output_nodes(Graph *)
Definition Graph.cpp:101

luci_interpreter
Definition BuddyMemoryManager.h:22

luci_interpreter::getDataTypeSize
size_t getDataTypeSize(DataType data_type)
Definition DataType.h:33

luci::has_execution_plan
bool has_execution_plan(const luci::CircleNode *circle_node)
Definition CircleNodeExecutionPlan.cpp:57

luci::get_execution_plan
luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node)
Definition CircleNodeExecutionPlan.cpp:69

nncc::core::ADT::feature::num_elements
uint32_t num_elements(const Shape &shape)
The number of elements of a feature map of a given shape.
Definition Shape.h:59

size
int32_t size[5]
Definition Slice.cpp:35

begin
int32_t begin[5]
Definition Slice.cpp:33

Shape
Definition Shape.h:28

luci::CircleNode
Definition CircleNodeDecl.h:41

luci::CircleNode::quantparam
CircleQuantParam * quantparam(void) const
Definition CircleNodeDecl.h:53

luci::CircleNode::name
NodeName name(void) const
Definition CircleNodeDecl.h:50

luci::CircleNode::opcode
virtual CircleOpcode opcode(void) const =0

luci::CircleQuantParam
Definition CircleQuantParam.h:27

luci::CircleQuantParam::scale
std::vector< float > scale
Definition CircleQuantParam.h:30

luci::CircleQuantParam::quantized_dimension
int32_t quantized_dimension
Definition CircleQuantParam.h:32

luci::CircleQuantParam::zerop
std::vector< int64_t > zerop
Definition CircleQuantParam.h:31

luci_interpreter::AffineQuantization
Definition Tensor.h:94

luci_interpreter::AffineQuantization::quantized_dimension
int32_t quantized_dimension
Definition Tensor.h:97

luci_interpreter::AffineQuantization::scale
std::vector< float > scale
Definition Tensor.h:95

luci_interpreter::AffineQuantization::zero_point
std::vector< int32_t > zero_point
Definition Tensor.h:96

luci_interpreter::RuntimeToIR
Definition RuntimeToIR.h:31

luci_interpreter::RuntimeToIR::tensor_to_node
std::unordered_map< const Tensor *, const luci::CircleNode * > tensor_to_node
Definition RuntimeToIR.h:32

luci_interpreter::RuntimeToIR::kernel_to_node
std::unordered_map< const Kernel *, const luci::CircleNode * > kernel_to_node
Definition RuntimeToIR.h:33