ONE/ggml_2ops_2_fully_connected_layer_8cc_source.html

/*

 * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *      http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#include "FullyConnectedLayer.h"


#include "GGMLHelper.h"

#include "../KernelGenerator.h"

#include "../Validator.h"


namespace onert::backend::ggml

{


void Validator::visit(const ir::operation::FullyConnected &node)

{

  using ir::operation::FullyConnected;


  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};

  const auto weight_node = &_graph.operands().at(weight_index);


  _supported = false;


  if (weight_node->typeInfo().type() != ir::DataType::QUANT_GGML_Q4_0 &&

      weight_node->typeInfo().type() != ir::DataType::QUANT_GGML_Q8_0)

    return;


  if (node.param().activation != ir::Activation::NONE)

    return;


  _supported = true;

}


void KernelGenerator::visit(const ir::operation::FullyConnected &node)

{

  using ir::operation::FullyConnected;


  const auto output_index{node.getOutputs().at(0)};

  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};

  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};

  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};

  const auto activation = node.param().activation;

  const auto weights_format = node.param().weights_format;

  if (weights_format != ir::FullyConnectedWeightsFormat::Default)

    throw std::runtime_error("Unsupported FullyConnected Weights Format");


  auto output_tensor = _tensor_reg->getPortableTensor(output_index);

  auto input_tensor = _tensor_reg->getPortableTensor(input_index);

  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);

  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);


  auto fn = std::make_unique<ops::FullyConnectedLayer>();


  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,

                _external_context);


  _return_fn = std::move(fn);

}


} // namespace onert::backend::ggml


namespace onert::backend::ggml::ops

{


FullyConnectedLayer::FullyConnectedLayer()

  : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),

    _activation(ir::Activation::NONE), _external_context(nullptr)

{

  // DO NOTHING

}


FullyConnectedLayer::~FullyConnectedLayer() = default;


void FullyConnectedLayer::fullyConnectedGGMLWeight()

{

  if (_bias)

    throw std::runtime_error{"FullyConnected: GGML weights format does not support bias yet."};


  // convert tensor

  auto input = getGGMLTensor(_input);

  auto weights = getGGMLTensor(_weights);

  auto output = getGGMLTensor(_output);

  {

    output.op = GGML_OP_MUL_MAT;

    output.src[0] = &weights;

    output.src[1] = &input;

  }

  auto *nodes = &output;


  // create graph

  struct ggml_cgraph graph;

  {

    memset(&graph, 0, sizeof(graph));

    graph.n_nodes = 1;

    graph.nodes = &nodes;

  }


  // get cplan

  auto cplan = ggml_graph_plan(&graph, _external_context->maxNumThreads());

  std::vector<uint8_t> buf(cplan.work_size);

  cplan.work_data = buf.data();


  // compute

  ggml_graph_compute(&graph, &cplan);

}


void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,

                                    const IPortableTensor *bias, ir::Activation activation,

                                    IPortableTensor *output,

                                    const std::shared_ptr<ExternalContext> &external_context)

{

  _input = input;

  _weights = weights;

  _bias = bias;

  _activation = activation;

  _output = output;

  _external_context = external_context;

}


void FullyConnectedLayer::run()

{

  if (_weights->data_type() == ir::DataType::QUANT_GGML_Q4_0 ||

      _weights->data_type() == ir::DataType::QUANT_GGML_Q8_0)

  {

    fullyConnectedGGMLWeight();

  }

  else

  {

    throw std::runtime_error{"FullyConnected: unsupported data type"};

  }

}


void FullyConnectedLayer::prepare()

{

  // DO NOTHING

}


} // namespace onert::backend::ggml::ops


FusedActivationFunc::NONE
@ NONE

GGMLHelper.h

onert::backend::IPortableTensor
A tensor class that is portable for other backends.
Definition IPortableTensor.h:37

onert::backend::IPortableTensor::data_type
ir::DataType data_type() const override final
Definition IPortableTensor.h:54

onert::backend::ValidatorBase::_graph
const ir::Graph & _graph
Definition ValidatorBase.h:49

onert::backend::ValidatorBase::_supported
bool _supported
Definition ValidatorBase.h:50

onert::backend::basic::KernelGeneratorBase::_return_fn
std::unique_ptr< exec::IFunction > _return_fn
Definition KernelGeneratorBase.h:60

onert::backend::ggml::ops::FullyConnectedLayer::FullyConnectedLayer
FullyConnectedLayer()
Definition FullyConnectedLayer.cc:76

onert::backend::ggml::ops::FullyConnectedLayer::_external_context
std::shared_ptr< ExternalContext > _external_context
Definition FullyConnectedLayer.h:53

onert::backend::ggml::ops::FullyConnectedLayer::_weights
const IPortableTensor * _weights
Definition FullyConnectedLayer.h:48

onert::backend::ggml::ops::FullyConnectedLayer::run
void run() override
Definition FullyConnectedLayer.cc:131

onert::backend::ggml::ops::FullyConnectedLayer::_input
const IPortableTensor * _input
Definition FullyConnectedLayer.h:47

onert::backend::ggml::ops::FullyConnectedLayer::prepare
void prepare() override
Definition FullyConnectedLayer.cc:144

onert::backend::ggml::ops::FullyConnectedLayer::fullyConnectedGGMLWeight
void fullyConnectedGGMLWeight()
Definition FullyConnectedLayer.cc:85

onert::backend::ggml::ops::FullyConnectedLayer::~FullyConnectedLayer
~FullyConnectedLayer()

onert::backend::ggml::ops::FullyConnectedLayer::configure
void configure(const IPortableTensor *input, const IPortableTensor *weights, const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output, const std::shared_ptr< ExternalContext > &external_context)
Definition FullyConnectedLayer.cc:118

onert::backend::ggml::ops::FullyConnectedLayer::_activation
ir::Activation _activation
Definition FullyConnectedLayer.h:51

onert::backend::ggml::ops::FullyConnectedLayer::_output
IPortableTensor * _output
Definition FullyConnectedLayer.h:50

onert::backend::ggml::ops::FullyConnectedLayer::_bias
const IPortableTensor * _bias
Definition FullyConnectedLayer.h:49

onert::ir::Graph::operands
const Operands & operands() const override
Definition Graph.h:103

onert::util::ObjectManager::at
const Object & at(const Index &index) const
Get the object that is associated with the given index.
Definition ObjectManager.h:119

GenH5RandomInputs.input_index
input_index
Definition GenH5RandomInputs.py:60

onert::backend::ggml::ops
Definition FullyConnectedLayer.cc:74

onert::backend::ggml::ops::getGGMLTensor
struct ggml_tensor getGGMLTensor(const IPortableTensor *tensor)
Definition GGMLHelper.cc:41

onert::backend::ggml
Definition Backend.h:30

onert::ir::FullyConnectedWeightsFormat::Default
@ Default

onert::ir::Activation
Activation
Definition InternalType.h:26

onert::ir::Activation::NONE
@ NONE

part_eval_one.output_tensor
output_tensor
Definition part_eval_one.py:116

bias_tensor
CLTensor bias_tensor
Definition Convolution.cpp:294

FullyConnectedLayer.h