#include <AttentionLayer.h>

Collaboration diagram for onert::backend::cpu::ops::AttentionLayer:

Public Member Functions
	AttentionLayer ()

	~AttentionLayer ()

void	configure (const IPortableTensor input, const IPortableTensor wq, const IPortableTensor wk, const IPortableTensor wv, const IPortableTensor wo, const IPortableTensor cos, const IPortableTensor sin, const IPortableTensor mask, IPortableTensor k_cache, IPortableTensor v_cache, const IPortableTensor pos, IPortableTensor output)

void	run () override

Public Member Functions inherited from onert::exec::IFunction
virtual	~IFunction ()=default

virtual void	prepare ()

Detailed Description

Definition at line 28 of file AttentionLayer.h.

Constructor & Destructor Documentation

◆ AttentionLayer()

onert::backend::cpu::ops::AttentionLayer::AttentionLayer ( )

Definition at line 84 of file AttentionLayer.cc.

  : _input(nullptr), _wq(nullptr), _wk(nullptr), _wv(nullptr), _wo(nullptr), _cos(nullptr),
    _sin(nullptr), _mask(nullptr), _k_cache(nullptr), _v_cache(nullptr), _cache_pos(nullptr),
    _output(nullptr)
{
  // DO NOTHING
}

◆ ~AttentionLayer()

onert::backend::cpu::ops::AttentionLayer::~AttentionLayer ( )

default

Member Function Documentation

◆ configure()

void onert::backend::cpu::ops::AttentionLayer::configure	(	const IPortableTensor *	input,
		const IPortableTensor *	wq,
		const IPortableTensor *	wk,
		const IPortableTensor *	wv,
		const IPortableTensor *	wo,
		const IPortableTensor *	cos,
		const IPortableTensor *	sin,
		const IPortableTensor *	mask,
		IPortableTensor *	k_cache,
		IPortableTensor *	v_cache,
		const IPortableTensor *	pos,
		IPortableTensor *	output
	)

Definition at line 94 of file AttentionLayer.cc.

{
  _input = input;
  _wq = wq;
  _wk = wk;
  _wv = wv;
  _wo = wo;
  _cos = cos;
  _sin = sin;
  _mask = mask;
  _k_cache = k_cache;
  _v_cache = v_cache;
  _cache_pos = pos;
  _output = output;
 
  // 0. Read and check inputs and params
  const auto n_batch = getShape(_input).Dims(0);
  assert(n_batch == 1); // Multi-batch is not supported.
  const auto d_model = getShape(_input).Dims(2);
 
  if (_cos == nullptr || _sin == nullptr || _cache_pos == nullptr)
    throw std::runtime_error{"Attention: input tensors cannot be nullptr"};
 
  const auto k_cache_shape = getShape(_k_cache);
  if (k_cache_shape.DimensionsCount() != 4)
    throw std::runtime_error{"K cache tensor must be 4D"};
 
  // 0.1 Param - Read n_head from K cache 3rd dimension
  const int32_t n_head = k_cache_shape.Dims(2);
  if (d_model % n_head != 0)
    throw std::runtime_error{"d_model must be divisible by n_head"};
 
  const int32_t d_head = d_model / n_head;
  const auto k_cache_dims = k_cache_shape.DimsData();
  const int32_t k_cache_n_batch = k_cache_dims[0];
  const int32_t k_cache_n_head = k_cache_dims[2];
  const int32_t k_cache_d_head = k_cache_dims[3];
 
  if (n_batch != k_cache_n_batch || n_head != k_cache_n_head || d_head != k_cache_d_head)
    throw std::runtime_error{"Attention: shape mismatch between inputs"};
}

References nnfw::cker::Shape::Dims(), and onert::backend::cpu::ops::getShape().

◆ run()

void onert::backend::cpu::ops::AttentionLayer::run ( )

overridevirtual

Implements onert::exec::IFunction.

Definition at line 388 of file AttentionLayer.cc.

{
  if (_input->data_type() == OperandType::FLOAT32)
    attentionFloat32();
  else
    throw std::runtime_error{"AttentionLayer: unsupported input data type"};
}

References onert::backend::IPortableTensor::data_type().

The documentation for this class was generated from the following files:

runtime/onert/backend/cpu/ops/AttentionLayer.h
runtime/onert/backend/cpu/ops/AttentionLayer.cc