#include <NEFullyConnectedHybridLayer.h>

Collaboration diagram for arm_compute::NEFullyConnectedHybridLayer:

Public Member Functions
	NEFullyConnectedHybridLayer (std::shared_ptr< IMemoryManager > memory_manager=nullptr)

	NEFullyConnectedHybridLayer (const NEFullyConnectedHybridLayer &)=delete

	NEFullyConnectedHybridLayer (NEFullyConnectedHybridLayer &&)=default

NEFullyConnectedHybridLayer &	operator= (const NEFullyConnectedHybridLayer &)=delete

NEFullyConnectedHybridLayer &	operator= (NEFullyConnectedHybridLayer &&)=default

void	configure (const ITensor input, const ITensor weights, const ITensor biases, ITensor output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())

void	run () override

void	prepare () override

Static Public Member Functions
static Status	validate (const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())

Detailed Description

Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:

NEIm2ColKernel (called when the input comes from a convolutional layer)
NETranspose (if are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
NEGEMMMatrixMultiplyKernel or NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
NEGEMMMatrixAccumulateBiasesKernel or NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if biases is not equal to nullptr)

Note: The fully connected layer accepts "weights" tensors only with 2 dimensions.

Definition at line 69 of file NEFullyConnectedHybridLayer.h.

Constructor & Destructor Documentation

◆ NEFullyConnectedHybridLayer() [1/3]

NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer ( std::shared_ptr< IMemoryManager > memory_manager = nullptr )

Constructor

Definition at line 67 of file NEFullyConnectedHybridLayer.cpp.

  : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
    _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
    _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
    _accumulate_biases(false), _is_prepared(false)
{
}

◆ NEFullyConnectedHybridLayer() [2/3]

arm_compute::NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer ( const NEFullyConnectedHybridLayer & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ NEFullyConnectedHybridLayer() [3/3]

arm_compute::NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer ( NEFullyConnectedHybridLayer && )

default

Default move constructor

Member Function Documentation

◆ configure()

void NEFullyConnectedHybridLayer::configure	(	const ITensor *	input,
		const ITensor *	weights,
		const ITensor *	biases,
		ITensor *	output,
		FullyConnectedLayerInfo	fc_info = `FullyConnectedLayerInfo()`
	)

Set the input and output tensors.

Parameters

[in]	input	Source tensor. Data type supported: F16/F32.
[in]	weights	Weights tensor. The weights must be 2 dimensional. If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. Data type supported: S8.
[in]	biases	Bias tensor. Can be nullptr. Data type supported:Same as `input`.
[out]	output	Destination tensor. Its shape should be equal to the output of a matrix multiplication between: The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. Data type supported: Same as `input`.
[in]	fc_info	(Optional) Fully connected layer additional info

Definition at line 85 of file NEFullyConnectedHybridLayer.cpp.

{
  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
 
  // Perform validate step
  ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate(
    input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
    fc_info));
 
  _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
  _accumulate_biases = false;
  _is_prepared = fc_info.retain_internal_weights;
  _original_weights = weights;
 
  // Configure accumulate biases kernel for non quantized asymmetric types
  if (biases != nullptr)
  {
    _accumulate_biases = true;
 
    // Configure accumulate biases kernel
    _accumulate_biases_kernel.configure(output, biases);
  }
 
  // With the Fully Connected layer we can have 4 different cases:
  //  1) Convolution layer -> Fully Connected layer without batches
  //  2) Fully Connected layer -> Fully Connected layer without batches
  //  3) Convolution layer -> Fully Connected layer with batches
  //  4) Fully Connected layer -> Fully Connected layer with batches
 
  const ITensor *weights_to_use = weights;
 
  // Check if we have a fully connected layer with batches
  const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
  bool _is_fc_after_conv = false;
  if (is_batched_fc_layer)
  {
    _is_fc_after_conv =
      (TensorShape::num_max_dimensions >= 4) &&
      (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
                  output->info()->tensor_shape().cbegin() + 1));
  }
  else
  {
    _is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1;
  }
  ARM_COMPUTE_ERROR_ON_MSG(_is_fc_after_conv,
                           "NEFullyConnectedHybridLayer does not support after conv");
  ARM_COMPUTE_UNUSED(_is_fc_after_conv);
 
  // Reshape weights if needed
  if (!_are_weights_reshaped)
  {
    // Reshape the weights
    _reshape_weights_output.allocator()->init(
      weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
        compute_transposed_shape(*weights->info())));
    _reshape_weights_function.configure(weights_to_use, &_reshape_weights_output);
    weights_to_use = &_reshape_weights_output;
  }
 
  // Quantize input
  _quantized_input.allocator()->init(
    input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
      DataType::QASYMM8_SIGNED));
  _scale_factor.allocator()->init(
    TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
  _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
 
  // GEMM
  _gemmlowp_output.allocator()->init(
    output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
  configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output);
 
  // Multiply scale
  _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
                                   weights->info()->quantization_info().uniform().scale);
 
  _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
 
  _quantized_input.allocator()->allocate();
  _scale_factor.allocator()->allocate();
  _gemmlowp_output.allocator()->allocate();
}

References arm_compute::NEMultiplyScaleFactorKernel::configure(), arm_compute::NEQuantizationSymmetricKernel::configure(), arm_compute::NEGEMMMatrixAccumulateBiasesKernel::configure(), and validate().

◆ operator=() [1/2]

NEFullyConnectedHybridLayer & arm_compute::NEFullyConnectedHybridLayer::operator= ( const NEFullyConnectedHybridLayer & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ operator=() [2/2]

NEFullyConnectedHybridLayer & arm_compute::NEFullyConnectedHybridLayer::operator= ( NEFullyConnectedHybridLayer && )

default

Default move assignment operator

References validate().

◆ prepare()

void NEFullyConnectedHybridLayer::prepare ( )

override

Definition at line 254 of file NEFullyConnectedHybridLayer.cpp.

{
  if (!_is_prepared)
  {
    ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
 
    auto release_unused = [](Tensor *w) {
      if (!w->is_used())
      {
        w->allocator()->free();
      }
    };
 
    // Reshape of the weights (happens only once)
    if (!_are_weights_reshaped)
    {
      // Run reshape weights kernel and mark weights as unused
      _reshape_weights_output.allocator()->allocate();
      _reshape_weights_function.run();
 
      _are_weights_reshaped = true;
      // We can not release _original_weights because it can be used in other nodes
    }
 
    // Prepare GEMM prepare and release unused weights
    _mm_gemmlowp.prepare();
 
    // Release reshaped weights if unused
    release_unused(&_reshape_weights_output);
 
    _is_prepared = true;
  }
}

Referenced by run().

◆ run()

void NEFullyConnectedHybridLayer::run ( )

override

Definition at line 232 of file NEFullyConnectedHybridLayer.cpp.

{
  prepare();
 
  MemoryGroupResourceScope scope_mg(_memory_group);
 
  // Quantize input
  NEScheduler::get().schedule(&_quant_input_kernel, Window::DimY);
 
  // Run matrix multiply
  _mm_gemmlowp.run();
 
  // Multiply scale factor
  NEScheduler::get().schedule(&_multiply_scale_kernel, Window::DimY);
 
  // Accumulate biases if provided
  if (_accumulate_biases)
  {
    NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY);
  }
}

References prepare().

◆ validate()

Status NEFullyConnectedHybridLayer::validate	(	const ITensorInfo *	input,
		const ITensorInfo *	weights,
		const ITensorInfo *	biases,
		const ITensorInfo *	output,
		FullyConnectedLayerInfo	fc_info = `FullyConnectedLayerInfo()`
	)

static

Static function to check if given info will lead to a valid configuration of NEFullyConnectedHybridLayer

Parameters

[in]	input	Source tensor info. Data type supported: F16/F32.
[in]	weights	Weights tensor info. The weights must be 2 dimensional. If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. Data type supported: S8.
[in]	biases	Bias tensor info. Can be nullptr. Data type supported:Same as `input`.
[out]	output	Destination tensor info. Its shape should be equal to the output of a matrix multiplication between: The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. Data type supported: Same as `input`.
[in]	fc_info	(Optional) Fully connected layer additional info

Returns: a status

Definition at line 171 of file NEFullyConnectedHybridLayer.cpp.

{
  ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
  ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2);
 
  bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
 
  const ITensorInfo &reshaped_weights =
    TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
      compute_transposed_shape(*weights)));
 
  // Configure accumulate biases kernel for non quantized asymmetric types
  if (biases != nullptr)
  {
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
    ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases));
  }
 
  // With the Fully Connected layer we can have 4 different cases:
  //  1) Convolution layer -> Fully Connected layer without batches
  //  2) Fully Connected layer -> Fully Connected layer without batches
  //  3) Convolution layer -> Fully Connected layer with batches
  //  4) Fully Connected layer -> Fully Connected layer with batches
 
  const ITensorInfo *weights_to_use = weights;
 
  if (!weights_reshaped)
  {
    // Validate reshape weights kernel
    ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(weights_to_use, &reshaped_weights));
    weights_to_use = &reshaped_weights;
  }
 
  // Fully Connected layer after a Fully Connected Layer without batches
  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
 
  // Validate quantization kernel
  const ITensorInfo &quantized_input = TensorInfo(
    input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
  const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
  ARM_COMPUTE_RETURN_ON_ERROR(
    NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
 
  const ITensorInfo &gemmlowp_output = TensorInfo(
    output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
  // Validate matrix multiply kernel
  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
 
  ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
    &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
 
  return Status{};
}

References arm_compute::NEGEMMMatrixAccumulateBiasesKernel::validate(), arm_compute::NEQuantizationSymmetricKernel::validate(), and arm_compute::NEMultiplyScaleFactorKernel::validate().

Referenced by configure(), and operator=().

The documentation for this class was generated from the following files:

runtime/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
runtime/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp

Public Member Functions

Static Public Member Functions