#include <NEFullyConnectedLayerEx.h>

Collaboration diagram for arm_compute::NEFullyConnectedLayerEx:

Public Member Functions
	NEFullyConnectedLayerEx (std::shared_ptr< IMemoryManager > memory_manager=nullptr)

	NEFullyConnectedLayerEx (const NEFullyConnectedLayerEx &)=delete

	NEFullyConnectedLayerEx (NEFullyConnectedLayerEx &&)=delete

NEFullyConnectedLayerEx &	operator= (const NEFullyConnectedLayerEx &)=delete

NEFullyConnectedLayerEx &	operator= (NEFullyConnectedLayerEx &&)=delete

void	configure (const ITensor input, const ITensor weights, const ITensor biases, ITensor output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())

void	run () override

void	prepare () override

Static Public Member Functions
static Status	validate (const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())

Detailed Description

Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:

NEIm2ColKernel (called when the input comes from a convolutional layer)
NETranspose (if are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
NEGEMMMatrixMultiplyKernel or NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)

Note: The fully connected layer accepts "weights" tensors only with 2 dimensions.; The difference from NEFullyConnectedLayer is that this class supports weights as input with performance loss.

Definition at line 70 of file NEFullyConnectedLayerEx.h.

Constructor & Destructor Documentation

◆ NEFullyConnectedLayerEx() [1/3]

arm_compute::NEFullyConnectedLayerEx::NEFullyConnectedLayerEx ( std::shared_ptr< IMemoryManager > memory_manager = nullptr )

Constructor

Definition at line 145 of file NEFullyConnectedLayerEx.cpp.

  : _memory_group(std::move(memory_manager)), _convert_weights(), _flatten_kernel(),
    _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _flatten_output(),
    _converted_weights_output(), _reshape_weights_output(), _are_weights_converted(true),
    _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized(false),
    _is_prepared(false), _original_weights(nullptr)
{
}

◆ NEFullyConnectedLayerEx() [2/3]

arm_compute::NEFullyConnectedLayerEx::NEFullyConnectedLayerEx ( const NEFullyConnectedLayerEx & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ NEFullyConnectedLayerEx() [3/3]

arm_compute::NEFullyConnectedLayerEx::NEFullyConnectedLayerEx ( NEFullyConnectedLayerEx && )

delete

Default move constructor

Member Function Documentation

◆ configure()

void arm_compute::NEFullyConnectedLayerEx::configure	(	const ITensor *	input,
		const ITensor *	weights,
		const ITensor *	biases,
		ITensor *	output,
		FullyConnectedLayerInfo	fc_info = `FullyConnectedLayerInfo()`
	)

Set the input and output tensors.

Parameters

[in]	input	Source tensor. Data type supported: QASYMM8/F16/F32.
[in]	weights	Weights tensor. The weights must be 2 dimensional. If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. Data type supported: Same as `input`.
[in]	biases	Bias tensor. Can be nullptr. Data type supported:Same as `input`.
[out]	output	Destination tensor. Its shape should be equal to the output of a matrix multiplication between: The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. Data type supported: Same as `input`.
[in]	fc_info	(Optional) Fully connected layer additional info

Definition at line 243 of file NEFullyConnectedLayerEx.cpp.

{
  // Perform validate step
  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
  ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
    input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
    fc_info));
 
  _are_weights_converted = true;
  _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
  _is_fc_after_conv = true;
  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
  _original_weights = weights;
 
  // With the Fully Connected layer we can have 4 different cases:
  //  1) Convolution layer -> Fully Connected layer without batches
  //  2) Fully Connected layer -> Fully Connected layer without batches
  //  3) Convolution layer -> Fully Connected layer with batches
  //  4) Fully Connected layer -> Fully Connected layer with batches
 
  const ITensor *weights_to_use = weights;
 
  // Check if we have a fully connected layer with batches
  const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
  if (is_batched_fc_layer)
  {
    _is_fc_after_conv =
      (TensorShape::num_max_dimensions >= 4) &&
      (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
                  output->info()->tensor_shape().cbegin() + 1));
  }
  else
  {
    _is_fc_after_conv = input->info()->num_dimensions() > 1;
  }
 
  // Reshape weights if needed
  if (!_are_weights_reshaped)
  {
    // Reshape the weights
    _reshape_weights_function.configure(weights, &_reshape_weights_output);
    weights_to_use = &_reshape_weights_output;
  }
 
  // Convert weights if needed
  if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
  {
    // Convert weights
    _convert_weights.configure(weights_to_use, &_converted_weights_output,
                               input->info()->tensor_shape(), fc_info.weights_trained_layout);
 
    weights_to_use = &_converted_weights_output;
    _are_weights_converted = false;
  }
 
  if (_is_fc_after_conv)
  {
    // Fully Connected layer after a Convolution Layer without batches
    configure_conv_fc(input, weights_to_use, biases, output, fc_info);
  }
  else
  {
    // Fully Connected layer after a Fully Connected Layer without batches
    configure_fc_fc(input, weights_to_use, biases, output, fc_info);
  }
 
  _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
}

References validate().

◆ operator=() [1/2]

NEFullyConnectedLayerEx & arm_compute::NEFullyConnectedLayerEx::operator= ( const NEFullyConnectedLayerEx & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ operator=() [2/2]

NEFullyConnectedLayerEx & arm_compute::NEFullyConnectedLayerEx::operator= ( NEFullyConnectedLayerEx && )

delete

Default move assignment operator

References validate().

◆ prepare()

void arm_compute::NEFullyConnectedLayerEx::prepare ( )

override

Definition at line 451 of file NEFullyConnectedLayerEx.cpp.

{
  // DO NOTHING
}

◆ run()

void arm_compute::NEFullyConnectedLayerEx::run ( )

override

Definition at line 399 of file NEFullyConnectedLayerEx.cpp.

{
  if (!_is_prepared)
  {
    if (!_are_weights_reshaped)
      _reshape_weights_output.allocator()->allocate();
    if (!_are_weights_converted)
      _converted_weights_output.allocator()->allocate();
    _is_prepared = true;
  }
 
  {
    ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
 
    // Reshape of the weights
    if (!_are_weights_reshaped)
    {
      _reshape_weights_function.run();
    }
 
    // Convert weights if needed
    if (!_are_weights_converted)
    {
      _convert_weights.run();
    }
 
    // Prepare GEMM prepare
    if (!_is_quantized)
    {
      _mm_gemm.prepare();
    }
  }
 
  MemoryGroupResourceScope scope_mg(_memory_group);
 
  // Linearize input if it comes from a convolutional layer
  if (_is_fc_after_conv)
  {
    _flatten_kernel.run();
  }
 
  // Run matrix multiply
  if (_is_quantized)
  {
    _mm_gemmlowp.run();
  }
  else
  {
    _mm_gemm.run();
  }
}

◆ validate()

Status arm_compute::NEFullyConnectedLayerEx::validate	(	const ITensorInfo *	input,
		const ITensorInfo *	weights,
		const ITensorInfo *	biases,
		const ITensorInfo *	output,
		FullyConnectedLayerInfo	fc_info = `FullyConnectedLayerInfo()`
	)

static

Static function to check if given info will lead to a valid configuration of NEFullyConnectedLayerEx

Parameters

[in]	input	Source tensor info. Data type supported: QASYMM8/F16/F32.
[in]	weights	Weights tensor info. The weights must be 2 dimensional. If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. Data type supported: Same as `input`.
[in]	biases	Bias tensor info. Can be nullptr. Data type supported:Same as `input`.
[out]	output	Destination tensor info. Its shape should be equal to the output of a matrix multiplication between: The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. Data type supported: Same as `input`.
[in]	fc_info	(Optional) Fully connected layer additional info

Returns: a status

Definition at line 314 of file NEFullyConnectedLayerEx.cpp.

{
  ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
                                                       DataType::F32);
  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
 
  bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
  bool is_fc_after_conv = true;
 
  const ITensorInfo &flatten_input =
    TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
      compute_flatten_shape(input)));
  const ITensorInfo &reshaped_weights =
    TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
      compute_transposed_shape(*weights)));
  const ITensorInfo &converted_weights =
    weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
                     : TensorInfo(*reshaped_weights.clone());
 
  // With the Fully Connected layer we can have 4 different cases:
  //  1) Convolution layer -> Fully Connected layer without batches
  //  2) Fully Connected layer -> Fully Connected layer without batches
  //  3) Convolution layer -> Fully Connected layer with batches
  //  4) Fully Connected layer -> Fully Connected layer with batches
 
  const ITensorInfo *input_to_use = input;
  const ITensorInfo *weights_to_use = weights;
 
  // Check if we have a fully connected layer with batches
  const bool is_batched_fc_layer = output->dimension(1) > 1;
 
  if (is_batched_fc_layer)
  {
    is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
                       (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(),
                                   output->tensor_shape().cbegin() + 1));
  }
  else
  {
    is_fc_after_conv = input->num_dimensions() > 1;
  }
 
  if (!weights_reshaped)
  {
    // Validate reshape weights kernel
    ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(weights, &reshaped_weights));
    weights_to_use = &reshaped_weights;
  }
 
  if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
  {
    // Validate convert weights kernel
    ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(
      weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
    weights_to_use = &converted_weights;
  }
 
  if (is_fc_after_conv)
  {
    // Fully Connected layer after a Convolution Layer without batches
    ARM_COMPUTE_RETURN_ERROR_ON(
      (weights_to_use->dimension(1) !=
       (input->dimension(0) * input->dimension(1) * input->dimension(2))));
 
    // Validate flatten kernel
    ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
    input_to_use = &flatten_input;
  }
  else
  {
    // Fully Connected layer after a Fully Connected Layer without batches
    ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
  }
  // Validate matrix multiply kernel
  ARM_COMPUTE_RETURN_ON_ERROR(
    validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
 
  return Status{};
}

Referenced by configure(), and operator=().

The documentation for this class was generated from the following files:

runtime/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
runtime/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp

Public Member Functions

Static Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ NEFullyConnectedLayerEx() [1/3]

◆ NEFullyConnectedLayerEx() [2/3]

◆ NEFullyConnectedLayerEx() [3/3]

Member Function Documentation

◆ configure()

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ prepare()

◆ run()

◆ validate()