#include <CLFullyConnectedHybridLayer.h>

Collaboration diagram for arm_compute::CLFullyConnectedHybridLayer:

Public Member Functions
	CLFullyConnectedHybridLayer (std::shared_ptr< IMemoryManager > memory_manager=nullptr)

	CLFullyConnectedHybridLayer (const CLFullyConnectedHybridLayer &)=delete

	CLFullyConnectedHybridLayer (CLFullyConnectedHybridLayer &&)=default

CLFullyConnectedHybridLayer &	operator= (const CLFullyConnectedHybridLayer &)=delete

CLFullyConnectedHybridLayer &	operator= (CLFullyConnectedHybridLayer &&)=default

void	configure (const ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())

void	run () override

void	prepare () override

Static Public Member Functions
static Status	validate (const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())

Detailed Description

Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:

CLIm2ColKernel (called when the input comes from a convolutional layer)
CLTranspose (if are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
CLGEMMLowpMatrixMultiplyCore (if quantized symmetric)
CLGEMMMatrixAccumulateBiasesKernel (if biases is not equal to nullptr)

Note: The fully connected layer accepts "weights" tensors only with 2 dimensions.

Definition at line 69 of file CLFullyConnectedHybridLayer.h.

Constructor & Destructor Documentation

◆ CLFullyConnectedHybridLayer() [1/3]

CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer ( std::shared_ptr< IMemoryManager > memory_manager = nullptr )

Constructor

Definition at line 68 of file CLFullyConnectedHybridLayer.cpp.

  : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
    _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
    _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
    _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
    _original_weights(nullptr)
{
}

◆ CLFullyConnectedHybridLayer() [2/3]

arm_compute::CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer ( const CLFullyConnectedHybridLayer & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ CLFullyConnectedHybridLayer() [3/3]

arm_compute::CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer ( CLFullyConnectedHybridLayer && )

default

Default move constructor

Member Function Documentation

◆ configure()

void CLFullyConnectedHybridLayer::configure	(	const ICLTensor *	input,
		const ICLTensor *	weights,
		const ICLTensor *	biases,
		ICLTensor *	output,
		FullyConnectedLayerInfo	fc_info = `FullyConnectedLayerInfo()`
	)

Set the input and output tensors.

Parameters

[in]	input	Source tensor. Data type supported: F16/F32.
[in]	weights	Weights tensor. The weights must be 2 dimensional. If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. Data type supported: S8.
[in]	biases	Bias tensor. Can be nullptr. Data type supported:Same as `input`.
[out]	output	Destination tensor. Its shape should be equal to the output of a matrix multiplication between: The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. Data type supported: Same as `input`.
[in]	fc_info	(Optional) Fully connected layer additional info

Definition at line 88 of file CLFullyConnectedHybridLayer.cpp.

{
  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
 
  // Perform validate step
  ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate(
    input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
    fc_info));
 
  _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
  _accumulate_biases = false;
  _is_prepared = fc_info.retain_internal_weights;
  _original_weights = weights;
 
  // Configure accumulate biases kernel for non quantized asymmetric types
  if (biases != nullptr)
  {
    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
 
    _accumulate_biases = true;
 
    // Configure accumulate biases kernel
    _accumulate_biases_kernel.set_target(CLScheduler::get().target());
    _accumulate_biases_kernel.configure(output, biases);
  }
 
  const ICLTensor *weights_to_use = weights;
 
  // With the Fully Connected layer we can have 4 different cases:
  //  1) Convolution layer -> Fully Connected layer without batches
  //  2) Fully Connected layer -> Fully Connected layer without batches
  //  3) Convolution layer -> Fully Connected layer with batches
  //  4) Fully Connected layer -> Fully Connected layer with batches
 
  // Check if we have a fully connected layer with batches
  const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
  bool is_fc_after_conv = false;
  if (is_batched_fc_layer)
  {
    is_fc_after_conv =
      (TensorShape::num_max_dimensions >= 4) &&
      (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
                  output->info()->tensor_shape().cbegin() + 1));
  }
  else
  {
    is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1;
  }
  ARM_COMPUTE_ERROR_ON_MSG(is_fc_after_conv,
                           "CLFullyConnectedHybridLayer does not support after conv");
  ARM_COMPUTE_UNUSED(is_fc_after_conv);
 
  // Reshape weights if needed
  if (!_are_weights_reshaped)
  {
    // Reshape the weights
    _reshape_weights_output.allocator()->init(
      weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
        compute_transposed_shape(*weights->info())));
    _reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output);
    weights_to_use = &_reshape_weights_output;
  }
 
  // Extract scale factor
  _scale_factor.allocator()->init(
    TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
  _memory_group.manage(&_scale_factor);
  _scale_factor_kernel.configure(input, &_scale_factor);
 
  // Quantize input
  _quantized_input.allocator()->init(
    input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
      DataType::QASYMM8_SIGNED));
  _memory_group.manage(&_quantized_input);
  _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
 
  // GEMMLowp
  _gemmlowp_output.allocator()->init(
    output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
  _memory_group.manage(&_gemmlowp_output);
  configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output,
               fc_info.retain_internal_weights);
  _quantized_input.allocator()->allocate();
 
  // Multiply scale
  _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
                                   weights->info()->quantization_info().uniform().scale);
  _gemmlowp_output.allocator()->allocate();
  _scale_factor.allocator()->allocate();
 
  _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
}

References arm_compute::CLQuantizationSymmetricKernel::configure(), arm_compute::CLMultiplyScaleFactorKernel::configure(), arm_compute::CLScaleFactorSymm8Kernel::configure(), arm_compute::CLGEMMMatrixAccumulateBiasesKernel::configure(), and validate().

◆ operator=() [1/2]

CLFullyConnectedHybridLayer & arm_compute::CLFullyConnectedHybridLayer::operator= ( CLFullyConnectedHybridLayer && )

default

Default move assignment operator

References validate().

◆ operator=() [2/2]

CLFullyConnectedHybridLayer & arm_compute::CLFullyConnectedHybridLayer::operator= ( const CLFullyConnectedHybridLayer & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ prepare()

void CLFullyConnectedHybridLayer::prepare ( )

override

Definition at line 290 of file CLFullyConnectedHybridLayer.cpp.

{
  if (!_is_prepared)
  {
    ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
 
    auto release_unused = [](CLTensor *w) {
      if (!w->is_used())
      {
        CLScheduler::get().queue().finish();
        w->allocator()->free();
      }
    };
 
    // Reshape of the weights if needed (happens only once)
    if (!_are_weights_reshaped)
    {
      // Run reshape weights kernel and mark weights as unused
      _reshape_weights_output.allocator()->allocate();
      _reshape_weights_kernel.run();
 
      _are_weights_reshaped = true;
      // We can not release _original_weights because it can be used in other nodes
    }
 
    // Prepare GEMM prepare and release unused weights
    _mm_gemmlowp.prepare();
 
    // Release reshaped weights if unused
    release_unused(&_reshape_weights_output);
 
    _is_prepared = true;
  }
}

Referenced by run().

◆ run()

void CLFullyConnectedHybridLayer::run ( )

override

Definition at line 265 of file CLFullyConnectedHybridLayer.cpp.

{
  prepare();
 
  MemoryGroupResourceScope scope_mg(_memory_group);
 
  // Extract scale_factor
  CLScheduler::get().enqueue(_scale_factor_kernel);
 
  // Quantize input
  CLScheduler::get().enqueue(_quant_input_kernel);
 
  // Run matrix multiply
  _mm_gemmlowp.run();
 
  // Multiply scale factor
  CLScheduler::get().enqueue(_multiply_scale_kernel);
 
  // Accumulate biases if provided
  if (_accumulate_biases)
  {
    CLScheduler::get().enqueue(_accumulate_biases_kernel);
  }
}

References prepare().

◆ validate()

Status CLFullyConnectedHybridLayer::validate	(	const ITensorInfo *	input,
		const ITensorInfo *	weights,
		const ITensorInfo *	biases,
		const ITensorInfo *	output,
		FullyConnectedLayerInfo	fc_info = `FullyConnectedLayerInfo()`
	)

static

Static function to check if given info will lead to a valid configuration of CLFullyConnectedHybridLayer

Parameters

[in]	input	Source tensor info. Data type supported: F16/F32.
[in]	weights	Weights tensor info. The weights must be 2 dimensional. If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. Data type supported: S8.
[in]	biases	Bias tensor info. Can be nullptr. Data type supported:Same as `input`.
[out]	output	Destination tensor info. Its shape should be equal to the output of a matrix multiplication between: The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. Data type supported: Same as `input`.
[in]	fc_info	(Optional) Fully connected layer additional info

Returns: a status

Definition at line 183 of file CLFullyConnectedHybridLayer.cpp.

{
  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
  ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
 
  bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
  bool is_fc_after_conv = true;
  const GPUTarget gpu_target = CLScheduler::get().target();
 
  const ITensorInfo &reshaped_weights =
    TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
      compute_transposed_shape(*weights)));
 
  // Configure accumulate biases kernel for non quantized asymmetric types
  if (biases != nullptr)
  {
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
    ARM_COMPUTE_RETURN_ON_ERROR(
      CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
  }
 
  // With the Fully Connected layer we can have 4 different cases:
  //  1) Convolution layer -> Fully Connected layer without batches
  //  2) Fully Connected layer -> Fully Connected layer without batches
  //  3) Convolution layer -> Fully Connected layer with batches
  //  4) Fully Connected layer -> Fully Connected layer with batches
 
  const ITensorInfo *weights_to_use = weights;
 
  // Check if we have a fully connected layer with batches
  const bool is_batched_fc_layer = output->dimension(1) > 1;
  if (is_batched_fc_layer)
  {
    is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
                       (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(),
                                   output->tensor_shape().cbegin() + 1));
  }
  else
  {
    is_fc_after_conv = input->num_dimensions() > 1 && input->dimension(1) > 1;
  }
  ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_fc_after_conv,
                                  "CLFullyConnectedHybridLayer does not support after conv");
 
  if (!weights_reshaped)
  {
    // Validate reshape weights kernel
    ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(weights_to_use, &reshaped_weights));
    weights_to_use = &reshaped_weights;
  }
 
  // Validate Scale factor kernel
  const ITensorInfo &scale_factor =
    TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
  ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
 
  // Validate quantization symm8 kernel
  const ITensorInfo &quantized_input = TensorInfo(
    input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
  ARM_COMPUTE_RETURN_ON_ERROR(
    CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
 
  // Fully Connected layer after a Fully Connected Layer without batches
  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
 
  // Validate matrix multiply kernel
  const ITensorInfo &gemmlowp_output = TensorInfo(
    output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
  ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
 
  // Multiply scale
  ARM_COMPUTE_RETURN_ON_ERROR(
    CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
 
  return Status{};
}

References arm_compute::CLGEMMMatrixAccumulateBiasesKernel::validate(), arm_compute::CLScaleFactorSymm8Kernel::validate(), arm_compute::CLMultiplyScaleFactorKernel::validate(), and arm_compute::CLQuantizationSymmetricKernel::validate().

Referenced by configure(), and operator=().

The documentation for this class was generated from the following files:

runtime/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
runtime/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp

Public Member Functions

Static Public Member Functions