ONE/cpu_2ops_2_depthwise_convolution_layer_8cc_source.html

/*

 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *      http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#include "DepthwiseConvolutionLayer.h"


#include "cker/PortableTensorUtils.h"

#include <cker/operation/DepthwiseConv.h>


namespace onert::backend::cpu::ops

{


void DepthwiseConvolutionLayer::convFloat32()

{

  float output_activation_min = 0, output_activation_max = 0;

  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);


  nnfw::cker::DepthwiseConvParams op_params;

  op_params.stride_width = _strideWidth;

  op_params.stride_height = _strideHeight;

  op_params.dilation_width_factor = _dilationWidth;

  op_params.dilation_height_factor = _dilationHeight;

  op_params.padding_values.width = _paddingLeft;

  op_params.padding_values.height = _paddingTop;

  op_params.depth_multiplier = _multiplier;

  op_params.float_activation_min = output_activation_min;

  op_params.float_activation_max = output_activation_max;


  // TODO: Use the following call if TensorBuilder manages padded_filter_data

  //       and filter_buffers_data:

  //

  //         void DepthwiseConvOp(

  //           const DepthwiseConvParams &params,

  //           const Shape &input_shape,    const float *input_data,

  //           const Shape &filter_shape,   const float *filter_data,

  //           const Shape &bias_shape,     const float *bias_data,

  //           float *padded_filter_data,    bool pad_filter,

  //           float *filter_buffers_data,

  //           const Shape &output_shape,    float *output_data

  //         );

  //

  //       See https://github.com/Samsung/ONE/pull/13669 for an example of using DepthwiseConvOp

  nnfw::cker::DepthwiseConv<float, float>(

    op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),

    getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),

    getBuffer<float>(_output), _external_context->ruy_context());

}


void DepthwiseConvolutionLayer::convQ8uPerTensor()

{

  int32_t output_activation_min = 0;

  int32_t output_activation_max = 0;

  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,

                                    &output_activation_max);


  double real_multiplier = 0.0;

  int32_t output_multiplier = 0;

  int32_t output_shift = 0;

  GetQuantizedConvolutionMultiplier(_input, _kernel, _bias, _output, &real_multiplier);

  QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);


  nnfw::cker::DepthwiseConvParams op_params;

  op_params.stride_width = _strideWidth;

  op_params.stride_height = _strideHeight;

  op_params.dilation_width_factor = _dilationWidth;

  op_params.dilation_height_factor = _dilationHeight;

  op_params.padding_values.width = _paddingLeft;

  op_params.padding_values.height = _paddingTop;

  op_params.depth_multiplier = _multiplier;

  op_params.input_offset = -_input->data_zero_point();

  op_params.weights_offset = -_kernel->data_zero_point();

  op_params.output_offset = _output->data_zero_point();

  op_params.output_multiplier = output_multiplier;

  op_params.output_shift = output_shift;

  op_params.quantized_activation_min = output_activation_min;

  op_params.quantized_activation_max = output_activation_max;


  nnfw::cker::DepthwiseConv<uint8_t, int32_t>(

    op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),

    getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),

    getBuffer<uint8_t>(_output), _external_context->ruy_context());

}


void DepthwiseConvolutionLayer::convQ8uPerChannel()

{

  nnfw::cker::DepthwiseConvParams op_params;

  op_params.padding_values.width = _paddingLeft;

  op_params.padding_values.height = _paddingTop;

  op_params.stride_width = _strideWidth;

  op_params.stride_height = _strideHeight;

  op_params.dilation_width_factor = _dilationWidth;

  op_params.dilation_height_factor = _dilationHeight;

  op_params.depth_multiplier = _multiplier;

  op_params.input_offset = -_input->data_zero_point();

  op_params.output_offset = _output->data_zero_point();

  int32_t output_activation_min = 0;

  int32_t output_activation_max = 0;

  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,

                                    &output_activation_max);

  op_params.quantized_activation_min = output_activation_min;

  op_params.quantized_activation_max = output_activation_max;

  // NOTE: The following fields of ConvParams are not used:

  // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}


  nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(

    op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),

    getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),

    _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),

    getShape(_output), getBuffer<uint8_t>(_output));

}


void DepthwiseConvolutionLayer::convQ8i()

{

  if (!_prepared)

  {

    prepareQ8i();

    _prepared = true;

  }


  int32_t output_activation_min = 0;

  int32_t output_activation_max = 0;

  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,

                                    &output_activation_max);


  nnfw::cker::DepthwiseConvParams op_params;

  op_params.padding_type = nnfw::cker::PaddingType::kSame;

  op_params.padding_values.width = _paddingLeft;

  op_params.padding_values.height = _paddingTop;

  op_params.depth_multiplier = _multiplier;

  op_params.stride_width = _strideWidth;

  op_params.stride_height = _strideHeight;

  op_params.dilation_width_factor = _dilationWidth;

  op_params.dilation_height_factor = _dilationHeight;

  op_params.input_offset = -_input->data_zero_point();

  op_params.weights_offset = 0;

  op_params.output_offset = _output->data_zero_point();

  op_params.quantized_activation_min = output_activation_min;

  op_params.quantized_activation_max = output_activation_max;


  nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel(

    op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),

    getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),

    getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),

    _external_context->ruy_context());

}


void DepthwiseConvolutionLayer::convQ8iHybridPerChannel()

{

  if (!_prepared)

  {

    prepareQ8iHybridPerChannel();

    _prepared = true;

  }


  float output_activation_min = 0, output_activation_max = 0;

  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);


  auto input_shape = getShape(_input);

  const int batch_size = input_shape.Dims(0);

  const int input_size = input_shape.FlatSize() / batch_size;


  auto scaling_factors_ptr = _input_scaling_factors.data();

  auto input_offsets_ptr = _input_offsets.data();


  for (int b = 0; b < batch_size; ++b)

  {

    const int offset = b * input_size;

    nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer<float>(_input) + offset, input_size,

                                                 _input_quantized.data() + offset,

                                                 &scaling_factors_ptr[b], &input_offsets_ptr[b]);

  }


  nnfw::cker::DepthwiseConvParams op_params;

  op_params.padding_values.width = _paddingLeft;

  op_params.padding_values.height = _paddingTop;

  op_params.depth_multiplier = _multiplier;

  op_params.stride_width = _strideWidth;

  op_params.stride_height = _strideHeight;

  op_params.dilation_width_factor = _dilationWidth;

  op_params.dilation_height_factor = _dilationHeight;

  op_params.float_activation_min = output_activation_min;

  op_params.float_activation_max = output_activation_max;


  nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel(

    op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(),

    getShape(_kernel), getBuffer<int8_t>(_kernel), getShape(_bias), getBuffer<float>(_bias),

    getShape(_output), getBuffer<float>(_output), _kernel->data_scales().data(),

    _input_offsets.data());

}


void DepthwiseConvolutionLayer::prepareQ8i()

{

  GetQuantizedConvolutionMultipliersAndShifts(

    _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),

    _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,

    _per_channel_output_shift);

}


void DepthwiseConvolutionLayer::prepareQ8uPerChannel()

{

  GetQuantizedConvolutionMultipliersAndShifts(

    _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),

    _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,

    _per_channel_output_shift);

}


void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()

{

  // allocate memory for activation quantization.

  // - quantized values (int8_t type and same shape of original input)

  // - quantization params (= scale/zeropoint for each input)

  auto input_shape = getShape(_input);

  const int batch_size = input_shape.Dims(0);

  const int input_size = input_shape.FlatSize() / batch_size;

  _input_quantized.resize(input_size);

  // TODO: Optimize the case of batch_size = 1

  _input_scaling_factors.resize(batch_size);

  _input_offsets.resize(batch_size);

}


void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()

{

  // ensure weight is per-channel quantized.

  int32_t kernel_input_channel = getShape(_kernel).Dims(3);

  // zero_points comes from flatbuffer vector. Its size is within uint32_t range.

  size_t kernel_zerop_cnt = _kernel->data_scales().size();

  // promote to int64_t to compare int32_t and uint32_t

  if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)

    throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."};

}


void DepthwiseConvolutionLayer::configure(

  const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,

  const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,

  const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,

  const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,

  const ir::Activation activation, IPortableTensor *output,

  const std::shared_ptr<ExternalContext> &external_context)

{

  _input = input;

  _kernel = kernel;

  _bias = bias;

  _paddingLeft = paddingLeft;

  _paddingRight = paddingRight;

  _paddingTop = paddingTop;

  _paddingBottom = paddingBottom;

  _strideWidth = strideWidth;

  _strideHeight = strideHeight;

  _multiplier = multiplier;

  _dilationWidth = dilationWidth;

  _dilationHeight = dilationHeight;

  _activation = activation;

  _output = output;

  _external_context = external_context;

  _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&

               _kernel->data_type() == OperandType::QUANT_INT8_SYMM;


  if (_is_hybrid)

  {

    ensureQ8iHybridPerChannel();

    prepareQ8iHybridPerChannel();

    _prepared = true;

  }

  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)

  {

    if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())

    {

      prepareQ8i();

      _prepared = true;

    }

  }

  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&

           !_input->is_dynamic() && !_output->is_dynamic())

  {

    const bool per_channel_quantized = _kernel->data_scales().size() > 1;

    if (per_channel_quantized)

    {

      prepareQ8uPerChannel();

      _prepared = true;

    }

  }

}


void DepthwiseConvolutionLayer::run()

{

  if (_is_hybrid)

  {

    convQ8iHybridPerChannel();

  }

  else if (_input->data_type() == OperandType::FLOAT32)

  {

    convFloat32();

  }

  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)

  {

    const bool per_channel_quantized = _kernel->data_scales().size() > 1;

    if (per_channel_quantized)

      convQ8uPerChannel();

    else

      convQ8uPerTensor();

  }

  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)

  {

    convQ8i();

  }

  else

  {

    throw std::runtime_error{"DepthwiseConv: unsupported data type"};

  }

}


} // namespace onert::backend::cpu::ops

PortableTensorUtils.h

nnfw::cker::Shape::Dims
int32_t Dims(int i) const
Definition Shape.h:106

onert::backend::IPortableTensor
A tensor class that is portable for other backends.
Definition IPortableTensor.h:37

onert::backend::IPortableTensor::data_scales
const std::vector< float > & data_scales() const override final
Definition IPortableTensor.h:57

onert::backend::IPortableTensor::data_scale
float data_scale() const override final
Definition IPortableTensor.h:55

onert::backend::IPortableTensor::data_zero_point
int32_t data_zero_point() const override final
Definition IPortableTensor.h:56

onert::backend::IPortableTensor::data_zero_points
const std::vector< int32_t > & data_zero_points() const override
Definition IPortableTensor.h:58

onert::backend::IPortableTensor::data_type
ir::DataType data_type() const override final
Definition IPortableTensor.h:54

onert::backend::IPortableTensor::is_dynamic
bool is_dynamic() const override final
Return true if the tensor needs dynamic allocation, meaning that during compile-time the outpus shape...
Definition IPortableTensor.h:63

onert::backend::IPortableTensor::is_constant
bool is_constant() const override final
Return true if the tensor is constant.
Definition IPortableTensor.h:62

onert::backend::cpu::ops::DepthwiseConvolutionLayer::convFloat32
void convFloat32()
Definition DepthwiseConvolutionLayer.cc:25

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_dilationHeight
uint32_t _dilationHeight
Definition DepthwiseConvolutionLayer.h:76

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_paddingLeft
uint32_t _paddingLeft
Definition DepthwiseConvolutionLayer.h:65

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_input
const IPortableTensor * _input
Definition DepthwiseConvolutionLayer.h:60

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_multiplier
uint32_t _multiplier
Definition DepthwiseConvolutionLayer.h:73

onert::backend::cpu::ops::DepthwiseConvolutionLayer::configure
void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, const ir::Activation activation, IPortableTensor *output, const std::shared_ptr< ExternalContext > &external_context)
Definition DepthwiseConvolutionLayer.cc:244

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_bias
const IPortableTensor * _bias
Definition DepthwiseConvolutionLayer.h:62

onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerChannel
void convQ8uPerChannel()
Definition DepthwiseConvolutionLayer.cc:96

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_paddingTop
uint32_t _paddingTop
Definition DepthwiseConvolutionLayer.h:66

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_activation
ir::Activation _activation
Definition DepthwiseConvolutionLayer.h:78

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_kernel
const IPortableTensor * _kernel
Definition DepthwiseConvolutionLayer.h:61

onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerTensor
void convQ8uPerTensor()
Definition DepthwiseConvolutionLayer.cc:61

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_strideWidth
uint32_t _strideWidth
Definition DepthwiseConvolutionLayer.h:70

onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8i
void convQ8i()
Definition DepthwiseConvolutionLayer.cc:124

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_dilationWidth
uint32_t _dilationWidth
Definition DepthwiseConvolutionLayer.h:75

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_paddingRight
uint32_t _paddingRight
Definition DepthwiseConvolutionLayer.h:67

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_paddingBottom
uint32_t _paddingBottom
Definition DepthwiseConvolutionLayer.h:68

onert::backend::cpu::ops::DepthwiseConvolutionLayer::run
void run() override
Definition DepthwiseConvolutionLayer.cc:296

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_strideHeight
uint32_t _strideHeight
Definition DepthwiseConvolutionLayer.h:71

onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8iHybridPerChannel
void convQ8iHybridPerChannel()
Definition DepthwiseConvolutionLayer.cc:159

onert::backend::cpu::ops::DepthwiseConvolutionLayer::_output
IPortableTensor * _output
Definition DepthwiseConvolutionLayer.h:63

offset
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel
void DepthwiseConvPerChannel(const DepthwiseConvParams &params, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, int8_t *output_data, ruy::Context *ruy_context)
Definition DepthwiseConvInt8.h:2064

nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel
void DepthwiseConvPerChannel(const DepthwiseConvParams &params, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const int32_t *filter_zeropoint, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
Definition DepthwiseConvUInt8.h:31

nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel
void DepthwiseConvHybridPerChannel(const DepthwiseConvParams &params, float *scaling_factors_ptr, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const float *per_channel_scale, int32_t *input_offset)
Definition DepthwiseConvHybrid.h:32

nnfw::cker::PaddingType::kSame
@ kSame

nnfw::cker::PortableAsymmetricQuantizeFloats
void PortableAsymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, float *scaling_factor, int32_t *offset)
Definition PortableTensorUtils.h:147

onert::backend::cpu::ops
Definition AddNLayer.cc:25

onert::backend::cpu::ops::getShape
nnfw::cker::Shape getShape(const IPortableTensor *tensor)
Definition OperationUtils.h:89

onert::backend::cpu::ops::GetQuantizedConvolutionMultipliersAndShifts
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
Definition OperationUtils.cc:93

onert::backend::cpu::ops::QuantizeMultiplier
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition OperationUtils.cc:56

onert::backend::cpu::ops::CalculateActivationRangeQuantized
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
Definition OperationUtils.cc:138

onert::backend::cpu::ops::GetQuantizedConvolutionMultiplier
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
Definition OperationUtils.cc:77

onert::ir::Activation
Activation
Definition InternalType.h:26

onert::util::CalculateActivationRange
void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
Definition CalculateActivationRange.h:28

DepthwiseConv.h

Dims
Definition Dims.h:26

nnfw::cker::DepthwiseConvParams
Definition Types.h:232

nnfw::cker::DepthwiseConvParams::output_offset
int32_t output_offset
Definition Types.h:244

nnfw::cker::DepthwiseConvParams::float_activation_min
float float_activation_min
Definition Types.h:251

nnfw::cker::DepthwiseConvParams::padding_type
PaddingType padding_type
Definition Types.h:233

nnfw::cker::DepthwiseConvParams::input_offset
int32_t input_offset
Definition Types.h:242

nnfw::cker::DepthwiseConvParams::dilation_height_factor
int16_t dilation_height_factor
Definition Types.h:238

nnfw::cker::DepthwiseConvParams::output_multiplier
int32_t output_multiplier
Definition Types.h:245

nnfw::cker::DepthwiseConvParams::dilation_width_factor
int16_t dilation_width_factor
Definition Types.h:237

nnfw::cker::DepthwiseConvParams::quantized_activation_max
int32_t quantized_activation_max
Definition Types.h:249

nnfw::cker::DepthwiseConvParams::float_activation_max
float float_activation_max
Definition Types.h:252

nnfw::cker::DepthwiseConvParams::stride_width
int16_t stride_width
Definition Types.h:235

nnfw::cker::DepthwiseConvParams::quantized_activation_min
int32_t quantized_activation_min
Definition Types.h:248

nnfw::cker::DepthwiseConvParams::weights_offset
int32_t weights_offset
Definition Types.h:243

nnfw::cker::DepthwiseConvParams::depth_multiplier
int16_t depth_multiplier
Definition Types.h:239

nnfw::cker::DepthwiseConvParams::padding_values
PaddingValues padding_values
Definition Types.h:234

nnfw::cker::DepthwiseConvParams::stride_height
int16_t stride_height
Definition Types.h:236

nnfw::cker::DepthwiseConvParams::output_shift
int output_shift
Definition Types.h:246

nnfw::cker::PaddingValues::height
int16_t height
Definition Types.h:75

nnfw::cker::PaddingValues::width
int16_t width
Definition Types.h:74

DepthwiseConvolutionLayer.h