ONE - On-device Neural Engine
Loading...
Searching...
No Matches
nnfw::cker::reference_integer_ops Namespace Reference

Functions

void DepthwiseConvHybridPerChannel (const DepthwiseConvParams &params, float *scaling_factors_ptr, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const float *per_channel_scale, int32_t *input_offset)
 
void DepthwiseConvPerChannel (const DepthwiseConvParams &params, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const int32_t *filter_zeropoint, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
 

Function Documentation

◆ DepthwiseConvHybridPerChannel()

void nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel ( const DepthwiseConvParams params,
float *  scaling_factors_ptr,
const Shape input_shape,
const int8_t *  input_data,
const Shape filter_shape,
const int8_t *  filter_data,
const Shape bias_shape,
const float *  bias_data,
const Shape output_shape,
float *  output_data,
const float *  per_channel_scale,
int32_t *  input_offset 
)
inline

Definition at line 32 of file DepthwiseConvHybrid.h.

39{
40 const int stride_width = params.stride_width;
41 const int stride_height = params.stride_height;
42 const int dilation_width_factor = params.dilation_width_factor;
43 const int dilation_height_factor = params.dilation_height_factor;
44 const int pad_width = params.padding_values.width;
45 const int pad_height = params.padding_values.height;
46 const int depth_multiplier = params.depth_multiplier;
47 const float output_activation_min = params.float_activation_min;
48 const float output_activation_max = params.float_activation_max;
49
50 // Check dimensions of the tensors.
51 assert(input_shape.DimensionsCount() == 4);
52 assert(filter_shape.DimensionsCount() == 4);
53 assert(output_shape.DimensionsCount() == 4);
54
55 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
56 [[maybe_unused]] const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
57 const int input_height = input_shape.Dims(1);
58 const int input_width = input_shape.Dims(2);
59 const int input_depth = input_shape.Dims(3);
60 const int filter_height = filter_shape.Dims(1);
61 const int filter_width = filter_shape.Dims(2);
62 const int output_height = output_shape.Dims(1);
63 const int output_width = output_shape.Dims(2);
64 const int bias_depth = bias_shape.FlatSize();
65 assert(output_depth == input_depth * depth_multiplier);
66 assert(bias_depth == output_depth);
67
68 for (int batch = 0; batch < batches; ++batch)
69 {
70 for (int out_y = 0; out_y < output_height; ++out_y)
71 {
72 for (int out_x = 0; out_x < output_width; ++out_x)
73 {
74 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
75 {
76 for (int m = 0; m < depth_multiplier; ++m)
77 {
78 const int output_channel = m + in_channel * depth_multiplier;
79 const int in_x_origin = (out_x * stride_width) - pad_width;
80 const int in_y_origin = (out_y * stride_height) - pad_height;
81 int32_t acc = 0;
82 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
83 {
84 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
85 {
86 const int in_x = in_x_origin + dilation_width_factor * filter_x;
87 const int in_y = in_y_origin + dilation_height_factor * filter_y;
88 // Zero padding by omitting the areas outside the image.
89 const bool is_point_inside_image =
90 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
91 if (is_point_inside_image)
92 {
93 int32_t input_val =
94 input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
95 int32_t filter_val =
96 filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)];
97 acc += filter_val * (input_val - input_offset[batch]);
98 }
99 }
100 }
101 float acc_float = static_cast<float>(acc);
102 acc_float *= per_channel_scale[output_channel] * scaling_factors_ptr[batch];
103 if (bias_data && output_channel < bias_depth)
104 {
105 acc_float += bias_data[output_channel];
106 }
107 output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] =
108 ActivationFunctionWithMinMax(acc_float, output_activation_min, output_activation_max);
109 }
110 }
111 }
112 }
113 }
114}
int Offset(const Dims< 4 > &dims, int i0, int i1, int i2, int i3)
Definition Dims.h:64
int32_t DimensionsCount() const
Definition Shape.h:91
int32_t Dims(int i) const
Definition Shape.h:92
int FlatSize() const
Definition Shape.h:181
const luci_interpreter::RuntimeShape output_shape
int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2)
Definition Shape.h:220
T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
Definition Utils.h:43
PaddingValues padding_values
Definition Types.h:234

References nnfw::cker::ActivationFunctionWithMinMax(), nnfw::cker::DepthwiseConvParams::depth_multiplier, nnfw::cker::DepthwiseConvParams::dilation_height_factor, nnfw::cker::DepthwiseConvParams::dilation_width_factor, nnfw::cker::Shape::DimensionsCount(), nnfw::cker::Shape::Dims(), nnfw::cker::Shape::FlatSize(), nnfw::cker::DepthwiseConvParams::float_activation_max, nnfw::cker::DepthwiseConvParams::float_activation_min, nnfw::cker::PaddingValues::height, m, nnfw::cker::MatchingDim(), nnfw::cker::Offset(), output_shape, nnfw::cker::DepthwiseConvParams::padding_values, nnfw::cker::DepthwiseConvParams::stride_height, nnfw::cker::DepthwiseConvParams::stride_width, and nnfw::cker::PaddingValues::width.

Referenced by onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8iHybridPerChannel().

◆ DepthwiseConvPerChannel()

void nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel ( const DepthwiseConvParams params,
const int32_t *  output_multiplier,
const int32_t *  output_shift,
const Shape input_shape,
const uint8_t *  input_data,
const Shape filter_shape,
const uint8_t *  filter_data,
const int32_t *  filter_zeropoint,
const Shape bias_shape,
const int32_t *  bias_data,
const Shape output_shape,
uint8_t *  output_data 
)
inline

Definition at line 31 of file DepthwiseConvUInt8.h.

39{
40 // Get parameters.
41 // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
42 const int stride_width = params.stride_width;
43 const int stride_height = params.stride_height;
44 const int dilation_width_factor = params.dilation_width_factor;
45 const int dilation_height_factor = params.dilation_height_factor;
46 const int pad_width = params.padding_values.width;
47 const int pad_height = params.padding_values.height;
48 const int depth_multiplier = params.depth_multiplier;
49 const int32_t input_offset = params.input_offset;
50 const int32_t output_offset = params.output_offset;
51 const int32_t output_activation_min = params.quantized_activation_min;
52 const int32_t output_activation_max = params.quantized_activation_max;
53
54 // Check dimensions of the tensors.
55 assert(input_shape.DimensionsCount() == 4);
56 assert(filter_shape.DimensionsCount() == 4);
57 assert(output_shape.DimensionsCount() == 4);
58
59 assert(output_activation_min <= output_activation_max);
60 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
61 [[maybe_unused]] const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
62 const int input_height = input_shape.Dims(1);
63 const int input_width = input_shape.Dims(2);
64 const int input_depth = input_shape.Dims(3);
65 const int filter_height = filter_shape.Dims(1);
66 const int filter_width = filter_shape.Dims(2);
67 const int output_height = output_shape.Dims(1);
68 const int output_width = output_shape.Dims(2);
69 assert(output_depth == input_depth * depth_multiplier);
70 assert(bias_shape.FlatSize() == output_depth);
71
72 for (int batch = 0; batch < batches; ++batch)
73 {
74 for (int out_y = 0; out_y < output_height; ++out_y)
75 {
76 for (int out_x = 0; out_x < output_width; ++out_x)
77 {
78 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
79 {
80 for (int m = 0; m < depth_multiplier; ++m)
81 {
82 const int output_channel = m + in_channel * depth_multiplier;
83 const int in_x_origin = (out_x * stride_width) - pad_width;
84 const int in_y_origin = (out_y * stride_height) - pad_height;
85 int32_t acc = 0;
86 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
87 {
88 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
89 {
90 const int in_x = in_x_origin + dilation_width_factor * filter_x;
91 const int in_y = in_y_origin + dilation_height_factor * filter_y;
92 // Zero padding by omitting the areas outside the image.
93 const bool is_point_inside_image =
94 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
95 if (is_point_inside_image)
96 {
97 uint8_t input_val =
98 input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
99 uint8_t filter_val =
100 filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)];
101
102 // { for per-channel
103 // NOTE: The following comment is copied from tflite int8 implementation
104 // It may not be 100% true for uint8 per-channel.
105 //
106 // Accumulate with 32 bits accumulator.
107 // In the nudging process during model quantization, we force
108 // real value of 0.0 be represented by a quantized value. This
109 // guarantees that the input_offset is a int8, even though it
110 // is represented using int32_t.
111 // int32 += int8 * (int8 - int8) so the highest value we can
112 // get from each accumulation is [-127, 127] * ([-128, 127] -
113 // [-128, 127]), which is [-32512, 32512]. log2(32512)
114 // = 14.98, which means we can accumulate at least 2^16
115 // multiplications without overflow. The accumulator is
116 // applied to a filter so the accumulation logic will hold as
117 // long as the filter size (filter_y * filter_x * in_channel)
118 // does not exceed 2^16, which is the case in all the models
119 // we have seen so far.
120 // TODO(jianlijianli): Add a check to make sure the
121 // accumulator depth is smaller than 2^16.
122 const int32_t filter_offset = -filter_zeropoint[output_channel];
123 acc += (filter_val + filter_offset) * (input_val + input_offset);
124 // } for per-channel
125 }
126 }
127 }
128 if (bias_data)
129 {
130 acc += bias_data[output_channel];
131 }
132 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[output_channel],
133 output_shift[output_channel]);
134 acc += output_offset;
135 acc = std::max(acc, output_activation_min);
136 acc = std::min(acc, output_activation_max);
137 // For q8u per-channel, int8_t -> uint8_t
138 output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] =
139 static_cast<uint8_t>(acc);
140 }
141 }
142 }
143 }
144 }
145}
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
Definition Utils.h:96

References nnfw::cker::DepthwiseConvParams::depth_multiplier, nnfw::cker::DepthwiseConvParams::dilation_height_factor, nnfw::cker::DepthwiseConvParams::dilation_width_factor, nnfw::cker::Shape::DimensionsCount(), nnfw::cker::Shape::Dims(), nnfw::cker::PaddingValues::height, nnfw::cker::DepthwiseConvParams::input_offset, m, nnfw::cker::MatchingDim(), nnfw::cker::MultiplyByQuantizedMultiplier(), nnfw::cker::Offset(), nnfw::cker::DepthwiseConvParams::output_offset, output_shape, nnfw::cker::DepthwiseConvParams::padding_values, nnfw::cker::DepthwiseConvParams::quantized_activation_max, nnfw::cker::DepthwiseConvParams::quantized_activation_min, nnfw::cker::DepthwiseConvParams::stride_height, nnfw::cker::DepthwiseConvParams::stride_width, and nnfw::cker::PaddingValues::width.

Referenced by onert::backend::cpu::ops::DepthwiseConvolutionLayer::convQ8uPerChannel().