ONE - On-device Neural Engine
Loading...
Searching...
No Matches
DepthwiseConvUInt8.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
19#define __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
20
21#include "cker/Shape.h"
22#include "cker/Types.h"
23#include "cker/Utils.h"
24
25namespace nnfw
26{
27namespace cker
28{
29namespace reference_integer_ops
30{
32 const int32_t *output_multiplier, const int32_t *output_shift,
33 const Shape &input_shape, const uint8_t *input_data,
34 const Shape &filter_shape, const uint8_t *filter_data,
35 const int32_t *filter_zeropoint,
36 [[maybe_unused]] const Shape &bias_shape,
37 const int32_t *bias_data, const Shape &output_shape,
38 uint8_t *output_data)
39{
40 // Get parameters.
41 // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
42 const int stride_width = params.stride_width;
43 const int stride_height = params.stride_height;
44 const int dilation_width_factor = params.dilation_width_factor;
45 const int dilation_height_factor = params.dilation_height_factor;
46 const int pad_width = params.padding_values.width;
47 const int pad_height = params.padding_values.height;
48 const int depth_multiplier = params.depth_multiplier;
49 const int32_t input_offset = params.input_offset;
50 const int32_t output_offset = params.output_offset;
51 const int32_t output_activation_min = params.quantized_activation_min;
52 const int32_t output_activation_max = params.quantized_activation_max;
53
54 // Check dimensions of the tensors.
55 assert(input_shape.DimensionsCount() == 4);
56 assert(filter_shape.DimensionsCount() == 4);
57 assert(output_shape.DimensionsCount() == 4);
58
59 assert(output_activation_min <= output_activation_max);
60 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
61 [[maybe_unused]] const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
62 const int input_height = input_shape.Dims(1);
63 const int input_width = input_shape.Dims(2);
64 const int input_depth = input_shape.Dims(3);
65 const int filter_height = filter_shape.Dims(1);
66 const int filter_width = filter_shape.Dims(2);
67 const int output_height = output_shape.Dims(1);
68 const int output_width = output_shape.Dims(2);
69 assert(output_depth == input_depth * depth_multiplier);
70 assert(bias_shape.FlatSize() == output_depth);
71
72 for (int batch = 0; batch < batches; ++batch)
73 {
74 for (int out_y = 0; out_y < output_height; ++out_y)
75 {
76 for (int out_x = 0; out_x < output_width; ++out_x)
77 {
78 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
79 {
80 for (int m = 0; m < depth_multiplier; ++m)
81 {
82 const int output_channel = m + in_channel * depth_multiplier;
83 const int in_x_origin = (out_x * stride_width) - pad_width;
84 const int in_y_origin = (out_y * stride_height) - pad_height;
85 int32_t acc = 0;
86 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
87 {
88 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
89 {
90 const int in_x = in_x_origin + dilation_width_factor * filter_x;
91 const int in_y = in_y_origin + dilation_height_factor * filter_y;
92 // Zero padding by omitting the areas outside the image.
93 const bool is_point_inside_image =
94 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
95 if (is_point_inside_image)
96 {
97 uint8_t input_val =
98 input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
99 uint8_t filter_val =
100 filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)];
101
102 // { for per-channel
103 // NOTE: The following comment is copied from tflite int8 implementation
104 // It may not be 100% true for uint8 per-channel.
105 //
106 // Accumulate with 32 bits accumulator.
107 // In the nudging process during model quantization, we force
108 // real value of 0.0 be represented by a quantized value. This
109 // guarantees that the input_offset is a int8, even though it
110 // is represented using int32_t.
111 // int32 += int8 * (int8 - int8) so the highest value we can
112 // get from each accumulation is [-127, 127] * ([-128, 127] -
113 // [-128, 127]), which is [-32512, 32512]. log2(32512)
114 // = 14.98, which means we can accumulate at least 2^16
115 // multiplications without overflow. The accumulator is
116 // applied to a filter so the accumulation logic will hold as
117 // long as the filter size (filter_y * filter_x * in_channel)
118 // does not exceed 2^16, which is the case in all the models
119 // we have seen so far.
120 // TODO(jianlijianli): Add a check to make sure the
121 // accumulator depth is smaller than 2^16.
122 const int32_t filter_offset = -filter_zeropoint[output_channel];
123 acc += (filter_val + filter_offset) * (input_val + input_offset);
124 // } for per-channel
125 }
126 }
127 }
128 if (bias_data)
129 {
130 acc += bias_data[output_channel];
131 }
132 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[output_channel],
133 output_shift[output_channel]);
134 acc += output_offset;
135 acc = std::max(acc, output_activation_min);
136 acc = std::min(acc, output_activation_max);
137 // For q8u per-channel, int8_t -> uint8_t
138 output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] =
139 static_cast<uint8_t>(acc);
140 }
141 }
142 }
143 }
144 }
145}
146
147} // namespace reference_integer_ops
148} // namespace cker
149} // namespace nnfw
150
151#endif // __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
int32_t DimensionsCount() const
Definition Shape.h:91
int32_t Dims(int i) const
Definition Shape.h:92
const luci_interpreter::RuntimeShape output_shape
void DepthwiseConvPerChannel(const DepthwiseConvParams &params, const int32_t *output_multiplier, const int32_t *output_shift, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const int32_t *filter_zeropoint, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2)
Definition Shape.h:220
int Offset(const Shape &shape, int i0, int i1, int i2, int i3)
Definition Shape.h:237
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
Definition Utils.h:96
Definition topk_v2.h:30
PaddingValues padding_values
Definition Types.h:234