ONE - On-device Neural Engine
Loading...
Searching...
No Matches
DepthwiseConv2D.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "DepthwiseConv2D.h"
19#include "QuantizationHelpers.h"
20#include "Common.h"
21
22#include "mir/ShapeRange.h"
23#include "mir/Tensor.h"
24
25#include <cmath>
26
27namespace mir_interpreter
28{
29
30using namespace mir;
31
32template <typename T> struct DepthwiseConv2DImpl
33{
34 static void run(const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &inputv,
35 const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv,
36 mir::TensorVariant &output);
37};
38
39template <typename T>
41 const mir::TensorVariant &inputv,
42 const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv,
43 mir::TensorVariant &output)
44{
45 const Shape &in_shape = op.getInputShape(0);
46 const Shape &kernel_shape = op.getInputShape(1);
47 const Shape &out_shape = op.getOutputShape(0);
48 const auto &strides = op.getStrides();
49 const std::vector<int32_t> &pads = op.getPaddingBefore();
50
51 assert(in_shape.rank() == 4);
52 assert(kernel_shape.rank() == 4);
53 assert(kernel_shape.dim(2) == in_shape.dim(3));
54 assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
55 assert(strides.size() == 2);
56 assert(pads.size() == 2);
57
58 int32_t channel_multiplier = kernel_shape.dim(3);
59
60 Tensor<T> res_accessor(output);
61 Tensor<T> input(inputv);
62 Tensor<T> bias(*biasv);
63 Tensor<T> kernel(kernelv);
64
65 ShapeRange in_range(in_shape);
66 ShapeRange kernel_range(kernel_shape);
67 ShapeRange out_range(Shape{out_shape.dim(0), out_shape.dim(1), out_shape.dim(2), 1});
68
69 Index in_index;
70 in_index.resize(4);
71
72 erase<T>(output);
73
74 for (const auto &out_index : out_range)
75 {
76 Index out_index_k = out_index;
77 for (const auto &kernel_index : kernel_range)
78 {
79 in_index.at(0) = out_index.at(0);
80 for (int i = 0; i < 2; ++i)
81 in_index.at(1 + i) = out_index.at(1 + i) * strides[i] + kernel_index.at(i) - pads[i];
82 in_index.at(3) = kernel_index.at(2);
83
84 if (in_range.contains(in_index))
85 {
86 out_index_k.at(3) = kernel_index.at(2) * channel_multiplier + kernel_index.at(3);
87 res_accessor.at(out_index_k) += input.at(in_index) * kernel.at(kernel_index);
88 }
89 }
90 }
91}
92
93template <> struct DepthwiseConv2DImpl<uint8_t>
94{
95 static void run(const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &inputv,
96 const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv,
97 mir::TensorVariant &output);
98};
99
101 const mir::TensorVariant &inputv,
102 const mir::TensorVariant &kernelv,
103 const mir::TensorVariant *biasv, mir::TensorVariant &output)
104{
105 if (!biasv)
106 {
107 throw std::runtime_error{"Unsupported quantized DepthwiseConv2D without fused bias"};
108 }
109
110 const auto &input_type = inputv.getType();
111 const auto &kernel_type = kernelv.getType();
112 const auto &bias_type = biasv->getType();
113 const auto &output_type = op.getOutput(0)->getType();
114 (void)bias_type;
115
116 assert(input_type.isQuantized());
117 assert(kernel_type.isQuantized());
118 assert(bias_type.isQuantized());
119 assert(output_type.isQuantized());
120 assert(input_type.getElementType() == DataType::UINT8);
121 assert(kernel_type.getElementType() == DataType::UINT8);
122 assert(bias_type.getElementType() == DataType::INT32);
123
124 int32_t input_offset = -input_type.getQuantization().getZeroPoint();
125 int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
126 int32_t output_offset = output_type.getQuantization().getZeroPoint();
127
128 double input_scale = input_type.getQuantization().getScale();
129 double kernel_scale = kernel_type.getQuantization().getScale();
130 double output_scale = output_type.getQuantization().getScale();
131
132 double real_multiplier = input_scale * kernel_scale / output_scale;
133 int32_t output_multiplier = 0;
134 int output_shift = 0;
135 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
136
137 const Shape &in_shape = inputv.getShape();
138 const Shape &kernel_shape = kernelv.getShape();
139 const Shape &out_shape = op.getOutputShape(0);
140 const auto &strides = op.getStrides();
141 const std::vector<int32_t> &pads = op.getPaddingBefore();
142
143 assert(in_shape.rank() == 4);
144 assert(kernel_shape.rank() == 4);
145 assert(kernel_shape.dim(2) == in_shape.dim(3)); // HWIO
146 assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
147 assert(strides.size() == 2);
148 assert(pads.size() == 2);
149
150 int32_t stride_height = strides[0];
151 int32_t stride_width = strides[1];
152
153 int32_t pad_height = pads[0];
154 int32_t pad_width = pads[1];
155
156 int32_t input_height = in_shape.dim(1);
157 int32_t input_width = in_shape.dim(2);
158
159 Tensor<uint8_t> input_accessor(inputv);
160 Tensor<uint8_t> kernel_accessor(kernelv);
161 Tensor<int32_t> bias_accessor(*biasv);
162 Tensor<uint8_t> res_accessor(output);
163
164 int32_t output_min = std::numeric_limits<uint8_t>::min();
165 int32_t output_max = std::numeric_limits<uint8_t>::max();
166
167 int batches = out_shape.dim(0);
168 int output_height = out_shape.dim(1);
169 int output_width = out_shape.dim(2);
170 int input_depth = in_shape.dim(3);
171
172 int filter_height = kernel_shape.dim(0); // HWIO
173 int filter_width = kernel_shape.dim(1); // HWIO
174
175 for (int b = 0; b < batches; ++b)
176 {
177 for (int out_y = 0; out_y < output_height; ++out_y)
178 {
179 for (int out_x = 0; out_x < output_width; ++out_x)
180 {
181 for (int ic = 0; ic < input_depth; ++ic)
182 {
183 const int oc = ic;
184 const int in_x_origin = (out_x * stride_width) - pad_width;
185 const int in_y_origin = (out_y * stride_height) - pad_height;
186 int32_t acc = 0;
187 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
188 {
189 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
190 {
191 const int in_x = in_x_origin + filter_x;
192 const int in_y = in_y_origin + filter_y;
193 // If the location is outside the bounds of the input image,
194 // use zero as a default value.
195 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
196 {
197 Index in_index{b, in_y, in_x, ic};
198 Index ker_index{filter_y, filter_x, oc, 0}; // HWIO
199 int32_t input_val = input_accessor.at(in_index);
200 int32_t kernel_val = kernel_accessor.at(ker_index);
201 acc += (kernel_val + kernel_offset) * (input_val + input_offset);
202 }
203 }
204 }
205 acc += bias_accessor.at(Index{oc});
206 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
207 acc += output_offset;
208 acc = std::max(acc, output_min);
209 acc = std::min(acc, output_max);
210 Index out_index{b, out_y, out_x, oc};
211 res_accessor.at(out_index) = static_cast<uint8_t>(acc);
212 }
213 }
214 }
215 }
216}
217
219 const mir::TensorVariant &kernel, mir::TensorVariant &output,
220 const mir::TensorVariant *bias)
221{
222 dispatch<DepthwiseConv2DImpl>(output.getElementType(), op, input, kernel, bias, output);
223}
224
225} // namespace mir_interpreter
Index & resize(int32_t size)
resize index to given dimension number
Definition Index.cpp:24
int32_t & at(int32_t axis)
return position on given axis
Definition Index.h:64
const TensorType & getType() const
Gets the type of this output.
Definition Operation.h:91
Output * getOutput(std::size_t index)
Definition Operation.h:149
const Shape & getInputShape(std::size_t index) const
Definition Operation.h:161
const Shape & getOutputShape(std::size_t index) const
Definition Operation.h:163
int32_t & dim(int32_t axis) noexcept
Definition Shape.h:47
int32_t rank() const
Definition Shape.h:43
bool contains(const Index &idx)
Definition ShapeRange.h:92
T at(const Index &id) const
Definition Tensor.h:31
const TensorType & getType() const
const Shape & getShape() const
const std::vector< std::int32_t > & getStrides() const
const std::vector< std::int32_t > & getPaddingBefore() const
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
static void run(const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &inputv, const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv, mir::TensorVariant &output)