ONE - On-device Neural Engine
Loading...
Searching...
No Matches
mir_interpreter::DepthwiseConv2DImpl< uint8_t > Struct Reference

Static Public Member Functions

static void run (const mir::ops::DepthwiseConv2DOp &op, const mir::TensorVariant &inputv, const mir::TensorVariant &kernelv, const mir::TensorVariant *biasv, mir::TensorVariant &output)
 

Detailed Description

Definition at line 93 of file DepthwiseConv2D.cpp.

Member Function Documentation

◆ run()

void mir_interpreter::DepthwiseConv2DImpl< uint8_t >::run ( const mir::ops::DepthwiseConv2DOp op,
const mir::TensorVariant inputv,
const mir::TensorVariant kernelv,
const mir::TensorVariant biasv,
mir::TensorVariant output 
)
static

Definition at line 100 of file DepthwiseConv2D.cpp.

104{
105 if (!biasv)
106 {
107 throw std::runtime_error{"Unsupported quantized DepthwiseConv2D without fused bias"};
108 }
109
110 const auto &input_type = inputv.getType();
111 const auto &kernel_type = kernelv.getType();
112 const auto &bias_type = biasv->getType();
113 const auto &output_type = op.getOutput(0)->getType();
114 (void)bias_type;
115
116 assert(input_type.isQuantized());
117 assert(kernel_type.isQuantized());
118 assert(bias_type.isQuantized());
119 assert(output_type.isQuantized());
120 assert(input_type.getElementType() == DataType::UINT8);
121 assert(kernel_type.getElementType() == DataType::UINT8);
122 assert(bias_type.getElementType() == DataType::INT32);
123
124 int32_t input_offset = -input_type.getQuantization().getZeroPoint();
125 int32_t kernel_offset = -kernel_type.getQuantization().getZeroPoint();
126 int32_t output_offset = output_type.getQuantization().getZeroPoint();
127
128 double input_scale = input_type.getQuantization().getScale();
129 double kernel_scale = kernel_type.getQuantization().getScale();
130 double output_scale = output_type.getQuantization().getScale();
131
132 double real_multiplier = input_scale * kernel_scale / output_scale;
133 int32_t output_multiplier = 0;
134 int output_shift = 0;
135 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
136
137 const Shape &in_shape = inputv.getShape();
138 const Shape &kernel_shape = kernelv.getShape();
139 const Shape &out_shape = op.getOutputShape(0);
140 const auto &strides = op.getStrides();
141 const std::vector<int32_t> &pads = op.getPaddingBefore();
142
143 assert(in_shape.rank() == 4);
144 assert(kernel_shape.rank() == 4);
145 assert(kernel_shape.dim(2) == in_shape.dim(3)); // HWIO
146 assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
147 assert(strides.size() == 2);
148 assert(pads.size() == 2);
149
150 int32_t stride_height = strides[0];
151 int32_t stride_width = strides[1];
152
153 int32_t pad_height = pads[0];
154 int32_t pad_width = pads[1];
155
156 int32_t input_height = in_shape.dim(1);
157 int32_t input_width = in_shape.dim(2);
158
159 Tensor<uint8_t> input_accessor(inputv);
160 Tensor<uint8_t> kernel_accessor(kernelv);
161 Tensor<int32_t> bias_accessor(*biasv);
162 Tensor<uint8_t> res_accessor(output);
163
164 int32_t output_min = std::numeric_limits<uint8_t>::min();
165 int32_t output_max = std::numeric_limits<uint8_t>::max();
166
167 int batches = out_shape.dim(0);
168 int output_height = out_shape.dim(1);
169 int output_width = out_shape.dim(2);
170 int input_depth = in_shape.dim(3);
171
172 int filter_height = kernel_shape.dim(0); // HWIO
173 int filter_width = kernel_shape.dim(1); // HWIO
174
175 for (int b = 0; b < batches; ++b)
176 {
177 for (int out_y = 0; out_y < output_height; ++out_y)
178 {
179 for (int out_x = 0; out_x < output_width; ++out_x)
180 {
181 for (int ic = 0; ic < input_depth; ++ic)
182 {
183 const int oc = ic;
184 const int in_x_origin = (out_x * stride_width) - pad_width;
185 const int in_y_origin = (out_y * stride_height) - pad_height;
186 int32_t acc = 0;
187 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
188 {
189 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
190 {
191 const int in_x = in_x_origin + filter_x;
192 const int in_y = in_y_origin + filter_y;
193 // If the location is outside the bounds of the input image,
194 // use zero as a default value.
195 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
196 {
197 Index in_index{b, in_y, in_x, ic};
198 Index ker_index{filter_y, filter_x, oc, 0}; // HWIO
199 int32_t input_val = input_accessor.at(in_index);
200 int32_t kernel_val = kernel_accessor.at(ker_index);
201 acc += (kernel_val + kernel_offset) * (input_val + input_offset);
202 }
203 }
204 }
205 acc += bias_accessor.at(Index{oc});
206 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
207 acc += output_offset;
208 acc = std::max(acc, output_min);
209 acc = std::min(acc, output_max);
210 Index out_index{b, out_y, out_x, oc};
211 res_accessor.at(out_index) = static_cast<uint8_t>(acc);
212 }
213 }
214 }
215 }
216}
const TensorType & getType() const
Gets the type of this output.
Definition Operation.h:91
Output * getOutput(std::size_t index)
Definition Operation.h:149
const Shape & getOutputShape(std::size_t index) const
Definition Operation.h:163
const TensorType & getType() const
const Shape & getShape() const
const std::vector< std::int32_t > & getStrides() const
const std::vector< std::int32_t > & getPaddingBefore() const
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Shape.h:28

References mir::Tensor< T >::at(), mir::Shape::dim(), mir::Operation::getOutput(), mir::Operation::getOutputShape(), mir::ops::DepthwiseConv2DOp::getPaddingBefore(), mir::TensorVariant::getShape(), mir::ops::DepthwiseConv2DOp::getStrides(), mir::Operation::Output::getType(), mir::TensorVariant::getType(), mir_interpreter::MultiplyByQuantizedMultiplier(), mir_interpreter::QuantizeMultiplier(), and mir::Shape::rank().

Referenced by package.infer.session::inference().


The documentation for this struct was generated from the following file: