ONE - On-device Neural Engine
Loading...
Searching...
No Matches
mir_interpreter::FullyConnectedImpl< uint8_t > Struct Reference

Static Public Member Functions

static void run (const mir::TensorVariant &inputv, const mir::TensorVariant &weightsv, const mir::ops::FullyConnectedOp &op, mir::TensorVariant &res, const mir::TensorVariant *biasv)
 

Detailed Description

Definition at line 123 of file FullyConnected.cpp.

Member Function Documentation

◆ run()

void mir_interpreter::FullyConnectedImpl< uint8_t >::run ( const mir::TensorVariant inputv,
const mir::TensorVariant weightsv,
const mir::ops::FullyConnectedOp op,
mir::TensorVariant res,
const mir::TensorVariant biasv 
)
static

Definition at line 130 of file FullyConnected.cpp.

134{
135 if (!biasv)
136 {
137 throw std::runtime_error{"Quantized FullyConnected cannot be executed without fused bias"};
138 }
139
140 const auto &input_type = inputv.getType();
141 const auto &weights_type = weightsv.getType();
142 const auto &bias_type = biasv->getType();
143 const auto &output_type = op.getOutput(0)->getType();
144 (void)bias_type;
145
146 assert(input_type.isQuantized());
147 assert(weights_type.isQuantized());
148 assert(bias_type.isQuantized());
149 assert(output_type.isQuantized());
150 assert(input_type.getElementType() == mir::DataType::UINT8);
151 assert(weights_type.getElementType() == mir::DataType::UINT8);
152 assert(bias_type.getElementType() == mir::DataType::INT32);
153
154 int32_t input_offset = -input_type.getQuantization().getZeroPoint();
155 int32_t weights_offset = -weights_type.getQuantization().getZeroPoint();
156 int32_t output_offset = output_type.getQuantization().getZeroPoint();
157
158 double input_scale = input_type.getQuantization().getScale();
159 double weights_scale = weights_type.getQuantization().getScale();
160 double output_scale = output_type.getQuantization().getScale();
161
162 double real_multiplier = input_scale * weights_scale / output_scale;
163 int32_t output_multiplier = 0;
164 int output_shift = 0;
165 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
166
167 const mir::Shape &in_shape = inputv.getShape();
168 const mir::Shape &weights_shape = weightsv.getShape();
169 const mir::Shape &out_shape = op.getOutputShape(0);
170
171 const int32_t batches = in_shape.dim(0);
172 assert(in_shape.rank() == 2);
173 assert(weights_shape.rank() == 2);
174 assert(in_shape.dim(1) == weights_shape.dim(0));
175 const int32_t accum_depth = weights_shape.dim(0);
176 const int32_t output_depth = weights_shape.dim(1);
177
178 uint8_t *input_data = reinterpret_cast<uint8_t *>(inputv.atOffset(0));
179 uint8_t *weights_data = reinterpret_cast<uint8_t *>(weightsv.atOffset(0));
180 int32_t *bias_data = reinterpret_cast<int32_t *>(biasv->atOffset(0));
181
182 uint8_t *output_data = reinterpret_cast<uint8_t *>(res.atOffset(0));
183
184 int32_t output_min = std::numeric_limits<uint8_t>::min();
185 int32_t output_max = std::numeric_limits<uint8_t>::max();
186
187 for (int32_t b = 0; b < batches; ++b)
188 {
189 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
190 {
191 int32_t acc = 0;
192 for (int d = 0; d < accum_depth; ++d)
193 {
194 int32_t input_val = input_data[b * accum_depth + d];
195 int32_t weights_val = weights_data[d * output_depth + out_c];
196 acc += (weights_val + weights_offset) * (input_val + input_offset);
197 }
198 acc += bias_data[out_c];
199 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
200 acc += output_offset;
201 acc = std::max(acc, output_min);
202 acc = std::min(acc, output_max);
203 output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
204 }
205 }
206}
const TensorType & getType() const
Gets the type of this output.
Definition Operation.h:91
Output * getOutput(std::size_t index)
Definition Operation.h:149
const Shape & getOutputShape(std::size_t index) const
Definition Operation.h:163
int32_t & dim(int32_t axis) noexcept
Definition Shape.h:47
int32_t rank() const
Definition Shape.h:43
char * atOffset(int32_t offset) const
const TensorType & getType() const
const Shape & getShape() const
list input_data
Definition infer.py:29
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)

References mir::TensorVariant::atOffset(), mir::Shape::dim(), mir::Operation::getOutput(), mir::Operation::getOutputShape(), mir::TensorVariant::getShape(), mir::Operation::Output::getType(), mir::TensorVariant::getType(), mir_interpreter::MultiplyByQuantizedMultiplier(), mir_interpreter::QuantizeMultiplier(), and mir::Shape::rank().

Referenced by package.infer.session::inference().


The documentation for this struct was generated from the following file: