134{
135 if (!biasv)
136 {
137 throw std::runtime_error{"Quantized FullyConnected cannot be executed without fused bias"};
138 }
139
140 const auto &input_type = inputv.
getType();
141 const auto &weights_type = weightsv.
getType();
142 const auto &bias_type = biasv->
getType();
144 (void)bias_type;
145
146 assert(input_type.isQuantized());
147 assert(weights_type.isQuantized());
148 assert(bias_type.isQuantized());
150 assert(input_type.getElementType() == mir::DataType::UINT8);
151 assert(weights_type.getElementType() == mir::DataType::UINT8);
152 assert(bias_type.getElementType() == mir::DataType::INT32);
153
154 int32_t input_offset = -input_type.getQuantization().getZeroPoint();
155 int32_t weights_offset = -weights_type.getQuantization().getZeroPoint();
156 int32_t output_offset =
output_type.getQuantization().getZeroPoint();
157
158 double input_scale = input_type.getQuantization().getScale();
159 double weights_scale = weights_type.getQuantization().getScale();
160 double output_scale =
output_type.getQuantization().getScale();
161
162 double real_multiplier = input_scale * weights_scale / output_scale;
163 int32_t output_multiplier = 0;
164 int output_shift = 0;
166
170
171 const int32_t batches = in_shape.
dim(0);
172 assert(in_shape.
rank() == 2);
173 assert(weights_shape.
rank() == 2);
174 assert(in_shape.
dim(1) == weights_shape.
dim(0));
175 const int32_t accum_depth = weights_shape.
dim(0);
176 const int32_t output_depth = weights_shape.
dim(1);
177
179 uint8_t *weights_data =
reinterpret_cast<uint8_t *
>(weightsv.
atOffset(0));
180 int32_t *bias_data =
reinterpret_cast<int32_t *
>(biasv->
atOffset(0));
181
183
184 int32_t output_min = std::numeric_limits<uint8_t>::min();
185 int32_t output_max = std::numeric_limits<uint8_t>::max();
186
187 for (int32_t b = 0;
b < batches; ++
b)
188 {
189 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
190 {
191 int32_t acc = 0;
192 for (int d = 0; d < accum_depth; ++d)
193 {
194 int32_t input_val =
input_data[
b * accum_depth + d];
195 int32_t weights_val = weights_data[d * output_depth + out_c];
196 acc += (weights_val + weights_offset) * (input_val + input_offset);
197 }
198 acc += bias_data[out_c];
200 acc += output_offset;
201 acc = std::max(acc, output_min);
202 acc = std::min(acc, output_max);
203 output_data[out_c + output_depth *
b] =
static_cast<uint8_t
>(acc);
204 }
205 }
206}
const TensorType & getType() const
Gets the type of this output.
Output * getOutput(std::size_t index)
const Shape & getOutputShape(std::size_t index) const
int32_t & dim(int32_t axis) noexcept
char * atOffset(int32_t offset) const
const TensorType & getType() const
const Shape & getShape() const
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)