35 return tensor->getShape().rank();
42 auto shape = tensor->getShape();
43 for (
int i = 0; i < shape.rank(); i++)
45 count *= shape.dim(i);
53 auto shape = tensor->getShape();
54 if (dimensionIdx >=
static_cast<uint32_t
>(shape.rank()))
59 return shape.dim(dimensionIdx);
64 if (double_multiplier == 0.)
66 *quantized_multiplier = 0;
70 const double q = std::frexp(double_multiplier, shift);
71 auto q_fixed =
static_cast<int64_t
>(std::round(q * (1ll << 31)));
73 assert(q_fixed <= (1ll << 31));
74 if (q_fixed == (1ll << 31))
79 assert(q_fixed <= std::numeric_limits<int32_t>::max());
80 *quantized_multiplier =
static_cast<int32_t
>(q_fixed);
87 const double input_product_scale = input->data_scale() * filter->data_scale();
88 [[maybe_unused]]
const double bias_scale =
89 (bias !=
nullptr) ? bias->data_scale() : input_product_scale;
90 const double output_scale = output->data_scale();
92 assert(std::abs(input_product_scale - bias_scale) <=
93 1e-6 * std::min(input_product_scale, bias_scale));
94 assert(input_product_scale >= 0);
95 assert(input_product_scale < output_scale);
96 *multiplier = input_product_scale / output_scale;
100 float input_scale,
float output_scale,
const float *filter_scales,
size_t filter_scales_size,
101 int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
102 std::vector<int> &per_channel_output_shift)
105 per_channel_output_multiplier.resize(num_channels);
106 per_channel_output_shift.resize(num_channels);
108 const bool is_per_channel = filter_scales_size > 1;
109 auto per_channel_multiplier = per_channel_output_multiplier.data();
110 auto per_channel_shift = per_channel_output_shift.data();
111 for (
int i = 0; i < num_channels; ++i)
115 const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
116 const double filter_scale =
static_cast<double>(scale);
117 const double effective_output_scale =
118 static_cast<double>(input_scale) * filter_scale /
static_cast<double>(output_scale);
122 per_channel_multiplier[i] = significand;
123 per_channel_shift[i] = channel_shift;
130 assert(double_multiplier > 1.);
131 const double q = std::frexp(double_multiplier, left_shift);
132 int64_t q_fixed =
static_cast<int64_t
>(std::round(q * (1ll << 31)));
133 assert(q_fixed <= (1ll << 31));
134 if (q_fixed == (1ll << 31))
139 assert(*left_shift >= 0);
140 assert(q_fixed <= std::numeric_limits<int32_t>::max());
141 *quantized_multiplier =
static_cast<int32_t
>(q_fixed);
145 int32_t *act_min, int32_t *act_max)
150 switch (output->data_type())
152 case OperandType::QUANT_UINT8_ASYMM:
153 qmin = std::numeric_limits<uint8_t>::min();
154 qmax = std::numeric_limits<uint8_t>::max();
156 case OperandType::QUANT_INT8_ASYMM:
157 case OperandType::QUANT_INT8_SYMM:
158 qmin = std::numeric_limits<int8_t>::min();
159 qmax = std::numeric_limits<int8_t>::max();
162 throw std::runtime_error(
"CalculateActivationRangeQuantized: Not supported operand type.");
165 const auto scale = output->data_scale();
166 const auto zero_point = output->data_zero_point();
167 auto quantize = [scale, zero_point](
float f) {
168 return zero_point +
static_cast<int32_t
>(std::round(f / scale));
172 *act_min = std::max(qmin, quantize(0.0));
177 *act_min = std::max(qmin, quantize(0.0));
178 *act_max = std::min(qmax, quantize(6.0));
182 *act_min = std::max(qmin, quantize(-1.0));
183 *act_max = std::min(qmax, quantize(1.0));
187 *act_min = std::max(qmin, quantize(0.0));
188 *act_max = std::min(qmax, quantize(1.0));
197 throw std::runtime_error{
"Unsupported fused activation function."};
203 if (input1 == input2)
205 if (input2 == NULL || input2 == NULL)
219 if (shape1.dim(i) != shape2.dim(i))
227 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
228 (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
232 return static_cast<int32_t
>(std::floor(max_input_rescaled));
241 case OperandType::FLOAT32:
242 case OperandType::INT32:
243 case OperandType::UINT32:
246 case OperandType::BOOL8:
247 case OperandType::QUANT_UINT8_ASYMM:
248 case OperandType::QUANT_INT8_SYMM:
251 case OperandType::INT64:
255 throw std::runtime_error(
"Not supported operand type.");
259 for (
auto &&d : dimensions)
262 size *=
static_cast<uint32_t
>(d);
270 switch (ir_padding_type)
279 throw std::runtime_error(
"Wrong padding type.");
286 std::vector<int32_t> ret;
288 auto axes_vals = (axes->
getShape().rank() == 0) ? 1 : axes->
getShape().dim(0);
289 assert(
static_cast<size_t>(axes_vals) == axes->
getShape().num_elements());
292 case ir::DataType::INT32:
294 for (
int i = 0; i < axes_vals; ++i)
295 ret.emplace_back(*(getBuffer<int32_t>(axes) + i));
298 case ir::DataType::INT64:
300 for (
int i = 0; i < axes_vals; ++i)
301 ret.emplace_back(*(getBuffer<int64_t>(axes) + i));
305 throw std::runtime_error(
"getReducerAxes: Not supported data type");
320 throw std::runtime_error(
"Wrong rope mode.");
A tensor class that is portable for other backends.
ir::DataType data_type() const override final
ir::Shape getShape() const override final
Get ir::Shape of tensor.
uint32_t getNumberOfElements(const Shape &shape)
uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
uint32_t getNumberOfDimensions(const Shape &shape)
int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
nnfw::cker::RoPEMode getRoPEMode(ir::operation::RoPE::RoPEMode rope_mode)
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
uint32_t sizeOfData(OperandType type, const std::vector< int32_t > &dimensions)
void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
std::vector< int32_t > getReducerAxes(const IPortableTensor *axes)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2)