29 return tensor->getShape().rank();
36 auto shape = tensor->getShape();
37 for (
int i = 0; i < shape.rank(); i++)
39 count *= shape.dim(i);
47 auto shape = tensor->getShape();
48 if (dimensionIdx >=
static_cast<uint32_t
>(shape.rank()))
53 return shape.dim(dimensionIdx);
58 if (double_multiplier == 0.)
60 *quantized_multiplier = 0;
64 const double q = std::frexp(double_multiplier, shift);
65 auto q_fixed =
static_cast<int64_t
>(std::round(q * (1ll << 31)));
67 assert(q_fixed <= (1ll << 31));
68 if (q_fixed == (1ll << 31))
73 assert(q_fixed <= std::numeric_limits<int32_t>::max());
74 *quantized_multiplier =
static_cast<int32_t
>(q_fixed);
81 const double input_product_scale = input->data_scale() * filter->data_scale();
82 [[maybe_unused]]
const double bias_scale =
83 (bias !=
nullptr) ? bias->data_scale() : input_product_scale;
84 const double output_scale = output->data_scale();
86 assert(std::abs(input_product_scale - bias_scale) <=
87 1e-6 * std::min(input_product_scale, bias_scale));
88 assert(input_product_scale >= 0);
89 assert(input_product_scale < output_scale);
90 *multiplier = input_product_scale / output_scale;
94 float input_scale,
float output_scale,
const float *filter_scales,
size_t filter_scales_size,
95 int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
96 std::vector<int> &per_channel_output_shift)
99 per_channel_output_multiplier.resize(num_channels);
100 per_channel_output_shift.resize(num_channels);
102 const bool is_per_channel = filter_scales_size > 1;
103 auto per_channel_multiplier = per_channel_output_multiplier.data();
104 auto per_channel_shift = per_channel_output_shift.data();
105 for (
int i = 0; i < num_channels; ++i)
109 const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
110 const double filter_scale =
static_cast<double>(scale);
111 const double effective_output_scale =
112 static_cast<double>(input_scale) * filter_scale /
static_cast<double>(output_scale);
116 per_channel_multiplier[i] = significand;
117 per_channel_shift[i] = channel_shift;
124 assert(double_multiplier > 1.);
125 const double q = std::frexp(double_multiplier, left_shift);
126 int64_t q_fixed =
static_cast<int64_t
>(std::round(q * (1ll << 31)));
127 assert(q_fixed <= (1ll << 31));
128 if (q_fixed == (1ll << 31))
133 assert(*left_shift >= 0);
134 assert(q_fixed <= std::numeric_limits<int32_t>::max());
135 *quantized_multiplier =
static_cast<int32_t
>(q_fixed);
139 int32_t *act_min, int32_t *act_max)
144 switch (output->data_type())
146 case OperandType::QUANT_UINT8_ASYMM:
147 qmin = std::numeric_limits<uint8_t>::min();
148 qmax = std::numeric_limits<uint8_t>::max();
150 case OperandType::QUANT_INT8_ASYMM:
151 case OperandType::QUANT_INT8_SYMM:
152 qmin = std::numeric_limits<int8_t>::min();
153 qmax = std::numeric_limits<int8_t>::max();
156 throw std::runtime_error(
"CalculateActivationRangeQuantized: Not supported operand type.");
159 const auto scale = output->data_scale();
160 const auto zero_point = output->data_zero_point();
161 auto quantize = [scale, zero_point](
float f) {
162 return zero_point +
static_cast<int32_t
>(std::round(f / scale));
166 *act_min = std::max(qmin, quantize(0.0));
171 *act_min = std::max(qmin, quantize(0.0));
172 *act_max = std::min(qmax, quantize(6.0));
176 *act_min = std::max(qmin, quantize(-1.0));
177 *act_max = std::min(qmax, quantize(1.0));
181 *act_min = std::max(qmin, quantize(0.0));
182 *act_max = std::min(qmax, quantize(1.0));
191 throw std::runtime_error{
"Unsupported fused activation function."};
197 if (input1 == input2)
199 if (input2 == NULL || input2 == NULL)
213 if (shape1.dim(i) != shape2.dim(i))
221 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
222 (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
226 return static_cast<int32_t
>(std::floor(max_input_rescaled));
235 case OperandType::FLOAT32:
236 case OperandType::INT32:
237 case OperandType::UINT32:
240 case OperandType::BOOL8:
241 case OperandType::QUANT_UINT8_ASYMM:
242 case OperandType::QUANT_INT8_SYMM:
245 case OperandType::INT64:
249 throw std::runtime_error(
"Not supported operand type.");
253 for (
auto &&d : dimensions)
256 size *=
static_cast<uint32_t
>(d);
264 switch (ir_padding_type)
273 throw std::runtime_error(
"Wrong padding type.");
280 std::vector<int32_t> ret;
282 auto axes_vals = (axes->
getShape().rank() == 0) ? 1 : axes->
getShape().dim(0);
283 assert(
static_cast<size_t>(axes_vals) == axes->
getShape().num_elements());
286 case ir::DataType::INT32:
288 for (
int i = 0; i < axes_vals; ++i)
289 ret.emplace_back(*(getBuffer<int32_t>(axes) + i));
292 case ir::DataType::INT64:
294 for (
int i = 0; i < axes_vals; ++i)
295 ret.emplace_back(*(getBuffer<int64_t>(axes) + i));
299 throw std::runtime_error(
"getReducerAxes: Not supported data type");
314 throw std::runtime_error(
"Wrong rope mode.");
A tensor class that is portable for other backends.
ir::DataType data_type() const override final
ir::Shape getShape() const override final
Get ir::Shape of tensor.
uint32_t getNumberOfElements(const Shape &shape)
uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
uint32_t getNumberOfDimensions(const Shape &shape)
int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
nnfw::cker::RoPEMode getRoPEMode(ir::operation::RoPE::RoPEMode rope_mode)
void GetQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size, int num_channels, std::vector< int32_t > &per_channel_output_multiplier, std::vector< int > &per_channel_output_shift)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
uint32_t sizeOfData(OperandType type, const std::vector< int32_t > &dimensions)
void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
std::vector< int32_t > getReducerAxes(const IPortableTensor *axes)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2)