20#include "../KernelGenerator.h"
21#include "../Validator.h"
30void Validator::visit(
const ir::operation::FullyConnected &node)
32 using ir::operation::FullyConnected;
34 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
39 if (weight_node->typeInfo().type() == ir::DataType::QUANT_GGML_Q4_0 ||
40 weight_node->typeInfo().type() == ir::DataType::QUANT_GGML_Q8_0)
46void KernelGenerator::visit(
const ir::operation::FullyConnected &node)
48 using ir::operation::FullyConnected;
50 const auto output_index{node.getOutputs().at(0)};
51 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
52 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
53 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
54 const auto activation = node.param().activation;
55 const auto weights_format = node.param().weights_format;
57 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
58 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
59 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
60 auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
62 auto fn = std::make_unique<ops::FullyConnectedLayer>();
64 fn->configure(input_tensor, weight_tensor,
bias_tensor, activation, weights_format, output_tensor,
76 : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
77 _activation(ir::Activation::
NONE), _temp_arena(new
nnfw::cker::FCTempArena()),
78 _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
88 float output_activation_min = 0;
89 float output_activation_max = 0;
109 double real_multiplier = 0.0;
110 int32_t output_multiplier = 0;
111 int32_t output_shift = 0;
112 int32_t output_activation_min = 0;
113 int32_t output_activation_max = 0;
117 &output_activation_max);
154 (_cached_weights) ?
reinterpret_cast<const int8_t *
>(_cached_weights)
159 if (_cached_weights ==
nullptr || _is_weights_freed)
172 auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(
_weights);
176 auto tensor =
const_cast<Tensor *
>(weight_tensor);
177 if (tensor->buffer() ==
nullptr)
179 _is_weights_freed =
true;
183 tensor->decrease_ref();
184 if (tensor->buffer() ==
nullptr)
186#if defined(__ANDROID__) && (__ANDROID_API__ >= 26)
190 _is_weights_freed =
true;
204 if (block_size.size() == 0)
211 else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
219 throw std::runtime_error{
"FullyConnected: unsupported sparsity"};
224#if defined(__aarch64__) && defined(USE_NEON)
225 float output_activation_min = 0, output_activation_max = 0;
236 throw std::runtime_error{
"FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
244 const std::shared_ptr<ExternalContext> &external_context)
251 _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
252 weights->
data_type() == OperandType::QUANT_INT8_SYMM;
254#if !defined(__aarch64__) || !defined(USE_NEON)
257 throw std::runtime_error{
258 "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
284 throw std::runtime_error{
"FullyConnected: unsupported data type"};
299#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
void prepare(const Shape &input_shape, const Shape &weights_shape)
int32_t Dims(int i) const
A tensor class that is portable for other backends.
const ir::Sparsity * sparsity() const
float data_scale() const override final
int32_t data_zero_point() const override final
ir::DataType data_type() const override final
bool is_dynamic() const override final
Return true if the tensor needs dynamic allocation, meaning that during compile-time the outpus shape...
bool is_constant() const override final
Return true if the tensor is constant.
virtual uint8_t * buffer() const =0
std::unique_ptr< exec::IFunction > _return_fn
ir::Activation _activation
void fullyConnected16x1Float32()
void fullyConnectedSparseWeight()
const IPortableTensor * _weights
const IPortableTensor * _bias
bool _is_shuffled16x1float32
void fullyConnectedFloat32()
IPortableTensor * _output
std::unique_ptr< nnfw::cker::FCTempArena > _temp_arena
std::shared_ptr< ExternalContext > _external_context
const IPortableTensor * _input
void fullyConnectedHybrid()
void configure(const IPortableTensor *input, const IPortableTensor *weights, const IPortableTensor *bias, ir::Activation activation, ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output, const std::shared_ptr< ExternalContext > &external_context)
void fullyConnectedQuant8()
const Operands & operands() const override
const Object & at(const Index &index) const
Get the object that is associated with the given index.
void FullyConnectedSparseWeightRandom(const FullyConnectedParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &weights_shape, const float *weights_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const uint16_t *w1_segments, const uint16_t *w1_indices)
void FullyConnectedSparseWeight16x1(const FullyConnectedParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &weights_shape, const float *weights_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, const uint16_t *w1_segments, const uint16_t *w1_indices)
void FullyConnectedHybrid(const FullyConnectedParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &, const float *bias_data, const Shape &output_shape, float *output_data, FCTempArena &temp_arena, ruy::Context *ruy_context)
void FullyConnected(const FullyConnectedParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &weights_shape, const float *weights_data, const Shape &, const float *bias_data, const Shape &, float *output_data)
bool IsZeroVector(const float *vector, int v_size)
nnfw::cker::FusedActivationFunctionType convertActivationType(const ir::Activation activation)
nnfw::cker::Shape getShape(const IPortableTensor *tensor)
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output, int32_t *act_min, int32_t *act_max)
void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPortableTensor *filter, const IPortableTensor *bias, const IPortableTensor *output, double *multiplier)
FullyConnectedWeightsFormat
void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
int32_t output_multiplier
FusedActivationFunctionType activation
float float_activation_max
int32_t quantized_activation_min
float float_activation_min
int32_t quantized_activation_max
const std::vector< int32_t > & block_size() const
Returns block size which is used for block sparsity.
const uint16_t * w1_segments() const
Returns segments array. See compressed sparse row format.
const uint16_t * w1_indices() const
Returns indices array. See compressed sparse row format.