76 float output_activation_min = 0.f, output_activation_max = 0.f;
77 CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
79 const auto &kernel_shape = _kernel->
getShape();
80 assert(kernel_shape.rank() == 2);
81 uint32_t output_channels = kernel_shape.dim(0);
82 uint32_t input_channels = kernel_shape.dim(1);
84 const auto &input_shape = _input->
getShape();
89 flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
90 assert(input_shape.num_elements() % input_channels == 0);
94 assert(
static_cast<uint32_t
>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
97 assert(_kernel && _kernel->
buffer());
98 const float *kernel_buffer =
reinterpret_cast<const float *
>(_kernel->
buffer());
99 const float *bias_buffer = (_bias) ?
reinterpret_cast<const float *
>(_bias->
buffer()) :
nullptr;
101 enum xnn_status status = xnn_create_fully_connected_nc_f32(
102 input_channels, output_channels, input_channels ,
103 output_channels , kernel_buffer, bias_buffer, output_activation_min,
104 output_activation_max, flag,
nullptr,
nullptr, &
_kernel_op);
105 if (status != xnn_status_success)
107 throw std::runtime_error{
"failed to create FP32 FullyConnected operator"};
115 if (_input->
buffer() ==
nullptr || _output->
buffer() ==
nullptr)
121 uint32_t batch_size = _input->
getShape().num_elements() / _kernel->
getShape().dim(1);
122 enum xnn_status status =
124 if (status != xnn_status_success)
126 throw std::runtime_error{
"failed to create FP32 FullyConnected operator"};
130 xnn_setup_fully_connected_nc_f32(
_kernel_op,
reinterpret_cast<const float *
>(_input->
buffer()),
131 reinterpret_cast<float *
>(_output->
buffer()));
132 if (status != xnn_status_success)
134 throw std::runtime_error{
"failed to create FP32 FullyConnected operator"};