31 const InputType *input_data,
const int32_t *filter_shape,
32 const WeightType *filter_data,
const BiasType *bias_data,
34 uint32_t output_dims_count, uint32_t weights_dims_count)
45 const int output_depth =
output_shape[output_dims_count - 1];
46 const int accum_depth = filter_shape[weights_dims_count - 1];
48 for (
int b = 0; b < batches; ++b)
50 for (
int out_c = 0; out_c < output_depth; ++out_c)
53 for (
int d = 0; d < accum_depth; ++d)
55 int32_t input_val = input_data[b * accum_depth + d];
56 int32_t filter_val = filter_data[out_c * accum_depth + d];
57 acc += (filter_val + filter_offset) * (input_val + input_offset);
61 acc += bias_data[out_c];
64 acc_scaled += output_offset;
65 acc_scaled = std::max(acc_scaled, output_activation_min);
66 acc_scaled = std::min(acc_scaled, output_activation_max);
67 output_data[out_c + output_depth * b] =
static_cast<OutputType
>(acc_scaled);
73 const float *input_data,
const int32_t *filter_shape,
74 const WeightType *filter_data,
const float *bias_data,
76 uint32_t output_dims_count, uint32_t weights_dims_count)
82 const int output_depth =
output_shape[output_dims_count - 1];
83 const int accum_depth = filter_shape[weights_dims_count - 1];
85 for (
int b = 0; b < batches; ++b)
88 for (
int out_c = 0; out_c < output_depth; ++out_c)
91 for (
int d = 0; d < accum_depth; ++d)
93 auto input_value = input_data[b * accum_depth + d];
94 if (std::is_same<WeightType, float>::value)
96 total += input_value * filter_data[out_c * accum_depth + d];
100 const float filter_scale = *weight_scale_ptr;
101 const float filter_value =
102 static_cast<float>(filter_data[out_c * accum_depth + d]) * filter_scale;
103 total += input_value * filter_value;
106 float bias_value = 0.0f;
109 bias_value = bias_data[out_c];
111 output_data[out_c + output_depth * b] =
112 std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
113 if (std::is_same<WeightType, int8_t>::value)