ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALFullyConnectedCommon.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef LUCI_INTERPRETER_PAL_FULLY_CONNECTED_COMMON_H
19#define LUCI_INTERPRETER_PAL_FULLY_CONNECTED_COMMON_H
20
21#include "PALUtils.h"
22#include "Params.h"
23
24#include <type_traits>
25
27{
28
29template <typename InputType, typename WeightType, typename OutputType, typename BiasType>
30inline void FullyConnected(const FullyConnectedParams &params, const int32_t *input_shape,
31 const InputType *input_data, const int32_t *filter_shape,
32 const WeightType *filter_data, const BiasType *bias_data,
33 const int32_t *output_shape, OutputType *output_data,
34 uint32_t output_dims_count, uint32_t weights_dims_count)
35{
36 const int32_t input_offset = params.input_offset;
37 const int32_t filter_offset = params.weights_offset;
38 const int32_t output_offset = params.output_offset;
39 const int32_t output_multiplier = params.output_multiplier;
40 const int output_shift = params.output_shift;
41 const int32_t output_activation_min = params.quantized_activation_min;
42 const int32_t output_activation_max = params.quantized_activation_max;
43
44 const int batches = flatSizeSkipDim(output_shape, output_dims_count - 1, output_dims_count);
45 const int output_depth = output_shape[output_dims_count - 1];
46 const int accum_depth = filter_shape[weights_dims_count - 1];
47
48 for (int b = 0; b < batches; ++b)
49 {
50 for (int out_c = 0; out_c < output_depth; ++out_c)
51 {
52 BiasType acc = 0;
53 for (int d = 0; d < accum_depth; ++d)
54 {
55 int32_t input_val = input_data[b * accum_depth + d];
56 int32_t filter_val = filter_data[out_c * accum_depth + d];
57 acc += (filter_val + filter_offset) * (input_val + input_offset);
58 }
59 if (bias_data)
60 {
61 acc += bias_data[out_c];
62 }
63 int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
64 acc_scaled += output_offset;
65 acc_scaled = std::max(acc_scaled, output_activation_min);
66 acc_scaled = std::min(acc_scaled, output_activation_max);
67 output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
68 }
69 }
70}
71template <typename WeightType>
72inline void FullyConnected(const FullyConnectedParams &params, const int32_t *input_shape,
73 const float *input_data, const int32_t *filter_shape,
74 const WeightType *filter_data, const float *bias_data,
75 const int32_t *output_shape, float *output_data,
76 uint32_t output_dims_count, uint32_t weights_dims_count)
77{
78 const float output_activation_min = params.float_activation_min;
79 const float output_activation_max = params.float_activation_max;
80
81 const int batches = flatSizeSkipDim(output_shape, output_dims_count - 1, output_dims_count);
82 const int output_depth = output_shape[output_dims_count - 1];
83 const int accum_depth = filter_shape[weights_dims_count - 1];
84
85 for (int b = 0; b < batches; ++b)
86 {
87 const float *weight_scale_ptr = params.weights_scales;
88 for (int out_c = 0; out_c < output_depth; ++out_c)
89 {
90 float total = 0.f;
91 for (int d = 0; d < accum_depth; ++d)
92 {
93 auto input_value = input_data[b * accum_depth + d];
94 if (std::is_same<WeightType, float>::value)
95 {
96 total += input_value * filter_data[out_c * accum_depth + d];
97 }
98 else
99 {
100 const float filter_scale = *weight_scale_ptr;
101 const float filter_value =
102 static_cast<float>(filter_data[out_c * accum_depth + d]) * filter_scale;
103 total += input_value * filter_value;
104 }
105 }
106 float bias_value = 0.0f;
107 if (bias_data)
108 {
109 bias_value = bias_data[out_c];
110 }
111 output_data[out_c + output_depth * b] =
112 std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
113 if (std::is_same<WeightType, int8_t>::value)
114 {
115 if (params.is_channel_wise_quant)
116 weight_scale_ptr++;
117 }
118 }
119 }
120}
121
122} // namespace luci_interpreter_pal
123
124#endif // LUCI_INTERPRETER_PAL_FULLY_CONNECTED_COMMON_H
const luci_interpreter::RuntimeShape output_shape
int flatSizeSkipDim(const int32_t *dims_data, int skip_dim, int num_dims)
Definition PALUtils.h:183
int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
Definition PALUtils.h:77