ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALFullyConnectedCommon.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_COMMON_H
19#define ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_COMMON_H
20
21#include "OMStatus.h"
22#include "PALUtils.h"
23
24#include <cmath>
25
26namespace onert_micro
27{
28namespace execute
29{
30namespace pal
31{
32
33template <typename InputType, typename WeightType, typename OutputType, typename BiasType>
34OMStatus FullyConnected(const core::FullyConnectedParams &params, const InputType *input_data,
35 const core::OMRuntimeShape &filter_shape, const WeightType *filter_data,
36 const BiasType *bias_data, const core::OMRuntimeShape &output_shape,
37 OutputType *output_data)
38{
39 const int32_t input_offset = params.input_offset;
40 const int32_t filter_offset = params.weights_offset;
41 const int32_t output_offset = params.output_offset;
42 const int32_t output_multiplier = params.output_multiplier;
43 const int output_shift = params.output_shift;
44 const int32_t output_activation_min = params.quantized_activation_min;
45 const int32_t output_activation_max = params.quantized_activation_max;
46
47 const int filter_dim_count = filter_shape.dimensionsCount();
48 const int output_dim_count = output_shape.dimensionsCount();
49 const int batches =
50 flatSizeSkipDim(output_shape.dimsData(), output_dim_count - 1, output_dim_count);
51 const int output_depth = output_shape.dims(output_dim_count - 1);
52
53 const int accum_depth = filter_shape.dims(filter_dim_count - 1);
54 for (int b = 0; b < batches; ++b)
55 {
56 for (int out_c = 0; out_c < output_depth; ++out_c)
57 {
58 BiasType acc = 0;
59 for (int d = 0; d < accum_depth; ++d)
60 {
61 int32_t input_val = input_data[b * accum_depth + d];
62 int32_t filter_val = filter_data[out_c * accum_depth + d];
63 acc += (filter_val + filter_offset) * (input_val + input_offset);
64 }
65 if (bias_data)
66 {
67 acc += bias_data[out_c];
68 }
69 int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
70 acc_scaled += output_offset;
71 acc_scaled = std::max(acc_scaled, output_activation_min);
72 acc_scaled = std::min(acc_scaled, output_activation_max);
73 output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
74 }
75 }
76 return Ok;
77}
78
79template <typename WeightType>
80OMStatus inline FullyConnected(const core::FullyConnectedParams &params, const float *input_data,
81 const core::OMRuntimeShape &filter_shape,
82 const WeightType *filter_data, const float *bias_data,
83 const core::OMRuntimeShape &output_shape, float *output_data)
84{
85 const float output_activation_min = params.float_activation_min;
86 const float output_activation_max = params.float_activation_max;
87
90 const int output_depth = output_shape.dims(output_shape.dimensionsCount() - 1);
91 const int accum_depth = filter_shape.dims(filter_shape.dimensionsCount() - 1);
92
93 for (int b = 0; b < batches; ++b)
94 {
95 const float *weight_scale_ptr = params.weights_scales;
96 for (int out_c = 0; out_c < output_depth; ++out_c)
97 {
98 float total = 0.f;
99 for (int d = 0; d < accum_depth; ++d)
100 {
101 auto input_value = input_data[b * accum_depth + d];
102 if (std::is_same<WeightType, float>::value)
103 {
104 total += input_value * filter_data[out_c * accum_depth + d];
105 }
106 else
107 {
108 const float filter_scale = *weight_scale_ptr;
109 const float filter_value =
110 static_cast<float>(filter_data[out_c * accum_depth + d]) * filter_scale;
111 total += input_value * filter_value;
112 }
113 }
114 float bias_value = 0.0f;
115 if (bias_data)
116 {
117 bias_value = bias_data[out_c];
118 }
119 output_data[out_c + output_depth * b] =
120 std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
121
122 if (std::is_same<WeightType, int8_t>::value)
123 {
124 if (params.is_channel_wise_quant)
125 weight_scale_ptr++;
126 }
127 }
128 }
129 return Ok;
130}
131
132} // namespace pal
133} // namespace execute
134} // namespace onert_micro
135
136#endif // ONERT_MICRO_EXECUTE_PAL_FULLY_CONNECTED_COMMON_H
int32_t dimensionsCount() const
Definition Tensor.h:106
int32_t dims(int i) const
Definition Tensor.h:108
const luci_interpreter::RuntimeShape output_shape
int flatSizeSkipDim(const int32_t *dims_data, int skip_dim, int num_dims)
Definition PALUtils.h:210
OMStatus FullyConnected(const core::FullyConnectedParams &params, const int16_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int64_t *bias_data, const core::OMRuntimeShape &output_shape, int16_t *output_data)
int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
Definition PALUtils.h:104