ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALFullyConnected.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LUCI_INTERPRETER_PAL_FULLY_CONNECTED_H
18#define LUCI_INTERPRETER_PAL_FULLY_CONNECTED_H
19
20#include "PALFullyConnectedCommon.h"
21
22#include <arm_nnfunctions.h>
23
25{
26
27template <>
29 const int32_t *, const int8_t *input_data,
30 const int32_t *filter_shape, const int8_t *filter_data,
31 const int32_t *bias_data, const int32_t *output_shape,
32 int8_t *output_data, uint32_t output_dims_count,
33 uint32_t weights_dims_count)
34{
35 const int batches = flatSizeSkipDim(output_shape, output_dims_count - 1, output_dims_count);
36 const int output_depth = output_shape[output_dims_count - 1];
37 const int accum_depth = filter_shape[weights_dims_count - 1];
38
39 cmsis_nn_fc_params fc_params;
40 fc_params.input_offset = params.input_offset;
41 fc_params.output_offset = params.output_offset;
42 fc_params.filter_offset = params.weights_offset;
43 fc_params.activation.min = params.quantized_activation_min;
44 fc_params.activation.max = params.quantized_activation_max;
45
46 cmsis_nn_per_tensor_quant_params quant_params;
47 quant_params.multiplier = params.output_multiplier;
48 quant_params.shift = params.output_shift;
49
50 cmsis_nn_dims input_dims;
51 input_dims.n = batches;
52 input_dims.h = 1;
53 input_dims.w = 1;
54 input_dims.c = accum_depth;
55
56 cmsis_nn_dims filter_dims;
57 filter_dims.n = accum_depth;
58 filter_dims.h = 1;
59 filter_dims.w = 1;
60 filter_dims.c = output_depth;
61
62 cmsis_nn_dims bias_dims;
63 bias_dims.n = 1;
64 bias_dims.h = 1;
65 bias_dims.w = 1;
66 bias_dims.c = output_depth;
67
68 cmsis_nn_dims output_dims;
69 output_dims.n = batches;
70 output_dims.h = 1;
71 output_dims.w = 1;
72 output_dims.c = output_depth;
73
74 int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
75 auto buffer = std::make_unique<int8_t[]>(buf_size);
76 assert(buffer != nullptr);
77
78 cmsis_nn_context ctx;
79 ctx.buf = buffer.get();
80 ctx.size = buf_size;
81
82 auto res =
83 arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
84 filter_data, &bias_dims, bias_data, &output_dims, output_data);
85 assert(res == ARM_CMSIS_NN_SUCCESS);
86}
87
88template <>
89inline void FullyConnected(const luci_interpreter_pal::FullyConnectedParams &params,
90 const int32_t *, const int16_t *input_data, const int32_t *filter_shape,
91 const int8_t *filter_data, const int64_t *bias_data,
92 const int32_t *output_shape, int16_t *output_data,
93 uint32_t output_dims_count, uint32_t weights_dims_count)
94{
95 const int batches = flatSizeSkipDim(output_shape, output_dims_count - 1, output_dims_count);
96 const int output_depth = output_shape[output_dims_count - 1];
97 const int accum_depth = filter_shape[weights_dims_count - 1];
98
99 cmsis_nn_fc_params fc_params;
100 fc_params.input_offset = params.input_offset;
101 fc_params.output_offset = params.output_offset;
102 fc_params.filter_offset = params.weights_offset;
103 fc_params.activation.min = params.quantized_activation_min;
104 fc_params.activation.max = params.quantized_activation_max;
105
106 cmsis_nn_per_tensor_quant_params quant_params;
107 quant_params.multiplier = params.output_multiplier;
108 quant_params.shift = params.output_shift;
109
110 cmsis_nn_dims input_dims;
111 input_dims.n = batches;
112 input_dims.h = 1;
113 input_dims.w = 1;
114 input_dims.c = accum_depth;
115
116 cmsis_nn_dims filter_dims;
117 filter_dims.n = accum_depth;
118 filter_dims.h = 1;
119 filter_dims.w = 1;
120 filter_dims.c = output_depth;
121
122 cmsis_nn_dims bias_dims;
123 bias_dims.n = 1;
124 bias_dims.h = 1;
125 bias_dims.w = 1;
126 bias_dims.c = output_depth;
127
128 cmsis_nn_dims output_dims;
129 output_dims.n = batches;
130 output_dims.h = 1;
131 output_dims.w = 1;
132 output_dims.c = output_depth;
133
134 int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
135 auto buffer = std::make_unique<int8_t[]>(buf_size);
136 assert(buffer != nullptr);
137
138 cmsis_nn_context ctx;
139 ctx.buf = buffer.get();
140 ctx.size = buf_size;
141
142 auto res =
143 arm_fully_connected_s16(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
144 filter_data, &bias_dims, bias_data, &output_dims, output_data);
145 assert(res == ARM_CMSIS_NN_SUCCESS);
146}
147
148} // namespace luci_interpreter_pal
149
150#endif // LUCI_INTERPRETER_PAL_FULLY_CONNECTED_H
const luci_interpreter::RuntimeShape output_shape
int flatSizeSkipDim(const int32_t *dims_data, int skip_dim, int num_dims)
Definition PALUtils.h:183
void FullyConnected< int8_t >(const tflite::FullyConnectedParams &params, const tflite::RuntimeShape &input_shape, const int8_t *input_data, const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, const tflite::RuntimeShape &output_shape, int8_t *output_data)