ONE - On-device Neural Engine
Loading...
Searching...
No Matches
gemm_helpers.h
Go to the documentation of this file.
1
/*
2
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
3
*
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
7
*
8
* http://www.apache.org/licenses/LICENSE-2.0
9
*
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
15
*/
16
17
/*
18
* Copyright (c) 2019-2020 ARM Limited.
19
*
20
* SPDX-License-Identifier: MIT
21
*
22
* Permission is hereby granted, free of charge, to any person obtaining a copy
23
* of this software and associated documentation files (the "Software"), to
24
* deal in the Software without restriction, including without limitation the
25
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26
* sell copies of the Software, and to permit persons to whom the Software is
27
* furnished to do so, subject to the following conditions:
28
*
29
* The above copyright notice and this permission notice shall be included in all
30
* copies or substantial portions of the Software.
31
*
32
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38
* SOFTWARE.
39
*/
40
#include "
activation_float_helpers.h
"
41
#include "
helpers.h
"
42
55
#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
56
VEC_DATA_TYPE(DATA_TYPE, N0) \
57
BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
58
59
#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
60
LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
61
VEC_DATA_TYPE(DATA_TYPE, N0) \
62
BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
63
64
#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
65
LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
66
VEC_DATA_TYPE(DATA_TYPE, N0) \
67
BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
68
69
#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
70
LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
71
VEC_DATA_TYPE(DATA_TYPE, N0) \
72
BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
73
74
#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
75
LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
76
VEC_DATA_TYPE(DATA_TYPE, N0) \
77
BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
78
79
#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
80
LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
81
VEC_DATA_TYPE(DATA_TYPE, N0) \
82
BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
83
84
#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
85
LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
86
VEC_DATA_TYPE(DATA_TYPE, N0) \
87
BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
88
89
#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
90
LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
91
VEC_DATA_TYPE(DATA_TYPE, N0) \
92
BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
93
94
#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
95
LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
96
VEC_DATA_TYPE(DATA_TYPE, N0) \
97
BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
98
99
#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
100
LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
101
VEC_DATA_TYPE(DATA_TYPE, N0) \
102
BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
103
104
#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
105
LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
106
VEC_DATA_TYPE(DATA_TYPE, N0) \
107
BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
108
109
#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
110
LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
111
VEC_DATA_TYPE(DATA_TYPE, N0) \
112
BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
113
114
#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
115
LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
116
VEC_DATA_TYPE(DATA_TYPE, N0) \
117
BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
118
119
#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
120
LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
121
VEC_DATA_TYPE(DATA_TYPE, N0) \
122
BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
123
124
#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
125
LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
126
VEC_DATA_TYPE(DATA_TYPE, N0) \
127
BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
128
129
#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
130
LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
131
VEC_DATA_TYPE(DATA_TYPE, N0) \
132
BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
133
// end of group LOAD_ROW_n
135
155
#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
156
LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
157
#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
158
LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
// end of group LOAD_BLOCK
160
172
#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
173
VEC_DATA_TYPE(DATA_TYPE, N0) \
174
BASENAME##0 = *((__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y));
175
176
#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
177
LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
178
VEC_DATA_TYPE(DATA_TYPE, N0) \
179
BASENAME##1 = *((__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y));
180
181
#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
182
LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
183
VEC_DATA_TYPE(DATA_TYPE, N0) \
184
BASENAME##2 = *((__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y));
185
186
#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
187
LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
188
VEC_DATA_TYPE(DATA_TYPE, N0) \
189
BASENAME##3 = *((__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y));
190
191
#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
192
LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
193
VEC_DATA_TYPE(DATA_TYPE, N0) \
194
BASENAME##4 = *((__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y));
195
196
#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
197
LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
198
VEC_DATA_TYPE(DATA_TYPE, N0) \
199
BASENAME##5 = *((__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y));
200
201
#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
202
LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
203
VEC_DATA_TYPE(DATA_TYPE, N0) \
204
BASENAME##6 = *((__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y));
205
206
#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
207
LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
208
VEC_DATA_TYPE(DATA_TYPE, N0) \
209
BASENAME##7 = *((__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y));
210
211
#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
212
LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
213
VEC_DATA_TYPE(DATA_TYPE, N0) \
214
BASENAME##8 = *((__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y));
215
216
#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
217
LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
218
VEC_DATA_TYPE(DATA_TYPE, N0) \
219
BASENAME##9 = *((__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y));
220
221
#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
222
LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
223
VEC_DATA_TYPE(DATA_TYPE, N0) \
224
BASENAME##A = *((__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y));
225
226
#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
227
LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
228
VEC_DATA_TYPE(DATA_TYPE, N0) \
229
BASENAME##B = *((__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y));
230
231
#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
232
LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
233
VEC_DATA_TYPE(DATA_TYPE, N0) \
234
BASENAME##C = *((__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y));
235
236
#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
237
LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
238
VEC_DATA_TYPE(DATA_TYPE, N0) \
239
BASENAME##D = *((__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y));
240
241
#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
242
LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
243
VEC_DATA_TYPE(DATA_TYPE, N0) \
244
BASENAME##E = *((__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y));
245
246
#define LOAD_ELEMENT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
247
LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
248
VEC_DATA_TYPE(DATA_TYPE, N0) \
249
BASENAME##F = *((__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y));
250
// end of group LOAD_ELEMENT_n
252
269
#define LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
270
LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
271
#define LOAD_SCALAR_AS_VECTOR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
272
LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
// end of group LOAD_SCALAR_AS_VECTOR
274
289
#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
290
STRIDE_Y) \
291
Z##0 = (0 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
292
Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0); \
293
Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);
294
295
#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
296
STRIDE_Y) \
297
CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
298
STRIDE_Y) \
299
Z##1 = (1 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
300
Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1); \
301
Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
302
303
#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
304
STRIDE_Y) \
305
CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
306
STRIDE_Y) \
307
Z##2 = (2 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
308
Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2); \
309
Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
310
311
#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
312
STRIDE_Y) \
313
CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
314
STRIDE_Y) \
315
Z##3 = (3 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
316
Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3); \
317
Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
318
319
#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
320
STRIDE_Y) \
321
CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
322
STRIDE_Y) \
323
Z##4 = (4 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
324
Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4); \
325
Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
326
327
#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
328
STRIDE_Y) \
329
CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
330
STRIDE_Y) \
331
Z##5 = (5 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
332
Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5); \
333
Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
334
335
#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
336
STRIDE_Y) \
337
CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
338
STRIDE_Y) \
339
Z##6 = (6 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
340
Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6); \
341
Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
342
343
#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
344
STRIDE_Y) \
345
CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
346
STRIDE_Y) \
347
Z##7 = (7 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
348
Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7); \
349
Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
350
// end of group CALCULATE_Z_OFFSET_n
352
385
#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
386
STRIDE_Y) \
387
CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
388
STRIDE_Y)
389
#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
390
STRIDE_Y) \
391
CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
392
STRIDE_Y)
// end of group CALCULATE_Z_OFFSET
394
406
#define STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
407
VSTORE(N0) \
408
(BASENAME##0, 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
409
410
#define STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
411
STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
412
VSTORE(N0) \
413
(BASENAME##1, 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
414
415
#define STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
416
STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
417
VSTORE(N0) \
418
(BASENAME##2, 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
419
420
#define STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
421
STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
422
VSTORE(N0) \
423
(BASENAME##3, 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
424
425
#define STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
426
STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
427
VSTORE(N0) \
428
(BASENAME##4, 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
429
430
#define STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
431
STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
432
VSTORE(N0) \
433
(BASENAME##5, 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
434
435
#define STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
436
STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
437
VSTORE(N0) \
438
(BASENAME##6, 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
439
440
#define STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
441
STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
442
VSTORE(N0) \
443
(BASENAME##7, 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
444
445
#define STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
446
STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
447
VSTORE(N0) \
448
(BASENAME##8, 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
449
450
#define STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
451
STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
452
VSTORE(N0) \
453
(BASENAME##9, 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
454
455
#define STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
456
STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
457
VSTORE(N0) \
458
(BASENAME##A, 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
459
460
#define STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
461
STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
462
VSTORE(N0) \
463
(BASENAME##B, 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
464
465
#define STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
466
STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
467
VSTORE(N0) \
468
(BASENAME##C, 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
469
470
#define STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
471
STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
472
VSTORE(N0) \
473
(BASENAME##D, 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
474
475
#define STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
476
STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
477
VSTORE(N0) \
478
(BASENAME##E, 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
479
480
#define STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
481
STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
482
VSTORE(N0) \
483
(BASENAME##F, 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
// end of groupd STORE_ROW_n
485
497
#define CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
498
VSTORE(N0) \
499
(CONVERT_SAT((BASENAME##0), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
500
(__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
501
502
#define CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
503
CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
504
VSTORE(N0) \
505
(CONVERT_SAT((BASENAME##1), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
506
(__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
507
508
#define CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
509
CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
510
VSTORE(N0) \
511
(CONVERT_SAT((BASENAME##2), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
512
(__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
513
514
#define CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
515
CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
516
VSTORE(N0) \
517
(CONVERT_SAT((BASENAME##3), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
518
(__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
519
520
#define CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
521
CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
522
VSTORE(N0) \
523
(CONVERT_SAT((BASENAME##4), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
524
(__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
525
526
#define CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
527
CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
528
VSTORE(N0) \
529
(CONVERT_SAT((BASENAME##5), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
530
(__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
531
532
#define CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
533
CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
534
VSTORE(N0) \
535
(CONVERT_SAT((BASENAME##6), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
536
(__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
537
538
#define CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
539
CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
540
VSTORE(N0) \
541
(CONVERT_SAT((BASENAME##7), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
542
(__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
543
544
#define CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
545
CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
546
VSTORE(N0) \
547
(CONVERT_SAT((BASENAME##8), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
548
(__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
549
550
#define CONVERT_STORE_ROW_10(N0, DATA, BASENAME, PTR, STRIDE_Y, Z) \
551
CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
552
VSTORE(N0) \
553
(CONVERT_SAT((BASENAME##9), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
554
(__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
555
556
#define CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
557
CONVERT_STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
558
VSTORE(N0) \
559
(CONVERT_SAT((BASENAME##A), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
560
(__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
561
562
#define CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
563
CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
564
VSTORE(N0) \
565
(CONVERT_SAT((BASENAME##B), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
566
(__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
567
568
#define CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
569
CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
570
VSTORE(N0) \
571
(CONVERT_SAT((BASENAME##C), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
572
(__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
573
574
#define CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
575
CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
576
VSTORE(N0) \
577
(CONVERT_SAT((BASENAME##D), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
578
(__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
579
580
#define CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
581
CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
582
VSTORE(N0) \
583
(CONVERT_SAT((BASENAME##E), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
584
(__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
585
586
#define CONVERT_STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
587
CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
588
VSTORE(N0) \
589
(CONVERT_SAT((BASENAME##F), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
590
(__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
591
// end of groupd CONVERT_STORE_ROW_n
593
612
#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
613
STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
614
#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
615
STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
// end of group STORE_BLOCK
617
636
#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
637
CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
638
#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
639
CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
// end of group CONVERT_STORE_BLOCK
641
650
#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) BASENAME##0 *= (DATA_TYPE)SCALE;
651
652
#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
653
SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
654
BASENAME##1 *= (DATA_TYPE)SCALE;
655
656
#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
657
SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
658
BASENAME##2 *= (DATA_TYPE)SCALE;
659
660
#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
661
SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
662
BASENAME##3 *= (DATA_TYPE)SCALE;
663
664
#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
665
SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
666
BASENAME##4 *= (DATA_TYPE)SCALE;
667
668
#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
669
SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
670
BASENAME##5 *= (DATA_TYPE)SCALE;
671
672
#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
673
SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
674
BASENAME##6 *= (DATA_TYPE)SCALE;
675
676
#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
677
SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
678
BASENAME##7 *= (DATA_TYPE)SCALE;
679
680
#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
681
SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
682
BASENAME##8 *= (DATA_TYPE)SCALE;
683
684
#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
685
SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
686
BASENAME##9 *= (DATA_TYPE)SCALE;
687
688
#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
689
SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
690
BASENAME##A *= (DATA_TYPE)SCALE;
691
692
#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
693
SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
694
BASENAME##B *= (DATA_TYPE)SCALE;
695
696
#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
697
SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
698
BASENAME##C *= (DATA_TYPE)SCALE;
699
700
#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
701
SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
702
BASENAME##D *= (DATA_TYPE)SCALE;
703
704
#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
705
SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
706
BASENAME##E *= (DATA_TYPE)SCALE;
707
708
#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \
709
SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
710
BASENAME##F *= (DATA_TYPE)SCALE;
// end of group SCALE_ROW_n
712
724
#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
725
#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE) SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
// end of group SCALE_BLOCK
727
737
#define COLUMN_VECTOR1(IDX_COL, BASENAME, X, TYPE) \
738
TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);
739
#define COLUMN_VECTOR2(IDX_COL, BASENAME, X, TYPE) \
740
VEC_DATA_TYPE(TYPE, 2) \
741
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0).s##IDX_COL, (X##1).s##IDX_COL);
742
#define COLUMN_VECTOR3(IDX_COL, BASENAME, X, TYPE) \
743
VEC_DATA_TYPE(TYPE, 3) \
744
BASENAME##IDX_COL = \
745
(VEC_DATA_TYPE(TYPE, 3))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
746
#define COLUMN_VECTOR4(IDX_COL, BASENAME, X, TYPE) \
747
VEC_DATA_TYPE(TYPE, 4) \
748
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0).s##IDX_COL, (X##1).s##IDX_COL, \
749
(X##2).s##IDX_COL, (X##3).s##IDX_COL);
750
#define COLUMN_VECTOR8(IDX_COL, BASENAME, X, TYPE) \
751
VEC_DATA_TYPE(TYPE, 8) \
752
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))( \
753
(X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
754
(X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
755
#define COLUMN_VECTOR16(IDX_COL, BASENAME, X, TYPE) \
756
VEC_DATA_TYPE(TYPE, 16) \
757
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))( \
758
(X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
759
(X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, \
760
(X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, \
761
(X##F).s##IDX_COL);
// end of group COLUMN_VECTORn
763
774
#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE) TYPE BASENAME##IDX_COL = (TYPE)((X##0));
775
#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE) \
776
VEC_DATA_TYPE(TYPE, 2) \
777
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1));
778
#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE) \
779
VEC_DATA_TYPE(TYPE, 3) \
780
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2));
781
#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE) \
782
VEC_DATA_TYPE(TYPE, 4) \
783
BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3));
784
#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE) \
785
VEC_DATA_TYPE(TYPE, 8) \
786
BASENAME##IDX_COL = \
787
(VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7));
788
#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE) \
789
VEC_DATA_TYPE(TYPE, 16) \
790
BASENAME##IDX_COL = \
791
(VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), \
792
(X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F));
// end of group COLUMN_VECTORn
794
804
#define TRANSPOSE_K0X1(K0, BASENAME, B, TYPE) COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, B, TYPE);
805
#define TRANSPOSE_K0X2(K0, BASENAME, B, TYPE) \
806
COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE); \
807
COLUMN_VECTOR(K0, 1, BASENAME, B, TYPE);
808
#define TRANSPOSE_K0X3(K0, BASENAME, B, TYPE) \
809
TRANSPOSE_K0X2(K0, BASENAME, B, TYPE); \
810
COLUMN_VECTOR(K0, 2, BASENAME, B, TYPE);
811
#define TRANSPOSE_K0X4(K0, BASENAME, B, TYPE) \
812
TRANSPOSE_K0X3(K0, BASENAME, B, TYPE); \
813
COLUMN_VECTOR(K0, 3, BASENAME, B, TYPE);
814
#define TRANSPOSE_K0X8(K0, BASENAME, B, TYPE) \
815
TRANSPOSE_K0X4(K0, BASENAME, B, TYPE); \
816
COLUMN_VECTOR(K0, 4, BASENAME, B, TYPE); \
817
COLUMN_VECTOR(K0, 5, BASENAME, B, TYPE); \
818
COLUMN_VECTOR(K0, 6, BASENAME, B, TYPE); \
819
COLUMN_VECTOR(K0, 7, BASENAME, B, TYPE);
820
#define TRANSPOSE_K0X16(K0, BASENAME, B, TYPE) \
821
TRANSPOSE_K0X8(K0, BASENAME, B, TYPE); \
822
COLUMN_VECTOR(K0, 8, BASENAME, B, TYPE); \
823
COLUMN_VECTOR(K0, 9, BASENAME, B, TYPE); \
824
COLUMN_VECTOR(K0, A, BASENAME, B, TYPE); \
825
COLUMN_VECTOR(K0, B, BASENAME, B, TYPE); \
826
COLUMN_VECTOR(K0, C, BASENAME, B, TYPE); \
827
COLUMN_VECTOR(K0, D, BASENAME, B, TYPE); \
828
COLUMN_VECTOR(K0, E, BASENAME, B, TYPE); \
829
COLUMN_VECTOR(K0, F, BASENAME, B, TYPE);
830
// end of group TRANSPOSE_K0Xn
832
841
#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, B, TYPE) \
842
CONCAT(COLUMN_VECTOR, K0) \
843
(IDX_COL, BASENAME, B, TYPE);
844
854
#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, B, TYPE) \
855
CONCAT(COLUMN_VECTOR_SCALAR, K0) \
856
(IDX_COL, BASENAME, B, TYPE);
857
867
#define TRANSPOSE_K0XN0(K0, N0, BASENAME, B, TYPE) \
868
CONCAT(TRANSPOSE_K0X, N0) \
869
(K0, BASENAME, B, TYPE);
870
878
#define ADD_ROW_1(BASENAME, BIAS) BASENAME##0 += BIAS##0;
879
880
#define ADD_ROW_2(BASENAME, BIAS) \
881
ADD_ROW_1(BASENAME, BIAS) \
882
BASENAME##1 += BIAS##1;
883
884
#define ADD_ROW_3(BASENAME, BIAS) \
885
ADD_ROW_2(BASENAME, BIAS) \
886
BASENAME##2 += BIAS##2;
887
888
#define ADD_ROW_4(BASENAME, BIAS) \
889
ADD_ROW_3(BASENAME, BIAS) \
890
BASENAME##3 += BIAS##3;
891
892
#define ADD_ROW_5(BASENAME, BIAS) \
893
ADD_ROW_4(BASENAME, BIAS) \
894
BASENAME##4 += BIAS##4;
895
896
#define ADD_ROW_6(BASENAME, BIAS) \
897
ADD_ROW_5(BASENAME, BIAS) \
898
BASENAME##5 += BIAS##5;
899
900
#define ADD_ROW_7(BASENAME, BIAS) \
901
ADD_ROW_6(BASENAME, BIAS) \
902
BASENAME##6 += BIAS##6;
903
904
#define ADD_ROW_8(BASENAME, BIAS) \
905
ADD_ROW_7(BASENAME, BIAS) \
906
BASENAME##7 += BIAS##7;
907
908
#define ADD_ROW_9(BASENAME, BIAS) \
909
ADD_ROW_8(BASENAME, BIAS) \
910
BASENAME##8 += BIAS##8;
911
912
#define ADD_ROW_10(BASENAME, BIAS) \
913
ADD_ROW_9(BASENAME, BIAS) \
914
BASENAME##9 += BIAS##9;
915
916
#define ADD_ROW_11(BASENAME, BIAS) \
917
ADD_ROW_10(BASENAME, BIAS) \
918
BASENAME##A += BIAS##A;
919
920
#define ADD_ROW_12(BASENAME, BIAS) \
921
ADD_ROW_11(BASENAME, BIAS) \
922
BASENAME##B += BIAS##B;
923
924
#define ADD_ROW_13(BASENAME, BIAS) \
925
ADD_ROW_12(BASENAME, BIAS) \
926
BASENAME##C += BIAS##C;
927
928
#define ADD_ROW_14(BASENAME, BIAS) \
929
ADD_ROW_13(BASENAME, BIAS) \
930
BASENAME##D += BIAS##D;
931
932
#define ADD_ROW_15(BASENAME, BIAS) \
933
ADD_ROW_14(BASENAME, BIAS) \
934
BASENAME##E += BIAS##E;
935
936
#define ADD_ROW_16(BASENAME, BIAS) \
937
ADD_ROW_15(BASENAME, BIAS) \
938
BASENAME##F += BIAS##F;
939
// end of group ADD_ROW_n
941
952
#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS)
953
#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS)
// end of group ADD_BLOCK
955
963
#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) BASENAME##0 += BIAS;
964
965
#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
966
ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
967
BASENAME##1 += BIAS;
968
969
#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
970
ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
971
BASENAME##2 += BIAS;
972
973
#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
974
ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
975
BASENAME##3 += BIAS;
976
977
#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
978
ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
979
BASENAME##4 += BIAS;
980
981
#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
982
ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
983
BASENAME##5 += BIAS;
984
985
#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
986
ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
987
BASENAME##6 += BIAS;
988
989
#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
990
ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
991
BASENAME##7 += BIAS;
992
993
#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
994
ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
995
BASENAME##8 += BIAS;
996
997
#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
998
ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
999
BASENAME##9 += BIAS;
1000
1001
#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
1002
ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
1003
BASENAME##A += BIAS;
1004
1005
#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
1006
ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
1007
BASENAME##B += BIAS;
1008
1009
#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
1010
ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
1011
BASENAME##C += BIAS;
1012
1013
#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
1014
ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
1015
BASENAME##D += BIAS;
1016
1017
#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
1018
ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
1019
BASENAME##E += BIAS;
1020
1021
#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \
1022
ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
1023
BASENAME##F += BIAS;
1024
1035
#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
1036
#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
// end of group ADD_BLOCK_BROADCAST
1038
1049
#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1050
BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
1051
1052
#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1053
ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1054
BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
1055
1056
#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1057
ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1058
BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
1059
1060
#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1061
ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1062
BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
1063
1064
#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1065
ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1066
BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
1067
1068
#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1069
ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1070
BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
1071
1072
#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1073
ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1074
BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
1075
1076
#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1077
ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1078
BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
1079
1080
#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1081
ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1082
BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
1083
1084
#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1085
ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1086
BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
1087
1088
#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1089
ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1090
BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
1091
1092
#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1093
ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1094
BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
1095
1096
#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1097
ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1098
BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
1099
1100
#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1101
ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1102
BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
1103
1104
#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1105
ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1106
BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
1107
1108
#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1109
ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1110
BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
// end of group ACTIVATION_ROW_n
1112
1126
#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1127
ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
1128
#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1129
ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
// end of group ACTIVATION_BLOCK
1131
1140
#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1141
VEC_DATA_TYPE(DATA_TYPE, N) \
1142
BASENAME_DST##0 = CONVERT(BASENAME_SRC##0, VEC_DATA_TYPE(DATA_TYPE, N));
1143
1144
#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1145
CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1146
VEC_DATA_TYPE(DATA_TYPE, N) \
1147
BASENAME_DST##1 = CONVERT(BASENAME_SRC##1, VEC_DATA_TYPE(DATA_TYPE, N));
1148
1149
#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1150
CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1151
VEC_DATA_TYPE(DATA_TYPE, N) \
1152
BASENAME_DST##2 = CONVERT(BASENAME_SRC##2, VEC_DATA_TYPE(DATA_TYPE, N));
1153
1154
#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1155
CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1156
VEC_DATA_TYPE(DATA_TYPE, N) \
1157
BASENAME_DST##3 = CONVERT(BASENAME_SRC##3, VEC_DATA_TYPE(DATA_TYPE, N));
1158
1159
#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1160
CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1161
VEC_DATA_TYPE(DATA_TYPE, N) \
1162
BASENAME_DST##4 = CONVERT(BASENAME_SRC##4, VEC_DATA_TYPE(DATA_TYPE, N));
1163
1164
#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1165
CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1166
VEC_DATA_TYPE(DATA_TYPE, N) \
1167
BASENAME_DST##5 = CONVERT(BASENAME_SRC##5, VEC_DATA_TYPE(DATA_TYPE, N));
1168
1169
#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1170
CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1171
VEC_DATA_TYPE(DATA_TYPE, N) \
1172
BASENAME_DST##6 = CONVERT(BASENAME_SRC##6, VEC_DATA_TYPE(DATA_TYPE, N));
1173
1174
#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1175
CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1176
VEC_DATA_TYPE(DATA_TYPE, N) \
1177
BASENAME_DST##7 = CONVERT(BASENAME_SRC##7, VEC_DATA_TYPE(DATA_TYPE, N));
1178
1179
#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1180
CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1181
VEC_DATA_TYPE(DATA_TYPE, N) \
1182
BASENAME_DST##8 = CONVERT(BASENAME_SRC##8, VEC_DATA_TYPE(DATA_TYPE, N));
1183
1184
#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1185
CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1186
VEC_DATA_TYPE(DATA_TYPE, N) \
1187
BASENAME_DST##9 = CONVERT(BASENAME_SRC##9, VEC_DATA_TYPE(DATA_TYPE, N));
1188
1189
#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1190
CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1191
VEC_DATA_TYPE(DATA_TYPE, N) \
1192
BASENAME_DST##A = CONVERT(BASENAME_SRC##A, VEC_DATA_TYPE(DATA_TYPE, N));
1193
1194
#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1195
CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1196
VEC_DATA_TYPE(DATA_TYPE, N) \
1197
BASENAME_DST##B = CONVERT(BASENAME_SRC##B, VEC_DATA_TYPE(DATA_TYPE, N));
1198
1199
#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1200
CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1201
VEC_DATA_TYPE(DATA_TYPE, N) \
1202
BASENAME_DST##C = CONVERT(BASENAME_SRC##C, VEC_DATA_TYPE(DATA_TYPE, N));
1203
1204
#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1205
CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1206
VEC_DATA_TYPE(DATA_TYPE, N) \
1207
BASENAME_DST##D = CONVERT(BASENAME_SRC##D, VEC_DATA_TYPE(DATA_TYPE, N));
1208
1209
#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1210
CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1211
VEC_DATA_TYPE(DATA_TYPE, N) \
1212
BASENAME_DST##E = CONVERT(BASENAME_SRC##E, VEC_DATA_TYPE(DATA_TYPE, N));
1213
1214
#define CONVERT_ROW_16(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1215
CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1216
VEC_DATA_TYPE(DATA_TYPE, N) \
1217
BASENAME_DST##F = CONVERT(BASENAME_SRC##F, VEC_DATA_TYPE(DATA_TYPE, N));
// end of group CONVERT_ROW_n
1219
1231
#define CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1232
CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
1233
#define CONVERT_BLOCK(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1234
CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
// end of group CONVERT_BLOCK
activation_float_helpers.h
helpers.h
compute
ARMComputeEx
src
core
CL
cl_kernels
gemm_helpers.h
Generated by
1.9.8