ONE - On-device Neural Engine
Loading...
Searching...
No Matches
gemm_helpers.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Copyright (c) 2019-2020 ARM Limited.
19 *
20 * SPDX-License-Identifier: MIT
21 *
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to
24 * deal in the Software without restriction, including without limitation the
25 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26 * sell copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
28 *
29 * The above copyright notice and this permission notice shall be included in all
30 * copies or substantial portions of the Software.
31 *
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 * SOFTWARE.
39 */
41#include "helpers.h"
42
55#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
56 VEC_DATA_TYPE(DATA_TYPE, N0) \
57 BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
58
59#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
60 LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
61 VEC_DATA_TYPE(DATA_TYPE, N0) \
62 BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
63
64#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
65 LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
66 VEC_DATA_TYPE(DATA_TYPE, N0) \
67 BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
68
69#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
70 LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
71 VEC_DATA_TYPE(DATA_TYPE, N0) \
72 BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
73
74#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
75 LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
76 VEC_DATA_TYPE(DATA_TYPE, N0) \
77 BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
78
79#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
80 LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
81 VEC_DATA_TYPE(DATA_TYPE, N0) \
82 BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
83
84#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
85 LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
86 VEC_DATA_TYPE(DATA_TYPE, N0) \
87 BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
88
89#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
90 LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
91 VEC_DATA_TYPE(DATA_TYPE, N0) \
92 BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
93
94#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
95 LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
96 VEC_DATA_TYPE(DATA_TYPE, N0) \
97 BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
98
99#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
100 LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
101 VEC_DATA_TYPE(DATA_TYPE, N0) \
102 BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
103
104#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
105 LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
106 VEC_DATA_TYPE(DATA_TYPE, N0) \
107 BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
108
109#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
110 LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
111 VEC_DATA_TYPE(DATA_TYPE, N0) \
112 BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
113
114#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
115 LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
116 VEC_DATA_TYPE(DATA_TYPE, N0) \
117 BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
118
119#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
120 LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
121 VEC_DATA_TYPE(DATA_TYPE, N0) \
122 BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
123
124#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
125 LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
126 VEC_DATA_TYPE(DATA_TYPE, N0) \
127 BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
128
129#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
130 LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
131 VEC_DATA_TYPE(DATA_TYPE, N0) \
132 BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
133
// end of group LOAD_ROW_n
135
155#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
156 LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
157#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
158 LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
// end of group LOAD_BLOCK
160
172#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
173 VEC_DATA_TYPE(DATA_TYPE, N0) \
174 BASENAME##0 = *((__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y));
175
176#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
177 LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
178 VEC_DATA_TYPE(DATA_TYPE, N0) \
179 BASENAME##1 = *((__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y));
180
181#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
182 LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
183 VEC_DATA_TYPE(DATA_TYPE, N0) \
184 BASENAME##2 = *((__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y));
185
186#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
187 LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
188 VEC_DATA_TYPE(DATA_TYPE, N0) \
189 BASENAME##3 = *((__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y));
190
191#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
192 LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
193 VEC_DATA_TYPE(DATA_TYPE, N0) \
194 BASENAME##4 = *((__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y));
195
196#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
197 LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
198 VEC_DATA_TYPE(DATA_TYPE, N0) \
199 BASENAME##5 = *((__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y));
200
201#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
202 LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
203 VEC_DATA_TYPE(DATA_TYPE, N0) \
204 BASENAME##6 = *((__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y));
205
206#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
207 LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
208 VEC_DATA_TYPE(DATA_TYPE, N0) \
209 BASENAME##7 = *((__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y));
210
211#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
212 LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
213 VEC_DATA_TYPE(DATA_TYPE, N0) \
214 BASENAME##8 = *((__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y));
215
216#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
217 LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
218 VEC_DATA_TYPE(DATA_TYPE, N0) \
219 BASENAME##9 = *((__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y));
220
221#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
222 LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
223 VEC_DATA_TYPE(DATA_TYPE, N0) \
224 BASENAME##A = *((__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y));
225
226#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
227 LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
228 VEC_DATA_TYPE(DATA_TYPE, N0) \
229 BASENAME##B = *((__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y));
230
231#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
232 LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
233 VEC_DATA_TYPE(DATA_TYPE, N0) \
234 BASENAME##C = *((__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y));
235
236#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
237 LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
238 VEC_DATA_TYPE(DATA_TYPE, N0) \
239 BASENAME##D = *((__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y));
240
241#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
242 LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
243 VEC_DATA_TYPE(DATA_TYPE, N0) \
244 BASENAME##E = *((__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y));
245
246#define LOAD_ELEMENT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
247 LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
248 VEC_DATA_TYPE(DATA_TYPE, N0) \
249 BASENAME##F = *((__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y));
250
// end of group LOAD_ELEMENT_n
252
269#define LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
270 LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
271#define LOAD_SCALAR_AS_VECTOR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
272 LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
// end of group LOAD_SCALAR_AS_VECTOR
274
289#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
290 STRIDE_Y) \
291 Z##0 = (0 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
292 Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0); \
293 Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);
294
295#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
296 STRIDE_Y) \
297 CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
298 STRIDE_Y) \
299 Z##1 = (1 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
300 Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1); \
301 Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
302
303#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
304 STRIDE_Y) \
305 CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
306 STRIDE_Y) \
307 Z##2 = (2 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
308 Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2); \
309 Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
310
311#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
312 STRIDE_Y) \
313 CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
314 STRIDE_Y) \
315 Z##3 = (3 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
316 Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3); \
317 Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
318
319#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
320 STRIDE_Y) \
321 CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
322 STRIDE_Y) \
323 Z##4 = (4 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
324 Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4); \
325 Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
326
327#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
328 STRIDE_Y) \
329 CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
330 STRIDE_Y) \
331 Z##5 = (5 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
332 Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5); \
333 Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
334
335#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
336 STRIDE_Y) \
337 CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
338 STRIDE_Y) \
339 Z##6 = (6 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
340 Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6); \
341 Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
342
343#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
344 STRIDE_Y) \
345 CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
346 STRIDE_Y) \
347 Z##7 = (7 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
348 Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7); \
349 Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
350
// end of group CALCULATE_Z_OFFSET_n
352
385#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
386 STRIDE_Y) \
387 CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
388 STRIDE_Y)
389#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
390 STRIDE_Y) \
391 CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
392 STRIDE_Y)
// end of group CALCULATE_Z_OFFSET
394
406#define STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
407 VSTORE(N0) \
408 (BASENAME##0, 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
409
410#define STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
411 STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
412 VSTORE(N0) \
413 (BASENAME##1, 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
414
415#define STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
416 STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
417 VSTORE(N0) \
418 (BASENAME##2, 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
419
420#define STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
421 STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
422 VSTORE(N0) \
423 (BASENAME##3, 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
424
425#define STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
426 STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
427 VSTORE(N0) \
428 (BASENAME##4, 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
429
430#define STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
431 STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
432 VSTORE(N0) \
433 (BASENAME##5, 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
434
435#define STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
436 STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
437 VSTORE(N0) \
438 (BASENAME##6, 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
439
440#define STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
441 STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
442 VSTORE(N0) \
443 (BASENAME##7, 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
444
445#define STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
446 STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
447 VSTORE(N0) \
448 (BASENAME##8, 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
449
450#define STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
451 STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
452 VSTORE(N0) \
453 (BASENAME##9, 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
454
455#define STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
456 STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
457 VSTORE(N0) \
458 (BASENAME##A, 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
459
460#define STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
461 STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
462 VSTORE(N0) \
463 (BASENAME##B, 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
464
465#define STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
466 STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
467 VSTORE(N0) \
468 (BASENAME##C, 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
469
470#define STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
471 STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
472 VSTORE(N0) \
473 (BASENAME##D, 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
474
475#define STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
476 STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
477 VSTORE(N0) \
478 (BASENAME##E, 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
479
480#define STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
481 STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
482 VSTORE(N0) \
483 (BASENAME##F, 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
// end of groupd STORE_ROW_n
485
497#define CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
498 VSTORE(N0) \
499 (CONVERT_SAT((BASENAME##0), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
500 (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
501
502#define CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
503 CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
504 VSTORE(N0) \
505 (CONVERT_SAT((BASENAME##1), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
506 (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
507
508#define CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
509 CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
510 VSTORE(N0) \
511 (CONVERT_SAT((BASENAME##2), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
512 (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
513
514#define CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
515 CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
516 VSTORE(N0) \
517 (CONVERT_SAT((BASENAME##3), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
518 (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
519
520#define CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
521 CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
522 VSTORE(N0) \
523 (CONVERT_SAT((BASENAME##4), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
524 (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
525
526#define CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
527 CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
528 VSTORE(N0) \
529 (CONVERT_SAT((BASENAME##5), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
530 (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
531
532#define CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
533 CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
534 VSTORE(N0) \
535 (CONVERT_SAT((BASENAME##6), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
536 (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
537
538#define CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
539 CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
540 VSTORE(N0) \
541 (CONVERT_SAT((BASENAME##7), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
542 (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
543
544#define CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
545 CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
546 VSTORE(N0) \
547 (CONVERT_SAT((BASENAME##8), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
548 (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
549
550#define CONVERT_STORE_ROW_10(N0, DATA, BASENAME, PTR, STRIDE_Y, Z) \
551 CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
552 VSTORE(N0) \
553 (CONVERT_SAT((BASENAME##9), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
554 (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
555
556#define CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
557 CONVERT_STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
558 VSTORE(N0) \
559 (CONVERT_SAT((BASENAME##A), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
560 (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
561
562#define CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
563 CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
564 VSTORE(N0) \
565 (CONVERT_SAT((BASENAME##B), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
566 (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
567
568#define CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
569 CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
570 VSTORE(N0) \
571 (CONVERT_SAT((BASENAME##C), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
572 (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
573
574#define CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
575 CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
576 VSTORE(N0) \
577 (CONVERT_SAT((BASENAME##D), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
578 (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
579
580#define CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
581 CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
582 VSTORE(N0) \
583 (CONVERT_SAT((BASENAME##E), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
584 (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
585
586#define CONVERT_STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
587 CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
588 VSTORE(N0) \
589 (CONVERT_SAT((BASENAME##F), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
590 (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
591
// end of groupd CONVERT_STORE_ROW_n
593
612#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
613 STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
614#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
615 STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
// end of group STORE_BLOCK
617
636#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
637 CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
638#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
639 CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
// end of group CONVERT_STORE_BLOCK
641
650#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) BASENAME##0 *= (DATA_TYPE)SCALE;
651
652#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
653 SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
654 BASENAME##1 *= (DATA_TYPE)SCALE;
655
656#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
657 SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
658 BASENAME##2 *= (DATA_TYPE)SCALE;
659
660#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
661 SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
662 BASENAME##3 *= (DATA_TYPE)SCALE;
663
664#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
665 SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
666 BASENAME##4 *= (DATA_TYPE)SCALE;
667
668#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
669 SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
670 BASENAME##5 *= (DATA_TYPE)SCALE;
671
672#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
673 SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
674 BASENAME##6 *= (DATA_TYPE)SCALE;
675
676#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
677 SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
678 BASENAME##7 *= (DATA_TYPE)SCALE;
679
680#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
681 SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
682 BASENAME##8 *= (DATA_TYPE)SCALE;
683
684#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
685 SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
686 BASENAME##9 *= (DATA_TYPE)SCALE;
687
688#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
689 SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
690 BASENAME##A *= (DATA_TYPE)SCALE;
691
692#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
693 SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
694 BASENAME##B *= (DATA_TYPE)SCALE;
695
696#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
697 SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
698 BASENAME##C *= (DATA_TYPE)SCALE;
699
700#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
701 SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
702 BASENAME##D *= (DATA_TYPE)SCALE;
703
704#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
705 SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
706 BASENAME##E *= (DATA_TYPE)SCALE;
707
708#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \
709 SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
710 BASENAME##F *= (DATA_TYPE)SCALE;
// end of group SCALE_ROW_n
712
724#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
725#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE) SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
// end of group SCALE_BLOCK
727
737#define COLUMN_VECTOR1(IDX_COL, BASENAME, X, TYPE) \
738 TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);
739#define COLUMN_VECTOR2(IDX_COL, BASENAME, X, TYPE) \
740 VEC_DATA_TYPE(TYPE, 2) \
741 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0).s##IDX_COL, (X##1).s##IDX_COL);
742#define COLUMN_VECTOR3(IDX_COL, BASENAME, X, TYPE) \
743 VEC_DATA_TYPE(TYPE, 3) \
744 BASENAME##IDX_COL = \
745 (VEC_DATA_TYPE(TYPE, 3))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
746#define COLUMN_VECTOR4(IDX_COL, BASENAME, X, TYPE) \
747 VEC_DATA_TYPE(TYPE, 4) \
748 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0).s##IDX_COL, (X##1).s##IDX_COL, \
749 (X##2).s##IDX_COL, (X##3).s##IDX_COL);
750#define COLUMN_VECTOR8(IDX_COL, BASENAME, X, TYPE) \
751 VEC_DATA_TYPE(TYPE, 8) \
752 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))( \
753 (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
754 (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
755#define COLUMN_VECTOR16(IDX_COL, BASENAME, X, TYPE) \
756 VEC_DATA_TYPE(TYPE, 16) \
757 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))( \
758 (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
759 (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, \
760 (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, \
761 (X##F).s##IDX_COL);
// end of group COLUMN_VECTORn
763
774#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE) TYPE BASENAME##IDX_COL = (TYPE)((X##0));
775#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE) \
776 VEC_DATA_TYPE(TYPE, 2) \
777 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1));
778#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE) \
779 VEC_DATA_TYPE(TYPE, 3) \
780 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2));
781#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE) \
782 VEC_DATA_TYPE(TYPE, 4) \
783 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3));
784#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE) \
785 VEC_DATA_TYPE(TYPE, 8) \
786 BASENAME##IDX_COL = \
787 (VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7));
788#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE) \
789 VEC_DATA_TYPE(TYPE, 16) \
790 BASENAME##IDX_COL = \
791 (VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), \
792 (X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F));
// end of group COLUMN_VECTORn
794
804#define TRANSPOSE_K0X1(K0, BASENAME, B, TYPE) COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, B, TYPE);
805#define TRANSPOSE_K0X2(K0, BASENAME, B, TYPE) \
806 COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE); \
807 COLUMN_VECTOR(K0, 1, BASENAME, B, TYPE);
808#define TRANSPOSE_K0X3(K0, BASENAME, B, TYPE) \
809 TRANSPOSE_K0X2(K0, BASENAME, B, TYPE); \
810 COLUMN_VECTOR(K0, 2, BASENAME, B, TYPE);
811#define TRANSPOSE_K0X4(K0, BASENAME, B, TYPE) \
812 TRANSPOSE_K0X3(K0, BASENAME, B, TYPE); \
813 COLUMN_VECTOR(K0, 3, BASENAME, B, TYPE);
814#define TRANSPOSE_K0X8(K0, BASENAME, B, TYPE) \
815 TRANSPOSE_K0X4(K0, BASENAME, B, TYPE); \
816 COLUMN_VECTOR(K0, 4, BASENAME, B, TYPE); \
817 COLUMN_VECTOR(K0, 5, BASENAME, B, TYPE); \
818 COLUMN_VECTOR(K0, 6, BASENAME, B, TYPE); \
819 COLUMN_VECTOR(K0, 7, BASENAME, B, TYPE);
820#define TRANSPOSE_K0X16(K0, BASENAME, B, TYPE) \
821 TRANSPOSE_K0X8(K0, BASENAME, B, TYPE); \
822 COLUMN_VECTOR(K0, 8, BASENAME, B, TYPE); \
823 COLUMN_VECTOR(K0, 9, BASENAME, B, TYPE); \
824 COLUMN_VECTOR(K0, A, BASENAME, B, TYPE); \
825 COLUMN_VECTOR(K0, B, BASENAME, B, TYPE); \
826 COLUMN_VECTOR(K0, C, BASENAME, B, TYPE); \
827 COLUMN_VECTOR(K0, D, BASENAME, B, TYPE); \
828 COLUMN_VECTOR(K0, E, BASENAME, B, TYPE); \
829 COLUMN_VECTOR(K0, F, BASENAME, B, TYPE);
830
// end of group TRANSPOSE_K0Xn
832
841#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, B, TYPE) \
842 CONCAT(COLUMN_VECTOR, K0) \
843 (IDX_COL, BASENAME, B, TYPE);
844
854#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, B, TYPE) \
855 CONCAT(COLUMN_VECTOR_SCALAR, K0) \
856 (IDX_COL, BASENAME, B, TYPE);
857
867#define TRANSPOSE_K0XN0(K0, N0, BASENAME, B, TYPE) \
868 CONCAT(TRANSPOSE_K0X, N0) \
869 (K0, BASENAME, B, TYPE);
870
878#define ADD_ROW_1(BASENAME, BIAS) BASENAME##0 += BIAS##0;
879
880#define ADD_ROW_2(BASENAME, BIAS) \
881 ADD_ROW_1(BASENAME, BIAS) \
882 BASENAME##1 += BIAS##1;
883
884#define ADD_ROW_3(BASENAME, BIAS) \
885 ADD_ROW_2(BASENAME, BIAS) \
886 BASENAME##2 += BIAS##2;
887
888#define ADD_ROW_4(BASENAME, BIAS) \
889 ADD_ROW_3(BASENAME, BIAS) \
890 BASENAME##3 += BIAS##3;
891
892#define ADD_ROW_5(BASENAME, BIAS) \
893 ADD_ROW_4(BASENAME, BIAS) \
894 BASENAME##4 += BIAS##4;
895
896#define ADD_ROW_6(BASENAME, BIAS) \
897 ADD_ROW_5(BASENAME, BIAS) \
898 BASENAME##5 += BIAS##5;
899
900#define ADD_ROW_7(BASENAME, BIAS) \
901 ADD_ROW_6(BASENAME, BIAS) \
902 BASENAME##6 += BIAS##6;
903
904#define ADD_ROW_8(BASENAME, BIAS) \
905 ADD_ROW_7(BASENAME, BIAS) \
906 BASENAME##7 += BIAS##7;
907
908#define ADD_ROW_9(BASENAME, BIAS) \
909 ADD_ROW_8(BASENAME, BIAS) \
910 BASENAME##8 += BIAS##8;
911
912#define ADD_ROW_10(BASENAME, BIAS) \
913 ADD_ROW_9(BASENAME, BIAS) \
914 BASENAME##9 += BIAS##9;
915
916#define ADD_ROW_11(BASENAME, BIAS) \
917 ADD_ROW_10(BASENAME, BIAS) \
918 BASENAME##A += BIAS##A;
919
920#define ADD_ROW_12(BASENAME, BIAS) \
921 ADD_ROW_11(BASENAME, BIAS) \
922 BASENAME##B += BIAS##B;
923
924#define ADD_ROW_13(BASENAME, BIAS) \
925 ADD_ROW_12(BASENAME, BIAS) \
926 BASENAME##C += BIAS##C;
927
928#define ADD_ROW_14(BASENAME, BIAS) \
929 ADD_ROW_13(BASENAME, BIAS) \
930 BASENAME##D += BIAS##D;
931
932#define ADD_ROW_15(BASENAME, BIAS) \
933 ADD_ROW_14(BASENAME, BIAS) \
934 BASENAME##E += BIAS##E;
935
936#define ADD_ROW_16(BASENAME, BIAS) \
937 ADD_ROW_15(BASENAME, BIAS) \
938 BASENAME##F += BIAS##F;
939
// end of group ADD_ROW_n
941
952#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS)
953#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS)
// end of group ADD_BLOCK
955
963#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) BASENAME##0 += BIAS;
964
965#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
966 ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
967 BASENAME##1 += BIAS;
968
969#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
970 ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
971 BASENAME##2 += BIAS;
972
973#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
974 ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
975 BASENAME##3 += BIAS;
976
977#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
978 ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
979 BASENAME##4 += BIAS;
980
981#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
982 ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
983 BASENAME##5 += BIAS;
984
985#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
986 ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
987 BASENAME##6 += BIAS;
988
989#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
990 ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
991 BASENAME##7 += BIAS;
992
993#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
994 ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
995 BASENAME##8 += BIAS;
996
997#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
998 ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
999 BASENAME##9 += BIAS;
1000
1001#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
1002 ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
1003 BASENAME##A += BIAS;
1004
1005#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
1006 ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
1007 BASENAME##B += BIAS;
1008
1009#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
1010 ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
1011 BASENAME##C += BIAS;
1012
1013#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
1014 ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
1015 BASENAME##D += BIAS;
1016
1017#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
1018 ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
1019 BASENAME##E += BIAS;
1020
1021#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \
1022 ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
1023 BASENAME##F += BIAS;
1024
1035#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
1036#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
// end of group ADD_BLOCK_BROADCAST
1038
1049#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1050 BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
1051
1052#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1053 ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1054 BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
1055
1056#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1057 ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1058 BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
1059
1060#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1061 ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1062 BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
1063
1064#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1065 ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1066 BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
1067
1068#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1069 ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1070 BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
1071
1072#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1073 ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1074 BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
1075
1076#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1077 ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1078 BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
1079
1080#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1081 ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1082 BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
1083
1084#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1085 ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1086 BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
1087
1088#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1089 ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1090 BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
1091
1092#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1093 ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1094 BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
1095
1096#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1097 ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1098 BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
1099
1100#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1101 ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1102 BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
1103
1104#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1105 ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1106 BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
1107
1108#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1109 ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1110 BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
// end of group ACTIVATION_ROW_n
1112
1126#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1127 ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
1128#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1129 ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
// end of group ACTIVATION_BLOCK
1131
1140#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1141 VEC_DATA_TYPE(DATA_TYPE, N) \
1142 BASENAME_DST##0 = CONVERT(BASENAME_SRC##0, VEC_DATA_TYPE(DATA_TYPE, N));
1143
1144#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1145 CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1146 VEC_DATA_TYPE(DATA_TYPE, N) \
1147 BASENAME_DST##1 = CONVERT(BASENAME_SRC##1, VEC_DATA_TYPE(DATA_TYPE, N));
1148
1149#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1150 CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1151 VEC_DATA_TYPE(DATA_TYPE, N) \
1152 BASENAME_DST##2 = CONVERT(BASENAME_SRC##2, VEC_DATA_TYPE(DATA_TYPE, N));
1153
1154#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1155 CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1156 VEC_DATA_TYPE(DATA_TYPE, N) \
1157 BASENAME_DST##3 = CONVERT(BASENAME_SRC##3, VEC_DATA_TYPE(DATA_TYPE, N));
1158
1159#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1160 CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1161 VEC_DATA_TYPE(DATA_TYPE, N) \
1162 BASENAME_DST##4 = CONVERT(BASENAME_SRC##4, VEC_DATA_TYPE(DATA_TYPE, N));
1163
1164#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1165 CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1166 VEC_DATA_TYPE(DATA_TYPE, N) \
1167 BASENAME_DST##5 = CONVERT(BASENAME_SRC##5, VEC_DATA_TYPE(DATA_TYPE, N));
1168
1169#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1170 CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1171 VEC_DATA_TYPE(DATA_TYPE, N) \
1172 BASENAME_DST##6 = CONVERT(BASENAME_SRC##6, VEC_DATA_TYPE(DATA_TYPE, N));
1173
1174#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1175 CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1176 VEC_DATA_TYPE(DATA_TYPE, N) \
1177 BASENAME_DST##7 = CONVERT(BASENAME_SRC##7, VEC_DATA_TYPE(DATA_TYPE, N));
1178
1179#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1180 CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1181 VEC_DATA_TYPE(DATA_TYPE, N) \
1182 BASENAME_DST##8 = CONVERT(BASENAME_SRC##8, VEC_DATA_TYPE(DATA_TYPE, N));
1183
1184#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1185 CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1186 VEC_DATA_TYPE(DATA_TYPE, N) \
1187 BASENAME_DST##9 = CONVERT(BASENAME_SRC##9, VEC_DATA_TYPE(DATA_TYPE, N));
1188
1189#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1190 CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1191 VEC_DATA_TYPE(DATA_TYPE, N) \
1192 BASENAME_DST##A = CONVERT(BASENAME_SRC##A, VEC_DATA_TYPE(DATA_TYPE, N));
1193
1194#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1195 CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1196 VEC_DATA_TYPE(DATA_TYPE, N) \
1197 BASENAME_DST##B = CONVERT(BASENAME_SRC##B, VEC_DATA_TYPE(DATA_TYPE, N));
1198
1199#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1200 CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1201 VEC_DATA_TYPE(DATA_TYPE, N) \
1202 BASENAME_DST##C = CONVERT(BASENAME_SRC##C, VEC_DATA_TYPE(DATA_TYPE, N));
1203
1204#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1205 CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1206 VEC_DATA_TYPE(DATA_TYPE, N) \
1207 BASENAME_DST##D = CONVERT(BASENAME_SRC##D, VEC_DATA_TYPE(DATA_TYPE, N));
1208
1209#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1210 CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1211 VEC_DATA_TYPE(DATA_TYPE, N) \
1212 BASENAME_DST##E = CONVERT(BASENAME_SRC##E, VEC_DATA_TYPE(DATA_TYPE, N));
1213
1214#define CONVERT_ROW_16(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1215 CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1216 VEC_DATA_TYPE(DATA_TYPE, N) \
1217 BASENAME_DST##F = CONVERT(BASENAME_SRC##F, VEC_DATA_TYPE(DATA_TYPE, N));
// end of group CONVERT_ROW_n
1219
1231#define CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1232 CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
1233#define CONVERT_BLOCK(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1234 CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
// end of group CONVERT_BLOCK