ONE - On-device Neural Engine
Loading...
Searching...
No Matches
helpers.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Copyright (c) 2016-2020 ARM Limited.
19 *
20 * SPDX-License-Identifier: MIT
21 *
22 * Permission is hereby granted, free of charge, to any person obtaining a copy
23 * of this software and associated documentation files (the "Software"), to
24 * deal in the Software without restriction, including without limitation the
25 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
26 * sell copies of the Software, and to permit persons to whom the Software is
27 * furnished to do so, subject to the following conditions:
28 *
29 * The above copyright notice and this permission notice shall be included in all
30 * copies or substantial portions of the Software.
31 *
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 * SOFTWARE.
39 */
40#ifndef ARM_COMPUTE_HELPER_H
41#define ARM_COMPUTE_HELPER_H
42
43#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
44#pragma OPENCL EXTENSION cl_khr_fp16 : enable
45#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
46
47#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
48#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
49#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
50
51#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
52 defined(cl_arm_integer_dot_product_accumulate_int8)
53#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
54#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
55 // defined(cl_arm_integer_dot_product_accumulate_int8)
56
57#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
58#pragma OPENCL EXTENSION cl_arm_printf : enable
59#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
60
61#define GPU_ARCH_MIDGARD 0x100
62#define GPU_ARCH_BIFROST 0x200
63
71#define CONCAT(a, b) a##b
72
79#define EXPAND(x) x
80
89#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
90
99#define REV1(x) ((x))
100#define REV2(x) ((x).s10)
101#define REV3(x) ((x).s210)
102#define REV4(x) ((x).s3210)
103#define REV8(x) ((x).s76543210)
104#define REV16(x) ((x).sFEDCBA9876543210)
// end of group REVn
106
116#define REVERSE_STR(x, s) REV##s((x))
117#define REVERSE(x, s) REVERSE_STR(x, s)
// end of group REVERSE
119
128#define ROT1_0(x) ((x))
129
130#define ROT2_0(x) ((x))
131#define ROT2_1(x) ((x).s10)
132
133#define ROT3_0(x) ((x))
134#define ROT3_1(x) ((x).s201)
135#define ROT3_2(x) ((x).s120)
136
137#define ROT4_0(x) ((x))
138#define ROT4_1(x) ((x).s3012)
139#define ROT4_2(x) ((x).s2301)
140#define ROT4_3(x) ((x).s1230)
141
142#define ROT8_0(x) ((x))
143#define ROT8_1(x) ((x).s70123456)
144#define ROT8_2(x) ((x).s67012345)
145#define ROT8_3(x) ((x).s56701234)
146#define ROT8_4(x) ((x).s45670123)
147#define ROT8_5(x) ((x).s34567012)
148#define ROT8_6(x) ((x).s23456701)
149#define ROT8_7(x) ((x).s12345670)
150
151#define ROT16_0(x) ((x))
152#define ROT16_1(x) ((x).sF0123456789ABCDE)
153#define ROT16_2(x) ((x).sEF0123456789ABCD)
154#define ROT16_3(x) ((x).sDEF0123456789ABC)
155#define ROT16_4(x) ((x).sCDEF0123456789AB)
156#define ROT16_5(x) ((x).sBCDEF0123456789A)
157#define ROT16_6(x) ((x).sABCDEF0123456789)
158#define ROT16_7(x) ((x).s9ABCDEF012345678)
159#define ROT16_8(x) ((x).s89ABCDEF01234567)
160#define ROT16_9(x) ((x).s789ABCDEF0123456)
161#define ROT16_10(x) ((x).s6789ABCDEF012345)
162#define ROT16_11(x) ((x).s56789ABCDEF01234)
163#define ROT16_12(x) ((x).s456789ABCDEF0123)
164#define ROT16_13(x) ((x).s3456789ABCDEF012)
165#define ROT16_14(x) ((x).s23456789ABCDEF01)
166#define ROT16_15(x) ((x).s123456789ABCDEF0)
// end of group ROTs_n
168
179#define ROTATE_STR(x, s, n) ROT##s##_##n(x)
180#define ROTATE(x, s, n) ROTATE_STR(x, s, n)
// end of group ROTATE
182
192#define V_OFFS1(dt) (dt)(0)
193#define V_OFFS2(dt) (dt)(0, 1)
194#define V_OFFS3(dt) (dt)(0, 1, 3)
195#define V_OFFS4(dt) (dt)(0, 1, 2, 3)
196#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7)
197#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
// end of group V_OFFSn
199
209#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
210#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
// end of group VEC_OFFS
212
213#define VLOAD_STR(size) vload##size
214#define VLOAD(size) VLOAD_STR(size)
215
216#define VSTORE_STR(size) vstore##size
217#define VSTORE(size) VSTORE_STR(size)
218
219#define float1 float
220#define half1 half
221#define char1 char
222#define uchar1 uchar
223#define short1 short
224#define ushort1 ushort
225#define int1 int
226#define uint1 uint
227#define long1 long
228#define ulong1 ulong
229#define double1 double
230
231#define vload1(OFFSET, PTR) *(OFFSET + PTR)
232#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
233
234// Convert built-in functions with _sat modifier are not supported in floating point so we create
235// defines
236// without _sat to overcome this issue
237#define convert_float_sat convert_float
238#define convert_float1_sat convert_float
239#define convert_float2_sat convert_float2
240#define convert_float3_sat convert_float3
241#define convert_float4_sat convert_float4
242#define convert_float8_sat convert_float8
243#define convert_float16_sat convert_float16
244#define convert_half_sat convert_float
245#define convert_half1_sat convert_half
246#define convert_half2_sat convert_half2
247#define convert_half3_sat convert_half3
248#define convert_half4_sat convert_half4
249#define convert_half8_sat convert_half8
250#define convert_half16_sat convert_half16
251
252#define convert_float1 convert_float
253#define convert_half1 convert_half
254#define convert_char1 convert_char
255#define convert_uchar1 convert_uchar
256#define convert_short1 convert_short
257#define convert_ushort1 convert_ushort
258#define convert_int1 convert_int
259#define convert_uint1 convert_uint
260#define convert_long1 convert_long
261#define convert_ulong1 convert_ulong
262#define convert_double1 convert_double
263
264#define convert_char1_sat convert_char_sat
265#define convert_uchar1_sat convert_uchar_sat
266#define convert_short1_sat convert_short_sat
267#define convert_ushort1_sat convert_ushort_sat
268#define convert_int1_sat convert_int_sat
269#define convert_uint1_sat convert_uint_sat
270#define convert_long1_sat convert_long_sat
271#define convert_ulong1_sat convert_ulong_sat
272#define convert_double1_sat convert_double_sat
273
274#define VEC_DATA_TYPE_STR(type, size) type##size
275#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
276
277#define CL_VEC_DATA_TYPE_STR(type, size) type##size
278#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
279
280#define CONVERT_STR(x, type) (convert_##type((x)))
281#define CONVERT(x, type) CONVERT_STR(x, type)
282
283#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
284#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
285
286#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
287#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
288
289#define VECTOR_DECLARATION(name) \
290 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \
291 uint name##_offset_first_element_in_bytes
292
293#define IMAGE_DECLARATION(name) \
294 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
295 uint name##_step_y, uint name##_offset_first_element_in_bytes
296
297#define TENSOR3D_DECLARATION(name) \
298 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
299 uint name##_step_y, uint name##_stride_z, uint name##_step_z, \
300 uint name##_offset_first_element_in_bytes
301
302#define TENSOR4D_DECLARATION(name) \
303 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
304 uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \
305 uint name##_step_w, uint name##_offset_first_element_in_bytes
306
307#define CONVERT_TO_VECTOR_STRUCT(name) \
308 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
309 name##_step_x)
310
311#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
312 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
313
314#define CONVERT_TO_IMAGE_STRUCT(name) \
315 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
316 name##_step_x, name##_stride_y, name##_step_y)
317
318#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
319 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, \
320 name##_stride_y, 0)
321
322#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
323 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
324 name##_stride_x, name##_step_x, name##_stride_y, \
325 name##_step_y, name##_stride_z, name##_step_z)
326
327#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
328 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
329 name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, \
330 name##_step_z)
331
332#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
333 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
334 name##_stride_x, name##_step_x, name##_stride_y, \
335 name##_step_y, name##_stride_z, name##_step_z)
336
337#define CONVERT_TO_TENSOR3D_STRUCT(name) \
338 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
339 name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \
340 name##_step_z)
341
342#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
343 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
344 0, name##_stride_y, 0, name##_stride_z, 0)
345
346#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
347 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
348 name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \
349 name##_step_z, name##_stride_w, name##_step_w, mod_size)
350
351#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
352 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
353 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, \
354 mod_size)
355
357typedef struct Vector
358{
359 __global uchar *ptr;
363
365typedef struct Image
366{
367 __global uchar *ptr;
372
382
393
405inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes,
406 uint stride_x, uint step_x)
407{
408 Vector vector = {
409 .ptr = ptr,
410 .offset_first_element_in_bytes = offset_first_element_in_bytes,
411 .stride_x = stride_x,
412 };
413 vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
414 return vector;
415}
416
431inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes,
432 uint stride_x, uint step_x, uint stride_y, uint step_y)
433{
434 Image img = {.ptr = ptr,
435 .offset_first_element_in_bytes = offset_first_element_in_bytes,
436 .stride_x = stride_x,
437 .stride_y = stride_y};
438 img.ptr +=
439 img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
440 return img;
441}
442
461 uint offset_first_element_in_bytes,
462 uint stride_x, uint step_x, uint stride_y,
463 uint step_y, uint stride_z, uint step_z)
464{
465 Image img = {.ptr = ptr,
466 .offset_first_element_in_bytes = offset_first_element_in_bytes,
467 .stride_x = stride_x,
468 .stride_y = stride_y};
469 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x +
470 get_global_id(1) * step_y + get_global_id(2) * step_z;
471 return img;
472}
473
491inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr,
492 uint offset_first_element_in_bytes, uint stride_x,
493 uint step_x, uint stride_y, uint step_y, uint stride_z,
494 uint step_z)
495{
496 Tensor3D tensor = {.ptr = ptr,
497 .offset_first_element_in_bytes = offset_first_element_in_bytes,
498 .stride_x = stride_x,
499 .stride_y = stride_y,
500 .stride_z = stride_z};
501 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x +
502 get_global_id(1) * step_y + get_global_id(2) * step_z;
503 return tensor;
504}
505
506inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr,
507 uint offset_first_element_in_bytes, uint stride_x,
508 uint step_x, uint stride_y, uint step_y, uint stride_z,
509 uint step_z, uint stride_w, uint step_w, uint mod_size)
510{
511 Tensor4D tensor = {.ptr = ptr,
512 .offset_first_element_in_bytes = offset_first_element_in_bytes,
513 .stride_x = stride_x,
514 .stride_y = stride_y,
515 .stride_z = stride_z,
516 .stride_w = stride_w};
517
518 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x +
519 get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z +
520 (get_global_id(2) / mod_size) * step_w;
521 return tensor;
522}
523
529inline __global const uchar *vector_offset(const Vector *vec, int x)
530{
531 return vec->ptr + x * vec->stride_x;
532}
533
540inline __global uchar *offset(const Image *img, int x, int y)
541{
542 return img->ptr + x * img->stride_x + y * img->stride_y;
543}
544
552inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
553{
554 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
555}
556
565inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
566{
567 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z +
568 w * tensor->stride_w;
569}
570
571#endif // _HELPER_H
Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Definition helpers.h:431
__global const uchar * tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
Definition helpers.h:565
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Definition helpers.h:460
Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w, uint step_w, uint mod_size)
Definition helpers.h:506
Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Definition helpers.h:405
Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Definition helpers.h:491
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Definition helpers.h:552
__global const uchar * vector_offset(const Vector *vec, int x)
Definition helpers.h:529
int stride_x
Definition helpers.h:369
int offset_first_element_in_bytes
Definition helpers.h:368
__global uchar * ptr
Definition helpers.h:367
int stride_y
Definition helpers.h:370
int stride_y
Definition helpers.h:379
__global uchar * ptr
Definition helpers.h:376
int offset_first_element_in_bytes
Definition helpers.h:377
int stride_z
Definition helpers.h:380
int stride_x
Definition helpers.h:378
int offset_first_element_in_bytes
Definition helpers.h:387
int stride_z
Definition helpers.h:390
int stride_y
Definition helpers.h:389
int stride_w
Definition helpers.h:391
__global uchar * ptr
Definition helpers.h:386
int stride_x
Definition helpers.h:388
int offset_first_element_in_bytes
Definition helpers.h:360
__global uchar * ptr
Definition helpers.h:359
int stride_x
Definition helpers.h:361