40#ifndef ARM_COMPUTE_HELPER_H
41#define ARM_COMPUTE_HELPER_H
43#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
44#pragma OPENCL EXTENSION cl_khr_fp16 : enable
47#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
48#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
51#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
52 defined(cl_arm_integer_dot_product_accumulate_int8)
53#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
57#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
58#pragma OPENCL EXTENSION cl_arm_printf : enable
61#define GPU_ARCH_MIDGARD 0x100
62#define GPU_ARCH_BIFROST 0x200
71#define CONCAT(a, b) a##b
89#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
100#define REV2(x) ((x).s10)
101#define REV3(x) ((x).s210)
102#define REV4(x) ((x).s3210)
103#define REV8(x) ((x).s76543210)
104#define REV16(x) ((x).sFEDCBA9876543210)
116#define REVERSE_STR(x, s) REV##s((x))
117#define REVERSE(x, s) REVERSE_STR(x, s)
128#define ROT1_0(x) ((x))
130#define ROT2_0(x) ((x))
131#define ROT2_1(x) ((x).s10)
133#define ROT3_0(x) ((x))
134#define ROT3_1(x) ((x).s201)
135#define ROT3_2(x) ((x).s120)
137#define ROT4_0(x) ((x))
138#define ROT4_1(x) ((x).s3012)
139#define ROT4_2(x) ((x).s2301)
140#define ROT4_3(x) ((x).s1230)
142#define ROT8_0(x) ((x))
143#define ROT8_1(x) ((x).s70123456)
144#define ROT8_2(x) ((x).s67012345)
145#define ROT8_3(x) ((x).s56701234)
146#define ROT8_4(x) ((x).s45670123)
147#define ROT8_5(x) ((x).s34567012)
148#define ROT8_6(x) ((x).s23456701)
149#define ROT8_7(x) ((x).s12345670)
151#define ROT16_0(x) ((x))
152#define ROT16_1(x) ((x).sF0123456789ABCDE)
153#define ROT16_2(x) ((x).sEF0123456789ABCD)
154#define ROT16_3(x) ((x).sDEF0123456789ABC)
155#define ROT16_4(x) ((x).sCDEF0123456789AB)
156#define ROT16_5(x) ((x).sBCDEF0123456789A)
157#define ROT16_6(x) ((x).sABCDEF0123456789)
158#define ROT16_7(x) ((x).s9ABCDEF012345678)
159#define ROT16_8(x) ((x).s89ABCDEF01234567)
160#define ROT16_9(x) ((x).s789ABCDEF0123456)
161#define ROT16_10(x) ((x).s6789ABCDEF012345)
162#define ROT16_11(x) ((x).s56789ABCDEF01234)
163#define ROT16_12(x) ((x).s456789ABCDEF0123)
164#define ROT16_13(x) ((x).s3456789ABCDEF012)
165#define ROT16_14(x) ((x).s23456789ABCDEF01)
166#define ROT16_15(x) ((x).s123456789ABCDEF0)
179#define ROTATE_STR(x, s, n) ROT##s##_##n(x)
180#define ROTATE(x, s, n) ROTATE_STR(x, s, n)
192#define V_OFFS1(dt) (dt)(0)
193#define V_OFFS2(dt) (dt)(0, 1)
194#define V_OFFS3(dt) (dt)(0, 1, 3)
195#define V_OFFS4(dt) (dt)(0, 1, 2, 3)
196#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7)
197#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
209#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
210#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
213#define VLOAD_STR(size) vload##size
214#define VLOAD(size) VLOAD_STR(size)
216#define VSTORE_STR(size) vstore##size
217#define VSTORE(size) VSTORE_STR(size)
224#define ushort1 ushort
229#define double1 double
231#define vload1(OFFSET, PTR) *(OFFSET + PTR)
232#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
237#define convert_float_sat convert_float
238#define convert_float1_sat convert_float
239#define convert_float2_sat convert_float2
240#define convert_float3_sat convert_float3
241#define convert_float4_sat convert_float4
242#define convert_float8_sat convert_float8
243#define convert_float16_sat convert_float16
244#define convert_half_sat convert_float
245#define convert_half1_sat convert_half
246#define convert_half2_sat convert_half2
247#define convert_half3_sat convert_half3
248#define convert_half4_sat convert_half4
249#define convert_half8_sat convert_half8
250#define convert_half16_sat convert_half16
252#define convert_float1 convert_float
253#define convert_half1 convert_half
254#define convert_char1 convert_char
255#define convert_uchar1 convert_uchar
256#define convert_short1 convert_short
257#define convert_ushort1 convert_ushort
258#define convert_int1 convert_int
259#define convert_uint1 convert_uint
260#define convert_long1 convert_long
261#define convert_ulong1 convert_ulong
262#define convert_double1 convert_double
264#define convert_char1_sat convert_char_sat
265#define convert_uchar1_sat convert_uchar_sat
266#define convert_short1_sat convert_short_sat
267#define convert_ushort1_sat convert_ushort_sat
268#define convert_int1_sat convert_int_sat
269#define convert_uint1_sat convert_uint_sat
270#define convert_long1_sat convert_long_sat
271#define convert_ulong1_sat convert_ulong_sat
272#define convert_double1_sat convert_double_sat
274#define VEC_DATA_TYPE_STR(type, size) type##size
275#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
277#define CL_VEC_DATA_TYPE_STR(type, size) type##size
278#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
280#define CONVERT_STR(x, type) (convert_##type((x)))
281#define CONVERT(x, type) CONVERT_STR(x, type)
283#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
284#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
286#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
287#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
289#define VECTOR_DECLARATION(name) \
290 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \
291 uint name##_offset_first_element_in_bytes
293#define IMAGE_DECLARATION(name) \
294 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
295 uint name##_step_y, uint name##_offset_first_element_in_bytes
297#define TENSOR3D_DECLARATION(name) \
298 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
299 uint name##_step_y, uint name##_stride_z, uint name##_step_z, \
300 uint name##_offset_first_element_in_bytes
302#define TENSOR4D_DECLARATION(name) \
303 __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
304 uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \
305 uint name##_step_w, uint name##_offset_first_element_in_bytes
307#define CONVERT_TO_VECTOR_STRUCT(name) \
308 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
311#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
312 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
314#define CONVERT_TO_IMAGE_STRUCT(name) \
315 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
316 name##_step_x, name##_stride_y, name##_step_y)
318#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
319 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, \
322#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
323 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
324 name##_stride_x, name##_step_x, name##_stride_y, \
325 name##_step_y, name##_stride_z, name##_step_z)
327#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
328 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
329 name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, \
332#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
333 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
334 name##_stride_x, name##_step_x, name##_stride_y, \
335 name##_step_y, name##_stride_z, name##_step_z)
337#define CONVERT_TO_TENSOR3D_STRUCT(name) \
338 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
339 name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \
342#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
343 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
344 0, name##_stride_y, 0, name##_stride_z, 0)
346#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
347 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
348 name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \
349 name##_step_z, name##_stride_w, name##_step_w, mod_size)
351#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
352 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
353 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, \
406 uint stride_x, uint step_x)
410 .offset_first_element_in_bytes = offset_first_element_in_bytes,
411 .stride_x = stride_x,
432 uint stride_x, uint step_x, uint stride_y, uint step_y)
435 .offset_first_element_in_bytes = offset_first_element_in_bytes,
436 .stride_x = stride_x,
437 .stride_y = stride_y};
461 uint offset_first_element_in_bytes,
462 uint stride_x, uint step_x, uint stride_y,
463 uint step_y, uint stride_z, uint step_z)
466 .offset_first_element_in_bytes = offset_first_element_in_bytes,
467 .stride_x = stride_x,
468 .stride_y = stride_y};
470 get_global_id(1) * step_y + get_global_id(2) * step_z;
492 uint offset_first_element_in_bytes, uint stride_x,
493 uint step_x, uint stride_y, uint step_y, uint stride_z,
497 .offset_first_element_in_bytes = offset_first_element_in_bytes,
498 .stride_x = stride_x,
499 .stride_y = stride_y,
500 .stride_z = stride_z};
501 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x +
502 get_global_id(1) * step_y + get_global_id(2) * step_z;
507 uint offset_first_element_in_bytes, uint stride_x,
508 uint step_x, uint stride_y, uint step_y, uint stride_z,
509 uint step_z, uint stride_w, uint step_w, uint mod_size)
512 .offset_first_element_in_bytes = offset_first_element_in_bytes,
513 .stride_x = stride_x,
514 .stride_y = stride_y,
515 .stride_z = stride_z,
516 .stride_w = stride_w};
518 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x +
519 get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z +
520 (get_global_id(2) / mod_size) * step_w;
554 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
567 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z +
568 w * tensor->stride_w;
Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
__global const uchar * tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
__global uchar * offset(const Image *img, int x, int y)
Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w, uint step_w, uint mod_size)
Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
__global const uchar * vector_offset(const Vector *vec, int x)
int offset_first_element_in_bytes
int offset_first_element_in_bytes
int offset_first_element_in_bytes
int offset_first_element_in_bytes