ONE - On-device Neural Engine
Loading...
Searching...
No Matches
arm_compute::CLTopKV2 Class Reference

Class to execute TopKV2 operation. More...

#include <CLTopKV2.h>

Collaboration diagram for arm_compute::CLTopKV2:

Public Member Functions

 CLTopKV2 ()
 Construct a new CLTopKV2 object.
 
 CLTopKV2 (const CLTopKV2 &)=delete
 Prevent instances of this class from being copied (As this class contains pointers)
 
CLTopKV2operator= (const CLTopKV2 &)=delete
 Prevent instances of this class from being copied (As this class contains pointers)
 
 CLTopKV2 (CLTopKV2 &&)=default
 Construct a new CLTopKV2 object by using copy constructor.
 
CLTopKV2operator= (CLTopKV2 &&)=default
 Assign a CLTopKV2 object.
 
void configure (ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, int total_bits=32, int bits=4)
 Initialise the kernel's inputs and outputs.
 
void run () override
 Run the kernels contained in the function Depending on the value of the following environment variables it works differently:
 

Detailed Description

Class to execute TopKV2 operation.

Definition at line 60 of file CLTopKV2.h.

Constructor & Destructor Documentation

◆ CLTopKV2() [1/3]

arm_compute::CLTopKV2::CLTopKV2 ( )

Construct a new CLTopKV2 object.

Definition at line 51 of file CLTopKV2.cpp.

52 : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
53 _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
54 _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
55 _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), _p_out_key_buf(nullptr),
56 _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
57 _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
58 _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
59 _reorder_negatives_kernel(), _store_kernel()*/
60{
61}

◆ CLTopKV2() [2/3]

arm_compute::CLTopKV2::CLTopKV2 ( const CLTopKV2 )
delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ CLTopKV2() [3/3]

arm_compute::CLTopKV2::CLTopKV2 ( CLTopKV2 &&  )
default

Construct a new CLTopKV2 object by using copy constructor.

Parameters
[in]CLTopKV2object to move

Member Function Documentation

◆ configure()

void arm_compute::CLTopKV2::configure ( ICLTensor *  input,
int  k,
ICLTensor *  values,
ICLTensor *  indices,
int  total_bits = 32,
int  bits = 4 
)

Initialise the kernel's inputs and outputs.

Parameters
[in]inputInput image. Data types supported: U8/S16/F32.
[in]kThe value of k.
[out]valuesTop k values. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
[out]indicesIndices related to top k values. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
Returns
N/A

Definition at line 63 of file CLTopKV2.cpp.

65{
66 _total_bits = total_bits;
67 _bits = bits;
68 _n = input->info()->tensor_shape()[0];
69
70 // _total_bits should be divided by _bits.
71 ARM_COMPUTE_ERROR_ON((_total_bits % _bits) != 0);
72
73 _k = k;
74 _radix = 1 << bits;
75
76 _input = input;
77 _values = values;
78 _indices = indices;
79
80 std::string topk_env;
81
82// Disable GPU implementation
83// TODO Enable GPU implementation with verification, or remove code
84// Invalid result on GPU
85#if 0
86 char *env = getenv("ACL_TOPKV2");
87 if (env)
88 topk_env = env;
89
90 if (topk_env == "GPU_SINGLE")
91 {
92 _qs_idx_buf = cl::Buffer(CLScheduler::get().context(),
93 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
94 _qs_temp_buf = cl::Buffer(CLScheduler::get().context(),
95 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
96
97 _qs_kernel.configure(input, values, indices, &_qs_idx_buf, &_qs_temp_buf, k, _n);
98 }
99 else if (topk_env == "GPU")
100 {
101 // n should be divided by (_GROUPS * _ITEMS)
102 ARM_COMPUTE_ERROR_ON((_n % (_GROUPS * _ITEMS)) != 0);
103
104 _hist_buf_size = _radix * _GROUPS * _ITEMS;
105 _glob_sum_buf_size = _HISTOSPLIT;
106
107 _hist_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
108 sizeof(cl_int) * _hist_buf_size);
109 _glob_sum_buf =
110 cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
111 sizeof(cl_int) * _glob_sum_buf_size);
112 _temp_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
113 sizeof(cl_int) * _glob_sum_buf_size);
114 _first_negative_idx_buf = cl::Buffer(CLScheduler::get().context(),
115 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int));
116 _in_key_buf = cl::Buffer(CLScheduler::get().context(),
117 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n);
118 _out_key_buf = cl::Buffer(CLScheduler::get().context(),
119 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n);
120 _in_ind_buf = cl::Buffer(CLScheduler::get().context(),
121 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
122 _out_ind_buf = cl::Buffer(CLScheduler::get().context(),
123 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
124
125 _p_in_key_buf = &_in_key_buf;
126 _p_out_key_buf = &_out_key_buf;
127 _p_in_ind_buf = &_in_ind_buf;
128 _p_out_ind_buf = &_out_ind_buf;
129
130 _init_kernel.configure(input, _p_in_key_buf, _p_in_ind_buf, _n);
131 _hist_kernel.configure(&_hist_buf, bits, _n);
132 _scan_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits);
133 _glob_scan_hist_kernel.configure(&_glob_sum_buf, &_temp_buf, bits);
134 _paste_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits);
135 _reorder_kernel.configure(&_hist_buf, bits, _n);
136 _find_first_negative_kernel.configure(&_first_negative_idx_buf, _n);
137 _reorder_negatives_kernel.configure(&_first_negative_idx_buf, _n);
138 _store_kernel.configure(values, indices, k, _n);
139 }
140 else
141#endif // Disable GPU implementation
142 {
143 // DO NOTHING for CPU.
144 }
145}
#define _HISTOSPLIT
#define _GROUPS
#define _ITEMS

References _GROUPS, _HISTOSPLIT, and _ITEMS.

◆ operator=() [1/2]

CLTopKV2 & arm_compute::CLTopKV2::operator= ( CLTopKV2 &&  )
default

Assign a CLTopKV2 object.

Parameters
[in]CLTopKV2object to assign. This object will be moved.

◆ operator=() [2/2]

CLTopKV2 & arm_compute::CLTopKV2::operator= ( const CLTopKV2 )
delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ run()

void arm_compute::CLTopKV2::run ( )
override

Run the kernels contained in the function Depending on the value of the following environment variables it works differently:

  • If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE", quick sort on GPU is used.
  • If the value of environment variable "ACL_TOPKV2" == ""GPU"", radix sort on GPU is used.
  • For other value, TopKV2 runs on CPU
    Returns
    N/A

Definition at line 147 of file CLTopKV2.cpp.

148{
149 std::string topk_env;
150#if 0
151 char *env = getenv("ACL_TOPKV2");
152 if (env)
153 topk_env = env;
154
155 if (topk_env == "GPU_SINGLE")
156 {
157 run_on_gpu_single_quicksort();
158 }
159 else if (topk_env == "GPU")
160 {
161 run_on_gpu();
162 }
163 else
164#endif
165 {
166 run_on_cpu();
167 }
168}

Referenced by package.infer.session::inference().


The documentation for this class was generated from the following files: