Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
141{
142 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
143 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
144
145 const_cast<ICLTensor *>(_lookups)->map(queue);
146 const_cast<ICLTensor *>(_keys)->map(queue);
147 _hits->map(queue);
148 _lookup_indices->map(queue);
149
150
151 const int32_t *lookups_buf =
152 reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
153 const int32_t *keys_buf = reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_keys)->buffer());
154 uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
155 int32_t *lookup_indices_buf = reinterpret_cast<int32_t *>(_lookup_indices->buffer());
156
157 std::map<int32_t, size_t> key_map;
158 const size_t keys_num = _keys->info()->dimension(0);
159 for (size_t key_index = 0; key_index < keys_num; key_index++)
160 {
161 key_map[keys_buf[key_index]] = key_index;
162 }
163
164 const size_t lookups_num = _lookups->info()->dimension(0);
165 for (size_t i = 0; i < lookups_num; ++i)
166 {
167 const auto lookup_value = lookups_buf[i];
168 const auto it = key_map.find(lookup_value);
169 if (it != key_map.end())
170 {
171#if defined(ARM_COMPUTE_DEBUG_ENABLED)
172 if (it->second >= lookups_num)
173 ARM_COMPUTE_ERROR("HashTable Lookup: index out of bounds.");
174#endif
175 lookup_indices_buf[i] = static_cast<int32_t>(it->second);
176 hits_buf[i] = static_cast<uint8_t>(1);
177 }
178 else
179 {
180 lookup_indices_buf[i] = -1;
181 hits_buf[i] = static_cast<uint8_t>(0);
182 }
183 }
184
185 const_cast<ICLTensor *>(_lookups)->unmap(queue);
186 const_cast<ICLTensor *>(_keys)->unmap(queue);
187 _hits->unmap(queue);
188 _lookup_indices->unmap(queue);
189
190 Window win = window.collapse(ICLKernel::window(), 2, 4);
191
192 Window win_lookup;
193 win_lookup.set(Window::DimX, Window::Dimension(0, 0, 0));
194
195 do
196 {
197 unsigned int idx = 0;
198 add_4D_tensor_argument(idx, _input, win);
199 add_4D_tensor_argument(idx, _output, win);
200 add_1D_tensor_argument(idx, _lookup_indices.get(), win_lookup);
201
202 enqueue(queue, *this, win);
203 } while (window.slide_window_slice_4D(win) && window.slide_window_slice_1D(win_lookup));
204}