MLIR  22.0.0git
LevelZeroRuntimeWrappers.cpp
Go to the documentation of this file.
1 //===- LevelZeroRuntimeWrappers.cpp - MLIR Level Zero (L0) wrapper library-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements wrappers around the Level Zero (L0) runtime library with C linkage
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/Twine.h"
14 
15 #include "level_zero/ze_api.h"
16 #include <cassert>
17 #include <deque>
18 #include <exception>
19 #include <functional>
20 #include <iostream>
21 #include <limits>
22 #include <unordered_set>
23 #include <vector>
24 
25 namespace {
26 template <typename F>
27 auto catchAll(F &&func) {
28  try {
29  return func();
30  } catch (const std::exception &e) {
31  std::cerr << "An exception was thrown: " << e.what() << std::endl;
32  std::abort();
33  } catch (...) {
34  std::cerr << "An unknown exception was thrown." << std::endl;
35  std::abort();
36  }
37 }
38 
39 #define L0_SAFE_CALL(call) \
40  { \
41  ze_result_t status = (call); \
42  if (status != ZE_RESULT_SUCCESS) { \
43  const char *errorString; \
44  zeDriverGetLastErrorDescription(NULL, &errorString); \
45  std::cerr << "L0 error " << status << ": " << errorString << std::endl; \
46  std::abort(); \
47  } \
48  }
49 } // namespace
50 
51 //===----------------------------------------------------------------------===//
52 // L0 RT context & device setters
53 //===----------------------------------------------------------------------===//
54 
55 // Returns the L0 driver handle for the given index. Default index is 0
56 // (i.e., returns the first driver handle of the available drivers).
57 
58 static ze_driver_handle_t getDriver(uint32_t idx = 0) {
59  ze_init_driver_type_desc_t driver_type = {};
60  driver_type.stype = ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC;
61  driver_type.flags = ZE_INIT_DRIVER_TYPE_FLAG_GPU;
62  driver_type.pNext = nullptr;
63  uint32_t driverCount{0};
64  thread_local static std::vector<ze_driver_handle_t> drivers;
65  thread_local static bool isDriverInitialised{false};
66  if (isDriverInitialised && idx < drivers.size())
67  return drivers[idx];
68  L0_SAFE_CALL(zeInitDrivers(&driverCount, nullptr, &driver_type));
69  if (!driverCount)
70  throw std::runtime_error("No L0 drivers found.");
71  drivers.resize(driverCount);
72  L0_SAFE_CALL(zeInitDrivers(&driverCount, drivers.data(), &driver_type));
73  if (idx >= driverCount)
74  throw std::runtime_error((llvm::Twine("Requested driver idx out-of-bound, "
75  "number of availabe drivers: ") +
76  std::to_string(driverCount))
77  .str());
78  isDriverInitialised = true;
79  return drivers[idx];
80 }
81 
82 static ze_device_handle_t getDevice(const uint32_t driverIdx = 0,
83  const int32_t devIdx = 0) {
84  thread_local static ze_device_handle_t l0Device;
85  thread_local int32_t currDevIdx{-1};
86  thread_local uint32_t currDriverIdx{0};
87  if (currDriverIdx == driverIdx && currDevIdx == devIdx)
88  return l0Device;
89  auto driver = getDriver(driverIdx);
90  uint32_t deviceCount{0};
91  L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, nullptr));
92  if (!deviceCount)
93  throw std::runtime_error("getDevice failed: did not find L0 device.");
94  if (static_cast<int>(deviceCount) < devIdx + 1)
95  throw std::runtime_error("getDevice failed: devIdx out-of-bounds.");
96  std::vector<ze_device_handle_t> devices(deviceCount);
97  L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, devices.data()));
98  l0Device = devices[devIdx];
99  currDriverIdx = driverIdx;
100  currDevIdx = devIdx;
101  return l0Device;
102 }
103 
104 // Returns the default L0 context of the defult driver.
105 static ze_context_handle_t getContext(ze_driver_handle_t driver) {
106  thread_local static ze_context_handle_t context;
107  thread_local static bool isContextInitialised{false};
108  if (isContextInitialised)
109  return context;
110  ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
111  L0_SAFE_CALL(zeContextCreate(driver, &ctxtDesc, &context));
112  isContextInitialised = true;
113  return context;
114 }
115 
116 //===----------------------------------------------------------------------===//
117 // L0 RT helper structs
118 //===----------------------------------------------------------------------===//
119 
121  void operator()(ze_context_handle_t ctx) const {
122  if (ctx)
123  L0_SAFE_CALL(zeContextDestroy(ctx));
124  }
125 };
126 
128  void operator()(ze_command_list_handle_t cmdList) const {
129  if (cmdList)
130  L0_SAFE_CALL(zeCommandListDestroy(cmdList));
131  }
132 };
134  std::unique_ptr<std::remove_pointer<ze_context_handle_t>::type,
137  std::unique_ptr<std::remove_pointer<ze_command_list_handle_t>::type,
140  ze_driver_handle_t driver{nullptr};
141  ze_device_handle_t device{nullptr};
143  // Usually, one immediate command list with ordinal 0 suffices for
144  // both copy and compute ops, but leaves HW underutilized.
146  // Copy engines can be used for both memcpy and memset, but
147  // they have limitations for memset pattern size (e.g., 1 byte).
150 
151  L0RTContextWrapper() = default;
152  L0RTContextWrapper(const uint32_t driverIdx = 0, const int32_t devIdx = 0)
153  : driver(getDriver(driverIdx)), device(getDevice(devIdx)) {
154  // Create context
155  ze_context_handle_t ctx = getContext(driver);
156  context.reset(ctx);
157 
158  // Determine ordinals
159  uint32_t computeEngineOrdinal = -1u, copyEngineOrdinal = -1u;
160  ze_device_properties_t deviceProperties{};
161  L0_SAFE_CALL(zeDeviceGetProperties(device, &deviceProperties));
162  uint32_t queueGroupCount = 0;
163  L0_SAFE_CALL(zeDeviceGetCommandQueueGroupProperties(
164  device, &queueGroupCount, nullptr));
165  std::vector<ze_command_queue_group_properties_t> queueGroupProperties(
166  queueGroupCount);
167  L0_SAFE_CALL(zeDeviceGetCommandQueueGroupProperties(
168  device, &queueGroupCount, queueGroupProperties.data()));
169 
170  for (uint32_t queueGroupIdx = 0; queueGroupIdx < queueGroupCount;
171  ++queueGroupIdx) {
172  const auto &group = queueGroupProperties[queueGroupIdx];
173  if (group.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)
174  computeEngineOrdinal = queueGroupIdx;
175  else if (group.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) {
176  copyEngineOrdinal = queueGroupIdx;
177  copyEngineMaxMemoryFillPatternSize = group.maxMemoryFillPatternSize;
178  }
179  if (copyEngineOrdinal != -1u && computeEngineOrdinal != -1u)
180  break;
181  }
182 
183  // Fallback to the default queue if no dedicated copy queue is available.
184  if (copyEngineOrdinal == -1u)
185  copyEngineOrdinal = computeEngineOrdinal;
186 
187  assert(copyEngineOrdinal != -1u && computeEngineOrdinal != -1u &&
188  "Expected two engines to be available.");
189 
190  // Create copy command list
191  ze_command_queue_desc_t cmdQueueDesc{
192  ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
193  nullptr,
194  copyEngineOrdinal, // ordinal
195  0, // index (assume one physical engine in the group)
196  0, // flags
197  ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
198  ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
199 
200  ze_command_list_handle_t rawCmdListCopy = nullptr;
201  L0_SAFE_CALL(zeCommandListCreateImmediate(context.get(), device,
202  &cmdQueueDesc, &rawCmdListCopy));
203  immCmdListCopy.reset(rawCmdListCopy);
204 
205  // Create compute command list
206  cmdQueueDesc.ordinal = computeEngineOrdinal;
207  ze_command_list_handle_t rawCmdListCompute = nullptr;
208  L0_SAFE_CALL(zeCommandListCreateImmediate(
209  context.get(), device, &cmdQueueDesc, &rawCmdListCompute));
210  immCmdListCompute.reset(rawCmdListCompute);
211  }
214  // Allow move
215  L0RTContextWrapper(L0RTContextWrapper &&) noexcept = default;
216  L0RTContextWrapper &operator=(L0RTContextWrapper &&) noexcept = default;
217  ~L0RTContextWrapper() = default;
218 };
219 
221  void operator()(ze_event_handle_t event) const {
222  if (event)
223  L0_SAFE_CALL(zeEventDestroy(event));
224  }
225 };
226 
228  void operator()(ze_event_pool_handle_t pool) const {
229  if (pool)
230  L0_SAFE_CALL(zeEventPoolDestroy(pool));
231  }
232 };
233 
235  std::unique_ptr<std::remove_pointer<ze_event_handle_t>::type,
238  std::unique_ptr<std::remove_pointer<ze_event_pool_handle_t>::type,
240 
241 // L0 only supports pre-determined sizes of event pools,
242 // implement a runtime data structure to avoid running out of events.
243 
245  constexpr static size_t numEventsPerPool{128};
246 
247  std::vector<UniqueZeEventPool> eventPools;
248  std::vector<UniqueZeEvent> availableEvents;
249  std::unordered_map<ze_event_handle_t, UniqueZeEvent> takenEvents;
250 
251  // Limit the number of events to avoid running out of memory.
252  // The limit is set to 32K events, which should be sufficient for most use
253  // cases.
254  size_t maxEventsCount{32768}; // 32K events
255  size_t currentEventsLimit{0};
256  size_t currentEventsCnt{0};
258 
259  DynamicEventPool(L0RTContextWrapper *rtCtx) : rtCtx(rtCtx) {
260  createNewPool(numEventsPerPool);
261  }
262 
265 
266  // Allow move
267  DynamicEventPool(DynamicEventPool &&) noexcept = default;
268  DynamicEventPool &operator=(DynamicEventPool &&) noexcept = default;
269 
271  assert(takenEvents.empty() && "Some events were not released");
272  }
273 
274  void createNewPool(size_t numEvents) {
275  ze_event_pool_desc_t eventPoolDesc = {};
276  eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
277  eventPoolDesc.count = numEvents;
278 
279  ze_event_pool_handle_t rawPool = nullptr;
280  L0_SAFE_CALL(zeEventPoolCreate(rtCtx->context.get(), &eventPoolDesc, 1,
281  &rtCtx->device, &rawPool));
282 
283  eventPools.emplace_back(UniqueZeEventPool(rawPool));
284  currentEventsLimit += numEvents;
285  }
286 
287  ze_event_handle_t takeEvent() {
288  ze_event_handle_t rawEvent = nullptr;
289 
290  if (!availableEvents.empty()) {
291  // Reuse one
292  auto uniqueEvent = std::move(availableEvents.back());
293  availableEvents.pop_back();
294  rawEvent = uniqueEvent.get();
295  takenEvents[rawEvent] = std::move(uniqueEvent);
296  } else {
297  if (currentEventsCnt >= maxEventsCount) {
298  throw std::runtime_error("DynamicEventPool: reached max events limit");
299  }
300  if (currentEventsCnt == currentEventsLimit)
301  createNewPool(numEventsPerPool);
302 
303  ze_event_desc_t eventDesc = {
304  ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr,
305  static_cast<uint32_t>(currentEventsCnt % numEventsPerPool),
306  ZE_EVENT_SCOPE_FLAG_DEVICE, ZE_EVENT_SCOPE_FLAG_HOST};
307 
308  ze_event_handle_t newEvent = nullptr;
309  L0_SAFE_CALL(
310  zeEventCreate(eventPools.back().get(), &eventDesc, &newEvent));
311 
312  takenEvents[newEvent] = UniqueZeEvent(newEvent);
313  rawEvent = newEvent;
314  currentEventsCnt++;
315  }
316 
317  return rawEvent;
318  }
319 
320  void releaseEvent(ze_event_handle_t event) {
321  auto it = takenEvents.find(event);
322  assert(it != takenEvents.end() &&
323  "Attempting to release unknown or already released event");
324 
325  L0_SAFE_CALL(zeEventHostReset(event));
326  availableEvents.emplace_back(std::move(it->second));
327  takenEvents.erase(it);
328  }
329 };
330 
332  thread_local static L0RTContextWrapper rtContext(0);
333  return rtContext;
334 }
335 
337  thread_local static DynamicEventPool dynEventPool{&getRtContext()};
338  return dynEventPool;
339 }
340 
342  // avoid event pointer invalidations
343  std::deque<ze_event_handle_t> implicitEventStack;
345 
346  StreamWrapper(DynamicEventPool &dynEventPool) : dynEventPool(dynEventPool) {}
347  ~StreamWrapper() { sync(); }
348 
349  ze_event_handle_t *getLastImplicitEventPtr() {
350  // Assume current implicit events will not be used after `sync`.
351  return implicitEventStack.size() ? &implicitEventStack.back() : nullptr;
352  }
353 
354  void sync(ze_event_handle_t explicitEvent = nullptr) {
355  ze_event_handle_t syncEvent{nullptr};
356  if (!explicitEvent) {
357  ze_event_handle_t *lastImplicitEventPtr = getLastImplicitEventPtr();
358  syncEvent = lastImplicitEventPtr ? *lastImplicitEventPtr : nullptr;
359  } else {
360  syncEvent = explicitEvent;
361  }
362  if (syncEvent)
363  L0_SAFE_CALL(zeEventHostSynchronize(
364  syncEvent, std::numeric_limits<uint64_t>::max()));
365  // All of the "implicit" events were signaled and are of no use, release
366  // them. "explicit" event must be "released" via mgpuEventDestroy
367  for (auto event : implicitEventStack)
368  dynEventPool.releaseEvent(event);
369  implicitEventStack.clear();
370  }
371 
372  template <typename Func>
373  void enqueueOp(Func &&op) {
374  ze_event_handle_t newImplicitEvent = dynEventPool.takeEvent();
375  ze_event_handle_t *lastImplicitEventPtr = getLastImplicitEventPtr();
376  const uint32_t numWaitEvents = lastImplicitEventPtr ? 1 : 0;
377  std::forward<Func>(op)(newImplicitEvent, numWaitEvents,
378  lastImplicitEventPtr);
379  implicitEventStack.push_back(newImplicitEvent);
380  }
381 };
382 
383 static ze_module_handle_t loadModule(const void *data, size_t dataSize) {
384  assert(data);
385  ze_module_handle_t zeModule;
386  ze_module_desc_t desc = {ZE_STRUCTURE_TYPE_MODULE_DESC,
387  nullptr,
388  ZE_MODULE_FORMAT_IL_SPIRV,
389  dataSize,
390  (const uint8_t *)data,
391  nullptr,
392  nullptr};
393  ze_module_build_log_handle_t buildLogHandle;
394  ze_result_t result =
395  zeModuleCreate(getRtContext().context.get(), getRtContext().device, &desc,
396  &zeModule, &buildLogHandle);
397  if (result != ZE_RESULT_SUCCESS) {
398  std::cerr << "Error creating module, error code: " << result << std::endl;
399  size_t logSize = 0;
400  L0_SAFE_CALL(zeModuleBuildLogGetString(buildLogHandle, &logSize, nullptr));
401  std::string buildLog(" ", logSize);
402  L0_SAFE_CALL(
403  zeModuleBuildLogGetString(buildLogHandle, &logSize, buildLog.data()));
404  std::cerr << "Build log:\n" << buildLog << std::endl;
405  std::abort();
406  }
407  return zeModule;
408 }
409 
410 //===----------------------------------------------------------------------===//
411 // L0 Wrappers definition
412 //===----------------------------------------------------------------------===//
413 
415  return new StreamWrapper(getDynamicEventPool());
416 }
417 
418 extern "C" void mgpuStreamSynchronize(StreamWrapper *stream) {
419  if (stream)
420  stream->sync();
421 }
422 
423 extern "C" void mgpuStreamDestroy(StreamWrapper *stream) { delete stream; }
424 
425 extern "C" void mgpuStreamWaitEvent(StreamWrapper *stream,
426  ze_event_handle_t event) {
427  assert(stream && "Invalid stream");
428  assert(event && "Invalid event");
429  stream->sync(event);
430 }
431 
432 extern "C" ze_event_handle_t mgpuEventCreate() {
433  return getDynamicEventPool().takeEvent();
434 }
435 
436 extern "C" void mgpuEventDestroy(ze_event_handle_t event) {
437  return getDynamicEventPool().releaseEvent(event);
438 }
439 
440 extern "C" void mgpuEventSynchronize(ze_event_handle_t event) {
441  L0_SAFE_CALL(
442  zeEventHostSynchronize(event, std::numeric_limits<uint64_t>::max()));
443  L0_SAFE_CALL(zeEventHostReset(event));
444 }
445 
446 extern "C" void mgpuEventRecord(ze_event_handle_t event,
447  StreamWrapper *stream) {
448  L0_SAFE_CALL(zeCommandListAppendSignalEvent(
449  getRtContext().immCmdListCopy.get(), event));
450  L0_SAFE_CALL(zeCommandListAppendSignalEvent(
451  getRtContext().immCmdListCompute.get(), event));
452 }
453 
454 extern "C" void *mgpuMemAlloc(uint64_t size, StreamWrapper *stream,
455  bool isShared) {
456  return catchAll([&]() {
457  void *memPtr = nullptr;
458  constexpr size_t alignment{64};
459  ze_device_mem_alloc_desc_t deviceDesc = {};
460  deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
461  if (isShared) {
462  ze_host_mem_alloc_desc_t hostDesc = {};
463  hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
464  L0_SAFE_CALL(zeMemAllocShared(getRtContext().context.get(), &deviceDesc,
465  &hostDesc, size, alignment,
466  getRtContext().device, &memPtr));
467  } else {
468  L0_SAFE_CALL(zeMemAllocDevice(getRtContext().context.get(), &deviceDesc,
469  size, alignment, getRtContext().device,
470  &memPtr));
471  }
472  if (!memPtr)
473  throw std::runtime_error("mem allocation failed!");
474  return memPtr;
475  });
476 }
477 
478 extern "C" void mgpuMemFree(void *ptr, StreamWrapper *stream) {
479  stream->sync();
480  if (ptr)
481  L0_SAFE_CALL(zeMemFree(getRtContext().context.get(), ptr));
482 }
483 
484 extern "C" void mgpuMemcpy(void *dst, void *src, size_t sizeBytes,
485  StreamWrapper *stream) {
486  stream->enqueueOp([&](ze_event_handle_t newEvent, uint32_t numWaitEvents,
487  ze_event_handle_t *waitEvents) {
488  L0_SAFE_CALL(zeCommandListAppendMemoryCopy(
489  getRtContext().immCmdListCopy.get(), dst, src, sizeBytes, newEvent,
490  numWaitEvents, waitEvents));
491  });
492 }
493 
494 template <typename PATTERN_TYPE>
495 void mgpuMemset(void *dst, PATTERN_TYPE value, size_t count,
496  StreamWrapper *stream) {
497  L0RTContextWrapper &rtContext = getRtContext();
498  auto listType =
499  rtContext.copyEngineMaxMemoryFillPatternSize >= sizeof(PATTERN_TYPE)
500  ? rtContext.immCmdListCopy.get()
501  : rtContext.immCmdListCompute.get();
502  stream->enqueueOp([&](ze_event_handle_t newEvent, uint32_t numWaitEvents,
503  ze_event_handle_t *waitEvents) {
504  L0_SAFE_CALL(zeCommandListAppendMemoryFill(
505  listType, dst, &value, sizeof(PATTERN_TYPE),
506  count * sizeof(PATTERN_TYPE), newEvent, numWaitEvents, waitEvents));
507  });
508 }
509 extern "C" void mgpuMemset32(void *dst, unsigned int value, size_t count,
510  StreamWrapper *stream) {
511  mgpuMemset<unsigned int>(dst, value, count, stream);
512 }
513 
514 extern "C" void mgpuMemset16(void *dst, unsigned short value, size_t count,
515  StreamWrapper *stream) {
516  mgpuMemset<unsigned short>(dst, value, count, stream);
517 }
518 
519 extern "C" ze_module_handle_t mgpuModuleLoad(const void *data,
520  size_t gpuBlobSize) {
521  return catchAll([&]() { return loadModule(data, gpuBlobSize); });
522 }
523 
524 extern "C" ze_kernel_handle_t mgpuModuleGetFunction(ze_module_handle_t module,
525  const char *name) {
526  assert(module && name);
527  ze_kernel_handle_t zeKernel;
528  ze_kernel_desc_t desc = {};
529  desc.pKernelName = name;
530  L0_SAFE_CALL(zeKernelCreate(module, &desc, &zeKernel));
531  return zeKernel;
532 }
533 
534 extern "C" void mgpuLaunchKernel(ze_kernel_handle_t kernel, size_t gridX,
535  size_t gridY, size_t gridZ, size_t blockX,
536  size_t blockY, size_t blockZ,
537  size_t sharedMemBytes, StreamWrapper *stream,
538  void **params, void ** /*extra*/,
539  size_t paramsCount) {
540 
541  if (sharedMemBytes > 0) {
542  paramsCount = paramsCount - 1; // Last param is shared memory size
543  L0_SAFE_CALL(
544  zeKernelSetArgumentValue(kernel, paramsCount, sharedMemBytes, nullptr));
545  }
546  for (size_t i = 0; i < paramsCount; ++i)
547  L0_SAFE_CALL(zeKernelSetArgumentValue(kernel, static_cast<uint32_t>(i),
548  sizeof(void *), params[i]));
549  L0_SAFE_CALL(zeKernelSetGroupSize(kernel, blockX, blockY, blockZ));
550  ze_group_count_t dispatch;
551  dispatch.groupCountX = static_cast<uint32_t>(gridX);
552  dispatch.groupCountY = static_cast<uint32_t>(gridY);
553  dispatch.groupCountZ = static_cast<uint32_t>(gridZ);
554  stream->enqueueOp([&](ze_event_handle_t newEvent, uint32_t numWaitEvents,
555  ze_event_handle_t *waitEvents) {
556  L0_SAFE_CALL(zeCommandListAppendLaunchKernel(
557  getRtContext().immCmdListCompute.get(), kernel, &dispatch, newEvent,
558  numWaitEvents, waitEvents));
559  });
560 }
561 
562 extern "C" void mgpuModuleUnload(ze_module_handle_t module) {
563  L0_SAFE_CALL(zeModuleDestroy(module));
564 }
565 
566 extern "C" void mgpuSetDefaultDevice(int32_t devIdx) {
567  catchAll([&]() {
568  // For now, a user must ensure that streams and events complete
569  // and are destroyed before switching a device.
570  getRtContext() = L0RTContextWrapper(devIdx);
572  });
573 }
StreamWrapper * mgpuStreamCreate()
void mgpuSetDefaultDevice(int32_t devIdx)
static ze_module_handle_t loadModule(const void *data, size_t dataSize)
void mgpuMemset16(void *dst, unsigned short value, size_t count, StreamWrapper *stream)
L0RTContextWrapper & getRtContext()
void * mgpuMemAlloc(uint64_t size, StreamWrapper *stream, bool isShared)
#define L0_SAFE_CALL(call)
static ze_device_handle_t getDevice(const uint32_t driverIdx=0, const int32_t devIdx=0)
void mgpuStreamDestroy(StreamWrapper *stream)
ze_module_handle_t mgpuModuleLoad(const void *data, size_t gpuBlobSize)
void mgpuEventSynchronize(ze_event_handle_t event)
void mgpuModuleUnload(ze_module_handle_t module)
static ze_driver_handle_t getDriver(uint32_t idx=0)
void mgpuMemset(void *dst, PATTERN_TYPE value, size_t count, StreamWrapper *stream)
void mgpuMemset32(void *dst, unsigned int value, size_t count, StreamWrapper *stream)
std::unique_ptr< std::remove_pointer< ze_event_pool_handle_t >::type, ZeEventPoolDeleter > UniqueZeEventPool
void mgpuEventDestroy(ze_event_handle_t event)
void mgpuStreamSynchronize(StreamWrapper *stream)
ze_kernel_handle_t mgpuModuleGetFunction(ze_module_handle_t module, const char *name)
static ze_context_handle_t getContext(ze_driver_handle_t driver)
std::unique_ptr< std::remove_pointer< ze_event_handle_t >::type, ZeEventDeleter > UniqueZeEvent
void mgpuMemcpy(void *dst, void *src, size_t sizeBytes, StreamWrapper *stream)
std::unique_ptr< std::remove_pointer< ze_context_handle_t >::type, ZeContextDeleter > UniqueZeContext
void mgpuStreamWaitEvent(StreamWrapper *stream, ze_event_handle_t event)
void mgpuMemFree(void *ptr, StreamWrapper *stream)
DynamicEventPool & getDynamicEventPool()
std::unique_ptr< std::remove_pointer< ze_command_list_handle_t >::type, ZeCommandListDeleter > UniqueZeCommandList
void mgpuEventRecord(ze_event_handle_t event, StreamWrapper *stream)
ze_event_handle_t mgpuEventCreate()
void mgpuLaunchKernel(ze_kernel_handle_t kernel, size_t gridX, size_t gridY, size_t gridZ, size_t blockX, size_t blockY, size_t blockZ, size_t sharedMemBytes, StreamWrapper *stream, void **params, void **, size_t paramsCount)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
void createNewPool(size_t numEvents)
L0RTContextWrapper * rtCtx
void releaseEvent(ze_event_handle_t event)
DynamicEventPool(DynamicEventPool &&) noexcept=default
DynamicEventPool(L0RTContextWrapper *rtCtx)
std::vector< UniqueZeEventPool > eventPools
DynamicEventPool & operator=(const DynamicEventPool &)=delete
std::unordered_map< ze_event_handle_t, UniqueZeEvent > takenEvents
ze_event_handle_t takeEvent()
std::vector< UniqueZeEvent > availableEvents
DynamicEventPool(const DynamicEventPool &)=delete
UniqueZeCommandList immCmdListCopy
L0RTContextWrapper()=default
L0RTContextWrapper(L0RTContextWrapper &&) noexcept=default
L0RTContextWrapper & operator=(const L0RTContextWrapper &)=delete
L0RTContextWrapper(const uint32_t driverIdx=0, const int32_t devIdx=0)
UniqueZeCommandList immCmdListCompute
L0RTContextWrapper(const L0RTContextWrapper &)=delete
ze_event_handle_t * getLastImplicitEventPtr()
void sync(ze_event_handle_t explicitEvent=nullptr)
StreamWrapper(DynamicEventPool &dynEventPool)
std::deque< ze_event_handle_t > implicitEventStack
DynamicEventPool & dynEventPool
void operator()(ze_command_list_handle_t cmdList) const
void operator()(ze_context_handle_t ctx) const
void operator()(ze_event_handle_t event) const
void operator()(ze_event_pool_handle_t pool) const