MLIR 23.0.0git
LevelZeroRuntimeWrappers.cpp
Go to the documentation of this file.
1//===- LevelZeroRuntimeWrappers.cpp - MLIR Level Zero (L0) wrapper library-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implements wrappers around the Level Zero (L0) runtime library with C linkage
10//
11//===----------------------------------------------------------------------===//
12
13#include "level_zero/ze_api.h"
14#include <cassert>
15#include <cstring>
16#include <deque>
17#include <exception>
18#include <functional>
19#include <iostream>
20#include <limits>
21#include <memory>
22#include <stdexcept>
23#include <unordered_set>
24#include <vector>
25
26namespace {
27template <typename F>
28auto catchAll(F &&func) {
29 try {
30 return func();
31 } catch (const std::exception &e) {
32 std::cerr << "An exception was thrown: " << e.what() << std::endl;
33 std::abort();
34 } catch (...) {
35 std::cerr << "An unknown exception was thrown." << std::endl;
36 std::abort();
37 }
38}
39
40#define L0_SAFE_CALL(call) \
41 { \
42 ze_result_t status = (call); \
43 if (status != ZE_RESULT_SUCCESS) { \
44 const char *errorString; \
45 zeDriverGetLastErrorDescription(NULL, &errorString); \
46 std::cerr << "L0 error " << status << ": " << errorString << std::endl; \
47 std::abort(); \
48 } \
49 }
50} // namespace
51
52//===----------------------------------------------------------------------===//
53// L0 RT context & device setters
54//===----------------------------------------------------------------------===//
55
56// Returns the L0 driver handle for the given index. Default index is 0
57// (i.e., returns the first driver handle of the available drivers).
58
59static ze_driver_handle_t getDriver(uint32_t idx = 0) {
60 ze_init_driver_type_desc_t driver_type = {};
61 driver_type.stype = ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC;
62 driver_type.flags = ZE_INIT_DRIVER_TYPE_FLAG_GPU;
63 driver_type.pNext = nullptr;
64 uint32_t driverCount{0};
65 thread_local static std::vector<ze_driver_handle_t> drivers;
66 thread_local static bool isDriverInitialised{false};
67 if (isDriverInitialised && idx < drivers.size())
68 return drivers[idx];
69 L0_SAFE_CALL(zeInitDrivers(&driverCount, nullptr, &driver_type));
70 if (!driverCount)
71 throw std::runtime_error("No L0 drivers found.");
72 drivers.resize(driverCount);
73 L0_SAFE_CALL(zeInitDrivers(&driverCount, drivers.data(), &driver_type));
74 if (idx >= driverCount)
75 throw std::runtime_error(std::string("Requested driver idx out-of-bound, "
76 "number of availabe drivers: ") +
77 std::to_string(driverCount));
78 isDriverInitialised = true;
79 return drivers[idx];
80}
81
82static ze_device_handle_t getDevice(const uint32_t driverIdx = 0,
83 const int32_t devIdx = 0) {
84 thread_local static ze_device_handle_t l0Device;
85 thread_local int32_t currDevIdx{-1};
86 thread_local uint32_t currDriverIdx{0};
87 if (currDriverIdx == driverIdx && currDevIdx == devIdx)
88 return l0Device;
89 auto driver = getDriver(driverIdx);
90 uint32_t deviceCount{0};
91 L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, nullptr));
92 if (!deviceCount)
93 throw std::runtime_error("getDevice failed: did not find L0 device.");
94 if (static_cast<int>(deviceCount) < devIdx + 1)
95 throw std::runtime_error("getDevice failed: devIdx out-of-bounds.");
96 std::vector<ze_device_handle_t> devices(deviceCount);
97 L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, devices.data()));
98 l0Device = devices[devIdx];
99 currDriverIdx = driverIdx;
100 currDevIdx = devIdx;
101 return l0Device;
102}
103
104// Returns the default L0 context of the defult driver.
105static ze_context_handle_t getContext(ze_driver_handle_t driver) {
106 thread_local static ze_context_handle_t context;
107 thread_local static bool isContextInitialised{false};
108 if (isContextInitialised)
109 return context;
110 ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
111 L0_SAFE_CALL(zeContextCreate(driver, &ctxtDesc, &context));
112 isContextInitialised = true;
113 return context;
114}
115
116//===----------------------------------------------------------------------===//
117// L0 RT helper structs
118//===----------------------------------------------------------------------===//
119
121 void operator()(ze_context_handle_t ctx) const {
122 if (ctx)
123 L0_SAFE_CALL(zeContextDestroy(ctx));
124 }
125};
126
128 void operator()(ze_command_list_handle_t cmdList) const {
129 if (cmdList)
130 L0_SAFE_CALL(zeCommandListDestroy(cmdList));
131 }
132};
134 std::unique_ptr<std::remove_pointer<ze_context_handle_t>::type,
137 std::unique_ptr<std::remove_pointer<ze_command_list_handle_t>::type,
140 ze_driver_handle_t driver{nullptr};
141 ze_device_handle_t device{nullptr};
143 // Usually, one immediate command list with ordinal 0 suffices for
144 // both copy and compute ops, but leaves HW underutilized.
146 // Copy engines can be used for both memcpy and memset, but
147 // they have limitations for memset pattern size (e.g., 1 byte).
150
152 L0RTContextWrapper(const uint32_t driverIdx = 0, const int32_t devIdx = 0)
153 : driver(getDriver(driverIdx)), device(getDevice(devIdx)) {
154 // Create context
155 ze_context_handle_t ctx = getContext(driver);
156 context.reset(ctx);
157
158 // Determine ordinals
159 uint32_t computeEngineOrdinal = -1u, copyEngineOrdinal = -1u;
160 ze_device_properties_t deviceProperties{};
161 L0_SAFE_CALL(zeDeviceGetProperties(device, &deviceProperties));
162 uint32_t queueGroupCount = 0;
163 L0_SAFE_CALL(zeDeviceGetCommandQueueGroupProperties(
164 device, &queueGroupCount, nullptr));
165 std::vector<ze_command_queue_group_properties_t> queueGroupProperties(
166 queueGroupCount);
167 L0_SAFE_CALL(zeDeviceGetCommandQueueGroupProperties(
168 device, &queueGroupCount, queueGroupProperties.data()));
169
170 for (uint32_t queueGroupIdx = 0; queueGroupIdx < queueGroupCount;
171 ++queueGroupIdx) {
172 const auto &group = queueGroupProperties[queueGroupIdx];
173 if (group.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)
174 computeEngineOrdinal = queueGroupIdx;
175 else if (group.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) {
176 copyEngineOrdinal = queueGroupIdx;
177 copyEngineMaxMemoryFillPatternSize = group.maxMemoryFillPatternSize;
178 }
179 if (copyEngineOrdinal != -1u && computeEngineOrdinal != -1u)
180 break;
181 }
182
183 // Fallback to the default queue if no dedicated copy queue is available.
184 if (copyEngineOrdinal == -1u)
185 copyEngineOrdinal = computeEngineOrdinal;
186
187 assert(copyEngineOrdinal != -1u && computeEngineOrdinal != -1u &&
188 "Expected two engines to be available.");
189
190 // Create copy command list
191 ze_command_queue_desc_t cmdQueueDesc{
192 ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
193 nullptr,
194 copyEngineOrdinal, // ordinal
195 0, // index (assume one physical engine in the group)
196 0, // flags
197 ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
198 ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
199
200 ze_command_list_handle_t rawCmdListCopy = nullptr;
201 L0_SAFE_CALL(zeCommandListCreateImmediate(context.get(), device,
202 &cmdQueueDesc, &rawCmdListCopy));
203 immCmdListCopy.reset(rawCmdListCopy);
204
205 // Create compute command list
206 cmdQueueDesc.ordinal = computeEngineOrdinal;
207 ze_command_list_handle_t rawCmdListCompute = nullptr;
208 L0_SAFE_CALL(zeCommandListCreateImmediate(
209 context.get(), device, &cmdQueueDesc, &rawCmdListCompute));
210 immCmdListCompute.reset(rawCmdListCompute);
211 }
214 // Allow move
216 L0RTContextWrapper &operator=(L0RTContextWrapper &&) noexcept = default;
217 ~L0RTContextWrapper() = default;
218};
219
221 void operator()(ze_event_handle_t event) const {
222 if (event)
223 L0_SAFE_CALL(zeEventDestroy(event));
224 }
225};
226
228 void operator()(ze_event_pool_handle_t pool) const {
229 if (pool)
230 L0_SAFE_CALL(zeEventPoolDestroy(pool));
231 }
232};
233
235 std::unique_ptr<std::remove_pointer<ze_event_handle_t>::type,
238 std::unique_ptr<std::remove_pointer<ze_event_pool_handle_t>::type,
240
241// L0 only supports pre-determined sizes of event pools,
242// implement a runtime data structure to avoid running out of events.
243
245 constexpr static size_t numEventsPerPool{128};
246
247 std::vector<UniqueZeEventPool> eventPools;
248 std::vector<UniqueZeEvent> availableEvents;
249 std::unordered_map<ze_event_handle_t, UniqueZeEvent> takenEvents;
250
251 // Limit the number of events to avoid running out of memory.
252 // The limit is set to 32K events, which should be sufficient for most use
253 // cases.
254 size_t maxEventsCount{32768}; // 32K events
258
262
265
266 // Allow move
267 DynamicEventPool(DynamicEventPool &&) noexcept = default;
268 DynamicEventPool &operator=(DynamicEventPool &&) noexcept = default;
269
271 assert(takenEvents.empty() && "Some events were not released");
272 }
273
274 void createNewPool(size_t numEvents) {
275 ze_event_pool_desc_t eventPoolDesc = {};
276 eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
277 eventPoolDesc.count = numEvents;
278
279 ze_event_pool_handle_t rawPool = nullptr;
280 L0_SAFE_CALL(zeEventPoolCreate(rtCtx->context.get(), &eventPoolDesc, 1,
281 &rtCtx->device, &rawPool));
282
283 eventPools.emplace_back(UniqueZeEventPool(rawPool));
284 currentEventsLimit += numEvents;
285 }
286
287 ze_event_handle_t takeEvent() {
288 ze_event_handle_t rawEvent = nullptr;
289
290 if (!availableEvents.empty()) {
291 // Reuse one
292 auto uniqueEvent = std::move(availableEvents.back());
293 availableEvents.pop_back();
294 rawEvent = uniqueEvent.get();
295 takenEvents[rawEvent] = std::move(uniqueEvent);
296 } else {
298 throw std::runtime_error("DynamicEventPool: reached max events limit");
299 }
302
303 ze_event_desc_t eventDesc = {
304 ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr,
305 static_cast<uint32_t>(currentEventsCnt % numEventsPerPool),
306 ZE_EVENT_SCOPE_FLAG_DEVICE, ZE_EVENT_SCOPE_FLAG_HOST};
307
308 ze_event_handle_t newEvent = nullptr;
310 zeEventCreate(eventPools.back().get(), &eventDesc, &newEvent));
311
312 takenEvents[newEvent] = UniqueZeEvent(newEvent);
313 rawEvent = newEvent;
315 }
316
317 return rawEvent;
318 }
319
320 void releaseEvent(ze_event_handle_t event) {
321 auto it = takenEvents.find(event);
322 assert(it != takenEvents.end() &&
323 "Attempting to release unknown or already released event");
324
325 L0_SAFE_CALL(zeEventHostReset(event));
326 availableEvents.emplace_back(std::move(it->second));
327 takenEvents.erase(it);
328 }
329};
330
332 thread_local static L0RTContextWrapper rtContext(0);
333 return rtContext;
334}
335
337 thread_local static DynamicEventPool dynEventPool{&getRtContext()};
338 return dynEventPool;
339}
340
342 // avoid event pointer invalidations
343 std::deque<ze_event_handle_t> implicitEventStack;
345
348
349 ze_event_handle_t *getLastImplicitEventPtr() {
350 // Assume current implicit events will not be used after `sync`.
351 return implicitEventStack.size() ? &implicitEventStack.back() : nullptr;
352 }
353
354 void sync(ze_event_handle_t explicitEvent = nullptr) {
355 ze_event_handle_t syncEvent{nullptr};
356 if (!explicitEvent) {
357 ze_event_handle_t *lastImplicitEventPtr = getLastImplicitEventPtr();
358 syncEvent = lastImplicitEventPtr ? *lastImplicitEventPtr : nullptr;
359 } else {
360 syncEvent = explicitEvent;
361 }
362 if (syncEvent)
363 L0_SAFE_CALL(zeEventHostSynchronize(
364 syncEvent, std::numeric_limits<uint64_t>::max()));
365 // All of the "implicit" events were signaled and are of no use, release
366 // them. "explicit" event must be "released" via mgpuEventDestroy
367 for (auto event : implicitEventStack)
368 dynEventPool.releaseEvent(event);
369 implicitEventStack.clear();
370 }
371
372 template <typename Func>
373 void enqueueOp(Func &&op) {
374 ze_event_handle_t newImplicitEvent = dynEventPool.takeEvent();
375 ze_event_handle_t *lastImplicitEventPtr = getLastImplicitEventPtr();
376 const uint32_t numWaitEvents = lastImplicitEventPtr ? 1 : 0;
377 std::forward<Func>(op)(newImplicitEvent, numWaitEvents,
378 lastImplicitEventPtr);
379 implicitEventStack.push_back(newImplicitEvent);
380 }
381};
382
383static ze_module_handle_t
384loadModule(const void *data, size_t dataSize,
385 ze_module_format_t format = ZE_MODULE_FORMAT_NATIVE) {
386 assert(data);
387 ze_module_handle_t zeModule;
388 ze_module_desc_t desc = {
389 ZE_STRUCTURE_TYPE_MODULE_DESC, nullptr, format, dataSize,
390 (const uint8_t *)data, nullptr, nullptr};
391
392 ze_module_build_log_handle_t buildLogHandle;
393 ze_result_t result =
394 zeModuleCreate(getRtContext().context.get(), getRtContext().device, &desc,
395 &zeModule, &buildLogHandle);
396 if (result != ZE_RESULT_SUCCESS) {
397 std::cerr << "Error creating module, error code: " << result << std::endl;
398 size_t logSize = 0;
399 L0_SAFE_CALL(zeModuleBuildLogGetString(buildLogHandle, &logSize, nullptr));
400 std::string buildLog(" ", logSize);
402 zeModuleBuildLogGetString(buildLogHandle, &logSize, buildLog.data()));
403 std::cerr << "Build log:\n" << buildLog << std::endl;
404 std::abort();
405 }
406 return zeModule;
407}
408
409//===----------------------------------------------------------------------===//
410// L0 Wrappers definition
411//===----------------------------------------------------------------------===//
412
415}
416
417extern "C" void mgpuStreamSynchronize(StreamWrapper *stream) {
418 if (stream)
419 stream->sync();
420}
421
422extern "C" void mgpuStreamDestroy(StreamWrapper *stream) { delete stream; }
423
424extern "C" void mgpuStreamWaitEvent(StreamWrapper *stream,
425 ze_event_handle_t event) {
426 assert(stream && "Invalid stream");
427 assert(event && "Invalid event");
428 stream->sync(event);
429}
430
431extern "C" ze_event_handle_t mgpuEventCreate() {
433}
434
435extern "C" void mgpuEventDestroy(ze_event_handle_t event) {
436 return getDynamicEventPool().releaseEvent(event);
437}
438
439extern "C" void mgpuEventSynchronize(ze_event_handle_t event) {
441 zeEventHostSynchronize(event, std::numeric_limits<uint64_t>::max()));
442 L0_SAFE_CALL(zeEventHostReset(event));
443}
444
445extern "C" void mgpuEventRecord(ze_event_handle_t event,
446 StreamWrapper *stream) {
447 L0_SAFE_CALL(zeCommandListAppendSignalEvent(
448 getRtContext().immCmdListCopy.get(), event));
449 L0_SAFE_CALL(zeCommandListAppendSignalEvent(
450 getRtContext().immCmdListCompute.get(), event));
451}
452
453extern "C" void *mgpuMemAlloc(uint64_t size, StreamWrapper *stream,
454 bool isShared) {
455 return catchAll([&]() {
456 void *memPtr = nullptr;
457 constexpr size_t alignment{64};
458 ze_device_mem_alloc_desc_t deviceDesc = {};
459 deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
460 if (isShared) {
461 ze_host_mem_alloc_desc_t hostDesc = {};
462 hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
463 L0_SAFE_CALL(zeMemAllocShared(getRtContext().context.get(), &deviceDesc,
464 &hostDesc, size, alignment,
465 getRtContext().device, &memPtr));
466 } else {
467 L0_SAFE_CALL(zeMemAllocDevice(getRtContext().context.get(), &deviceDesc,
468 size, alignment, getRtContext().device,
469 &memPtr));
470 }
471 if (!memPtr)
472 throw std::runtime_error("mem allocation failed!");
473 return memPtr;
474 });
475}
476
477extern "C" void mgpuMemFree(void *ptr, StreamWrapper *stream) {
478 stream->sync();
479 if (ptr)
480 L0_SAFE_CALL(zeMemFree(getRtContext().context.get(), ptr));
481}
482
483extern "C" void mgpuMemcpy(void *dst, void *src, size_t sizeBytes,
484 StreamWrapper *stream) {
485 stream->enqueueOp([&](ze_event_handle_t newEvent, uint32_t numWaitEvents,
486 ze_event_handle_t *waitEvents) {
487 L0_SAFE_CALL(zeCommandListAppendMemoryCopy(
488 getRtContext().immCmdListCopy.get(), dst, src, sizeBytes, newEvent,
489 numWaitEvents, waitEvents));
490 });
491}
492
493template <typename PATTERN_TYPE>
494static void mgpuMemset(void *dst, PATTERN_TYPE value, size_t count,
495 StreamWrapper *stream) {
496 L0RTContextWrapper &rtContext = getRtContext();
497 auto listType =
498 rtContext.copyEngineMaxMemoryFillPatternSize >= sizeof(PATTERN_TYPE)
499 ? rtContext.immCmdListCopy.get()
500 : rtContext.immCmdListCompute.get();
501 stream->enqueueOp([&](ze_event_handle_t newEvent, uint32_t numWaitEvents,
502 ze_event_handle_t *waitEvents) {
503 L0_SAFE_CALL(zeCommandListAppendMemoryFill(
504 listType, dst, &value, sizeof(PATTERN_TYPE),
505 count * sizeof(PATTERN_TYPE), newEvent, numWaitEvents, waitEvents));
506 });
507}
508extern "C" void mgpuMemset32(void *dst, unsigned int value, size_t count,
509 StreamWrapper *stream) {
510 mgpuMemset<unsigned int>(dst, value, count, stream);
511}
512
513extern "C" void mgpuMemset16(void *dst, unsigned short value, size_t count,
514 StreamWrapper *stream) {
515 mgpuMemset<unsigned short>(dst, value, count, stream);
516}
517
518extern "C" ze_module_handle_t mgpuModuleLoad(const void *data,
519 size_t gpuBlobSize) {
520 return catchAll([&]() { return loadModule(data, gpuBlobSize); });
521}
522
523extern "C" ze_module_handle_t mgpuModuleLoadJIT(void *data, int optLevel) {
524 return catchAll([&]() {
525 return loadModule(data, strlen(reinterpret_cast<char *>(data)),
526 ZE_MODULE_FORMAT_IL_SPIRV);
527 });
528}
529
530extern "C" ze_kernel_handle_t mgpuModuleGetFunction(ze_module_handle_t module,
531 const char *name) {
532 assert(module && name);
533 ze_kernel_handle_t zeKernel;
534 ze_kernel_desc_t desc = {};
535 desc.pKernelName = name;
536 L0_SAFE_CALL(zeKernelCreate(module, &desc, &zeKernel));
537 return zeKernel;
538}
539
540extern "C" void mgpuLaunchKernel(ze_kernel_handle_t kernel, size_t gridX,
541 size_t gridY, size_t gridZ, size_t blockX,
542 size_t blockY, size_t blockZ,
543 size_t sharedMemBytes, StreamWrapper *stream,
544 void **params, void ** /*extra*/,
545 size_t paramsCount) {
546
547 if (sharedMemBytes > 0) {
548 paramsCount = paramsCount - 1; // Last param is shared memory size
550 zeKernelSetArgumentValue(kernel, paramsCount, sharedMemBytes, nullptr));
551 }
552 for (size_t i = 0; i < paramsCount; ++i)
553 L0_SAFE_CALL(zeKernelSetArgumentValue(kernel, static_cast<uint32_t>(i),
554 sizeof(void *), params[i]));
555 L0_SAFE_CALL(zeKernelSetGroupSize(kernel, blockX, blockY, blockZ));
556 ze_group_count_t dispatch;
557 dispatch.groupCountX = static_cast<uint32_t>(gridX);
558 dispatch.groupCountY = static_cast<uint32_t>(gridY);
559 dispatch.groupCountZ = static_cast<uint32_t>(gridZ);
560 stream->enqueueOp([&](ze_event_handle_t newEvent, uint32_t numWaitEvents,
561 ze_event_handle_t *waitEvents) {
562 L0_SAFE_CALL(zeCommandListAppendLaunchKernel(
563 getRtContext().immCmdListCompute.get(), kernel, &dispatch, newEvent,
564 numWaitEvents, waitEvents));
565 });
566}
567
568extern "C" void mgpuModuleUnload(ze_module_handle_t module) {
569 L0_SAFE_CALL(zeModuleDestroy(module));
570}
571
572extern "C" void mgpuSetDefaultDevice(int32_t devIdx) {
573 catchAll([&]() {
574 // For now, a user must ensure that streams and events complete
575 // and are destroyed before switching a device.
578 });
579}
std::unique_ptr< std::remove_pointer< ze_event_handle_t >::type, ZeEventDeleter > UniqueZeEvent
void mgpuSetDefaultDevice(int32_t devIdx)
static L0RTContextWrapper & getRtContext()
static ze_module_handle_t loadModule(const void *data, size_t dataSize, ze_module_format_t format=ZE_MODULE_FORMAT_NATIVE)
void mgpuMemset16(void *dst, unsigned short value, size_t count, StreamWrapper *stream)
#define L0_SAFE_CALL(call)
static void mgpuMemset(void *dst, PATTERN_TYPE value, size_t count, StreamWrapper *stream)
static ze_device_handle_t getDevice(const uint32_t driverIdx=0, const int32_t devIdx=0)
static DynamicEventPool & getDynamicEventPool()
std::unique_ptr< std::remove_pointer< ze_context_handle_t >::type, ZeContextDeleter > UniqueZeContext
void * mgpuMemAlloc(uint64_t size, StreamWrapper *stream, bool isShared)
void mgpuStreamDestroy(StreamWrapper *stream)
ze_module_handle_t mgpuModuleLoad(const void *data, size_t gpuBlobSize)
void mgpuEventSynchronize(ze_event_handle_t event)
void mgpuModuleUnload(ze_module_handle_t module)
static ze_driver_handle_t getDriver(uint32_t idx=0)
void mgpuMemset32(void *dst, unsigned int value, size_t count, StreamWrapper *stream)
StreamWrapper * mgpuStreamCreate()
std::unique_ptr< std::remove_pointer< ze_command_list_handle_t >::type, ZeCommandListDeleter > UniqueZeCommandList
void mgpuEventDestroy(ze_event_handle_t event)
void mgpuStreamSynchronize(StreamWrapper *stream)
ze_kernel_handle_t mgpuModuleGetFunction(ze_module_handle_t module, const char *name)
ze_module_handle_t mgpuModuleLoadJIT(void *data, int optLevel)
void mgpuMemcpy(void *dst, void *src, size_t sizeBytes, StreamWrapper *stream)
void mgpuStreamWaitEvent(StreamWrapper *stream, ze_event_handle_t event)
void mgpuMemFree(void *ptr, StreamWrapper *stream)
void mgpuEventRecord(ze_event_handle_t event, StreamWrapper *stream)
ze_event_handle_t mgpuEventCreate()
std::unique_ptr< std::remove_pointer< ze_event_pool_handle_t >::type, ZeEventPoolDeleter > UniqueZeEventPool
void mgpuLaunchKernel(ze_kernel_handle_t kernel, size_t gridX, size_t gridY, size_t gridZ, size_t blockX, size_t blockY, size_t blockZ, size_t sharedMemBytes, StreamWrapper *stream, void **params, void **, size_t paramsCount)
b getContext())
void createNewPool(size_t numEvents)
L0RTContextWrapper * rtCtx
DynamicEventPool & operator=(const DynamicEventPool &)=delete
static constexpr size_t numEventsPerPool
void releaseEvent(ze_event_handle_t event)
DynamicEventPool(DynamicEventPool &&) noexcept=default
DynamicEventPool(L0RTContextWrapper *rtCtx)
std::vector< UniqueZeEventPool > eventPools
std::unordered_map< ze_event_handle_t, UniqueZeEvent > takenEvents
ze_event_handle_t takeEvent()
std::vector< UniqueZeEvent > availableEvents
DynamicEventPool(const DynamicEventPool &)=delete
UniqueZeCommandList immCmdListCopy
L0RTContextWrapper()=default
L0RTContextWrapper & operator=(const L0RTContextWrapper &)=delete
L0RTContextWrapper(L0RTContextWrapper &&) noexcept=default
L0RTContextWrapper(const uint32_t driverIdx=0, const int32_t devIdx=0)
UniqueZeCommandList immCmdListCompute
L0RTContextWrapper(const L0RTContextWrapper &)=delete
ze_event_handle_t * getLastImplicitEventPtr()
void sync(ze_event_handle_t explicitEvent=nullptr)
StreamWrapper(DynamicEventPool &dynEventPool)
std::deque< ze_event_handle_t > implicitEventStack
DynamicEventPool & dynEventPool
void operator()(ze_command_list_handle_t cmdList) const
void operator()(ze_context_handle_t ctx) const
void operator()(ze_event_handle_t event) const
void operator()(ze_event_pool_handle_t pool) const