15#include <level_zero/ze_api.h>
16#include <sycl/ext/oneapi/backend/level_zero.hpp>
17#include <sycl/sycl.hpp>
20#define SYCL_RUNTIME_EXPORT __declspec(dllexport)
22#define SYCL_RUNTIME_EXPORT
28auto catchAll(F &&func) {
31 }
catch (
const std::exception &e) {
32 fprintf(stderr,
"SYCL runtime error: %s\n", e.what());
34 std::exit(EXIT_FAILURE);
36 fprintf(stderr,
"SYCL runtime error: unknown exception was thrown\n");
38 std::exit(EXIT_FAILURE);
42#define L0_SAFE_CALL(call) \
44 ze_result_t status = (call); \
45 if (status != ZE_RESULT_SUCCESS) { \
46 fprintf(stderr, "L0 error %d\n", status); \
55 static sycl::device syclDevice;
56 static bool isDeviceInitialised =
false;
57 if (!isDeviceInitialised) {
58 auto platformList = sycl::platform::get_platforms();
59 for (
const auto &platform : platformList) {
60 auto platformName = platform.get_info<sycl::info::platform::name>();
61 bool isLevelZero = platformName.find(
"Level-Zero") != std::string::npos;
65 syclDevice = platform.get_devices()[0];
66 isDeviceInitialised =
true;
69 throw std::runtime_error(
70 "no Level-Zero SYCL platform found; the MLIR SYCL runtime wrapper "
71 "currently requires a Level-Zero backend");
83 void *memPtr =
nullptr;
88 memPtr = sycl::aligned_alloc_device(64, size, *queue);
90 if (memPtr ==
nullptr) {
91 throw std::runtime_error(
"mem allocation failed!");
97 sycl::free(ptr, *queue);
100static ze_module_handle_t
loadModule(
const void *data,
size_t dataSize) {
102 ze_module_handle_t zeModule;
103 ze_module_desc_t desc = {ZE_STRUCTURE_TYPE_MODULE_DESC,
105 ZE_MODULE_FORMAT_IL_SPIRV,
107 (
const uint8_t *)data,
110 auto zeDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(
112 auto zeContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(
114 L0_SAFE_CALL(zeModuleCreate(zeContext, zeDevice, &desc, &zeModule,
nullptr));
118static sycl::kernel *
getKernel(ze_module_handle_t zeModule,
const char *name) {
121 ze_kernel_handle_t zeKernel;
122 ze_kernel_desc_t desc = {};
123 desc.pKernelName = name;
125 L0_SAFE_CALL(zeKernelCreate(zeModule, &desc, &zeKernel));
126 sycl::kernel_bundle<sycl::bundle_state::executable> kernelBundle =
127 sycl::make_kernel_bundle<sycl::backend::ext_oneapi_level_zero,
128 sycl::bundle_state::executable>(
131 auto kernel = sycl::make_kernel<sycl::backend::ext_oneapi_level_zero>(
133 return new sycl::kernel(kernel);
136static void launchKernel(sycl::queue *queue, sycl::kernel *kernel,
size_t gridX,
137 size_t gridY,
size_t gridZ,
size_t blockX,
138 size_t blockY,
size_t blockZ,
size_t sharedMemBytes,
139 void **params,
size_t paramsCount) {
140 auto syclGlobalRange =
141 sycl::range<3>(blockZ * gridZ, blockY * gridY, blockX * gridX);
142 auto syclLocalRange = sycl::range<3>(blockZ, blockY, blockX);
143 sycl::nd_range<3> syclNdRange(syclGlobalRange, syclLocalRange);
145 queue->submit([&](sycl::handler &cgh) {
146 for (
size_t i = 0; i < paramsCount; i++) {
147 cgh.set_arg(
static_cast<uint32_t
>(i), *(
static_cast<void **
>(params[i])));
149 cgh.parallel_for(syclNdRange, *kernel);
157 return catchAll([&]() {
165 catchAll([&]() {
delete queue; });
170 return catchAll([&]() {
185 return catchAll([&]() {
return loadModule(data, gpuBlobSize); });
190 return catchAll([&]() {
return getKernel(module, name); });
195 size_t blockX,
size_t blockY,
size_t blockZ,
196 size_t sharedMemBytes, sycl::queue *queue,
void **params,
197 void ** ,
size_t paramsCount) {
198 return catchAll([&]() {
199 launchKernel(queue, kernel, gridX, gridY, gridZ, blockX, blockY, blockZ,
200 sharedMemBytes, params, paramsCount);
206 catchAll([&]() { queue->wait(); });
212 catchAll([&]() {
L0_SAFE_CALL(zeModuleDestroy(module)); });
216mgpuMemcpy(
void *dst,
void *src,
size_t sizeBytes, sycl::queue *queue) {
217 catchAll([&]() { queue->memcpy(dst, src, sizeBytes).wait(); });
static ze_module_handle_t loadModule(const void *data, size_t dataSize)
static void deallocDeviceMemory(sycl::queue *queue, void *ptr)
static void launchKernel(sycl::queue *queue, sycl::kernel *kernel, size_t gridX, size_t gridY, size_t gridZ, size_t blockX, size_t blockY, size_t blockZ, size_t sharedMemBytes, void **params, size_t paramsCount)
static sycl::kernel * getKernel(ze_module_handle_t zeModule, const char *name)
static void * allocDeviceMemory(sycl::queue *queue, size_t size, bool isShared)
#define SYCL_RUNTIME_EXPORT
static sycl::device getDefaultDevice()
#define L0_SAFE_CALL(call)
SYCL_RUNTIME_EXPORT void mgpuStreamDestroy(sycl::queue *queue)
SYCL_RUNTIME_EXPORT void mgpuMemFree(void *ptr, sycl::queue *queue)
SYCL_RUNTIME_EXPORT void mgpuModuleUnload(ze_module_handle_t module)
SYCL_RUNTIME_EXPORT void * mgpuMemAlloc(uint64_t size, sycl::queue *queue, bool isShared)
SYCL_RUNTIME_EXPORT ze_module_handle_t mgpuModuleLoad(const void *data, size_t gpuBlobSize)
SYCL_RUNTIME_EXPORT void mgpuMemcpy(void *dst, void *src, size_t sizeBytes, sycl::queue *queue)
SYCL_RUNTIME_EXPORT void mgpuStreamSynchronize(sycl::queue *queue)
SYCL_RUNTIME_EXPORT sycl::queue * mgpuStreamCreate()
static sycl::context getDefaultContext()
SYCL_RUNTIME_EXPORT void mgpuLaunchKernel(sycl::kernel *kernel, size_t gridX, size_t gridY, size_t gridZ, size_t blockX, size_t blockY, size_t blockZ, size_t sharedMemBytes, sycl::queue *queue, void **params, void **, size_t paramsCount)
SYCL_RUNTIME_EXPORT sycl::kernel * mgpuModuleGetFunction(ze_module_handle_t module, const char *name)