MLIR  21.0.0git
GPUOpsLowering.cpp
Go to the documentation of this file.
1 //===- GPUOpsLowering.cpp - GPU FuncOp / ReturnOp lowering ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GPUOpsLowering.h"
10 
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Builders.h"
16 #include "mlir/IR/BuiltinTypes.h"
17 #include "llvm/ADT/SmallVectorExtras.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 using namespace mlir;
22 
23 LLVM::LLVMFuncOp mlir::getOrDefineFunction(gpu::GPUModuleOp moduleOp,
24  Location loc, OpBuilder &b,
25  StringRef name,
26  LLVM::LLVMFunctionType type) {
27  LLVM::LLVMFuncOp ret;
28  if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
30  b.setInsertionPointToStart(moduleOp.getBody());
31  ret = b.create<LLVM::LLVMFuncOp>(loc, name, type, LLVM::Linkage::External);
32  }
33  return ret;
34 }
35 
36 static SmallString<16> getUniqueSymbolName(gpu::GPUModuleOp moduleOp,
37  StringRef prefix) {
38  // Get a unique global name.
39  unsigned stringNumber = 0;
40  SmallString<16> stringConstName;
41  do {
42  stringConstName.clear();
43  (prefix + Twine(stringNumber++)).toStringRef(stringConstName);
44  } while (moduleOp.lookupSymbol(stringConstName));
45  return stringConstName;
46 }
47 
48 LLVM::GlobalOp
50  gpu::GPUModuleOp moduleOp, Type llvmI8,
51  StringRef namePrefix, StringRef str,
52  uint64_t alignment, unsigned addrSpace) {
53  llvm::SmallString<20> nullTermStr(str);
54  nullTermStr.push_back('\0'); // Null terminate for C
55  auto globalType =
56  LLVM::LLVMArrayType::get(llvmI8, nullTermStr.size_in_bytes());
57  StringAttr attr = b.getStringAttr(nullTermStr);
58 
59  // Try to find existing global.
60  for (auto globalOp : moduleOp.getOps<LLVM::GlobalOp>())
61  if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
62  globalOp.getValueAttr() == attr &&
63  globalOp.getAlignment().value_or(0) == alignment &&
64  globalOp.getAddrSpace() == addrSpace)
65  return globalOp;
66 
67  // Not found: create new global.
69  b.setInsertionPointToStart(moduleOp.getBody());
70  SmallString<16> name = getUniqueSymbolName(moduleOp, namePrefix);
71  return b.create<LLVM::GlobalOp>(loc, globalType,
72  /*isConstant=*/true, LLVM::Linkage::Internal,
73  name, attr, alignment, addrSpace);
74 }
75 
76 LogicalResult
77 GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
78  ConversionPatternRewriter &rewriter) const {
79  Location loc = gpuFuncOp.getLoc();
80 
81  SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
82  if (encodeWorkgroupAttributionsAsArguments) {
83  // Append an `llvm.ptr` argument to the function signature to encode
84  // workgroup attributions.
85 
86  ArrayRef<BlockArgument> workgroupAttributions =
87  gpuFuncOp.getWorkgroupAttributions();
88  size_t numAttributions = workgroupAttributions.size();
89 
90  // Insert all arguments at the end.
91  unsigned index = gpuFuncOp.getNumArguments();
92  SmallVector<unsigned> argIndices(numAttributions, index);
93 
94  // New arguments will simply be `llvm.ptr` with the correct address space
95  Type workgroupPtrType =
96  rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
97  SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
98 
99  // Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
100  std::array attrs{
101  rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
102  rewriter.getUnitAttr()),
103  rewriter.getNamedAttr(
104  getDialect().getWorkgroupAttributionAttrHelper().getName(),
105  rewriter.getUnitAttr()),
106  };
108  for (BlockArgument attribution : workgroupAttributions) {
109  auto attributionType = cast<MemRefType>(attribution.getType());
110  IntegerAttr numElements =
111  rewriter.getI64IntegerAttr(attributionType.getNumElements());
112  Type llvmElementType =
113  getTypeConverter()->convertType(attributionType.getElementType());
114  if (!llvmElementType)
115  return failure();
116  TypeAttr type = TypeAttr::get(llvmElementType);
117  attrs.back().setValue(
118  rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
119  argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
120  }
121 
122  // Location match function location
123  SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
124 
125  // Perform signature modification
126  rewriter.modifyOpInPlace(
127  gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
128  static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
129  argIndices, argTypes, argAttrs, argLocs);
130  });
131  } else {
132  workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
133  for (auto [idx, attribution] :
134  llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
135  auto type = dyn_cast<MemRefType>(attribution.getType());
136  assert(type && type.hasStaticShape() && "unexpected type in attribution");
137 
138  uint64_t numElements = type.getNumElements();
139 
140  auto elementType =
141  cast<Type>(typeConverter->convertType(type.getElementType()));
142  auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
143  std::string name =
144  std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
145  uint64_t alignment = 0;
146  if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
147  gpuFuncOp.getWorkgroupAttributionAttr(
148  idx, LLVM::LLVMDialect::getAlignAttrName())))
149  alignment = alignAttr.getInt();
150  auto globalOp = rewriter.create<LLVM::GlobalOp>(
151  gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
152  LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
153  workgroupAddrSpace);
154  workgroupBuffers.push_back(globalOp);
155  }
156  }
157 
158  // Remap proper input types.
159  TypeConverter::SignatureConversion signatureConversion(
160  gpuFuncOp.front().getNumArguments());
161 
163  gpuFuncOp.getFunctionType(), /*isVariadic=*/false,
164  getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
165  if (!funcType) {
166  return rewriter.notifyMatchFailure(gpuFuncOp, [&](Diagnostic &diag) {
167  diag << "failed to convert function signature type for: "
168  << gpuFuncOp.getFunctionType();
169  });
170  }
171 
172  // Create the new function operation. Only copy those attributes that are
173  // not specific to function modeling.
175  ArrayAttr argAttrs;
176  for (const auto &attr : gpuFuncOp->getAttrs()) {
177  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
178  attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
179  attr.getName() ==
180  gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
181  attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
182  attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
183  attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
184  attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
185  continue;
186  if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
187  argAttrs = gpuFuncOp.getArgAttrsAttr();
188  continue;
189  }
190  attributes.push_back(attr);
191  }
192 
193  DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
194  DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
195  // Ensure we don't lose information if the function is lowered before its
196  // surrounding context.
197  auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
198  if (knownBlockSize)
199  attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
200  knownBlockSize);
201  if (knownGridSize)
202  attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
203  knownGridSize);
204 
205  // Add a dialect specific kernel attribute in addition to GPU kernel
206  // attribute. The former is necessary for further translation while the
207  // latter is expected by gpu.launch_func.
208  if (gpuFuncOp.isKernel()) {
209  if (kernelAttributeName)
210  attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
211  // Set the dialect-specific block size attribute if there is one.
212  if (kernelBlockSizeAttributeName && knownBlockSize) {
213  attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
214  }
215  }
216  LLVM::CConv callingConvention = gpuFuncOp.isKernel()
217  ? kernelCallingConvention
218  : nonKernelCallingConvention;
219  auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
220  gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
221  LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
222  /*comdat=*/nullptr, attributes);
223 
224  {
225  // Insert operations that correspond to converted workgroup and private
226  // memory attributions to the body of the function. This must operate on
227  // the original function, before the body region is inlined in the new
228  // function to maintain the relation between block arguments and the
229  // parent operation that assigns their semantics.
230  OpBuilder::InsertionGuard guard(rewriter);
231 
232  // Rewrite workgroup memory attributions to addresses of global buffers.
233  rewriter.setInsertionPointToStart(&gpuFuncOp.front());
234  unsigned numProperArguments = gpuFuncOp.getNumArguments();
235 
236  if (encodeWorkgroupAttributionsAsArguments) {
237  // Build a MemRefDescriptor with each of the arguments added above.
238 
239  unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
240  assert(numProperArguments >= numAttributions &&
241  "Expecting attributions to be encoded as arguments already");
242 
243  // Arguments encoding workgroup attributions will be in positions
244  // [numProperArguments, numProperArguments+numAttributions)
245  ArrayRef<BlockArgument> attributionArguments =
246  gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
247  numAttributions);
248  for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
249  gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
250  auto [attribution, arg] = vals;
251  auto type = cast<MemRefType>(attribution.getType());
252 
253  // Arguments are of llvm.ptr type and attributions are of memref type:
254  // we need to wrap them in memref descriptors.
256  rewriter, loc, *getTypeConverter(), type, arg);
257 
258  // And remap the arguments
259  signatureConversion.remapInput(numProperArguments + idx, descr);
260  }
261  } else {
262  for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
263  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
264  global.getAddrSpace());
265  Value address = rewriter.create<LLVM::AddressOfOp>(
266  loc, ptrType, global.getSymNameAttr());
267  Value memory =
268  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(),
269  address, ArrayRef<LLVM::GEPArg>{0, 0});
270 
271  // Build a memref descriptor pointing to the buffer to plug with the
272  // existing memref infrastructure. This may use more registers than
273  // otherwise necessary given that memref sizes are fixed, but we can try
274  // and canonicalize that away later.
275  Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
276  auto type = cast<MemRefType>(attribution.getType());
278  rewriter, loc, *getTypeConverter(), type, memory);
279  signatureConversion.remapInput(numProperArguments + idx, descr);
280  }
281  }
282 
283  // Rewrite private memory attributions to alloca'ed buffers.
284  unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
285  auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
286  for (const auto [idx, attribution] :
287  llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
288  auto type = cast<MemRefType>(attribution.getType());
289  assert(type && type.hasStaticShape() && "unexpected type in attribution");
290 
291  // Explicitly drop memory space when lowering private memory
292  // attributions since NVVM models it as `alloca`s in the default
293  // memory space and does not support `alloca`s with addrspace(5).
294  Type elementType = typeConverter->convertType(type.getElementType());
295  auto ptrType =
296  LLVM::LLVMPointerType::get(rewriter.getContext(), allocaAddrSpace);
297  Value numElements = rewriter.create<LLVM::ConstantOp>(
298  gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
299  uint64_t alignment = 0;
300  if (auto alignAttr =
301  dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
302  idx, LLVM::LLVMDialect::getAlignAttrName())))
303  alignment = alignAttr.getInt();
304  Value allocated = rewriter.create<LLVM::AllocaOp>(
305  gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
307  rewriter, loc, *getTypeConverter(), type, allocated);
308  signatureConversion.remapInput(
309  numProperArguments + numWorkgroupAttributions + idx, descr);
310  }
311  }
312 
313  // Move the region to the new function, update the entry block signature.
314  rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
315  llvmFuncOp.end());
316  if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), *typeConverter,
317  &signatureConversion)))
318  return failure();
319 
320  // Get memref type from function arguments and set the noalias to
321  // pointer arguments.
322  for (const auto [idx, argTy] :
323  llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
324  auto remapping = signatureConversion.getInputMapping(idx);
325  NamedAttrList argAttr =
326  argAttrs ? cast<DictionaryAttr>(argAttrs[idx]) : NamedAttrList();
327  auto copyAttribute = [&](StringRef attrName) {
328  Attribute attr = argAttr.erase(attrName);
329  if (!attr)
330  return;
331  for (size_t i = 0, e = remapping->size; i < e; ++i)
332  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
333  };
334  auto copyPointerAttribute = [&](StringRef attrName) {
335  Attribute attr = argAttr.erase(attrName);
336 
337  if (!attr)
338  return;
339  if (remapping->size > 1 &&
340  attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
341  emitWarning(llvmFuncOp.getLoc(),
342  "Cannot copy noalias with non-bare pointers.\n");
343  return;
344  }
345  for (size_t i = 0, e = remapping->size; i < e; ++i) {
346  if (isa<LLVM::LLVMPointerType>(
347  llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
348  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
349  }
350  }
351  };
352 
353  if (argAttr.empty())
354  continue;
355 
356  copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
357  copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
358  copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
359  bool lowersToPointer = false;
360  for (size_t i = 0, e = remapping->size; i < e; ++i) {
361  lowersToPointer |= isa<LLVM::LLVMPointerType>(
362  llvmFuncOp.getArgument(remapping->inputNo + i).getType());
363  }
364 
365  if (lowersToPointer) {
366  copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
367  copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
368  copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
369  copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
370  copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
371  copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
372  copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
373  copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
374  copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
375  copyPointerAttribute(
376  LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
377  copyPointerAttribute(
378  LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
379  }
380  }
381  rewriter.eraseOp(gpuFuncOp);
382  return success();
383 }
384 
386  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
387  ConversionPatternRewriter &rewriter) const {
388  Location loc = gpuPrintfOp->getLoc();
389 
390  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getI8Type());
391  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
392  mlir::Type llvmI32 = typeConverter->convertType(rewriter.getI32Type());
393  mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
394  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
395  // This ensures that global constants and declarations are placed within
396  // the device code, not the host code
397  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
398 
399  auto ocklBegin =
400  getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
401  LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
402  LLVM::LLVMFuncOp ocklAppendArgs;
403  if (!adaptor.getArgs().empty()) {
404  ocklAppendArgs = getOrDefineFunction(
405  moduleOp, loc, rewriter, "__ockl_printf_append_args",
407  llvmI64, {llvmI64, /*numArgs*/ llvmI32, llvmI64, llvmI64, llvmI64,
408  llvmI64, llvmI64, llvmI64, llvmI64, /*isLast*/ llvmI32}));
409  }
410  auto ocklAppendStringN = getOrDefineFunction(
411  moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
413  llvmI64,
414  {llvmI64, ptrType, /*length (bytes)*/ llvmI64, /*isLast*/ llvmI32}));
415 
416  /// Start the printf hostcall
417  Value zeroI64 = rewriter.create<LLVM::ConstantOp>(loc, llvmI64, 0);
418  auto printfBeginCall = rewriter.create<LLVM::CallOp>(loc, ocklBegin, zeroI64);
419  Value printfDesc = printfBeginCall.getResult();
420 
421  // Create the global op or find an existing one.
422  LLVM::GlobalOp global = getOrCreateStringConstant(
423  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
424 
425  // Get a pointer to the format string's first element and pass it to printf()
426  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
427  loc,
428  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
429  global.getSymNameAttr());
430  Value stringStart =
431  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
432  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
433  Value stringLen = rewriter.create<LLVM::ConstantOp>(
434  loc, llvmI64, cast<StringAttr>(global.getValueAttr()).size());
435 
436  Value oneI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 1);
437  Value zeroI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 0);
438 
439  auto appendFormatCall = rewriter.create<LLVM::CallOp>(
440  loc, ocklAppendStringN,
441  ValueRange{printfDesc, stringStart, stringLen,
442  adaptor.getArgs().empty() ? oneI32 : zeroI32});
443  printfDesc = appendFormatCall.getResult();
444 
445  // __ockl_printf_append_args takes 7 values per append call
446  constexpr size_t argsPerAppend = 7;
447  size_t nArgs = adaptor.getArgs().size();
448  for (size_t group = 0; group < nArgs; group += argsPerAppend) {
449  size_t bound = std::min(group + argsPerAppend, nArgs);
450  size_t numArgsThisCall = bound - group;
451 
453  arguments.push_back(printfDesc);
454  arguments.push_back(
455  rewriter.create<LLVM::ConstantOp>(loc, llvmI32, numArgsThisCall));
456  for (size_t i = group; i < bound; ++i) {
457  Value arg = adaptor.getArgs()[i];
458  if (auto floatType = dyn_cast<FloatType>(arg.getType())) {
459  if (!floatType.isF64())
460  arg = rewriter.create<LLVM::FPExtOp>(
461  loc, typeConverter->convertType(rewriter.getF64Type()), arg);
462  arg = rewriter.create<LLVM::BitcastOp>(loc, llvmI64, arg);
463  }
464  if (arg.getType().getIntOrFloatBitWidth() != 64)
465  arg = rewriter.create<LLVM::ZExtOp>(loc, llvmI64, arg);
466 
467  arguments.push_back(arg);
468  }
469  // Pad out to 7 arguments since the hostcall always needs 7
470  for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
471  arguments.push_back(zeroI64);
472  }
473 
474  auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
475  arguments.push_back(isLast);
476  auto call = rewriter.create<LLVM::CallOp>(loc, ocklAppendArgs, arguments);
477  printfDesc = call.getResult();
478  }
479  rewriter.eraseOp(gpuPrintfOp);
480  return success();
481 }
482 
484  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
485  ConversionPatternRewriter &rewriter) const {
486  Location loc = gpuPrintfOp->getLoc();
487 
488  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
489  mlir::Type ptrType =
490  LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
491 
492  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
493  // This ensures that global constants and declarations are placed within
494  // the device code, not the host code
495  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
496 
497  auto printfType =
498  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
499  /*isVarArg=*/true);
500  LLVM::LLVMFuncOp printfDecl =
501  getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
502 
503  // Create the global op or find an existing one.
504  LLVM::GlobalOp global = getOrCreateStringConstant(
505  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat(),
506  /*alignment=*/0, addressSpace);
507 
508  // Get a pointer to the format string's first element
509  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
510  loc,
511  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
512  global.getSymNameAttr());
513  Value stringStart =
514  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
515  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
516 
517  // Construct arguments and function call
518  auto argsRange = adaptor.getArgs();
519  SmallVector<Value, 4> printfArgs;
520  printfArgs.reserve(argsRange.size() + 1);
521  printfArgs.push_back(stringStart);
522  printfArgs.append(argsRange.begin(), argsRange.end());
523 
524  rewriter.create<LLVM::CallOp>(loc, printfDecl, printfArgs);
525  rewriter.eraseOp(gpuPrintfOp);
526  return success();
527 }
528 
530  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
531  ConversionPatternRewriter &rewriter) const {
532  Location loc = gpuPrintfOp->getLoc();
533 
534  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
535  mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
536 
537  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
538  // This ensures that global constants and declarations are placed within
539  // the device code, not the host code
540  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
541 
542  auto vprintfType =
543  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType});
544  LLVM::LLVMFuncOp vprintfDecl =
545  getOrDefineFunction(moduleOp, loc, rewriter, "vprintf", vprintfType);
546 
547  // Create the global op or find an existing one.
548  LLVM::GlobalOp global = getOrCreateStringConstant(
549  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
550 
551  // Get a pointer to the format string's first element
552  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(loc, global);
553  Value stringStart =
554  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
555  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
556  SmallVector<Type> types;
557  SmallVector<Value> args;
558  // Promote and pack the arguments into a stack allocation.
559  for (Value arg : adaptor.getArgs()) {
560  Type type = arg.getType();
561  Value promotedArg = arg;
562  assert(type.isIntOrFloat());
563  if (isa<FloatType>(type)) {
564  type = rewriter.getF64Type();
565  promotedArg = rewriter.create<LLVM::FPExtOp>(loc, type, arg);
566  }
567  types.push_back(type);
568  args.push_back(promotedArg);
569  }
570  Type structType =
571  LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
572  Value one = rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI64Type(),
573  rewriter.getIndexAttr(1));
574  Value tempAlloc =
575  rewriter.create<LLVM::AllocaOp>(loc, ptrType, structType, one,
576  /*alignment=*/0);
577  for (auto [index, arg] : llvm::enumerate(args)) {
578  Value ptr = rewriter.create<LLVM::GEPOp>(
579  loc, ptrType, structType, tempAlloc,
580  ArrayRef<LLVM::GEPArg>{0, static_cast<int32_t>(index)});
581  rewriter.create<LLVM::StoreOp>(loc, arg, ptr);
582  }
583  std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
584 
585  rewriter.create<LLVM::CallOp>(loc, vprintfDecl, printfArgs);
586  rewriter.eraseOp(gpuPrintfOp);
587  return success();
588 }
589 
590 /// Helper for impl::scalarizeVectorOp. Scalarizes vectors to elements.
591 /// Used either directly (for ops on 1D vectors) or as the callback passed to
592 /// detail::handleMultidimensionalVectors (for ops on higher-rank vectors).
594  Type llvm1DVectorTy,
595  ConversionPatternRewriter &rewriter,
596  const LLVMTypeConverter &converter) {
597  TypeRange operandTypes(operands);
598  VectorType vectorType = cast<VectorType>(llvm1DVectorTy);
599  Location loc = op->getLoc();
600  Value result = rewriter.create<LLVM::PoisonOp>(loc, vectorType);
601  Type indexType = converter.convertType(rewriter.getIndexType());
602  StringAttr name = op->getName().getIdentifier();
603  Type elementType = vectorType.getElementType();
604 
605  for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
606  Value index = rewriter.create<LLVM::ConstantOp>(loc, indexType, i);
607  auto extractElement = [&](Value operand) -> Value {
608  if (!isa<VectorType>(operand.getType()))
609  return operand;
610  return rewriter.create<LLVM::ExtractElementOp>(loc, operand, index);
611  };
612  auto scalarOperands = llvm::map_to_vector(operands, extractElement);
613  Operation *scalarOp =
614  rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
615  result = rewriter.create<LLVM::InsertElementOp>(
616  loc, result, scalarOp->getResult(0), index);
617  }
618  return result;
619 }
620 
621 /// Unrolls op to array/vector elements.
622 LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
623  ConversionPatternRewriter &rewriter,
624  const LLVMTypeConverter &converter) {
625  TypeRange operandTypes(operands);
626  if (llvm::any_of(operandTypes, llvm::IsaPred<VectorType>)) {
627  VectorType vectorType =
628  cast<VectorType>(converter.convertType(op->getResultTypes()[0]));
629  rewriter.replaceOp(op, scalarizeVectorOpHelper(op, operands, vectorType,
630  rewriter, converter));
631  return success();
632  }
633 
634  if (llvm::any_of(operandTypes, llvm::IsaPred<LLVM::LLVMArrayType>)) {
636  op, operands, converter,
637  [&](Type llvm1DVectorTy, ValueRange operands) -> Value {
638  return scalarizeVectorOpHelper(op, operands, llvm1DVectorTy, rewriter,
639  converter);
640  },
641  rewriter);
642  }
643 
644  return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll");
645 }
646 
647 static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
648  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
649 }
650 
651 /// Generates a symbol with 0-sized array type for dynamic shared memory usage,
652 /// or uses existing symbol.
654  ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp,
655  gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter,
656  MemRefType memrefType, unsigned alignmentBit) {
657  uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
658 
659  FailureOr<unsigned> addressSpace =
660  typeConverter->getMemRefAddressSpace(memrefType);
661  if (failed(addressSpace)) {
662  op->emitError() << "conversion of memref memory space "
663  << memrefType.getMemorySpace()
664  << " to integer address space "
665  "failed. Consider adding memory space conversions.";
666  }
667 
668  // Step 1. Collect symbol names of LLVM::GlobalOp Ops. Also if any of
669  // LLVM::GlobalOp is suitable for shared memory, return it.
670  llvm::StringSet<> existingGlobalNames;
671  for (auto globalOp : moduleOp.getBody()->getOps<LLVM::GlobalOp>()) {
672  existingGlobalNames.insert(globalOp.getSymName());
673  if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(globalOp.getType())) {
674  if (globalOp.getAddrSpace() == addressSpace.value() &&
675  arrayType.getNumElements() == 0 &&
676  globalOp.getAlignment().value_or(0) == alignmentByte) {
677  return globalOp;
678  }
679  }
680  }
681 
682  // Step 2. Find a unique symbol name
683  unsigned uniquingCounter = 0;
684  SmallString<128> symName = SymbolTable::generateSymbolName<128>(
685  "__dynamic_shmem_",
686  [&](StringRef candidate) {
687  return existingGlobalNames.contains(candidate);
688  },
689  uniquingCounter);
690 
691  // Step 3. Generate a global op
692  OpBuilder::InsertionGuard guard(rewriter);
693  rewriter.setInsertionPointToStart(moduleOp.getBody());
694 
695  auto zeroSizedArrayType = LLVM::LLVMArrayType::get(
696  typeConverter->convertType(memrefType.getElementType()), 0);
697 
698  return rewriter.create<LLVM::GlobalOp>(
699  op->getLoc(), zeroSizedArrayType, /*isConstant=*/false,
700  LLVM::Linkage::Internal, symName, /*value=*/Attribute(), alignmentByte,
701  addressSpace.value());
702 }
703 
705  gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
706  ConversionPatternRewriter &rewriter) const {
707  Location loc = op.getLoc();
708  MemRefType memrefType = op.getResultMemref().getType();
709  Type elementType = typeConverter->convertType(memrefType.getElementType());
710 
711  // Step 1: Generate a memref<0xi8> type
712  MemRefLayoutAttrInterface layout = {};
713  auto memrefType0sz =
714  MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
715 
716  // Step 2: Generate a global symbol or existing for the dynamic shared
717  // memory with memref<0xi8> type
718  auto moduleOp = op->getParentOfType<gpu::GPUModuleOp>();
719  LLVM::GlobalOp shmemOp = getDynamicSharedMemorySymbol(
720  rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
721 
722  // Step 3. Get address of the global symbol
723  OpBuilder::InsertionGuard guard(rewriter);
724  rewriter.setInsertionPoint(op);
725  auto basePtr = rewriter.create<LLVM::AddressOfOp>(loc, shmemOp);
726  Type baseType = basePtr->getResultTypes().front();
727 
728  // Step 4. Generate GEP using offsets
729  SmallVector<LLVM::GEPArg> gepArgs = {0};
730  Value shmemPtr = rewriter.create<LLVM::GEPOp>(loc, baseType, elementType,
731  basePtr, gepArgs);
732  // Step 5. Create a memref descriptor
733  SmallVector<Value> shape, strides;
734  Value sizeBytes;
735  getMemRefDescriptorSizes(loc, memrefType0sz, {}, rewriter, shape, strides,
736  sizeBytes);
737  auto memRefDescriptor = this->createMemRefDescriptor(
738  loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
739 
740  // Step 5. Replace the op with memref descriptor
741  rewriter.replaceOp(op, {memRefDescriptor});
742  return success();
743 }
744 
746  gpu::ReturnOp op, OpAdaptor adaptor,
747  ConversionPatternRewriter &rewriter) const {
748  Location loc = op.getLoc();
749  unsigned numArguments = op.getNumOperands();
750  SmallVector<Value, 4> updatedOperands;
751 
752  bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
753  if (useBarePtrCallConv) {
754  // For the bare-ptr calling convention, extract the aligned pointer to
755  // be returned from the memref descriptor.
756  for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
757  Type oldTy = std::get<0>(it).getType();
758  Value newOperand = std::get<1>(it);
759  if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
760  cast<BaseMemRefType>(oldTy))) {
761  MemRefDescriptor memrefDesc(newOperand);
762  newOperand = memrefDesc.allocatedPtr(rewriter, loc);
763  } else if (isa<UnrankedMemRefType>(oldTy)) {
764  // Unranked memref is not supported in the bare pointer calling
765  // convention.
766  return failure();
767  }
768  updatedOperands.push_back(newOperand);
769  }
770  } else {
771  updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
772  (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
773  updatedOperands,
774  /*toDynamic=*/true);
775  }
776 
777  // If ReturnOp has 0 or 1 operand, create it and return immediately.
778  if (numArguments <= 1) {
779  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
780  op, TypeRange(), updatedOperands, op->getAttrs());
781  return success();
782  }
783 
784  // Otherwise, we need to pack the arguments into an LLVM struct type before
785  // returning.
786  auto packedType = getTypeConverter()->packFunctionResults(
787  op.getOperandTypes(), useBarePtrCallConv);
788  if (!packedType) {
789  return rewriter.notifyMatchFailure(op, "could not convert result types");
790  }
791 
792  Value packed = rewriter.create<LLVM::PoisonOp>(loc, packedType);
793  for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
794  packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx);
795  }
796  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
797  op->getAttrs());
798  return success();
799 }
800 
802  TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
803  typeConverter.addTypeAttributeConversion(
804  [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
805  gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
806  unsigned addressSpace = mapping(memorySpace);
807  return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
808  addressSpace);
809  });
810 }
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueSymbolName(gpu::GPUModuleOp moduleOp, StringRef prefix)
static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands, Type llvm1DVectorTy, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Helper for impl::scalarizeVectorOp.
LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:102
This class represents an argument of a Block.
Definition: Value.h:295
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:104
UnitAttr getUnitAttr()
Definition: Builders.cpp:94
IntegerType getI64Type()
Definition: Builders.cpp:65
IntegerType getI32Type()
Definition: Builders.cpp:63
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:108
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:67
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:89
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:258
MLIRContext * getContext() const
Definition: Builders.h:56
IndexType getIndexType()
Definition: Builders.cpp:51
IntegerType getI8Type()
Definition: Builders.cpp:59
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:100
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:90
FloatType getF64Type()
Definition: Builders.cpp:45
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:96
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
Definition: Pattern.cpp:216
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
Definition: Pattern.cpp:114
const LLVMTypeConverter * getTypeConverter() const
Definition: Pattern.cpp:27
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
Definition: Pattern.cpp:32
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
Definition: Pattern.cpp:245
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
result_type_range getResultTypes()
Definition: Operation.h:428
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:686
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:598
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:504
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:76
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, ArrayRef< Value > replacements)
Remap an input of the original signature to replacements values.
Type conversion class.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition: Types.cpp:116
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:122
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
LogicalResult handleMultidimensionalVectors(Operation *op, ValueRange operands, const LLVMTypeConverter &typeConverter, std::function< Value(Type, ValueRange)> createOperand, ConversionPatternRewriter &rewriter)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
Definition: GPUCommonPass.h:71
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Find or create an external function declaration in the given module.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, gpu::GPUModuleOp moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.