MLIR  21.0.0git
GPUOpsLowering.cpp
Go to the documentation of this file.
1 //===- GPUOpsLowering.cpp - GPU FuncOp / ReturnOp lowering ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GPUOpsLowering.h"
10 
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Builders.h"
16 #include "mlir/IR/BuiltinTypes.h"
17 #include "llvm/ADT/SmallVectorExtras.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 using namespace mlir;
22 
23 LLVM::LLVMFuncOp mlir::getOrDefineFunction(gpu::GPUModuleOp moduleOp,
24  Location loc, OpBuilder &b,
25  StringRef name,
26  LLVM::LLVMFunctionType type) {
27  LLVM::LLVMFuncOp ret;
28  if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
30  b.setInsertionPointToStart(moduleOp.getBody());
31  ret = b.create<LLVM::LLVMFuncOp>(loc, name, type, LLVM::Linkage::External);
32  }
33  return ret;
34 }
35 
36 static SmallString<16> getUniqueSymbolName(gpu::GPUModuleOp moduleOp,
37  StringRef prefix) {
38  // Get a unique global name.
39  unsigned stringNumber = 0;
40  SmallString<16> stringConstName;
41  do {
42  stringConstName.clear();
43  (prefix + Twine(stringNumber++)).toStringRef(stringConstName);
44  } while (moduleOp.lookupSymbol(stringConstName));
45  return stringConstName;
46 }
47 
48 LLVM::GlobalOp
50  gpu::GPUModuleOp moduleOp, Type llvmI8,
51  StringRef namePrefix, StringRef str,
52  uint64_t alignment, unsigned addrSpace) {
53  llvm::SmallString<20> nullTermStr(str);
54  nullTermStr.push_back('\0'); // Null terminate for C
55  auto globalType =
56  LLVM::LLVMArrayType::get(llvmI8, nullTermStr.size_in_bytes());
57  StringAttr attr = b.getStringAttr(nullTermStr);
58 
59  // Try to find existing global.
60  for (auto globalOp : moduleOp.getOps<LLVM::GlobalOp>())
61  if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
62  globalOp.getValueAttr() == attr &&
63  globalOp.getAlignment().value_or(0) == alignment &&
64  globalOp.getAddrSpace() == addrSpace)
65  return globalOp;
66 
67  // Not found: create new global.
69  b.setInsertionPointToStart(moduleOp.getBody());
70  SmallString<16> name = getUniqueSymbolName(moduleOp, namePrefix);
71  return b.create<LLVM::GlobalOp>(loc, globalType,
72  /*isConstant=*/true, LLVM::Linkage::Internal,
73  name, attr, alignment, addrSpace);
74 }
75 
76 LogicalResult
77 GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
78  ConversionPatternRewriter &rewriter) const {
79  Location loc = gpuFuncOp.getLoc();
80 
81  SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
82  if (encodeWorkgroupAttributionsAsArguments) {
83  // Append an `llvm.ptr` argument to the function signature to encode
84  // workgroup attributions.
85 
86  ArrayRef<BlockArgument> workgroupAttributions =
87  gpuFuncOp.getWorkgroupAttributions();
88  size_t numAttributions = workgroupAttributions.size();
89 
90  // Insert all arguments at the end.
91  unsigned index = gpuFuncOp.getNumArguments();
92  SmallVector<unsigned> argIndices(numAttributions, index);
93 
94  // New arguments will simply be `llvm.ptr` with the correct address space
95  Type workgroupPtrType =
96  rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
97  SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
98 
99  // Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
100  std::array attrs{
101  rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
102  rewriter.getUnitAttr()),
103  rewriter.getNamedAttr(
104  getDialect().getWorkgroupAttributionAttrHelper().getName(),
105  rewriter.getUnitAttr()),
106  };
108  for (BlockArgument attribution : workgroupAttributions) {
109  auto attributionType = cast<MemRefType>(attribution.getType());
110  IntegerAttr numElements =
111  rewriter.getI64IntegerAttr(attributionType.getNumElements());
112  Type llvmElementType =
113  getTypeConverter()->convertType(attributionType.getElementType());
114  if (!llvmElementType)
115  return failure();
116  TypeAttr type = TypeAttr::get(llvmElementType);
117  attrs.back().setValue(
118  rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
119  argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
120  }
121 
122  // Location match function location
123  SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
124 
125  // Perform signature modification
126  rewriter.modifyOpInPlace(
127  gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
128  LogicalResult inserted =
129  static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
130  argIndices, argTypes, argAttrs, argLocs);
131  (void)inserted;
132  assert(succeeded(inserted) &&
133  "expected GPU funcs to support inserting any argument");
134  });
135  } else {
136  workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
137  for (auto [idx, attribution] :
138  llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
139  auto type = dyn_cast<MemRefType>(attribution.getType());
140  assert(type && type.hasStaticShape() && "unexpected type in attribution");
141 
142  uint64_t numElements = type.getNumElements();
143 
144  auto elementType =
145  cast<Type>(typeConverter->convertType(type.getElementType()));
146  auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
147  std::string name =
148  std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
149  uint64_t alignment = 0;
150  if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
151  gpuFuncOp.getWorkgroupAttributionAttr(
152  idx, LLVM::LLVMDialect::getAlignAttrName())))
153  alignment = alignAttr.getInt();
154  auto globalOp = rewriter.create<LLVM::GlobalOp>(
155  gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
156  LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
157  workgroupAddrSpace);
158  workgroupBuffers.push_back(globalOp);
159  }
160  }
161 
162  // Remap proper input types.
163  TypeConverter::SignatureConversion signatureConversion(
164  gpuFuncOp.front().getNumArguments());
165 
167  gpuFuncOp.getFunctionType(), /*isVariadic=*/false,
168  getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
169  if (!funcType) {
170  return rewriter.notifyMatchFailure(gpuFuncOp, [&](Diagnostic &diag) {
171  diag << "failed to convert function signature type for: "
172  << gpuFuncOp.getFunctionType();
173  });
174  }
175 
176  // Create the new function operation. Only copy those attributes that are
177  // not specific to function modeling.
179  ArrayAttr argAttrs;
180  for (const auto &attr : gpuFuncOp->getAttrs()) {
181  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
182  attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
183  attr.getName() ==
184  gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
185  attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
186  attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
187  attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
188  attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
189  continue;
190  if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
191  argAttrs = gpuFuncOp.getArgAttrsAttr();
192  continue;
193  }
194  attributes.push_back(attr);
195  }
196 
197  DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
198  DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
199  // Ensure we don't lose information if the function is lowered before its
200  // surrounding context.
201  auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
202  if (knownBlockSize)
203  attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
204  knownBlockSize);
205  if (knownGridSize)
206  attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
207  knownGridSize);
208 
209  // Add a dialect specific kernel attribute in addition to GPU kernel
210  // attribute. The former is necessary for further translation while the
211  // latter is expected by gpu.launch_func.
212  if (gpuFuncOp.isKernel()) {
213  if (kernelAttributeName)
214  attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
215  // Set the dialect-specific block size attribute if there is one.
216  if (kernelBlockSizeAttributeName && knownBlockSize) {
217  attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
218  }
219  }
220  LLVM::CConv callingConvention = gpuFuncOp.isKernel()
221  ? kernelCallingConvention
222  : nonKernelCallingConvention;
223  auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
224  gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
225  LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
226  /*comdat=*/nullptr, attributes);
227 
228  {
229  // Insert operations that correspond to converted workgroup and private
230  // memory attributions to the body of the function. This must operate on
231  // the original function, before the body region is inlined in the new
232  // function to maintain the relation between block arguments and the
233  // parent operation that assigns their semantics.
234  OpBuilder::InsertionGuard guard(rewriter);
235 
236  // Rewrite workgroup memory attributions to addresses of global buffers.
237  rewriter.setInsertionPointToStart(&gpuFuncOp.front());
238  unsigned numProperArguments = gpuFuncOp.getNumArguments();
239 
240  if (encodeWorkgroupAttributionsAsArguments) {
241  // Build a MemRefDescriptor with each of the arguments added above.
242 
243  unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
244  assert(numProperArguments >= numAttributions &&
245  "Expecting attributions to be encoded as arguments already");
246 
247  // Arguments encoding workgroup attributions will be in positions
248  // [numProperArguments, numProperArguments+numAttributions)
249  ArrayRef<BlockArgument> attributionArguments =
250  gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
251  numAttributions);
252  for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
253  gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
254  auto [attribution, arg] = vals;
255  auto type = cast<MemRefType>(attribution.getType());
256 
257  // Arguments are of llvm.ptr type and attributions are of memref type:
258  // we need to wrap them in memref descriptors.
260  rewriter, loc, *getTypeConverter(), type, arg);
261 
262  // And remap the arguments
263  signatureConversion.remapInput(numProperArguments + idx, descr);
264  }
265  } else {
266  for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
267  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
268  global.getAddrSpace());
269  Value address = rewriter.create<LLVM::AddressOfOp>(
270  loc, ptrType, global.getSymNameAttr());
271  Value memory =
272  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(),
273  address, ArrayRef<LLVM::GEPArg>{0, 0});
274 
275  // Build a memref descriptor pointing to the buffer to plug with the
276  // existing memref infrastructure. This may use more registers than
277  // otherwise necessary given that memref sizes are fixed, but we can try
278  // and canonicalize that away later.
279  Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
280  auto type = cast<MemRefType>(attribution.getType());
282  rewriter, loc, *getTypeConverter(), type, memory);
283  signatureConversion.remapInput(numProperArguments + idx, descr);
284  }
285  }
286 
287  // Rewrite private memory attributions to alloca'ed buffers.
288  unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
289  auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
290  for (const auto [idx, attribution] :
291  llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
292  auto type = cast<MemRefType>(attribution.getType());
293  assert(type && type.hasStaticShape() && "unexpected type in attribution");
294 
295  // Explicitly drop memory space when lowering private memory
296  // attributions since NVVM models it as `alloca`s in the default
297  // memory space and does not support `alloca`s with addrspace(5).
298  Type elementType = typeConverter->convertType(type.getElementType());
299  auto ptrType =
300  LLVM::LLVMPointerType::get(rewriter.getContext(), allocaAddrSpace);
301  Value numElements = rewriter.create<LLVM::ConstantOp>(
302  gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
303  uint64_t alignment = 0;
304  if (auto alignAttr =
305  dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
306  idx, LLVM::LLVMDialect::getAlignAttrName())))
307  alignment = alignAttr.getInt();
308  Value allocated = rewriter.create<LLVM::AllocaOp>(
309  gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
311  rewriter, loc, *getTypeConverter(), type, allocated);
312  signatureConversion.remapInput(
313  numProperArguments + numWorkgroupAttributions + idx, descr);
314  }
315  }
316 
317  // Move the region to the new function, update the entry block signature.
318  rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
319  llvmFuncOp.end());
320  if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), *typeConverter,
321  &signatureConversion)))
322  return failure();
323 
324  // Get memref type from function arguments and set the noalias to
325  // pointer arguments.
326  for (const auto [idx, argTy] :
327  llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
328  auto remapping = signatureConversion.getInputMapping(idx);
329  NamedAttrList argAttr =
330  argAttrs ? cast<DictionaryAttr>(argAttrs[idx]) : NamedAttrList();
331  auto copyAttribute = [&](StringRef attrName) {
332  Attribute attr = argAttr.erase(attrName);
333  if (!attr)
334  return;
335  for (size_t i = 0, e = remapping->size; i < e; ++i)
336  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
337  };
338  auto copyPointerAttribute = [&](StringRef attrName) {
339  Attribute attr = argAttr.erase(attrName);
340 
341  if (!attr)
342  return;
343  if (remapping->size > 1 &&
344  attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
345  emitWarning(llvmFuncOp.getLoc(),
346  "Cannot copy noalias with non-bare pointers.\n");
347  return;
348  }
349  for (size_t i = 0, e = remapping->size; i < e; ++i) {
350  if (isa<LLVM::LLVMPointerType>(
351  llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
352  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
353  }
354  }
355  };
356 
357  if (argAttr.empty())
358  continue;
359 
360  copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
361  copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
362  copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
363  bool lowersToPointer = false;
364  for (size_t i = 0, e = remapping->size; i < e; ++i) {
365  lowersToPointer |= isa<LLVM::LLVMPointerType>(
366  llvmFuncOp.getArgument(remapping->inputNo + i).getType());
367  }
368 
369  if (lowersToPointer) {
370  copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
371  copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
372  copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
373  copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
374  copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
375  copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
376  copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
377  copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
378  copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
379  copyPointerAttribute(
380  LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
381  copyPointerAttribute(
382  LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
383  }
384  }
385  rewriter.eraseOp(gpuFuncOp);
386  return success();
387 }
388 
390  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
391  ConversionPatternRewriter &rewriter) const {
392  Location loc = gpuPrintfOp->getLoc();
393 
394  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getI8Type());
395  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
396  mlir::Type llvmI32 = typeConverter->convertType(rewriter.getI32Type());
397  mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
398  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
399  // This ensures that global constants and declarations are placed within
400  // the device code, not the host code
401  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
402 
403  auto ocklBegin =
404  getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
405  LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
406  LLVM::LLVMFuncOp ocklAppendArgs;
407  if (!adaptor.getArgs().empty()) {
408  ocklAppendArgs = getOrDefineFunction(
409  moduleOp, loc, rewriter, "__ockl_printf_append_args",
411  llvmI64, {llvmI64, /*numArgs*/ llvmI32, llvmI64, llvmI64, llvmI64,
412  llvmI64, llvmI64, llvmI64, llvmI64, /*isLast*/ llvmI32}));
413  }
414  auto ocklAppendStringN = getOrDefineFunction(
415  moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
417  llvmI64,
418  {llvmI64, ptrType, /*length (bytes)*/ llvmI64, /*isLast*/ llvmI32}));
419 
420  /// Start the printf hostcall
421  Value zeroI64 = rewriter.create<LLVM::ConstantOp>(loc, llvmI64, 0);
422  auto printfBeginCall = rewriter.create<LLVM::CallOp>(loc, ocklBegin, zeroI64);
423  Value printfDesc = printfBeginCall.getResult();
424 
425  // Create the global op or find an existing one.
426  LLVM::GlobalOp global = getOrCreateStringConstant(
427  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
428 
429  // Get a pointer to the format string's first element and pass it to printf()
430  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
431  loc,
432  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
433  global.getSymNameAttr());
434  Value stringStart =
435  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
436  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
437  Value stringLen = rewriter.create<LLVM::ConstantOp>(
438  loc, llvmI64, cast<StringAttr>(global.getValueAttr()).size());
439 
440  Value oneI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 1);
441  Value zeroI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 0);
442 
443  auto appendFormatCall = rewriter.create<LLVM::CallOp>(
444  loc, ocklAppendStringN,
445  ValueRange{printfDesc, stringStart, stringLen,
446  adaptor.getArgs().empty() ? oneI32 : zeroI32});
447  printfDesc = appendFormatCall.getResult();
448 
449  // __ockl_printf_append_args takes 7 values per append call
450  constexpr size_t argsPerAppend = 7;
451  size_t nArgs = adaptor.getArgs().size();
452  for (size_t group = 0; group < nArgs; group += argsPerAppend) {
453  size_t bound = std::min(group + argsPerAppend, nArgs);
454  size_t numArgsThisCall = bound - group;
455 
457  arguments.push_back(printfDesc);
458  arguments.push_back(
459  rewriter.create<LLVM::ConstantOp>(loc, llvmI32, numArgsThisCall));
460  for (size_t i = group; i < bound; ++i) {
461  Value arg = adaptor.getArgs()[i];
462  if (auto floatType = dyn_cast<FloatType>(arg.getType())) {
463  if (!floatType.isF64())
464  arg = rewriter.create<LLVM::FPExtOp>(
465  loc, typeConverter->convertType(rewriter.getF64Type()), arg);
466  arg = rewriter.create<LLVM::BitcastOp>(loc, llvmI64, arg);
467  }
468  if (arg.getType().getIntOrFloatBitWidth() != 64)
469  arg = rewriter.create<LLVM::ZExtOp>(loc, llvmI64, arg);
470 
471  arguments.push_back(arg);
472  }
473  // Pad out to 7 arguments since the hostcall always needs 7
474  for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
475  arguments.push_back(zeroI64);
476  }
477 
478  auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
479  arguments.push_back(isLast);
480  auto call = rewriter.create<LLVM::CallOp>(loc, ocklAppendArgs, arguments);
481  printfDesc = call.getResult();
482  }
483  rewriter.eraseOp(gpuPrintfOp);
484  return success();
485 }
486 
488  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
489  ConversionPatternRewriter &rewriter) const {
490  Location loc = gpuPrintfOp->getLoc();
491 
492  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
493  mlir::Type ptrType =
494  LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
495 
496  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
497  // This ensures that global constants and declarations are placed within
498  // the device code, not the host code
499  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
500 
501  auto printfType =
502  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
503  /*isVarArg=*/true);
504  LLVM::LLVMFuncOp printfDecl =
505  getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
506 
507  // Create the global op or find an existing one.
508  LLVM::GlobalOp global = getOrCreateStringConstant(
509  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat(),
510  /*alignment=*/0, addressSpace);
511 
512  // Get a pointer to the format string's first element
513  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
514  loc,
515  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
516  global.getSymNameAttr());
517  Value stringStart =
518  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
519  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
520 
521  // Construct arguments and function call
522  auto argsRange = adaptor.getArgs();
523  SmallVector<Value, 4> printfArgs;
524  printfArgs.reserve(argsRange.size() + 1);
525  printfArgs.push_back(stringStart);
526  printfArgs.append(argsRange.begin(), argsRange.end());
527 
528  rewriter.create<LLVM::CallOp>(loc, printfDecl, printfArgs);
529  rewriter.eraseOp(gpuPrintfOp);
530  return success();
531 }
532 
534  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
535  ConversionPatternRewriter &rewriter) const {
536  Location loc = gpuPrintfOp->getLoc();
537 
538  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
539  mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
540 
541  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
542  // This ensures that global constants and declarations are placed within
543  // the device code, not the host code
544  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
545 
546  auto vprintfType =
547  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType});
548  LLVM::LLVMFuncOp vprintfDecl =
549  getOrDefineFunction(moduleOp, loc, rewriter, "vprintf", vprintfType);
550 
551  // Create the global op or find an existing one.
552  LLVM::GlobalOp global = getOrCreateStringConstant(
553  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
554 
555  // Get a pointer to the format string's first element
556  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(loc, global);
557  Value stringStart =
558  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
559  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
560  SmallVector<Type> types;
561  SmallVector<Value> args;
562  // Promote and pack the arguments into a stack allocation.
563  for (Value arg : adaptor.getArgs()) {
564  Type type = arg.getType();
565  Value promotedArg = arg;
566  assert(type.isIntOrFloat());
567  if (isa<FloatType>(type)) {
568  type = rewriter.getF64Type();
569  promotedArg = rewriter.create<LLVM::FPExtOp>(loc, type, arg);
570  }
571  types.push_back(type);
572  args.push_back(promotedArg);
573  }
574  Type structType =
575  LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
576  Value one = rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI64Type(),
577  rewriter.getIndexAttr(1));
578  Value tempAlloc =
579  rewriter.create<LLVM::AllocaOp>(loc, ptrType, structType, one,
580  /*alignment=*/0);
581  for (auto [index, arg] : llvm::enumerate(args)) {
582  Value ptr = rewriter.create<LLVM::GEPOp>(
583  loc, ptrType, structType, tempAlloc,
584  ArrayRef<LLVM::GEPArg>{0, static_cast<int32_t>(index)});
585  rewriter.create<LLVM::StoreOp>(loc, arg, ptr);
586  }
587  std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
588 
589  rewriter.create<LLVM::CallOp>(loc, vprintfDecl, printfArgs);
590  rewriter.eraseOp(gpuPrintfOp);
591  return success();
592 }
593 
594 /// Helper for impl::scalarizeVectorOp. Scalarizes vectors to elements.
595 /// Used either directly (for ops on 1D vectors) or as the callback passed to
596 /// detail::handleMultidimensionalVectors (for ops on higher-rank vectors).
598  Type llvm1DVectorTy,
599  ConversionPatternRewriter &rewriter,
600  const LLVMTypeConverter &converter) {
601  TypeRange operandTypes(operands);
602  VectorType vectorType = cast<VectorType>(llvm1DVectorTy);
603  Location loc = op->getLoc();
604  Value result = rewriter.create<LLVM::PoisonOp>(loc, vectorType);
605  Type indexType = converter.convertType(rewriter.getIndexType());
606  StringAttr name = op->getName().getIdentifier();
607  Type elementType = vectorType.getElementType();
608 
609  for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
610  Value index = rewriter.create<LLVM::ConstantOp>(loc, indexType, i);
611  auto extractElement = [&](Value operand) -> Value {
612  if (!isa<VectorType>(operand.getType()))
613  return operand;
614  return rewriter.create<LLVM::ExtractElementOp>(loc, operand, index);
615  };
616  auto scalarOperands = llvm::map_to_vector(operands, extractElement);
617  Operation *scalarOp =
618  rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
619  result = rewriter.create<LLVM::InsertElementOp>(
620  loc, result, scalarOp->getResult(0), index);
621  }
622  return result;
623 }
624 
625 /// Unrolls op to array/vector elements.
626 LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
627  ConversionPatternRewriter &rewriter,
628  const LLVMTypeConverter &converter) {
629  TypeRange operandTypes(operands);
630  if (llvm::any_of(operandTypes, llvm::IsaPred<VectorType>)) {
631  VectorType vectorType =
632  cast<VectorType>(converter.convertType(op->getResultTypes()[0]));
633  rewriter.replaceOp(op, scalarizeVectorOpHelper(op, operands, vectorType,
634  rewriter, converter));
635  return success();
636  }
637 
638  if (llvm::any_of(operandTypes, llvm::IsaPred<LLVM::LLVMArrayType>)) {
640  op, operands, converter,
641  [&](Type llvm1DVectorTy, ValueRange operands) -> Value {
642  return scalarizeVectorOpHelper(op, operands, llvm1DVectorTy, rewriter,
643  converter);
644  },
645  rewriter);
646  }
647 
648  return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll");
649 }
650 
651 static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
652  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
653 }
654 
655 /// Generates a symbol with 0-sized array type for dynamic shared memory usage,
656 /// or uses existing symbol.
658  ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp,
659  gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter,
660  MemRefType memrefType, unsigned alignmentBit) {
661  uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
662 
663  FailureOr<unsigned> addressSpace =
664  typeConverter->getMemRefAddressSpace(memrefType);
665  if (failed(addressSpace)) {
666  op->emitError() << "conversion of memref memory space "
667  << memrefType.getMemorySpace()
668  << " to integer address space "
669  "failed. Consider adding memory space conversions.";
670  }
671 
672  // Step 1. Collect symbol names of LLVM::GlobalOp Ops. Also if any of
673  // LLVM::GlobalOp is suitable for shared memory, return it.
674  llvm::StringSet<> existingGlobalNames;
675  for (auto globalOp : moduleOp.getBody()->getOps<LLVM::GlobalOp>()) {
676  existingGlobalNames.insert(globalOp.getSymName());
677  if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(globalOp.getType())) {
678  if (globalOp.getAddrSpace() == addressSpace.value() &&
679  arrayType.getNumElements() == 0 &&
680  globalOp.getAlignment().value_or(0) == alignmentByte) {
681  return globalOp;
682  }
683  }
684  }
685 
686  // Step 2. Find a unique symbol name
687  unsigned uniquingCounter = 0;
688  SmallString<128> symName = SymbolTable::generateSymbolName<128>(
689  "__dynamic_shmem_",
690  [&](StringRef candidate) {
691  return existingGlobalNames.contains(candidate);
692  },
693  uniquingCounter);
694 
695  // Step 3. Generate a global op
696  OpBuilder::InsertionGuard guard(rewriter);
697  rewriter.setInsertionPointToStart(moduleOp.getBody());
698 
699  auto zeroSizedArrayType = LLVM::LLVMArrayType::get(
700  typeConverter->convertType(memrefType.getElementType()), 0);
701 
702  return rewriter.create<LLVM::GlobalOp>(
703  op->getLoc(), zeroSizedArrayType, /*isConstant=*/false,
704  LLVM::Linkage::Internal, symName, /*value=*/Attribute(), alignmentByte,
705  addressSpace.value());
706 }
707 
709  gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
710  ConversionPatternRewriter &rewriter) const {
711  Location loc = op.getLoc();
712  MemRefType memrefType = op.getResultMemref().getType();
713  Type elementType = typeConverter->convertType(memrefType.getElementType());
714 
715  // Step 1: Generate a memref<0xi8> type
716  MemRefLayoutAttrInterface layout = {};
717  auto memrefType0sz =
718  MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
719 
720  // Step 2: Generate a global symbol or existing for the dynamic shared
721  // memory with memref<0xi8> type
722  auto moduleOp = op->getParentOfType<gpu::GPUModuleOp>();
723  LLVM::GlobalOp shmemOp = getDynamicSharedMemorySymbol(
724  rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
725 
726  // Step 3. Get address of the global symbol
727  OpBuilder::InsertionGuard guard(rewriter);
728  rewriter.setInsertionPoint(op);
729  auto basePtr = rewriter.create<LLVM::AddressOfOp>(loc, shmemOp);
730  Type baseType = basePtr->getResultTypes().front();
731 
732  // Step 4. Generate GEP using offsets
733  SmallVector<LLVM::GEPArg> gepArgs = {0};
734  Value shmemPtr = rewriter.create<LLVM::GEPOp>(loc, baseType, elementType,
735  basePtr, gepArgs);
736  // Step 5. Create a memref descriptor
737  SmallVector<Value> shape, strides;
738  Value sizeBytes;
739  getMemRefDescriptorSizes(loc, memrefType0sz, {}, rewriter, shape, strides,
740  sizeBytes);
741  auto memRefDescriptor = this->createMemRefDescriptor(
742  loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
743 
744  // Step 5. Replace the op with memref descriptor
745  rewriter.replaceOp(op, {memRefDescriptor});
746  return success();
747 }
748 
750  gpu::ReturnOp op, OpAdaptor adaptor,
751  ConversionPatternRewriter &rewriter) const {
752  Location loc = op.getLoc();
753  unsigned numArguments = op.getNumOperands();
754  SmallVector<Value, 4> updatedOperands;
755 
756  bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
757  if (useBarePtrCallConv) {
758  // For the bare-ptr calling convention, extract the aligned pointer to
759  // be returned from the memref descriptor.
760  for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
761  Type oldTy = std::get<0>(it).getType();
762  Value newOperand = std::get<1>(it);
763  if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
764  cast<BaseMemRefType>(oldTy))) {
765  MemRefDescriptor memrefDesc(newOperand);
766  newOperand = memrefDesc.allocatedPtr(rewriter, loc);
767  } else if (isa<UnrankedMemRefType>(oldTy)) {
768  // Unranked memref is not supported in the bare pointer calling
769  // convention.
770  return failure();
771  }
772  updatedOperands.push_back(newOperand);
773  }
774  } else {
775  updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
776  (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
777  updatedOperands,
778  /*toDynamic=*/true);
779  }
780 
781  // If ReturnOp has 0 or 1 operand, create it and return immediately.
782  if (numArguments <= 1) {
783  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
784  op, TypeRange(), updatedOperands, op->getAttrs());
785  return success();
786  }
787 
788  // Otherwise, we need to pack the arguments into an LLVM struct type before
789  // returning.
790  auto packedType = getTypeConverter()->packFunctionResults(
791  op.getOperandTypes(), useBarePtrCallConv);
792  if (!packedType) {
793  return rewriter.notifyMatchFailure(op, "could not convert result types");
794  }
795 
796  Value packed = rewriter.create<LLVM::PoisonOp>(loc, packedType);
797  for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
798  packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx);
799  }
800  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
801  op->getAttrs());
802  return success();
803 }
804 
806  TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
807  typeConverter.addTypeAttributeConversion(
808  [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
809  gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
810  unsigned addressSpace = mapping(memorySpace);
811  return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
812  addressSpace);
813  });
814 }
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueSymbolName(gpu::GPUModuleOp moduleOp, StringRef prefix)
static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands, Type llvm1DVectorTy, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Helper for impl::scalarizeVectorOp.
LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:102
This class represents an argument of a Block.
Definition: Value.h:295
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:104
UnitAttr getUnitAttr()
Definition: Builders.cpp:94
IntegerType getI64Type()
Definition: Builders.cpp:65
IntegerType getI32Type()
Definition: Builders.cpp:63
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:108
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:67
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:88
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:258
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:51
IntegerType getI8Type()
Definition: Builders.cpp:59
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:100
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:90
FloatType getF64Type()
Definition: Builders.cpp:45
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:95
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
Definition: Pattern.cpp:216
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
Definition: Pattern.cpp:114
const LLVMTypeConverter * getTypeConverter() const
Definition: Pattern.cpp:27
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
Definition: Pattern.cpp:32
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
Definition: Pattern.cpp:245
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:345
This class helps build Operations.
Definition: Builders.h:204
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:428
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:395
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
result_type_range getResultTypes()
Definition: Operation.h:428
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:682
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:594
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:500
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:76
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, ArrayRef< Value > replacements)
Remap an input of the original signature to replacements values.
Type conversion class.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition: Types.cpp:116
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:122
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
LogicalResult handleMultidimensionalVectors(Operation *op, ValueRange operands, const LLVMTypeConverter &typeConverter, std::function< Value(Type, ValueRange)> createOperand, ConversionPatternRewriter &rewriter)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
Definition: GPUCommonPass.h:70
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Find or create an external function declaration in the given module.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, gpu::GPUModuleOp moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.