MLIR  21.0.0git
GPUOpsLowering.cpp
Go to the documentation of this file.
1 //===- GPUOpsLowering.cpp - GPU FuncOp / ReturnOp lowering ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GPUOpsLowering.h"
10 
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Builders.h"
16 #include "mlir/IR/BuiltinTypes.h"
17 #include "llvm/ADT/SmallVectorExtras.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 using namespace mlir;
22 
23 LLVM::LLVMFuncOp mlir::getOrDefineFunction(gpu::GPUModuleOp moduleOp,
24  Location loc, OpBuilder &b,
25  StringRef name,
26  LLVM::LLVMFunctionType type) {
27  LLVM::LLVMFuncOp ret;
28  if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
30  b.setInsertionPointToStart(moduleOp.getBody());
31  ret = b.create<LLVM::LLVMFuncOp>(loc, name, type, LLVM::Linkage::External);
32  }
33  return ret;
34 }
35 
36 static SmallString<16> getUniqueSymbolName(gpu::GPUModuleOp moduleOp,
37  StringRef prefix) {
38  // Get a unique global name.
39  unsigned stringNumber = 0;
40  SmallString<16> stringConstName;
41  do {
42  stringConstName.clear();
43  (prefix + Twine(stringNumber++)).toStringRef(stringConstName);
44  } while (moduleOp.lookupSymbol(stringConstName));
45  return stringConstName;
46 }
47 
48 LLVM::GlobalOp
50  gpu::GPUModuleOp moduleOp, Type llvmI8,
51  StringRef namePrefix, StringRef str,
52  uint64_t alignment, unsigned addrSpace) {
53  llvm::SmallString<20> nullTermStr(str);
54  nullTermStr.push_back('\0'); // Null terminate for C
55  auto globalType =
56  LLVM::LLVMArrayType::get(llvmI8, nullTermStr.size_in_bytes());
57  StringAttr attr = b.getStringAttr(nullTermStr);
58 
59  // Try to find existing global.
60  for (auto globalOp : moduleOp.getOps<LLVM::GlobalOp>())
61  if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
62  globalOp.getValueAttr() == attr &&
63  globalOp.getAlignment().value_or(0) == alignment &&
64  globalOp.getAddrSpace() == addrSpace)
65  return globalOp;
66 
67  // Not found: create new global.
69  b.setInsertionPointToStart(moduleOp.getBody());
70  SmallString<16> name = getUniqueSymbolName(moduleOp, namePrefix);
71  return b.create<LLVM::GlobalOp>(loc, globalType,
72  /*isConstant=*/true, LLVM::Linkage::Internal,
73  name, attr, alignment, addrSpace);
74 }
75 
76 LogicalResult
77 GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
78  ConversionPatternRewriter &rewriter) const {
79  Location loc = gpuFuncOp.getLoc();
80 
81  SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
82  if (encodeWorkgroupAttributionsAsArguments) {
83  // Append an `llvm.ptr` argument to the function signature to encode
84  // workgroup attributions.
85 
86  ArrayRef<BlockArgument> workgroupAttributions =
87  gpuFuncOp.getWorkgroupAttributions();
88  size_t numAttributions = workgroupAttributions.size();
89 
90  // Insert all arguments at the end.
91  unsigned index = gpuFuncOp.getNumArguments();
92  SmallVector<unsigned> argIndices(numAttributions, index);
93 
94  // New arguments will simply be `llvm.ptr` with the correct address space
95  Type workgroupPtrType =
96  rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
97  SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
98 
99  // Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
100  std::array attrs{
101  rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
102  rewriter.getUnitAttr()),
103  rewriter.getNamedAttr(
104  getDialect().getWorkgroupAttributionAttrHelper().getName(),
105  rewriter.getUnitAttr()),
106  };
108  for (BlockArgument attribution : workgroupAttributions) {
109  auto attributionType = cast<MemRefType>(attribution.getType());
110  IntegerAttr numElements =
111  rewriter.getI64IntegerAttr(attributionType.getNumElements());
112  Type llvmElementType =
113  getTypeConverter()->convertType(attributionType.getElementType());
114  if (!llvmElementType)
115  return failure();
116  TypeAttr type = TypeAttr::get(llvmElementType);
117  attrs.back().setValue(
118  rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
119  argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
120  }
121 
122  // Location match function location
123  SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
124 
125  // Perform signature modification
126  rewriter.modifyOpInPlace(
127  gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
128  LogicalResult inserted =
129  static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
130  argIndices, argTypes, argAttrs, argLocs);
131  (void)inserted;
132  assert(succeeded(inserted) &&
133  "expected GPU funcs to support inserting any argument");
134  });
135  } else {
136  workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
137  for (auto [idx, attribution] :
138  llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
139  auto type = dyn_cast<MemRefType>(attribution.getType());
140  assert(type && type.hasStaticShape() && "unexpected type in attribution");
141 
142  uint64_t numElements = type.getNumElements();
143 
144  auto elementType =
145  cast<Type>(typeConverter->convertType(type.getElementType()));
146  auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
147  std::string name =
148  std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
149  uint64_t alignment = 0;
150  if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
151  gpuFuncOp.getWorkgroupAttributionAttr(
152  idx, LLVM::LLVMDialect::getAlignAttrName())))
153  alignment = alignAttr.getInt();
154  auto globalOp = rewriter.create<LLVM::GlobalOp>(
155  gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
156  LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
157  workgroupAddrSpace);
158  workgroupBuffers.push_back(globalOp);
159  }
160  }
161 
162  // Remap proper input types.
163  TypeConverter::SignatureConversion signatureConversion(
164  gpuFuncOp.front().getNumArguments());
165 
167  gpuFuncOp.getFunctionType(), /*isVariadic=*/false,
168  getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
169  if (!funcType) {
170  return rewriter.notifyMatchFailure(gpuFuncOp, [&](Diagnostic &diag) {
171  diag << "failed to convert function signature type for: "
172  << gpuFuncOp.getFunctionType();
173  });
174  }
175 
176  // Create the new function operation. Only copy those attributes that are
177  // not specific to function modeling.
179  ArrayAttr argAttrs;
180  for (const auto &attr : gpuFuncOp->getAttrs()) {
181  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
182  attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
183  attr.getName() ==
184  gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
185  attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
186  attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
187  attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
188  attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
189  continue;
190  if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
191  argAttrs = gpuFuncOp.getArgAttrsAttr();
192  continue;
193  }
194  attributes.push_back(attr);
195  }
196 
197  DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
198  DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
199  // Ensure we don't lose information if the function is lowered before its
200  // surrounding context.
201  auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
202  if (knownBlockSize)
203  attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
204  knownBlockSize);
205  if (knownGridSize)
206  attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
207  knownGridSize);
208 
209  // Add a dialect specific kernel attribute in addition to GPU kernel
210  // attribute. The former is necessary for further translation while the
211  // latter is expected by gpu.launch_func.
212  if (gpuFuncOp.isKernel()) {
213  if (kernelAttributeName)
214  attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
215  // Set the dialect-specific block size attribute if there is one.
216  if (kernelBlockSizeAttributeName && knownBlockSize) {
217  attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
218  }
219  }
220  LLVM::CConv callingConvention = gpuFuncOp.isKernel()
221  ? kernelCallingConvention
222  : nonKernelCallingConvention;
223  auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
224  gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
225  LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
226  /*comdat=*/nullptr, attributes);
227 
228  {
229  // Insert operations that correspond to converted workgroup and private
230  // memory attributions to the body of the function. This must operate on
231  // the original function, before the body region is inlined in the new
232  // function to maintain the relation between block arguments and the
233  // parent operation that assigns their semantics.
234  OpBuilder::InsertionGuard guard(rewriter);
235 
236  // Rewrite workgroup memory attributions to addresses of global buffers.
237  rewriter.setInsertionPointToStart(&gpuFuncOp.front());
238  unsigned numProperArguments = gpuFuncOp.getNumArguments();
239 
240  if (encodeWorkgroupAttributionsAsArguments) {
241  // Build a MemRefDescriptor with each of the arguments added above.
242 
243  unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
244  assert(numProperArguments >= numAttributions &&
245  "Expecting attributions to be encoded as arguments already");
246 
247  // Arguments encoding workgroup attributions will be in positions
248  // [numProperArguments, numProperArguments+numAttributions)
249  ArrayRef<BlockArgument> attributionArguments =
250  gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
251  numAttributions);
252  for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
253  gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
254  auto [attribution, arg] = vals;
255  auto type = cast<MemRefType>(attribution.getType());
256 
257  // Arguments are of llvm.ptr type and attributions are of memref type:
258  // we need to wrap them in memref descriptors.
260  rewriter, loc, *getTypeConverter(), type, arg);
261 
262  // And remap the arguments
263  signatureConversion.remapInput(numProperArguments + idx, descr);
264  }
265  } else {
266  for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
267  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
268  global.getAddrSpace());
269  Value address = rewriter.create<LLVM::AddressOfOp>(
270  loc, ptrType, global.getSymNameAttr());
271  Value memory =
272  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(),
273  address, ArrayRef<LLVM::GEPArg>{0, 0});
274 
275  // Build a memref descriptor pointing to the buffer to plug with the
276  // existing memref infrastructure. This may use more registers than
277  // otherwise necessary given that memref sizes are fixed, but we can try
278  // and canonicalize that away later.
279  Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
280  auto type = cast<MemRefType>(attribution.getType());
282  rewriter, loc, *getTypeConverter(), type, memory);
283  signatureConversion.remapInput(numProperArguments + idx, descr);
284  }
285  }
286 
287  // Rewrite private memory attributions to alloca'ed buffers.
288  unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
289  auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
290  for (const auto [idx, attribution] :
291  llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
292  auto type = cast<MemRefType>(attribution.getType());
293  assert(type && type.hasStaticShape() && "unexpected type in attribution");
294 
295  // Explicitly drop memory space when lowering private memory
296  // attributions since NVVM models it as `alloca`s in the default
297  // memory space and does not support `alloca`s with addrspace(5).
298  Type elementType = typeConverter->convertType(type.getElementType());
299  auto ptrType =
300  LLVM::LLVMPointerType::get(rewriter.getContext(), allocaAddrSpace);
301  Value numElements = rewriter.create<LLVM::ConstantOp>(
302  gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
303  uint64_t alignment = 0;
304  if (auto alignAttr =
305  dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
306  idx, LLVM::LLVMDialect::getAlignAttrName())))
307  alignment = alignAttr.getInt();
308  Value allocated = rewriter.create<LLVM::AllocaOp>(
309  gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
311  rewriter, loc, *getTypeConverter(), type, allocated);
312  signatureConversion.remapInput(
313  numProperArguments + numWorkgroupAttributions + idx, descr);
314  }
315  }
316 
317  // Move the region to the new function, update the entry block signature.
318  rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
319  llvmFuncOp.end());
320  if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), *typeConverter,
321  &signatureConversion)))
322  return failure();
323 
324  // Get memref type from function arguments and set the noalias to
325  // pointer arguments.
326  for (const auto [idx, argTy] :
327  llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
328  auto remapping = signatureConversion.getInputMapping(idx);
329  NamedAttrList argAttr =
330  argAttrs ? cast<DictionaryAttr>(argAttrs[idx]) : NamedAttrList();
331  auto copyAttribute = [&](StringRef attrName) {
332  Attribute attr = argAttr.erase(attrName);
333  if (!attr)
334  return;
335  for (size_t i = 0, e = remapping->size; i < e; ++i)
336  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
337  };
338  auto copyPointerAttribute = [&](StringRef attrName) {
339  Attribute attr = argAttr.erase(attrName);
340 
341  if (!attr)
342  return;
343  if (remapping->size > 1 &&
344  attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
345  emitWarning(llvmFuncOp.getLoc(),
346  "Cannot copy noalias with non-bare pointers.\n");
347  return;
348  }
349  for (size_t i = 0, e = remapping->size; i < e; ++i) {
350  if (isa<LLVM::LLVMPointerType>(
351  llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
352  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
353  }
354  }
355  };
356 
357  if (argAttr.empty())
358  continue;
359 
360  copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
361  copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
362  copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
363  bool lowersToPointer = false;
364  for (size_t i = 0, e = remapping->size; i < e; ++i) {
365  lowersToPointer |= isa<LLVM::LLVMPointerType>(
366  llvmFuncOp.getArgument(remapping->inputNo + i).getType());
367  }
368 
369  if (lowersToPointer) {
370  copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
371  copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
372  copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
373  copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
374  copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
375  copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
376  copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
377  copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
378  copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
379  copyPointerAttribute(
380  LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
381  copyPointerAttribute(
382  LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
383  }
384  }
385  rewriter.eraseOp(gpuFuncOp);
386  return success();
387 }
388 
390  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
391  ConversionPatternRewriter &rewriter) const {
392  Location loc = gpuPrintfOp->getLoc();
393 
394  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getI8Type());
395  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
396  mlir::Type llvmI32 = typeConverter->convertType(rewriter.getI32Type());
397  mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
398  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
399  // This ensures that global constants and declarations are placed within
400  // the device code, not the host code
401  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
402 
403  auto ocklBegin =
404  getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
405  LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
406  LLVM::LLVMFuncOp ocklAppendArgs;
407  if (!adaptor.getArgs().empty()) {
408  ocklAppendArgs = getOrDefineFunction(
409  moduleOp, loc, rewriter, "__ockl_printf_append_args",
411  llvmI64, {llvmI64, /*numArgs*/ llvmI32, llvmI64, llvmI64, llvmI64,
412  llvmI64, llvmI64, llvmI64, llvmI64, /*isLast*/ llvmI32}));
413  }
414  auto ocklAppendStringN = getOrDefineFunction(
415  moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
417  llvmI64,
418  {llvmI64, ptrType, /*length (bytes)*/ llvmI64, /*isLast*/ llvmI32}));
419 
420  /// Start the printf hostcall
421  Value zeroI64 = rewriter.create<LLVM::ConstantOp>(loc, llvmI64, 0);
422  auto printfBeginCall = rewriter.create<LLVM::CallOp>(loc, ocklBegin, zeroI64);
423  Value printfDesc = printfBeginCall.getResult();
424 
425  // Create the global op or find an existing one.
426  LLVM::GlobalOp global = getOrCreateStringConstant(
427  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
428 
429  // Get a pointer to the format string's first element and pass it to printf()
430  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
431  loc,
432  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
433  global.getSymNameAttr());
434  Value stringStart =
435  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
436  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
437  Value stringLen = rewriter.create<LLVM::ConstantOp>(
438  loc, llvmI64, cast<StringAttr>(global.getValueAttr()).size());
439 
440  Value oneI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 1);
441  Value zeroI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 0);
442 
443  auto appendFormatCall = rewriter.create<LLVM::CallOp>(
444  loc, ocklAppendStringN,
445  ValueRange{printfDesc, stringStart, stringLen,
446  adaptor.getArgs().empty() ? oneI32 : zeroI32});
447  printfDesc = appendFormatCall.getResult();
448 
449  // __ockl_printf_append_args takes 7 values per append call
450  constexpr size_t argsPerAppend = 7;
451  size_t nArgs = adaptor.getArgs().size();
452  for (size_t group = 0; group < nArgs; group += argsPerAppend) {
453  size_t bound = std::min(group + argsPerAppend, nArgs);
454  size_t numArgsThisCall = bound - group;
455 
457  arguments.push_back(printfDesc);
458  arguments.push_back(
459  rewriter.create<LLVM::ConstantOp>(loc, llvmI32, numArgsThisCall));
460  for (size_t i = group; i < bound; ++i) {
461  Value arg = adaptor.getArgs()[i];
462  if (auto floatType = dyn_cast<FloatType>(arg.getType())) {
463  if (!floatType.isF64())
464  arg = rewriter.create<LLVM::FPExtOp>(
465  loc, typeConverter->convertType(rewriter.getF64Type()), arg);
466  arg = rewriter.create<LLVM::BitcastOp>(loc, llvmI64, arg);
467  }
468  if (arg.getType().getIntOrFloatBitWidth() != 64)
469  arg = rewriter.create<LLVM::ZExtOp>(loc, llvmI64, arg);
470 
471  arguments.push_back(arg);
472  }
473  // Pad out to 7 arguments since the hostcall always needs 7
474  for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
475  arguments.push_back(zeroI64);
476  }
477 
478  auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
479  arguments.push_back(isLast);
480  auto call = rewriter.create<LLVM::CallOp>(loc, ocklAppendArgs, arguments);
481  printfDesc = call.getResult();
482  }
483  rewriter.eraseOp(gpuPrintfOp);
484  return success();
485 }
486 
488  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
489  ConversionPatternRewriter &rewriter) const {
490  Location loc = gpuPrintfOp->getLoc();
491 
492  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
493  mlir::Type ptrType =
494  LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
495 
496  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
497  // This ensures that global constants and declarations are placed within
498  // the device code, not the host code
499  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
500 
501  auto printfType =
502  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
503  /*isVarArg=*/true);
504  LLVM::LLVMFuncOp printfDecl =
505  getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
506 
507  // Create the global op or find an existing one.
508  LLVM::GlobalOp global = getOrCreateStringConstant(
509  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat(),
510  /*alignment=*/0, addressSpace);
511 
512  // Get a pointer to the format string's first element
513  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
514  loc,
515  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
516  global.getSymNameAttr());
517  Value stringStart =
518  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
519  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
520 
521  // Construct arguments and function call
522  auto argsRange = adaptor.getArgs();
523  SmallVector<Value, 4> printfArgs;
524  printfArgs.reserve(argsRange.size() + 1);
525  printfArgs.push_back(stringStart);
526  printfArgs.append(argsRange.begin(), argsRange.end());
527 
528  rewriter.create<LLVM::CallOp>(loc, printfDecl, printfArgs);
529  rewriter.eraseOp(gpuPrintfOp);
530  return success();
531 }
532 
534  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
535  ConversionPatternRewriter &rewriter) const {
536  Location loc = gpuPrintfOp->getLoc();
537 
538  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
539  mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
540 
541  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
542  // This ensures that global constants and declarations are placed within
543  // the device code, not the host code
544  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
545 
546  // Create a valid global location removing any metadata attached to the
547  // location as debug info metadata inside of a function cannot be used outside
548  // of that function.
549  Location globalLoc = loc->findInstanceOfOrUnknown<FileLineColLoc>();
550 
551  auto vprintfType =
552  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType});
553  LLVM::LLVMFuncOp vprintfDecl = getOrDefineFunction(
554  moduleOp, globalLoc, rewriter, "vprintf", vprintfType);
555 
556  // Create the global op or find an existing one.
557  LLVM::GlobalOp global =
558  getOrCreateStringConstant(rewriter, globalLoc, moduleOp, llvmI8,
559  "printfFormat_", adaptor.getFormat());
560 
561  // Get a pointer to the format string's first element
562  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(loc, global);
563  Value stringStart =
564  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
565  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
566  SmallVector<Type> types;
567  SmallVector<Value> args;
568  // Promote and pack the arguments into a stack allocation.
569  for (Value arg : adaptor.getArgs()) {
570  Type type = arg.getType();
571  Value promotedArg = arg;
572  assert(type.isIntOrFloat());
573  if (isa<FloatType>(type)) {
574  type = rewriter.getF64Type();
575  promotedArg = rewriter.create<LLVM::FPExtOp>(loc, type, arg);
576  }
577  types.push_back(type);
578  args.push_back(promotedArg);
579  }
580  Type structType =
581  LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
582  Value one = rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI64Type(),
583  rewriter.getIndexAttr(1));
584  Value tempAlloc =
585  rewriter.create<LLVM::AllocaOp>(loc, ptrType, structType, one,
586  /*alignment=*/0);
587  for (auto [index, arg] : llvm::enumerate(args)) {
588  Value ptr = rewriter.create<LLVM::GEPOp>(
589  loc, ptrType, structType, tempAlloc,
590  ArrayRef<LLVM::GEPArg>{0, static_cast<int32_t>(index)});
591  rewriter.create<LLVM::StoreOp>(loc, arg, ptr);
592  }
593  std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
594 
595  rewriter.create<LLVM::CallOp>(loc, vprintfDecl, printfArgs);
596  rewriter.eraseOp(gpuPrintfOp);
597  return success();
598 }
599 
600 /// Helper for impl::scalarizeVectorOp. Scalarizes vectors to elements.
601 /// Used either directly (for ops on 1D vectors) or as the callback passed to
602 /// detail::handleMultidimensionalVectors (for ops on higher-rank vectors).
604  Type llvm1DVectorTy,
605  ConversionPatternRewriter &rewriter,
606  const LLVMTypeConverter &converter) {
607  TypeRange operandTypes(operands);
608  VectorType vectorType = cast<VectorType>(llvm1DVectorTy);
609  Location loc = op->getLoc();
610  Value result = rewriter.create<LLVM::PoisonOp>(loc, vectorType);
611  Type indexType = converter.convertType(rewriter.getIndexType());
612  StringAttr name = op->getName().getIdentifier();
613  Type elementType = vectorType.getElementType();
614 
615  for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
616  Value index = rewriter.create<LLVM::ConstantOp>(loc, indexType, i);
617  auto extractElement = [&](Value operand) -> Value {
618  if (!isa<VectorType>(operand.getType()))
619  return operand;
620  return rewriter.create<LLVM::ExtractElementOp>(loc, operand, index);
621  };
622  auto scalarOperands = llvm::map_to_vector(operands, extractElement);
623  Operation *scalarOp =
624  rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
625  result = rewriter.create<LLVM::InsertElementOp>(
626  loc, result, scalarOp->getResult(0), index);
627  }
628  return result;
629 }
630 
631 /// Unrolls op to array/vector elements.
632 LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
633  ConversionPatternRewriter &rewriter,
634  const LLVMTypeConverter &converter) {
635  TypeRange operandTypes(operands);
636  if (llvm::any_of(operandTypes, llvm::IsaPred<VectorType>)) {
637  VectorType vectorType =
638  cast<VectorType>(converter.convertType(op->getResultTypes()[0]));
639  rewriter.replaceOp(op, scalarizeVectorOpHelper(op, operands, vectorType,
640  rewriter, converter));
641  return success();
642  }
643 
644  if (llvm::any_of(operandTypes, llvm::IsaPred<LLVM::LLVMArrayType>)) {
646  op, operands, converter,
647  [&](Type llvm1DVectorTy, ValueRange operands) -> Value {
648  return scalarizeVectorOpHelper(op, operands, llvm1DVectorTy, rewriter,
649  converter);
650  },
651  rewriter);
652  }
653 
654  return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll");
655 }
656 
657 static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
658  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
659 }
660 
661 /// Generates a symbol with 0-sized array type for dynamic shared memory usage,
662 /// or uses existing symbol.
664  ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp,
665  gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter,
666  MemRefType memrefType, unsigned alignmentBit) {
667  uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
668 
669  FailureOr<unsigned> addressSpace =
670  typeConverter->getMemRefAddressSpace(memrefType);
671  if (failed(addressSpace)) {
672  op->emitError() << "conversion of memref memory space "
673  << memrefType.getMemorySpace()
674  << " to integer address space "
675  "failed. Consider adding memory space conversions.";
676  }
677 
678  // Step 1. Collect symbol names of LLVM::GlobalOp Ops. Also if any of
679  // LLVM::GlobalOp is suitable for shared memory, return it.
680  llvm::StringSet<> existingGlobalNames;
681  for (auto globalOp : moduleOp.getBody()->getOps<LLVM::GlobalOp>()) {
682  existingGlobalNames.insert(globalOp.getSymName());
683  if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(globalOp.getType())) {
684  if (globalOp.getAddrSpace() == addressSpace.value() &&
685  arrayType.getNumElements() == 0 &&
686  globalOp.getAlignment().value_or(0) == alignmentByte) {
687  return globalOp;
688  }
689  }
690  }
691 
692  // Step 2. Find a unique symbol name
693  unsigned uniquingCounter = 0;
694  SmallString<128> symName = SymbolTable::generateSymbolName<128>(
695  "__dynamic_shmem_",
696  [&](StringRef candidate) {
697  return existingGlobalNames.contains(candidate);
698  },
699  uniquingCounter);
700 
701  // Step 3. Generate a global op
702  OpBuilder::InsertionGuard guard(rewriter);
703  rewriter.setInsertionPointToStart(moduleOp.getBody());
704 
705  auto zeroSizedArrayType = LLVM::LLVMArrayType::get(
706  typeConverter->convertType(memrefType.getElementType()), 0);
707 
708  return rewriter.create<LLVM::GlobalOp>(
709  op->getLoc(), zeroSizedArrayType, /*isConstant=*/false,
710  LLVM::Linkage::Internal, symName, /*value=*/Attribute(), alignmentByte,
711  addressSpace.value());
712 }
713 
715  gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
716  ConversionPatternRewriter &rewriter) const {
717  Location loc = op.getLoc();
718  MemRefType memrefType = op.getResultMemref().getType();
719  Type elementType = typeConverter->convertType(memrefType.getElementType());
720 
721  // Step 1: Generate a memref<0xi8> type
722  MemRefLayoutAttrInterface layout = {};
723  auto memrefType0sz =
724  MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
725 
726  // Step 2: Generate a global symbol or existing for the dynamic shared
727  // memory with memref<0xi8> type
728  auto moduleOp = op->getParentOfType<gpu::GPUModuleOp>();
729  LLVM::GlobalOp shmemOp = getDynamicSharedMemorySymbol(
730  rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
731 
732  // Step 3. Get address of the global symbol
733  OpBuilder::InsertionGuard guard(rewriter);
734  rewriter.setInsertionPoint(op);
735  auto basePtr = rewriter.create<LLVM::AddressOfOp>(loc, shmemOp);
736  Type baseType = basePtr->getResultTypes().front();
737 
738  // Step 4. Generate GEP using offsets
739  SmallVector<LLVM::GEPArg> gepArgs = {0};
740  Value shmemPtr = rewriter.create<LLVM::GEPOp>(loc, baseType, elementType,
741  basePtr, gepArgs);
742  // Step 5. Create a memref descriptor
743  SmallVector<Value> shape, strides;
744  Value sizeBytes;
745  getMemRefDescriptorSizes(loc, memrefType0sz, {}, rewriter, shape, strides,
746  sizeBytes);
747  auto memRefDescriptor = this->createMemRefDescriptor(
748  loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
749 
750  // Step 5. Replace the op with memref descriptor
751  rewriter.replaceOp(op, {memRefDescriptor});
752  return success();
753 }
754 
756  gpu::ReturnOp op, OpAdaptor adaptor,
757  ConversionPatternRewriter &rewriter) const {
758  Location loc = op.getLoc();
759  unsigned numArguments = op.getNumOperands();
760  SmallVector<Value, 4> updatedOperands;
761 
762  bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
763  if (useBarePtrCallConv) {
764  // For the bare-ptr calling convention, extract the aligned pointer to
765  // be returned from the memref descriptor.
766  for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
767  Type oldTy = std::get<0>(it).getType();
768  Value newOperand = std::get<1>(it);
769  if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
770  cast<BaseMemRefType>(oldTy))) {
771  MemRefDescriptor memrefDesc(newOperand);
772  newOperand = memrefDesc.allocatedPtr(rewriter, loc);
773  } else if (isa<UnrankedMemRefType>(oldTy)) {
774  // Unranked memref is not supported in the bare pointer calling
775  // convention.
776  return failure();
777  }
778  updatedOperands.push_back(newOperand);
779  }
780  } else {
781  updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
782  (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
783  updatedOperands,
784  /*toDynamic=*/true);
785  }
786 
787  // If ReturnOp has 0 or 1 operand, create it and return immediately.
788  if (numArguments <= 1) {
789  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
790  op, TypeRange(), updatedOperands, op->getAttrs());
791  return success();
792  }
793 
794  // Otherwise, we need to pack the arguments into an LLVM struct type before
795  // returning.
796  auto packedType = getTypeConverter()->packFunctionResults(
797  op.getOperandTypes(), useBarePtrCallConv);
798  if (!packedType) {
799  return rewriter.notifyMatchFailure(op, "could not convert result types");
800  }
801 
802  Value packed = rewriter.create<LLVM::PoisonOp>(loc, packedType);
803  for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
804  packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx);
805  }
806  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
807  op->getAttrs());
808  return success();
809 }
810 
812  TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
813  typeConverter.addTypeAttributeConversion(
814  [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
815  gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
816  unsigned addressSpace = mapping(memorySpace);
817  return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
818  addressSpace);
819  });
820 }
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueSymbolName(gpu::GPUModuleOp moduleOp, StringRef prefix)
static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands, Type llvm1DVectorTy, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Helper for impl::scalarizeVectorOp.
LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:102
This class represents an argument of a Block.
Definition: Value.h:309
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:106
UnitAttr getUnitAttr()
Definition: Builders.cpp:96
IntegerType getI64Type()
Definition: Builders.cpp:67
IntegerType getI32Type()
Definition: Builders.cpp:65
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:110
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:69
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:89
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:260
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:53
IntegerType getI8Type()
Definition: Builders.cpp:61
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:102
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:92
FloatType getF64Type()
Definition: Builders.cpp:47
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:96
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
Definition: Pattern.cpp:187
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
Definition: Pattern.cpp:85
const LLVMTypeConverter * getTypeConverter() const
Definition: Pattern.cpp:27
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
Definition: Pattern.cpp:32
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
Definition: Pattern.cpp:216
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:174
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
LocationAttr findInstanceOfOrUnknown()
Return an instance of the given location type if one is nested under the current location else return...
Definition: Location.h:60
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
result_type_range getResultTypes()
Definition: Operation.h:428
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:682
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:594
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:500
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:76
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, ArrayRef< Value > replacements)
Remap an input of the original signature to replacements values.
Type conversion class.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition: Types.cpp:116
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:122
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
LogicalResult handleMultidimensionalVectors(Operation *op, ValueRange operands, const LLVMTypeConverter &typeConverter, std::function< Value(Type, ValueRange)> createOperand, ConversionPatternRewriter &rewriter)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
Definition: GPUCommonPass.h:70
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Find or create an external function declaration in the given module.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, gpu::GPUModuleOp moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.