MLIR  22.0.0git
GPUOpsLowering.cpp
Go to the documentation of this file.
1 //===- GPUOpsLowering.cpp - GPU FuncOp / ReturnOp lowering ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GPUOpsLowering.h"
10 
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Builders.h"
16 #include "mlir/IR/BuiltinTypes.h"
17 #include "llvm/ADT/SmallVectorExtras.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 using namespace mlir;
22 
23 LLVM::LLVMFuncOp mlir::getOrDefineFunction(gpu::GPUModuleOp moduleOp,
24  Location loc, OpBuilder &b,
25  StringRef name,
26  LLVM::LLVMFunctionType type) {
27  LLVM::LLVMFuncOp ret;
28  if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
30  b.setInsertionPointToStart(moduleOp.getBody());
31  ret = LLVM::LLVMFuncOp::create(b, loc, name, type, LLVM::Linkage::External);
32  }
33  return ret;
34 }
35 
36 static SmallString<16> getUniqueSymbolName(gpu::GPUModuleOp moduleOp,
37  StringRef prefix) {
38  // Get a unique global name.
39  unsigned stringNumber = 0;
40  SmallString<16> stringConstName;
41  do {
42  stringConstName.clear();
43  (prefix + Twine(stringNumber++)).toStringRef(stringConstName);
44  } while (moduleOp.lookupSymbol(stringConstName));
45  return stringConstName;
46 }
47 
48 LLVM::GlobalOp
50  gpu::GPUModuleOp moduleOp, Type llvmI8,
51  StringRef namePrefix, StringRef str,
52  uint64_t alignment, unsigned addrSpace) {
53  llvm::SmallString<20> nullTermStr(str);
54  nullTermStr.push_back('\0'); // Null terminate for C
55  auto globalType =
56  LLVM::LLVMArrayType::get(llvmI8, nullTermStr.size_in_bytes());
57  StringAttr attr = b.getStringAttr(nullTermStr);
58 
59  // Try to find existing global.
60  for (auto globalOp : moduleOp.getOps<LLVM::GlobalOp>())
61  if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
62  globalOp.getValueAttr() == attr &&
63  globalOp.getAlignment().value_or(0) == alignment &&
64  globalOp.getAddrSpace() == addrSpace)
65  return globalOp;
66 
67  // Not found: create new global.
69  b.setInsertionPointToStart(moduleOp.getBody());
70  SmallString<16> name = getUniqueSymbolName(moduleOp, namePrefix);
71  return LLVM::GlobalOp::create(b, loc, globalType,
72  /*isConstant=*/true, LLVM::Linkage::Internal,
73  name, attr, alignment, addrSpace);
74 }
75 
76 LogicalResult
77 GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
78  ConversionPatternRewriter &rewriter) const {
79  Location loc = gpuFuncOp.getLoc();
80 
81  SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
82  if (encodeWorkgroupAttributionsAsArguments) {
83  // Append an `llvm.ptr` argument to the function signature to encode
84  // workgroup attributions.
85 
86  ArrayRef<BlockArgument> workgroupAttributions =
87  gpuFuncOp.getWorkgroupAttributions();
88  size_t numAttributions = workgroupAttributions.size();
89 
90  // Insert all arguments at the end.
91  unsigned index = gpuFuncOp.getNumArguments();
92  SmallVector<unsigned> argIndices(numAttributions, index);
93 
94  // New arguments will simply be `llvm.ptr` with the correct address space
95  Type workgroupPtrType =
96  rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
97  SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
98 
99  // Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
100  std::array attrs{
101  rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
102  rewriter.getUnitAttr()),
103  rewriter.getNamedAttr(
104  getDialect().getWorkgroupAttributionAttrHelper().getName(),
105  rewriter.getUnitAttr()),
106  };
108  for (BlockArgument attribution : workgroupAttributions) {
109  auto attributionType = cast<MemRefType>(attribution.getType());
110  IntegerAttr numElements =
111  rewriter.getI64IntegerAttr(attributionType.getNumElements());
112  Type llvmElementType =
113  getTypeConverter()->convertType(attributionType.getElementType());
114  if (!llvmElementType)
115  return failure();
116  TypeAttr type = TypeAttr::get(llvmElementType);
117  attrs.back().setValue(
118  rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
119  argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
120  }
121 
122  // Location match function location
123  SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
124 
125  // Perform signature modification
126  rewriter.modifyOpInPlace(
127  gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
128  LogicalResult inserted =
129  static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
130  argIndices, argTypes, argAttrs, argLocs);
131  (void)inserted;
132  assert(succeeded(inserted) &&
133  "expected GPU funcs to support inserting any argument");
134  });
135  } else {
136  workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
137  for (auto [idx, attribution] :
138  llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
139  auto type = dyn_cast<MemRefType>(attribution.getType());
140  assert(type && type.hasStaticShape() && "unexpected type in attribution");
141 
142  uint64_t numElements = type.getNumElements();
143 
144  auto elementType =
145  cast<Type>(typeConverter->convertType(type.getElementType()));
146  auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
147  std::string name =
148  std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
149  uint64_t alignment = 0;
150  if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
151  gpuFuncOp.getWorkgroupAttributionAttr(
152  idx, LLVM::LLVMDialect::getAlignAttrName())))
153  alignment = alignAttr.getInt();
154  auto globalOp = LLVM::GlobalOp::create(
155  rewriter, gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
156  LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
157  workgroupAddrSpace);
158  workgroupBuffers.push_back(globalOp);
159  }
160  }
161 
162  // Remap proper input types.
163  TypeConverter::SignatureConversion signatureConversion(
164  gpuFuncOp.front().getNumArguments());
165 
167  gpuFuncOp.getFunctionType(), /*isVariadic=*/false,
168  getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
169  if (!funcType) {
170  return rewriter.notifyMatchFailure(gpuFuncOp, [&](Diagnostic &diag) {
171  diag << "failed to convert function signature type for: "
172  << gpuFuncOp.getFunctionType();
173  });
174  }
175 
176  // Create the new function operation. Only copy those attributes that are
177  // not specific to function modeling.
179  ArrayAttr argAttrs;
180  for (const auto &attr : gpuFuncOp->getAttrs()) {
181  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
182  attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
183  attr.getName() ==
184  gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
185  attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
186  attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
187  attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
188  attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
189  continue;
190  if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
191  argAttrs = gpuFuncOp.getArgAttrsAttr();
192  continue;
193  }
194  attributes.push_back(attr);
195  }
196 
197  DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
198  DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
199  // Ensure we don't lose information if the function is lowered before its
200  // surrounding context.
201  auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
202  if (knownBlockSize)
203  attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
204  knownBlockSize);
205  if (knownGridSize)
206  attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
207  knownGridSize);
208 
209  // Add a dialect specific kernel attribute in addition to GPU kernel
210  // attribute. The former is necessary for further translation while the
211  // latter is expected by gpu.launch_func.
212  if (gpuFuncOp.isKernel()) {
213  if (kernelAttributeName)
214  attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
215  // Set the dialect-specific block size attribute if there is one.
216  if (kernelBlockSizeAttributeName && knownBlockSize) {
217  attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
218  }
219  }
220  LLVM::CConv callingConvention = gpuFuncOp.isKernel()
221  ? kernelCallingConvention
222  : nonKernelCallingConvention;
223  auto llvmFuncOp = LLVM::LLVMFuncOp::create(
224  rewriter, gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
225  LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
226  /*comdat=*/nullptr, attributes);
227 
228  {
229  // Insert operations that correspond to converted workgroup and private
230  // memory attributions to the body of the function. This must operate on
231  // the original function, before the body region is inlined in the new
232  // function to maintain the relation between block arguments and the
233  // parent operation that assigns their semantics.
234  OpBuilder::InsertionGuard guard(rewriter);
235 
236  // Rewrite workgroup memory attributions to addresses of global buffers.
237  rewriter.setInsertionPointToStart(&gpuFuncOp.front());
238  unsigned numProperArguments = gpuFuncOp.getNumArguments();
239 
240  if (encodeWorkgroupAttributionsAsArguments) {
241  // Build a MemRefDescriptor with each of the arguments added above.
242 
243  unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
244  assert(numProperArguments >= numAttributions &&
245  "Expecting attributions to be encoded as arguments already");
246 
247  // Arguments encoding workgroup attributions will be in positions
248  // [numProperArguments, numProperArguments+numAttributions)
249  ArrayRef<BlockArgument> attributionArguments =
250  gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
251  numAttributions);
252  for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
253  gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
254  auto [attribution, arg] = vals;
255  auto type = cast<MemRefType>(attribution.getType());
256 
257  // Arguments are of llvm.ptr type and attributions are of memref type:
258  // we need to wrap them in memref descriptors.
260  rewriter, loc, *getTypeConverter(), type, arg);
261 
262  // And remap the arguments
263  signatureConversion.remapInput(numProperArguments + idx, descr);
264  }
265  } else {
266  for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
267  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
268  global.getAddrSpace());
269  Value address = LLVM::AddressOfOp::create(rewriter, loc, ptrType,
270  global.getSymNameAttr());
271  Value memory =
272  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getType(),
273  address, ArrayRef<LLVM::GEPArg>{0, 0});
274 
275  // Build a memref descriptor pointing to the buffer to plug with the
276  // existing memref infrastructure. This may use more registers than
277  // otherwise necessary given that memref sizes are fixed, but we can try
278  // and canonicalize that away later.
279  Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
280  auto type = cast<MemRefType>(attribution.getType());
282  rewriter, loc, *getTypeConverter(), type, memory);
283  signatureConversion.remapInput(numProperArguments + idx, descr);
284  }
285  }
286 
287  // Rewrite private memory attributions to alloca'ed buffers.
288  unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
289  auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
290  for (const auto [idx, attribution] :
291  llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
292  auto type = cast<MemRefType>(attribution.getType());
293  assert(type && type.hasStaticShape() && "unexpected type in attribution");
294 
295  // Explicitly drop memory space when lowering private memory
296  // attributions since NVVM models it as `alloca`s in the default
297  // memory space and does not support `alloca`s with addrspace(5).
298  Type elementType = typeConverter->convertType(type.getElementType());
299  auto ptrType =
300  LLVM::LLVMPointerType::get(rewriter.getContext(), allocaAddrSpace);
301  Value numElements = LLVM::ConstantOp::create(
302  rewriter, gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
303  uint64_t alignment = 0;
304  if (auto alignAttr =
305  dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
306  idx, LLVM::LLVMDialect::getAlignAttrName())))
307  alignment = alignAttr.getInt();
308  Value allocated =
309  LLVM::AllocaOp::create(rewriter, gpuFuncOp.getLoc(), ptrType,
310  elementType, numElements, alignment);
312  rewriter, loc, *getTypeConverter(), type, allocated);
313  signatureConversion.remapInput(
314  numProperArguments + numWorkgroupAttributions + idx, descr);
315  }
316  }
317 
318  // Move the region to the new function, update the entry block signature.
319  rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
320  llvmFuncOp.end());
321  if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), *typeConverter,
322  &signatureConversion)))
323  return failure();
324 
325  // Get memref type from function arguments and set the noalias to
326  // pointer arguments.
327  for (const auto [idx, argTy] :
328  llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
329  auto remapping = signatureConversion.getInputMapping(idx);
330  NamedAttrList argAttr =
331  argAttrs ? cast<DictionaryAttr>(argAttrs[idx]) : NamedAttrList();
332  auto copyAttribute = [&](StringRef attrName) {
333  Attribute attr = argAttr.erase(attrName);
334  if (!attr)
335  return;
336  for (size_t i = 0, e = remapping->size; i < e; ++i)
337  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
338  };
339  auto copyPointerAttribute = [&](StringRef attrName) {
340  Attribute attr = argAttr.erase(attrName);
341 
342  if (!attr)
343  return;
344  if (remapping->size > 1 &&
345  attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
346  emitWarning(llvmFuncOp.getLoc(),
347  "Cannot copy noalias with non-bare pointers.\n");
348  return;
349  }
350  for (size_t i = 0, e = remapping->size; i < e; ++i) {
351  if (isa<LLVM::LLVMPointerType>(
352  llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
353  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
354  }
355  }
356  };
357 
358  if (argAttr.empty())
359  continue;
360 
361  copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
362  copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
363  copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
364  bool lowersToPointer = false;
365  for (size_t i = 0, e = remapping->size; i < e; ++i) {
366  lowersToPointer |= isa<LLVM::LLVMPointerType>(
367  llvmFuncOp.getArgument(remapping->inputNo + i).getType());
368  }
369 
370  if (lowersToPointer) {
371  copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
372  copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
373  copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
374  copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
375  copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
376  copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
377  copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
378  copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
379  copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
380  copyPointerAttribute(
381  LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
382  copyPointerAttribute(
383  LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
384  }
385  }
386  rewriter.eraseOp(gpuFuncOp);
387  return success();
388 }
389 
391  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
392  ConversionPatternRewriter &rewriter) const {
393  Location loc = gpuPrintfOp->getLoc();
394 
395  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getI8Type());
396  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
397  mlir::Type llvmI32 = typeConverter->convertType(rewriter.getI32Type());
398  mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
399  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
400  // This ensures that global constants and declarations are placed within
401  // the device code, not the host code
402  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
403 
404  auto ocklBegin =
405  getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
406  LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
407  LLVM::LLVMFuncOp ocklAppendArgs;
408  if (!adaptor.getArgs().empty()) {
409  ocklAppendArgs = getOrDefineFunction(
410  moduleOp, loc, rewriter, "__ockl_printf_append_args",
412  llvmI64, {llvmI64, /*numArgs*/ llvmI32, llvmI64, llvmI64, llvmI64,
413  llvmI64, llvmI64, llvmI64, llvmI64, /*isLast*/ llvmI32}));
414  }
415  auto ocklAppendStringN = getOrDefineFunction(
416  moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
418  llvmI64,
419  {llvmI64, ptrType, /*length (bytes)*/ llvmI64, /*isLast*/ llvmI32}));
420 
421  /// Start the printf hostcall
422  Value zeroI64 = LLVM::ConstantOp::create(rewriter, loc, llvmI64, 0);
423  auto printfBeginCall =
424  LLVM::CallOp::create(rewriter, loc, ocklBegin, zeroI64);
425  Value printfDesc = printfBeginCall.getResult();
426 
427  // Create the global op or find an existing one.
428  LLVM::GlobalOp global = getOrCreateStringConstant(
429  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
430 
431  // Get a pointer to the format string's first element and pass it to printf()
432  Value globalPtr = LLVM::AddressOfOp::create(
433  rewriter, loc,
434  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
435  global.getSymNameAttr());
436  Value stringStart =
437  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
438  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
439  Value stringLen = LLVM::ConstantOp::create(
440  rewriter, loc, llvmI64, cast<StringAttr>(global.getValueAttr()).size());
441 
442  Value oneI32 = LLVM::ConstantOp::create(rewriter, loc, llvmI32, 1);
443  Value zeroI32 = LLVM::ConstantOp::create(rewriter, loc, llvmI32, 0);
444 
445  auto appendFormatCall = LLVM::CallOp::create(
446  rewriter, loc, ocklAppendStringN,
447  ValueRange{printfDesc, stringStart, stringLen,
448  adaptor.getArgs().empty() ? oneI32 : zeroI32});
449  printfDesc = appendFormatCall.getResult();
450 
451  // __ockl_printf_append_args takes 7 values per append call
452  constexpr size_t argsPerAppend = 7;
453  size_t nArgs = adaptor.getArgs().size();
454  for (size_t group = 0; group < nArgs; group += argsPerAppend) {
455  size_t bound = std::min(group + argsPerAppend, nArgs);
456  size_t numArgsThisCall = bound - group;
457 
459  arguments.push_back(printfDesc);
460  arguments.push_back(
461  LLVM::ConstantOp::create(rewriter, loc, llvmI32, numArgsThisCall));
462  for (size_t i = group; i < bound; ++i) {
463  Value arg = adaptor.getArgs()[i];
464  if (auto floatType = dyn_cast<FloatType>(arg.getType())) {
465  if (!floatType.isF64())
466  arg = LLVM::FPExtOp::create(
467  rewriter, loc, typeConverter->convertType(rewriter.getF64Type()),
468  arg);
469  arg = LLVM::BitcastOp::create(rewriter, loc, llvmI64, arg);
470  }
471  if (arg.getType().getIntOrFloatBitWidth() != 64)
472  arg = LLVM::ZExtOp::create(rewriter, loc, llvmI64, arg);
473 
474  arguments.push_back(arg);
475  }
476  // Pad out to 7 arguments since the hostcall always needs 7
477  for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
478  arguments.push_back(zeroI64);
479  }
480 
481  auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
482  arguments.push_back(isLast);
483  auto call = LLVM::CallOp::create(rewriter, loc, ocklAppendArgs, arguments);
484  printfDesc = call.getResult();
485  }
486  rewriter.eraseOp(gpuPrintfOp);
487  return success();
488 }
489 
491  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
492  ConversionPatternRewriter &rewriter) const {
493  Location loc = gpuPrintfOp->getLoc();
494 
495  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
496  mlir::Type ptrType =
497  LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
498 
499  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
500  // This ensures that global constants and declarations are placed within
501  // the device code, not the host code
502  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
503 
504  auto printfType =
505  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
506  /*isVarArg=*/true);
507  LLVM::LLVMFuncOp printfDecl =
508  getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
509 
510  // Create the global op or find an existing one.
511  LLVM::GlobalOp global = getOrCreateStringConstant(
512  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat(),
513  /*alignment=*/0, addressSpace);
514 
515  // Get a pointer to the format string's first element
516  Value globalPtr = LLVM::AddressOfOp::create(
517  rewriter, loc,
518  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
519  global.getSymNameAttr());
520  Value stringStart =
521  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
522  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
523 
524  // Construct arguments and function call
525  auto argsRange = adaptor.getArgs();
526  SmallVector<Value, 4> printfArgs;
527  printfArgs.reserve(argsRange.size() + 1);
528  printfArgs.push_back(stringStart);
529  printfArgs.append(argsRange.begin(), argsRange.end());
530 
531  LLVM::CallOp::create(rewriter, loc, printfDecl, printfArgs);
532  rewriter.eraseOp(gpuPrintfOp);
533  return success();
534 }
535 
537  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
538  ConversionPatternRewriter &rewriter) const {
539  Location loc = gpuPrintfOp->getLoc();
540 
541  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
542  mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
543 
544  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
545  // This ensures that global constants and declarations are placed within
546  // the device code, not the host code
547  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
548 
549  // Create a valid global location removing any metadata attached to the
550  // location as debug info metadata inside of a function cannot be used outside
551  // of that function.
552  Location globalLoc = loc->findInstanceOfOrUnknown<FileLineColLoc>();
553 
554  auto vprintfType =
555  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType});
556  LLVM::LLVMFuncOp vprintfDecl = getOrDefineFunction(
557  moduleOp, globalLoc, rewriter, "vprintf", vprintfType);
558 
559  // Create the global op or find an existing one.
560  LLVM::GlobalOp global =
561  getOrCreateStringConstant(rewriter, globalLoc, moduleOp, llvmI8,
562  "printfFormat_", adaptor.getFormat());
563 
564  // Get a pointer to the format string's first element
565  Value globalPtr = LLVM::AddressOfOp::create(rewriter, loc, global);
566  Value stringStart =
567  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
568  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
569  SmallVector<Type> types;
570  SmallVector<Value> args;
571  // Promote and pack the arguments into a stack allocation.
572  for (Value arg : adaptor.getArgs()) {
573  Type type = arg.getType();
574  Value promotedArg = arg;
575  assert(type.isIntOrFloat());
576  if (isa<FloatType>(type)) {
577  type = rewriter.getF64Type();
578  promotedArg = LLVM::FPExtOp::create(rewriter, loc, type, arg);
579  }
580  types.push_back(type);
581  args.push_back(promotedArg);
582  }
583  Type structType =
584  LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
585  Value one = LLVM::ConstantOp::create(rewriter, loc, rewriter.getI64Type(),
586  rewriter.getIndexAttr(1));
587  Value tempAlloc =
588  LLVM::AllocaOp::create(rewriter, loc, ptrType, structType, one,
589  /*alignment=*/0);
590  for (auto [index, arg] : llvm::enumerate(args)) {
591  Value ptr = LLVM::GEPOp::create(
592  rewriter, loc, ptrType, structType, tempAlloc,
593  ArrayRef<LLVM::GEPArg>{0, static_cast<int32_t>(index)});
594  LLVM::StoreOp::create(rewriter, loc, arg, ptr);
595  }
596  std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
597 
598  LLVM::CallOp::create(rewriter, loc, vprintfDecl, printfArgs);
599  rewriter.eraseOp(gpuPrintfOp);
600  return success();
601 }
602 
603 /// Helper for impl::scalarizeVectorOp. Scalarizes vectors to elements.
604 /// Used either directly (for ops on 1D vectors) or as the callback passed to
605 /// detail::handleMultidimensionalVectors (for ops on higher-rank vectors).
607  Type llvm1DVectorTy,
608  ConversionPatternRewriter &rewriter,
609  const LLVMTypeConverter &converter) {
610  TypeRange operandTypes(operands);
611  VectorType vectorType = cast<VectorType>(llvm1DVectorTy);
612  Location loc = op->getLoc();
613  Value result = LLVM::PoisonOp::create(rewriter, loc, vectorType);
614  Type indexType = converter.convertType(rewriter.getIndexType());
615  StringAttr name = op->getName().getIdentifier();
616  Type elementType = vectorType.getElementType();
617 
618  for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
619  Value index = LLVM::ConstantOp::create(rewriter, loc, indexType, i);
620  auto extractElement = [&](Value operand) -> Value {
621  if (!isa<VectorType>(operand.getType()))
622  return operand;
623  return LLVM::ExtractElementOp::create(rewriter, loc, operand, index);
624  };
625  auto scalarOperands = llvm::map_to_vector(operands, extractElement);
626  Operation *scalarOp =
627  rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
628  result = LLVM::InsertElementOp::create(rewriter, loc, result,
629  scalarOp->getResult(0), index);
630  }
631  return result;
632 }
633 
634 /// Unrolls op to array/vector elements.
635 LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
636  ConversionPatternRewriter &rewriter,
637  const LLVMTypeConverter &converter) {
638  TypeRange operandTypes(operands);
639  if (llvm::any_of(operandTypes, llvm::IsaPred<VectorType>)) {
640  VectorType vectorType =
641  cast<VectorType>(converter.convertType(op->getResultTypes()[0]));
642  rewriter.replaceOp(op, scalarizeVectorOpHelper(op, operands, vectorType,
643  rewriter, converter));
644  return success();
645  }
646 
647  if (llvm::any_of(operandTypes, llvm::IsaPred<LLVM::LLVMArrayType>)) {
649  op, operands, converter,
650  [&](Type llvm1DVectorTy, ValueRange operands) -> Value {
651  return scalarizeVectorOpHelper(op, operands, llvm1DVectorTy, rewriter,
652  converter);
653  },
654  rewriter);
655  }
656 
657  return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll");
658 }
659 
660 static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
661  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
662 }
663 
664 /// Generates a symbol with 0-sized array type for dynamic shared memory usage,
665 /// or uses existing symbol.
667  ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp,
668  gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter,
669  MemRefType memrefType, unsigned alignmentBit) {
670  uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
671 
672  FailureOr<unsigned> addressSpace =
673  typeConverter->getMemRefAddressSpace(memrefType);
674  if (failed(addressSpace)) {
675  op->emitError() << "conversion of memref memory space "
676  << memrefType.getMemorySpace()
677  << " to integer address space "
678  "failed. Consider adding memory space conversions.";
679  }
680 
681  // Step 1. Collect symbol names of LLVM::GlobalOp Ops. Also if any of
682  // LLVM::GlobalOp is suitable for shared memory, return it.
683  llvm::StringSet<> existingGlobalNames;
684  for (auto globalOp : moduleOp.getBody()->getOps<LLVM::GlobalOp>()) {
685  existingGlobalNames.insert(globalOp.getSymName());
686  if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(globalOp.getType())) {
687  if (globalOp.getAddrSpace() == addressSpace.value() &&
688  arrayType.getNumElements() == 0 &&
689  globalOp.getAlignment().value_or(0) == alignmentByte) {
690  return globalOp;
691  }
692  }
693  }
694 
695  // Step 2. Find a unique symbol name
696  unsigned uniquingCounter = 0;
697  SmallString<128> symName = SymbolTable::generateSymbolName<128>(
698  "__dynamic_shmem_",
699  [&](StringRef candidate) {
700  return existingGlobalNames.contains(candidate);
701  },
702  uniquingCounter);
703 
704  // Step 3. Generate a global op
705  OpBuilder::InsertionGuard guard(rewriter);
706  rewriter.setInsertionPointToStart(moduleOp.getBody());
707 
708  auto zeroSizedArrayType = LLVM::LLVMArrayType::get(
709  typeConverter->convertType(memrefType.getElementType()), 0);
710 
711  return LLVM::GlobalOp::create(rewriter, op->getLoc(), zeroSizedArrayType,
712  /*isConstant=*/false, LLVM::Linkage::Internal,
713  symName, /*value=*/Attribute(), alignmentByte,
714  addressSpace.value());
715 }
716 
718  gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
719  ConversionPatternRewriter &rewriter) const {
720  Location loc = op.getLoc();
721  MemRefType memrefType = op.getResultMemref().getType();
722  Type elementType = typeConverter->convertType(memrefType.getElementType());
723 
724  // Step 1: Generate a memref<0xi8> type
725  MemRefLayoutAttrInterface layout = {};
726  auto memrefType0sz =
727  MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
728 
729  // Step 2: Generate a global symbol or existing for the dynamic shared
730  // memory with memref<0xi8> type
731  auto moduleOp = op->getParentOfType<gpu::GPUModuleOp>();
732  LLVM::GlobalOp shmemOp = getDynamicSharedMemorySymbol(
733  rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
734 
735  // Step 3. Get address of the global symbol
736  OpBuilder::InsertionGuard guard(rewriter);
737  rewriter.setInsertionPoint(op);
738  auto basePtr = LLVM::AddressOfOp::create(rewriter, loc, shmemOp);
739  Type baseType = basePtr->getResultTypes().front();
740 
741  // Step 4. Generate GEP using offsets
742  SmallVector<LLVM::GEPArg> gepArgs = {0};
743  Value shmemPtr = LLVM::GEPOp::create(rewriter, loc, baseType, elementType,
744  basePtr, gepArgs);
745  // Step 5. Create a memref descriptor
746  SmallVector<Value> shape, strides;
747  Value sizeBytes;
748  getMemRefDescriptorSizes(loc, memrefType0sz, {}, rewriter, shape, strides,
749  sizeBytes);
750  auto memRefDescriptor = this->createMemRefDescriptor(
751  loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
752 
753  // Step 5. Replace the op with memref descriptor
754  rewriter.replaceOp(op, {memRefDescriptor});
755  return success();
756 }
757 
759  gpu::ReturnOp op, OpAdaptor adaptor,
760  ConversionPatternRewriter &rewriter) const {
761  Location loc = op.getLoc();
762  unsigned numArguments = op.getNumOperands();
763  SmallVector<Value, 4> updatedOperands;
764 
765  bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
766  if (useBarePtrCallConv) {
767  // For the bare-ptr calling convention, extract the aligned pointer to
768  // be returned from the memref descriptor.
769  for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
770  Type oldTy = std::get<0>(it).getType();
771  Value newOperand = std::get<1>(it);
772  if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
773  cast<BaseMemRefType>(oldTy))) {
774  MemRefDescriptor memrefDesc(newOperand);
775  newOperand = memrefDesc.allocatedPtr(rewriter, loc);
776  } else if (isa<UnrankedMemRefType>(oldTy)) {
777  // Unranked memref is not supported in the bare pointer calling
778  // convention.
779  return failure();
780  }
781  updatedOperands.push_back(newOperand);
782  }
783  } else {
784  updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
785  (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
786  updatedOperands,
787  /*toDynamic=*/true);
788  }
789 
790  // If ReturnOp has 0 or 1 operand, create it and return immediately.
791  if (numArguments <= 1) {
792  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
793  op, TypeRange(), updatedOperands, op->getAttrs());
794  return success();
795  }
796 
797  // Otherwise, we need to pack the arguments into an LLVM struct type before
798  // returning.
799  auto packedType = getTypeConverter()->packFunctionResults(
800  op.getOperandTypes(), useBarePtrCallConv);
801  if (!packedType) {
802  return rewriter.notifyMatchFailure(op, "could not convert result types");
803  }
804 
805  Value packed = LLVM::PoisonOp::create(rewriter, loc, packedType);
806  for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
807  packed = LLVM::InsertValueOp::create(rewriter, loc, packed, operand, idx);
808  }
809  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
810  op->getAttrs());
811  return success();
812 }
813 
815  TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
816  typeConverter.addTypeAttributeConversion(
817  [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
818  gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
819  unsigned addressSpace = mapping(memorySpace);
820  return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
821  addressSpace);
822  });
823 }
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueSymbolName(gpu::GPUModuleOp moduleOp, StringRef prefix)
static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands, Type llvm1DVectorTy, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Helper for impl::scalarizeVectorOp.
LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:104
This class represents an argument of a Block.
Definition: Value.h:309
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103
UnitAttr getUnitAttr()
Definition: Builders.cpp:93
IntegerType getI64Type()
Definition: Builders.cpp:64
IntegerType getI32Type()
Definition: Builders.cpp:62
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:107
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:66
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:89
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:257
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:50
IntegerType getI8Type()
Definition: Builders.cpp:58
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:99
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:89
FloatType getF64Type()
Definition: Builders.cpp:44
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:96
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
Definition: Pattern.cpp:190
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
Definition: Pattern.cpp:88
const LLVMTypeConverter * getTypeConverter() const
Definition: Pattern.cpp:27
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
Definition: Pattern.cpp:32
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
Definition: Pattern.cpp:278
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:174
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false, SmallVector< SmallVector< Type >> *groupedTypes=nullptr, int64_t *numConvertedTypes=nullptr) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
LocationAttr findInstanceOfOrUnknown()
Return an instance of the given location type if one is nested under the current location else return...
Definition: Location.h:60
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:452
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
result_type_range getResultTypes()
Definition: Operation.h:428
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:716
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:628
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:519
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:76
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, ArrayRef< Value > replacements)
Remap an input of the original signature to replacements values.
Type conversion class.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition: Types.cpp:116
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:122
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
LogicalResult handleMultidimensionalVectors(Operation *op, ValueRange operands, const LLVMTypeConverter &typeConverter, std::function< Value(Type, ValueRange)> createOperand, ConversionPatternRewriter &rewriter)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
Definition: GPUCommonPass.h:70
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Find or create an external function declaration in the given module.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, gpu::GPUModuleOp moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.