MLIR  20.0.0git
GPUOpsLowering.cpp
Go to the documentation of this file.
1 //===- GPUOpsLowering.cpp - GPU FuncOp / ReturnOp lowering ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GPUOpsLowering.h"
10 
13 #include "mlir/IR/Attributes.h"
14 #include "mlir/IR/Builders.h"
15 #include "mlir/IR/BuiltinTypes.h"
16 #include "llvm/ADT/SmallVectorExtras.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/FormatVariadic.h"
19 
20 using namespace mlir;
21 
22 LogicalResult
23 GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
24  ConversionPatternRewriter &rewriter) const {
25  Location loc = gpuFuncOp.getLoc();
26 
27  SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
28  if (encodeWorkgroupAttributionsAsArguments) {
29  // Append an `llvm.ptr` argument to the function signature to encode
30  // workgroup attributions.
31 
32  ArrayRef<BlockArgument> workgroupAttributions =
33  gpuFuncOp.getWorkgroupAttributions();
34  size_t numAttributions = workgroupAttributions.size();
35 
36  // Insert all arguments at the end.
37  unsigned index = gpuFuncOp.getNumArguments();
38  SmallVector<unsigned> argIndices(numAttributions, index);
39 
40  // New arguments will simply be `llvm.ptr` with the correct address space
41  Type workgroupPtrType =
42  rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
43  SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
44 
45  // Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
46  std::array attrs{
47  rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
48  rewriter.getUnitAttr()),
49  rewriter.getNamedAttr(
50  getDialect().getWorkgroupAttributionAttrHelper().getName(),
51  rewriter.getUnitAttr()),
52  };
54  for (BlockArgument attribution : workgroupAttributions) {
55  auto attributionType = cast<MemRefType>(attribution.getType());
56  IntegerAttr numElements =
57  rewriter.getI64IntegerAttr(attributionType.getNumElements());
58  Type llvmElementType =
59  getTypeConverter()->convertType(attributionType.getElementType());
60  if (!llvmElementType)
61  return failure();
62  TypeAttr type = TypeAttr::get(llvmElementType);
63  attrs.back().setValue(
64  rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
65  argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
66  }
67 
68  // Location match function location
69  SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
70 
71  // Perform signature modification
72  rewriter.modifyOpInPlace(
73  gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
74  static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
75  argIndices, argTypes, argAttrs, argLocs);
76  });
77  } else {
78  workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
79  for (auto [idx, attribution] :
80  llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
81  auto type = dyn_cast<MemRefType>(attribution.getType());
82  assert(type && type.hasStaticShape() && "unexpected type in attribution");
83 
84  uint64_t numElements = type.getNumElements();
85 
86  auto elementType =
87  cast<Type>(typeConverter->convertType(type.getElementType()));
88  auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
89  std::string name =
90  std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
91  uint64_t alignment = 0;
92  if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
93  gpuFuncOp.getWorkgroupAttributionAttr(
94  idx, LLVM::LLVMDialect::getAlignAttrName())))
95  alignment = alignAttr.getInt();
96  auto globalOp = rewriter.create<LLVM::GlobalOp>(
97  gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
98  LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
99  workgroupAddrSpace);
100  workgroupBuffers.push_back(globalOp);
101  }
102  }
103 
104  // Remap proper input types.
105  TypeConverter::SignatureConversion signatureConversion(
106  gpuFuncOp.front().getNumArguments());
107 
109  gpuFuncOp.getFunctionType(), /*isVariadic=*/false,
110  getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
111  if (!funcType) {
112  return rewriter.notifyMatchFailure(gpuFuncOp, [&](Diagnostic &diag) {
113  diag << "failed to convert function signature type for: "
114  << gpuFuncOp.getFunctionType();
115  });
116  }
117 
118  // Create the new function operation. Only copy those attributes that are
119  // not specific to function modeling.
121  ArrayAttr argAttrs;
122  for (const auto &attr : gpuFuncOp->getAttrs()) {
123  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
124  attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
125  attr.getName() ==
126  gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
127  attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
128  attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
129  attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
130  attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
131  continue;
132  if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
133  argAttrs = gpuFuncOp.getArgAttrsAttr();
134  continue;
135  }
136  attributes.push_back(attr);
137  }
138 
139  DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
140  DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
141  // Ensure we don't lose information if the function is lowered before its
142  // surrounding context.
143  auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
144  if (knownBlockSize)
145  attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
146  knownBlockSize);
147  if (knownGridSize)
148  attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
149  knownGridSize);
150 
151  // Add a dialect specific kernel attribute in addition to GPU kernel
152  // attribute. The former is necessary for further translation while the
153  // latter is expected by gpu.launch_func.
154  if (gpuFuncOp.isKernel()) {
155  if (kernelAttributeName)
156  attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
157  // Set the dialect-specific block size attribute if there is one.
158  if (kernelBlockSizeAttributeName && knownBlockSize) {
159  attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
160  }
161  }
162  LLVM::CConv callingConvention = gpuFuncOp.isKernel()
163  ? kernelCallingConvention
164  : nonKernelCallingConvention;
165  auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
166  gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
167  LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
168  /*comdat=*/nullptr, attributes);
169 
170  {
171  // Insert operations that correspond to converted workgroup and private
172  // memory attributions to the body of the function. This must operate on
173  // the original function, before the body region is inlined in the new
174  // function to maintain the relation between block arguments and the
175  // parent operation that assigns their semantics.
176  OpBuilder::InsertionGuard guard(rewriter);
177 
178  // Rewrite workgroup memory attributions to addresses of global buffers.
179  rewriter.setInsertionPointToStart(&gpuFuncOp.front());
180  unsigned numProperArguments = gpuFuncOp.getNumArguments();
181 
182  if (encodeWorkgroupAttributionsAsArguments) {
183  // Build a MemRefDescriptor with each of the arguments added above.
184 
185  unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
186  assert(numProperArguments >= numAttributions &&
187  "Expecting attributions to be encoded as arguments already");
188 
189  // Arguments encoding workgroup attributions will be in positions
190  // [numProperArguments, numProperArguments+numAttributions)
191  ArrayRef<BlockArgument> attributionArguments =
192  gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
193  numAttributions);
194  for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
195  gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
196  auto [attribution, arg] = vals;
197  auto type = cast<MemRefType>(attribution.getType());
198 
199  // Arguments are of llvm.ptr type and attributions are of memref type:
200  // we need to wrap them in memref descriptors.
202  rewriter, loc, *getTypeConverter(), type, arg);
203 
204  // And remap the arguments
205  signatureConversion.remapInput(numProperArguments + idx, descr);
206  }
207  } else {
208  for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
209  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
210  global.getAddrSpace());
211  Value address = rewriter.create<LLVM::AddressOfOp>(
212  loc, ptrType, global.getSymNameAttr());
213  Value memory =
214  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(),
215  address, ArrayRef<LLVM::GEPArg>{0, 0});
216 
217  // Build a memref descriptor pointing to the buffer to plug with the
218  // existing memref infrastructure. This may use more registers than
219  // otherwise necessary given that memref sizes are fixed, but we can try
220  // and canonicalize that away later.
221  Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
222  auto type = cast<MemRefType>(attribution.getType());
224  rewriter, loc, *getTypeConverter(), type, memory);
225  signatureConversion.remapInput(numProperArguments + idx, descr);
226  }
227  }
228 
229  // Rewrite private memory attributions to alloca'ed buffers.
230  unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
231  auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
232  for (const auto [idx, attribution] :
233  llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
234  auto type = cast<MemRefType>(attribution.getType());
235  assert(type && type.hasStaticShape() && "unexpected type in attribution");
236 
237  // Explicitly drop memory space when lowering private memory
238  // attributions since NVVM models it as `alloca`s in the default
239  // memory space and does not support `alloca`s with addrspace(5).
240  Type elementType = typeConverter->convertType(type.getElementType());
241  auto ptrType =
242  LLVM::LLVMPointerType::get(rewriter.getContext(), allocaAddrSpace);
243  Value numElements = rewriter.create<LLVM::ConstantOp>(
244  gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
245  uint64_t alignment = 0;
246  if (auto alignAttr =
247  dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
248  idx, LLVM::LLVMDialect::getAlignAttrName())))
249  alignment = alignAttr.getInt();
250  Value allocated = rewriter.create<LLVM::AllocaOp>(
251  gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
253  rewriter, loc, *getTypeConverter(), type, allocated);
254  signatureConversion.remapInput(
255  numProperArguments + numWorkgroupAttributions + idx, descr);
256  }
257  }
258 
259  // Move the region to the new function, update the entry block signature.
260  rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
261  llvmFuncOp.end());
262  if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), *typeConverter,
263  &signatureConversion)))
264  return failure();
265 
266  // Get memref type from function arguments and set the noalias to
267  // pointer arguments.
268  for (const auto [idx, argTy] :
269  llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
270  auto remapping = signatureConversion.getInputMapping(idx);
271  NamedAttrList argAttr =
272  argAttrs ? cast<DictionaryAttr>(argAttrs[idx]) : NamedAttrList();
273  auto copyAttribute = [&](StringRef attrName) {
274  Attribute attr = argAttr.erase(attrName);
275  if (!attr)
276  return;
277  for (size_t i = 0, e = remapping->size; i < e; ++i)
278  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
279  };
280  auto copyPointerAttribute = [&](StringRef attrName) {
281  Attribute attr = argAttr.erase(attrName);
282 
283  if (!attr)
284  return;
285  if (remapping->size > 1 &&
286  attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
287  emitWarning(llvmFuncOp.getLoc(),
288  "Cannot copy noalias with non-bare pointers.\n");
289  return;
290  }
291  for (size_t i = 0, e = remapping->size; i < e; ++i) {
292  if (isa<LLVM::LLVMPointerType>(
293  llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
294  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
295  }
296  }
297  };
298 
299  if (argAttr.empty())
300  continue;
301 
302  copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
303  copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
304  copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
305  bool lowersToPointer = false;
306  for (size_t i = 0, e = remapping->size; i < e; ++i) {
307  lowersToPointer |= isa<LLVM::LLVMPointerType>(
308  llvmFuncOp.getArgument(remapping->inputNo + i).getType());
309  }
310 
311  if (lowersToPointer) {
312  copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
313  copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
314  copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
315  copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
316  copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
317  copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
318  copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
319  copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
320  copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
321  copyPointerAttribute(
322  LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
323  copyPointerAttribute(
324  LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
325  }
326  }
327  rewriter.eraseOp(gpuFuncOp);
328  return success();
329 }
330 
331 static SmallString<16> getUniqueFormatGlobalName(gpu::GPUModuleOp moduleOp) {
332  const char formatStringPrefix[] = "printfFormat_";
333  // Get a unique global name.
334  unsigned stringNumber = 0;
335  SmallString<16> stringConstName;
336  do {
337  stringConstName.clear();
338  (formatStringPrefix + Twine(stringNumber++)).toStringRef(stringConstName);
339  } while (moduleOp.lookupSymbol(stringConstName));
340  return stringConstName;
341 }
342 
343 template <typename T>
344 static LLVM::LLVMFuncOp getOrDefineFunction(T &moduleOp, const Location loc,
345  ConversionPatternRewriter &rewriter,
346  StringRef name,
347  LLVM::LLVMFunctionType type) {
348  LLVM::LLVMFuncOp ret;
349  if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
350  ConversionPatternRewriter::InsertionGuard guard(rewriter);
351  rewriter.setInsertionPointToStart(moduleOp.getBody());
352  ret = rewriter.create<LLVM::LLVMFuncOp>(loc, name, type,
353  LLVM::Linkage::External);
354  }
355  return ret;
356 }
357 
359  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
360  ConversionPatternRewriter &rewriter) const {
361  Location loc = gpuPrintfOp->getLoc();
362 
363  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getI8Type());
364  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
365  mlir::Type llvmI32 = typeConverter->convertType(rewriter.getI32Type());
366  mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
367  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
368  // This ensures that global constants and declarations are placed within
369  // the device code, not the host code
370  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
371 
372  auto ocklBegin =
373  getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
374  LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
375  LLVM::LLVMFuncOp ocklAppendArgs;
376  if (!adaptor.getArgs().empty()) {
377  ocklAppendArgs = getOrDefineFunction(
378  moduleOp, loc, rewriter, "__ockl_printf_append_args",
380  llvmI64, {llvmI64, /*numArgs*/ llvmI32, llvmI64, llvmI64, llvmI64,
381  llvmI64, llvmI64, llvmI64, llvmI64, /*isLast*/ llvmI32}));
382  }
383  auto ocklAppendStringN = getOrDefineFunction(
384  moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
386  llvmI64,
387  {llvmI64, ptrType, /*length (bytes)*/ llvmI64, /*isLast*/ llvmI32}));
388 
389  /// Start the printf hostcall
390  Value zeroI64 = rewriter.create<LLVM::ConstantOp>(loc, llvmI64, 0);
391  auto printfBeginCall = rewriter.create<LLVM::CallOp>(loc, ocklBegin, zeroI64);
392  Value printfDesc = printfBeginCall.getResult();
393 
394  // Get a unique global name for the format.
395  SmallString<16> stringConstName = getUniqueFormatGlobalName(moduleOp);
396 
397  llvm::SmallString<20> formatString(adaptor.getFormat());
398  formatString.push_back('\0'); // Null terminate for C
399  size_t formatStringSize = formatString.size_in_bytes();
400 
401  auto globalType = LLVM::LLVMArrayType::get(llvmI8, formatStringSize);
402  LLVM::GlobalOp global;
403  {
405  rewriter.setInsertionPointToStart(moduleOp.getBody());
406  global = rewriter.create<LLVM::GlobalOp>(
407  loc, globalType,
408  /*isConstant=*/true, LLVM::Linkage::Internal, stringConstName,
409  rewriter.getStringAttr(formatString));
410  }
411 
412  // Get a pointer to the format string's first element and pass it to printf()
413  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
414  loc,
415  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
416  global.getSymNameAttr());
417  Value stringStart = rewriter.create<LLVM::GEPOp>(
418  loc, ptrType, globalType, globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
419  Value stringLen =
420  rewriter.create<LLVM::ConstantOp>(loc, llvmI64, formatStringSize);
421 
422  Value oneI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 1);
423  Value zeroI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 0);
424 
425  auto appendFormatCall = rewriter.create<LLVM::CallOp>(
426  loc, ocklAppendStringN,
427  ValueRange{printfDesc, stringStart, stringLen,
428  adaptor.getArgs().empty() ? oneI32 : zeroI32});
429  printfDesc = appendFormatCall.getResult();
430 
431  // __ockl_printf_append_args takes 7 values per append call
432  constexpr size_t argsPerAppend = 7;
433  size_t nArgs = adaptor.getArgs().size();
434  for (size_t group = 0; group < nArgs; group += argsPerAppend) {
435  size_t bound = std::min(group + argsPerAppend, nArgs);
436  size_t numArgsThisCall = bound - group;
437 
439  arguments.push_back(printfDesc);
440  arguments.push_back(
441  rewriter.create<LLVM::ConstantOp>(loc, llvmI32, numArgsThisCall));
442  for (size_t i = group; i < bound; ++i) {
443  Value arg = adaptor.getArgs()[i];
444  if (auto floatType = dyn_cast<FloatType>(arg.getType())) {
445  if (!floatType.isF64())
446  arg = rewriter.create<LLVM::FPExtOp>(
447  loc, typeConverter->convertType(rewriter.getF64Type()), arg);
448  arg = rewriter.create<LLVM::BitcastOp>(loc, llvmI64, arg);
449  }
450  if (arg.getType().getIntOrFloatBitWidth() != 64)
451  arg = rewriter.create<LLVM::ZExtOp>(loc, llvmI64, arg);
452 
453  arguments.push_back(arg);
454  }
455  // Pad out to 7 arguments since the hostcall always needs 7
456  for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
457  arguments.push_back(zeroI64);
458  }
459 
460  auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
461  arguments.push_back(isLast);
462  auto call = rewriter.create<LLVM::CallOp>(loc, ocklAppendArgs, arguments);
463  printfDesc = call.getResult();
464  }
465  rewriter.eraseOp(gpuPrintfOp);
466  return success();
467 }
468 
470  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
471  ConversionPatternRewriter &rewriter) const {
472  Location loc = gpuPrintfOp->getLoc();
473 
474  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
475  mlir::Type ptrType =
476  LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
477 
478  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
479  // This ensures that global constants and declarations are placed within
480  // the device code, not the host code
481  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
482 
483  auto printfType =
484  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
485  /*isVarArg=*/true);
486  LLVM::LLVMFuncOp printfDecl =
487  getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
488 
489  // Get a unique global name for the format.
490  SmallString<16> stringConstName = getUniqueFormatGlobalName(moduleOp);
491 
492  llvm::SmallString<20> formatString(adaptor.getFormat());
493  formatString.push_back('\0'); // Null terminate for C
494  auto globalType =
495  LLVM::LLVMArrayType::get(llvmI8, formatString.size_in_bytes());
496  LLVM::GlobalOp global;
497  {
499  rewriter.setInsertionPointToStart(moduleOp.getBody());
500  global = rewriter.create<LLVM::GlobalOp>(
501  loc, globalType,
502  /*isConstant=*/true, LLVM::Linkage::Internal, stringConstName,
503  rewriter.getStringAttr(formatString), /*allignment=*/0, addressSpace);
504  }
505 
506  // Get a pointer to the format string's first element
507  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
508  loc,
509  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
510  global.getSymNameAttr());
511  Value stringStart = rewriter.create<LLVM::GEPOp>(
512  loc, ptrType, globalType, globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
513 
514  // Construct arguments and function call
515  auto argsRange = adaptor.getArgs();
516  SmallVector<Value, 4> printfArgs;
517  printfArgs.reserve(argsRange.size() + 1);
518  printfArgs.push_back(stringStart);
519  printfArgs.append(argsRange.begin(), argsRange.end());
520 
521  rewriter.create<LLVM::CallOp>(loc, printfDecl, printfArgs);
522  rewriter.eraseOp(gpuPrintfOp);
523  return success();
524 }
525 
527  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
528  ConversionPatternRewriter &rewriter) const {
529  Location loc = gpuPrintfOp->getLoc();
530 
531  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
532  mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
533 
534  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
535  // This ensures that global constants and declarations are placed within
536  // the device code, not the host code
537  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
538 
539  auto vprintfType =
540  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType});
541  LLVM::LLVMFuncOp vprintfDecl =
542  getOrDefineFunction(moduleOp, loc, rewriter, "vprintf", vprintfType);
543 
544  // Get a unique global name for the format.
545  SmallString<16> stringConstName = getUniqueFormatGlobalName(moduleOp);
546 
547  llvm::SmallString<20> formatString(adaptor.getFormat());
548  formatString.push_back('\0'); // Null terminate for C
549  auto globalType =
550  LLVM::LLVMArrayType::get(llvmI8, formatString.size_in_bytes());
551  LLVM::GlobalOp global;
552  {
554  rewriter.setInsertionPointToStart(moduleOp.getBody());
555  global = rewriter.create<LLVM::GlobalOp>(
556  loc, globalType,
557  /*isConstant=*/true, LLVM::Linkage::Internal, stringConstName,
558  rewriter.getStringAttr(formatString), /*allignment=*/0);
559  }
560 
561  // Get a pointer to the format string's first element
562  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(loc, global);
563  Value stringStart = rewriter.create<LLVM::GEPOp>(
564  loc, ptrType, globalType, globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
565  SmallVector<Type> types;
566  SmallVector<Value> args;
567  // Promote and pack the arguments into a stack allocation.
568  for (Value arg : adaptor.getArgs()) {
569  Type type = arg.getType();
570  Value promotedArg = arg;
571  assert(type.isIntOrFloat());
572  if (isa<FloatType>(type)) {
573  type = rewriter.getF64Type();
574  promotedArg = rewriter.create<LLVM::FPExtOp>(loc, type, arg);
575  }
576  types.push_back(type);
577  args.push_back(promotedArg);
578  }
579  Type structType =
580  LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
581  Value one = rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI64Type(),
582  rewriter.getIndexAttr(1));
583  Value tempAlloc =
584  rewriter.create<LLVM::AllocaOp>(loc, ptrType, structType, one,
585  /*alignment=*/0);
586  for (auto [index, arg] : llvm::enumerate(args)) {
587  Value ptr = rewriter.create<LLVM::GEPOp>(
588  loc, ptrType, structType, tempAlloc,
589  ArrayRef<LLVM::GEPArg>{0, static_cast<int32_t>(index)});
590  rewriter.create<LLVM::StoreOp>(loc, arg, ptr);
591  }
592  std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
593 
594  rewriter.create<LLVM::CallOp>(loc, vprintfDecl, printfArgs);
595  rewriter.eraseOp(gpuPrintfOp);
596  return success();
597 }
598 
599 /// Unrolls op if it's operating on vectors.
600 LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
601  ConversionPatternRewriter &rewriter,
602  const LLVMTypeConverter &converter) {
603  TypeRange operandTypes(operands);
604  if (llvm::none_of(operandTypes, llvm::IsaPred<VectorType>)) {
605  return rewriter.notifyMatchFailure(op, "expected vector operand");
606  }
607  if (op->getNumRegions() != 0 || op->getNumSuccessors() != 0)
608  return rewriter.notifyMatchFailure(op, "expected no region/successor");
609  if (op->getNumResults() != 1)
610  return rewriter.notifyMatchFailure(op, "expected single result");
611  VectorType vectorType = dyn_cast<VectorType>(op->getResult(0).getType());
612  if (!vectorType)
613  return rewriter.notifyMatchFailure(op, "expected vector result");
614 
615  Location loc = op->getLoc();
616  Value result = rewriter.create<LLVM::UndefOp>(loc, vectorType);
617  Type indexType = converter.convertType(rewriter.getIndexType());
618  StringAttr name = op->getName().getIdentifier();
619  Type elementType = vectorType.getElementType();
620 
621  for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
622  Value index = rewriter.create<LLVM::ConstantOp>(loc, indexType, i);
623  auto extractElement = [&](Value operand) -> Value {
624  if (!isa<VectorType>(operand.getType()))
625  return operand;
626  return rewriter.create<LLVM::ExtractElementOp>(loc, operand, index);
627  };
628  auto scalarOperands = llvm::map_to_vector(operands, extractElement);
629  Operation *scalarOp =
630  rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
631  result = rewriter.create<LLVM::InsertElementOp>(
632  loc, result, scalarOp->getResult(0), index);
633  }
634 
635  rewriter.replaceOp(op, result);
636  return success();
637 }
638 
639 static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
640  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
641 }
642 
643 /// Generates a symbol with 0-sized array type for dynamic shared memory usage,
644 /// or uses existing symbol.
645 LLVM::GlobalOp
647  Operation *moduleOp, gpu::DynamicSharedMemoryOp op,
648  const LLVMTypeConverter *typeConverter,
649  MemRefType memrefType, unsigned alignmentBit) {
650  uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
651 
652  FailureOr<unsigned> addressSpace =
653  typeConverter->getMemRefAddressSpace(memrefType);
654  if (failed(addressSpace)) {
655  op->emitError() << "conversion of memref memory space "
656  << memrefType.getMemorySpace()
657  << " to integer address space "
658  "failed. Consider adding memory space conversions.";
659  }
660 
661  // Step 1. Collect symbol names of LLVM::GlobalOp Ops. Also if any of
662  // LLVM::GlobalOp is suitable for shared memory, return it.
663  llvm::StringSet<> existingGlobalNames;
664  for (auto globalOp :
665  moduleOp->getRegion(0).front().getOps<LLVM::GlobalOp>()) {
666  existingGlobalNames.insert(globalOp.getSymName());
667  if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(globalOp.getType())) {
668  if (globalOp.getAddrSpace() == addressSpace.value() &&
669  arrayType.getNumElements() == 0 &&
670  globalOp.getAlignment().value_or(0) == alignmentByte) {
671  return globalOp;
672  }
673  }
674  }
675 
676  // Step 2. Find a unique symbol name
677  unsigned uniquingCounter = 0;
678  SmallString<128> symName = SymbolTable::generateSymbolName<128>(
679  "__dynamic_shmem_",
680  [&](StringRef candidate) {
681  return existingGlobalNames.contains(candidate);
682  },
683  uniquingCounter);
684 
685  // Step 3. Generate a global op
686  OpBuilder::InsertionGuard guard(rewriter);
687  rewriter.setInsertionPoint(&moduleOp->getRegion(0).front().front());
688 
689  auto zeroSizedArrayType = LLVM::LLVMArrayType::get(
690  typeConverter->convertType(memrefType.getElementType()), 0);
691 
692  return rewriter.create<LLVM::GlobalOp>(
693  op->getLoc(), zeroSizedArrayType, /*isConstant=*/false,
694  LLVM::Linkage::Internal, symName, /*value=*/Attribute(), alignmentByte,
695  addressSpace.value());
696 }
697 
699  gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
700  ConversionPatternRewriter &rewriter) const {
701  Location loc = op.getLoc();
702  MemRefType memrefType = op.getResultMemref().getType();
703  Type elementType = typeConverter->convertType(memrefType.getElementType());
704 
705  // Step 1: Generate a memref<0xi8> type
706  MemRefLayoutAttrInterface layout = {};
707  auto memrefType0sz =
708  MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
709 
710  // Step 2: Generate a global symbol or existing for the dynamic shared
711  // memory with memref<0xi8> type
712  LLVM::LLVMFuncOp funcOp = op->getParentOfType<LLVM::LLVMFuncOp>();
713  LLVM::GlobalOp shmemOp = {};
714  Operation *moduleOp = funcOp->getParentWithTrait<OpTrait::SymbolTable>();
716  rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
717 
718  // Step 3. Get address of the global symbol
719  OpBuilder::InsertionGuard guard(rewriter);
720  rewriter.setInsertionPoint(op);
721  auto basePtr = rewriter.create<LLVM::AddressOfOp>(loc, shmemOp);
722  Type baseType = basePtr->getResultTypes().front();
723 
724  // Step 4. Generate GEP using offsets
725  SmallVector<LLVM::GEPArg> gepArgs = {0};
726  Value shmemPtr = rewriter.create<LLVM::GEPOp>(loc, baseType, elementType,
727  basePtr, gepArgs);
728  // Step 5. Create a memref descriptor
729  SmallVector<Value> shape, strides;
730  Value sizeBytes;
731  getMemRefDescriptorSizes(loc, memrefType0sz, {}, rewriter, shape, strides,
732  sizeBytes);
733  auto memRefDescriptor = this->createMemRefDescriptor(
734  loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
735 
736  // Step 5. Replace the op with memref descriptor
737  rewriter.replaceOp(op, {memRefDescriptor});
738  return success();
739 }
740 
742  gpu::ReturnOp op, OpAdaptor adaptor,
743  ConversionPatternRewriter &rewriter) const {
744  Location loc = op.getLoc();
745  unsigned numArguments = op.getNumOperands();
746  SmallVector<Value, 4> updatedOperands;
747 
748  bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
749  if (useBarePtrCallConv) {
750  // For the bare-ptr calling convention, extract the aligned pointer to
751  // be returned from the memref descriptor.
752  for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
753  Type oldTy = std::get<0>(it).getType();
754  Value newOperand = std::get<1>(it);
755  if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
756  cast<BaseMemRefType>(oldTy))) {
757  MemRefDescriptor memrefDesc(newOperand);
758  newOperand = memrefDesc.allocatedPtr(rewriter, loc);
759  } else if (isa<UnrankedMemRefType>(oldTy)) {
760  // Unranked memref is not supported in the bare pointer calling
761  // convention.
762  return failure();
763  }
764  updatedOperands.push_back(newOperand);
765  }
766  } else {
767  updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
768  (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
769  updatedOperands,
770  /*toDynamic=*/true);
771  }
772 
773  // If ReturnOp has 0 or 1 operand, create it and return immediately.
774  if (numArguments <= 1) {
775  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
776  op, TypeRange(), updatedOperands, op->getAttrs());
777  return success();
778  }
779 
780  // Otherwise, we need to pack the arguments into an LLVM struct type before
781  // returning.
782  auto packedType = getTypeConverter()->packFunctionResults(
783  op.getOperandTypes(), useBarePtrCallConv);
784  if (!packedType) {
785  return rewriter.notifyMatchFailure(op, "could not convert result types");
786  }
787 
788  Value packed = rewriter.create<LLVM::UndefOp>(loc, packedType);
789  for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
790  packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx);
791  }
792  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
793  op->getAttrs());
794  return success();
795 }
796 
798  TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
799  typeConverter.addTypeAttributeConversion(
800  [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
801  gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
802  unsigned addressSpace = mapping(memorySpace);
803  return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
804  addressSpace);
805  });
806 }
static LLVM::LLVMFuncOp getOrDefineFunction(T &moduleOp, const Location loc, ConversionPatternRewriter &rewriter, StringRef name, LLVM::LLVMFunctionType type)
LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, Operation *moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueFormatGlobalName(gpu::GPUModuleOp moduleOp)
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:146
This class represents an argument of a Block.
Definition: Value.h:319
Operation & front()
Definition: Block.h:151
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition: Block.h:191
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:136
UnitAttr getUnitAttr()
Definition: Builders.cpp:126
IntegerType getI64Type()
Definition: Builders.cpp:97
IntegerType getI32Type()
Definition: Builders.cpp:95
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:140
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:99
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:96
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:281
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:83
IntegerType getI8Type()
Definition: Builders.cpp:91
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:132
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:122
FloatType getF64Type()
Definition: Builders.cpp:77
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:103
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
PatternRewriter hook for replacing an operation.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
Definition: Pattern.cpp:218
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
Definition: Pattern.cpp:114
const LLVMTypeConverter * getTypeConverter() const
Definition: Pattern.cpp:27
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
Definition: Pattern.cpp:32
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
Definition: Pattern.cpp:247
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
static LLVMStructType getLiteral(MLIRContext *context, ArrayRef< Type > types, bool isPacked=false)
Gets or creates a literal struct with the given body in the provided context.
Definition: LLVMTypes.cpp:452
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:353
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:436
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:403
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:476
A trait used to provide symbol table functionalities to a region operation.
Definition: SymbolTable.h:435
type_range getTypes() const
Definition: ValueRange.cpp:26
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
unsigned getNumSuccessors()
Definition: Operation.h:702
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:669
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:507
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
Definition: Operation.h:248
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_type_range getOperandTypes()
Definition: Operation.h:392
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
Block & front()
Definition: Region.h:65
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:718
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:630
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:536
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:76
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, Value replacement)
Remap an input of the original signature to another replacement value.
Type conversion class.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition: Types.cpp:122
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:128
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op if it's operating on vectors.
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
Definition: GPUCommonPass.h:70
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override