MLIR  20.0.0git
GPUOpsLowering.cpp
Go to the documentation of this file.
1 //===- GPUOpsLowering.cpp - GPU FuncOp / ReturnOp lowering ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GPUOpsLowering.h"
10 
13 #include "mlir/IR/Attributes.h"
14 #include "mlir/IR/Builders.h"
15 #include "mlir/IR/BuiltinTypes.h"
16 #include "llvm/ADT/SmallVectorExtras.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/FormatVariadic.h"
19 
20 using namespace mlir;
21 
22 LLVM::LLVMFuncOp mlir::getOrDefineFunction(gpu::GPUModuleOp moduleOp,
23  Location loc, OpBuilder &b,
24  StringRef name,
25  LLVM::LLVMFunctionType type) {
26  LLVM::LLVMFuncOp ret;
27  if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
29  b.setInsertionPointToStart(moduleOp.getBody());
30  ret = b.create<LLVM::LLVMFuncOp>(loc, name, type, LLVM::Linkage::External);
31  }
32  return ret;
33 }
34 
35 static SmallString<16> getUniqueSymbolName(gpu::GPUModuleOp moduleOp,
36  StringRef prefix) {
37  // Get a unique global name.
38  unsigned stringNumber = 0;
39  SmallString<16> stringConstName;
40  do {
41  stringConstName.clear();
42  (prefix + Twine(stringNumber++)).toStringRef(stringConstName);
43  } while (moduleOp.lookupSymbol(stringConstName));
44  return stringConstName;
45 }
46 
47 LLVM::GlobalOp
49  gpu::GPUModuleOp moduleOp, Type llvmI8,
50  StringRef namePrefix, StringRef str,
51  uint64_t alignment, unsigned addrSpace) {
52  llvm::SmallString<20> nullTermStr(str);
53  nullTermStr.push_back('\0'); // Null terminate for C
54  auto globalType =
55  LLVM::LLVMArrayType::get(llvmI8, nullTermStr.size_in_bytes());
56  StringAttr attr = b.getStringAttr(nullTermStr);
57 
58  // Try to find existing global.
59  for (auto globalOp : moduleOp.getOps<LLVM::GlobalOp>())
60  if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
61  globalOp.getValueAttr() == attr &&
62  globalOp.getAlignment().value_or(0) == alignment &&
63  globalOp.getAddrSpace() == addrSpace)
64  return globalOp;
65 
66  // Not found: create new global.
68  b.setInsertionPointToStart(moduleOp.getBody());
69  SmallString<16> name = getUniqueSymbolName(moduleOp, namePrefix);
70  return b.create<LLVM::GlobalOp>(loc, globalType,
71  /*isConstant=*/true, LLVM::Linkage::Internal,
72  name, attr, alignment, addrSpace);
73 }
74 
75 LogicalResult
76 GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
77  ConversionPatternRewriter &rewriter) const {
78  Location loc = gpuFuncOp.getLoc();
79 
80  SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
81  if (encodeWorkgroupAttributionsAsArguments) {
82  // Append an `llvm.ptr` argument to the function signature to encode
83  // workgroup attributions.
84 
85  ArrayRef<BlockArgument> workgroupAttributions =
86  gpuFuncOp.getWorkgroupAttributions();
87  size_t numAttributions = workgroupAttributions.size();
88 
89  // Insert all arguments at the end.
90  unsigned index = gpuFuncOp.getNumArguments();
91  SmallVector<unsigned> argIndices(numAttributions, index);
92 
93  // New arguments will simply be `llvm.ptr` with the correct address space
94  Type workgroupPtrType =
95  rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
96  SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
97 
98  // Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
99  std::array attrs{
100  rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
101  rewriter.getUnitAttr()),
102  rewriter.getNamedAttr(
103  getDialect().getWorkgroupAttributionAttrHelper().getName(),
104  rewriter.getUnitAttr()),
105  };
107  for (BlockArgument attribution : workgroupAttributions) {
108  auto attributionType = cast<MemRefType>(attribution.getType());
109  IntegerAttr numElements =
110  rewriter.getI64IntegerAttr(attributionType.getNumElements());
111  Type llvmElementType =
112  getTypeConverter()->convertType(attributionType.getElementType());
113  if (!llvmElementType)
114  return failure();
115  TypeAttr type = TypeAttr::get(llvmElementType);
116  attrs.back().setValue(
117  rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
118  argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
119  }
120 
121  // Location match function location
122  SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
123 
124  // Perform signature modification
125  rewriter.modifyOpInPlace(
126  gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
127  static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
128  argIndices, argTypes, argAttrs, argLocs);
129  });
130  } else {
131  workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
132  for (auto [idx, attribution] :
133  llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
134  auto type = dyn_cast<MemRefType>(attribution.getType());
135  assert(type && type.hasStaticShape() && "unexpected type in attribution");
136 
137  uint64_t numElements = type.getNumElements();
138 
139  auto elementType =
140  cast<Type>(typeConverter->convertType(type.getElementType()));
141  auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
142  std::string name =
143  std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
144  uint64_t alignment = 0;
145  if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
146  gpuFuncOp.getWorkgroupAttributionAttr(
147  idx, LLVM::LLVMDialect::getAlignAttrName())))
148  alignment = alignAttr.getInt();
149  auto globalOp = rewriter.create<LLVM::GlobalOp>(
150  gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
151  LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
152  workgroupAddrSpace);
153  workgroupBuffers.push_back(globalOp);
154  }
155  }
156 
157  // Remap proper input types.
158  TypeConverter::SignatureConversion signatureConversion(
159  gpuFuncOp.front().getNumArguments());
160 
162  gpuFuncOp.getFunctionType(), /*isVariadic=*/false,
163  getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
164  if (!funcType) {
165  return rewriter.notifyMatchFailure(gpuFuncOp, [&](Diagnostic &diag) {
166  diag << "failed to convert function signature type for: "
167  << gpuFuncOp.getFunctionType();
168  });
169  }
170 
171  // Create the new function operation. Only copy those attributes that are
172  // not specific to function modeling.
174  ArrayAttr argAttrs;
175  for (const auto &attr : gpuFuncOp->getAttrs()) {
176  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
177  attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
178  attr.getName() ==
179  gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
180  attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
181  attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
182  attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
183  attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
184  continue;
185  if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
186  argAttrs = gpuFuncOp.getArgAttrsAttr();
187  continue;
188  }
189  attributes.push_back(attr);
190  }
191 
192  DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
193  DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
194  // Ensure we don't lose information if the function is lowered before its
195  // surrounding context.
196  auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
197  if (knownBlockSize)
198  attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
199  knownBlockSize);
200  if (knownGridSize)
201  attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
202  knownGridSize);
203 
204  // Add a dialect specific kernel attribute in addition to GPU kernel
205  // attribute. The former is necessary for further translation while the
206  // latter is expected by gpu.launch_func.
207  if (gpuFuncOp.isKernel()) {
208  if (kernelAttributeName)
209  attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
210  // Set the dialect-specific block size attribute if there is one.
211  if (kernelBlockSizeAttributeName && knownBlockSize) {
212  attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
213  }
214  }
215  LLVM::CConv callingConvention = gpuFuncOp.isKernel()
216  ? kernelCallingConvention
217  : nonKernelCallingConvention;
218  auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
219  gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
220  LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
221  /*comdat=*/nullptr, attributes);
222 
223  {
224  // Insert operations that correspond to converted workgroup and private
225  // memory attributions to the body of the function. This must operate on
226  // the original function, before the body region is inlined in the new
227  // function to maintain the relation between block arguments and the
228  // parent operation that assigns their semantics.
229  OpBuilder::InsertionGuard guard(rewriter);
230 
231  // Rewrite workgroup memory attributions to addresses of global buffers.
232  rewriter.setInsertionPointToStart(&gpuFuncOp.front());
233  unsigned numProperArguments = gpuFuncOp.getNumArguments();
234 
235  if (encodeWorkgroupAttributionsAsArguments) {
236  // Build a MemRefDescriptor with each of the arguments added above.
237 
238  unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
239  assert(numProperArguments >= numAttributions &&
240  "Expecting attributions to be encoded as arguments already");
241 
242  // Arguments encoding workgroup attributions will be in positions
243  // [numProperArguments, numProperArguments+numAttributions)
244  ArrayRef<BlockArgument> attributionArguments =
245  gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
246  numAttributions);
247  for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
248  gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
249  auto [attribution, arg] = vals;
250  auto type = cast<MemRefType>(attribution.getType());
251 
252  // Arguments are of llvm.ptr type and attributions are of memref type:
253  // we need to wrap them in memref descriptors.
255  rewriter, loc, *getTypeConverter(), type, arg);
256 
257  // And remap the arguments
258  signatureConversion.remapInput(numProperArguments + idx, descr);
259  }
260  } else {
261  for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
262  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
263  global.getAddrSpace());
264  Value address = rewriter.create<LLVM::AddressOfOp>(
265  loc, ptrType, global.getSymNameAttr());
266  Value memory =
267  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(),
268  address, ArrayRef<LLVM::GEPArg>{0, 0});
269 
270  // Build a memref descriptor pointing to the buffer to plug with the
271  // existing memref infrastructure. This may use more registers than
272  // otherwise necessary given that memref sizes are fixed, but we can try
273  // and canonicalize that away later.
274  Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
275  auto type = cast<MemRefType>(attribution.getType());
277  rewriter, loc, *getTypeConverter(), type, memory);
278  signatureConversion.remapInput(numProperArguments + idx, descr);
279  }
280  }
281 
282  // Rewrite private memory attributions to alloca'ed buffers.
283  unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
284  auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
285  for (const auto [idx, attribution] :
286  llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
287  auto type = cast<MemRefType>(attribution.getType());
288  assert(type && type.hasStaticShape() && "unexpected type in attribution");
289 
290  // Explicitly drop memory space when lowering private memory
291  // attributions since NVVM models it as `alloca`s in the default
292  // memory space and does not support `alloca`s with addrspace(5).
293  Type elementType = typeConverter->convertType(type.getElementType());
294  auto ptrType =
295  LLVM::LLVMPointerType::get(rewriter.getContext(), allocaAddrSpace);
296  Value numElements = rewriter.create<LLVM::ConstantOp>(
297  gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
298  uint64_t alignment = 0;
299  if (auto alignAttr =
300  dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
301  idx, LLVM::LLVMDialect::getAlignAttrName())))
302  alignment = alignAttr.getInt();
303  Value allocated = rewriter.create<LLVM::AllocaOp>(
304  gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
306  rewriter, loc, *getTypeConverter(), type, allocated);
307  signatureConversion.remapInput(
308  numProperArguments + numWorkgroupAttributions + idx, descr);
309  }
310  }
311 
312  // Move the region to the new function, update the entry block signature.
313  rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
314  llvmFuncOp.end());
315  if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), *typeConverter,
316  &signatureConversion)))
317  return failure();
318 
319  // Get memref type from function arguments and set the noalias to
320  // pointer arguments.
321  for (const auto [idx, argTy] :
322  llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
323  auto remapping = signatureConversion.getInputMapping(idx);
324  NamedAttrList argAttr =
325  argAttrs ? cast<DictionaryAttr>(argAttrs[idx]) : NamedAttrList();
326  auto copyAttribute = [&](StringRef attrName) {
327  Attribute attr = argAttr.erase(attrName);
328  if (!attr)
329  return;
330  for (size_t i = 0, e = remapping->size; i < e; ++i)
331  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
332  };
333  auto copyPointerAttribute = [&](StringRef attrName) {
334  Attribute attr = argAttr.erase(attrName);
335 
336  if (!attr)
337  return;
338  if (remapping->size > 1 &&
339  attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
340  emitWarning(llvmFuncOp.getLoc(),
341  "Cannot copy noalias with non-bare pointers.\n");
342  return;
343  }
344  for (size_t i = 0, e = remapping->size; i < e; ++i) {
345  if (isa<LLVM::LLVMPointerType>(
346  llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
347  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
348  }
349  }
350  };
351 
352  if (argAttr.empty())
353  continue;
354 
355  copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
356  copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
357  copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
358  bool lowersToPointer = false;
359  for (size_t i = 0, e = remapping->size; i < e; ++i) {
360  lowersToPointer |= isa<LLVM::LLVMPointerType>(
361  llvmFuncOp.getArgument(remapping->inputNo + i).getType());
362  }
363 
364  if (lowersToPointer) {
365  copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
366  copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
367  copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
368  copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
369  copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
370  copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
371  copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
372  copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
373  copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
374  copyPointerAttribute(
375  LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
376  copyPointerAttribute(
377  LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
378  }
379  }
380  rewriter.eraseOp(gpuFuncOp);
381  return success();
382 }
383 
385  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
386  ConversionPatternRewriter &rewriter) const {
387  Location loc = gpuPrintfOp->getLoc();
388 
389  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getI8Type());
390  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
391  mlir::Type llvmI32 = typeConverter->convertType(rewriter.getI32Type());
392  mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
393  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
394  // This ensures that global constants and declarations are placed within
395  // the device code, not the host code
396  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
397 
398  auto ocklBegin =
399  getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
400  LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
401  LLVM::LLVMFuncOp ocklAppendArgs;
402  if (!adaptor.getArgs().empty()) {
403  ocklAppendArgs = getOrDefineFunction(
404  moduleOp, loc, rewriter, "__ockl_printf_append_args",
406  llvmI64, {llvmI64, /*numArgs*/ llvmI32, llvmI64, llvmI64, llvmI64,
407  llvmI64, llvmI64, llvmI64, llvmI64, /*isLast*/ llvmI32}));
408  }
409  auto ocklAppendStringN = getOrDefineFunction(
410  moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
412  llvmI64,
413  {llvmI64, ptrType, /*length (bytes)*/ llvmI64, /*isLast*/ llvmI32}));
414 
415  /// Start the printf hostcall
416  Value zeroI64 = rewriter.create<LLVM::ConstantOp>(loc, llvmI64, 0);
417  auto printfBeginCall = rewriter.create<LLVM::CallOp>(loc, ocklBegin, zeroI64);
418  Value printfDesc = printfBeginCall.getResult();
419 
420  // Create the global op or find an existing one.
421  LLVM::GlobalOp global = getOrCreateStringConstant(
422  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
423 
424  // Get a pointer to the format string's first element and pass it to printf()
425  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
426  loc,
427  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
428  global.getSymNameAttr());
429  Value stringStart =
430  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
431  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
432  Value stringLen = rewriter.create<LLVM::ConstantOp>(
433  loc, llvmI64, cast<StringAttr>(global.getValueAttr()).size());
434 
435  Value oneI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 1);
436  Value zeroI32 = rewriter.create<LLVM::ConstantOp>(loc, llvmI32, 0);
437 
438  auto appendFormatCall = rewriter.create<LLVM::CallOp>(
439  loc, ocklAppendStringN,
440  ValueRange{printfDesc, stringStart, stringLen,
441  adaptor.getArgs().empty() ? oneI32 : zeroI32});
442  printfDesc = appendFormatCall.getResult();
443 
444  // __ockl_printf_append_args takes 7 values per append call
445  constexpr size_t argsPerAppend = 7;
446  size_t nArgs = adaptor.getArgs().size();
447  for (size_t group = 0; group < nArgs; group += argsPerAppend) {
448  size_t bound = std::min(group + argsPerAppend, nArgs);
449  size_t numArgsThisCall = bound - group;
450 
452  arguments.push_back(printfDesc);
453  arguments.push_back(
454  rewriter.create<LLVM::ConstantOp>(loc, llvmI32, numArgsThisCall));
455  for (size_t i = group; i < bound; ++i) {
456  Value arg = adaptor.getArgs()[i];
457  if (auto floatType = dyn_cast<FloatType>(arg.getType())) {
458  if (!floatType.isF64())
459  arg = rewriter.create<LLVM::FPExtOp>(
460  loc, typeConverter->convertType(rewriter.getF64Type()), arg);
461  arg = rewriter.create<LLVM::BitcastOp>(loc, llvmI64, arg);
462  }
463  if (arg.getType().getIntOrFloatBitWidth() != 64)
464  arg = rewriter.create<LLVM::ZExtOp>(loc, llvmI64, arg);
465 
466  arguments.push_back(arg);
467  }
468  // Pad out to 7 arguments since the hostcall always needs 7
469  for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
470  arguments.push_back(zeroI64);
471  }
472 
473  auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
474  arguments.push_back(isLast);
475  auto call = rewriter.create<LLVM::CallOp>(loc, ocklAppendArgs, arguments);
476  printfDesc = call.getResult();
477  }
478  rewriter.eraseOp(gpuPrintfOp);
479  return success();
480 }
481 
483  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
484  ConversionPatternRewriter &rewriter) const {
485  Location loc = gpuPrintfOp->getLoc();
486 
487  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
488  mlir::Type ptrType =
489  LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
490 
491  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
492  // This ensures that global constants and declarations are placed within
493  // the device code, not the host code
494  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
495 
496  auto printfType =
497  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
498  /*isVarArg=*/true);
499  LLVM::LLVMFuncOp printfDecl =
500  getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
501 
502  // Create the global op or find an existing one.
503  LLVM::GlobalOp global = getOrCreateStringConstant(
504  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat(),
505  /*alignment=*/0, addressSpace);
506 
507  // Get a pointer to the format string's first element
508  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(
509  loc,
510  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
511  global.getSymNameAttr());
512  Value stringStart =
513  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
514  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
515 
516  // Construct arguments and function call
517  auto argsRange = adaptor.getArgs();
518  SmallVector<Value, 4> printfArgs;
519  printfArgs.reserve(argsRange.size() + 1);
520  printfArgs.push_back(stringStart);
521  printfArgs.append(argsRange.begin(), argsRange.end());
522 
523  rewriter.create<LLVM::CallOp>(loc, printfDecl, printfArgs);
524  rewriter.eraseOp(gpuPrintfOp);
525  return success();
526 }
527 
529  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
530  ConversionPatternRewriter &rewriter) const {
531  Location loc = gpuPrintfOp->getLoc();
532 
533  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
534  mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
535 
536  // Note: this is the GPUModule op, not the ModuleOp that surrounds it
537  // This ensures that global constants and declarations are placed within
538  // the device code, not the host code
539  auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
540 
541  auto vprintfType =
542  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType});
543  LLVM::LLVMFuncOp vprintfDecl =
544  getOrDefineFunction(moduleOp, loc, rewriter, "vprintf", vprintfType);
545 
546  // Create the global op or find an existing one.
547  LLVM::GlobalOp global = getOrCreateStringConstant(
548  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
549 
550  // Get a pointer to the format string's first element
551  Value globalPtr = rewriter.create<LLVM::AddressOfOp>(loc, global);
552  Value stringStart =
553  rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getGlobalType(),
554  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
555  SmallVector<Type> types;
556  SmallVector<Value> args;
557  // Promote and pack the arguments into a stack allocation.
558  for (Value arg : adaptor.getArgs()) {
559  Type type = arg.getType();
560  Value promotedArg = arg;
561  assert(type.isIntOrFloat());
562  if (isa<FloatType>(type)) {
563  type = rewriter.getF64Type();
564  promotedArg = rewriter.create<LLVM::FPExtOp>(loc, type, arg);
565  }
566  types.push_back(type);
567  args.push_back(promotedArg);
568  }
569  Type structType =
570  LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
571  Value one = rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI64Type(),
572  rewriter.getIndexAttr(1));
573  Value tempAlloc =
574  rewriter.create<LLVM::AllocaOp>(loc, ptrType, structType, one,
575  /*alignment=*/0);
576  for (auto [index, arg] : llvm::enumerate(args)) {
577  Value ptr = rewriter.create<LLVM::GEPOp>(
578  loc, ptrType, structType, tempAlloc,
579  ArrayRef<LLVM::GEPArg>{0, static_cast<int32_t>(index)});
580  rewriter.create<LLVM::StoreOp>(loc, arg, ptr);
581  }
582  std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
583 
584  rewriter.create<LLVM::CallOp>(loc, vprintfDecl, printfArgs);
585  rewriter.eraseOp(gpuPrintfOp);
586  return success();
587 }
588 
589 /// Unrolls op if it's operating on vectors.
590 LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
591  ConversionPatternRewriter &rewriter,
592  const LLVMTypeConverter &converter) {
593  TypeRange operandTypes(operands);
594  if (llvm::none_of(operandTypes, llvm::IsaPred<VectorType>)) {
595  return rewriter.notifyMatchFailure(op, "expected vector operand");
596  }
597  if (op->getNumRegions() != 0 || op->getNumSuccessors() != 0)
598  return rewriter.notifyMatchFailure(op, "expected no region/successor");
599  if (op->getNumResults() != 1)
600  return rewriter.notifyMatchFailure(op, "expected single result");
601  VectorType vectorType = dyn_cast<VectorType>(op->getResult(0).getType());
602  if (!vectorType)
603  return rewriter.notifyMatchFailure(op, "expected vector result");
604 
605  Location loc = op->getLoc();
606  Value result = rewriter.create<LLVM::UndefOp>(loc, vectorType);
607  Type indexType = converter.convertType(rewriter.getIndexType());
608  StringAttr name = op->getName().getIdentifier();
609  Type elementType = vectorType.getElementType();
610 
611  for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
612  Value index = rewriter.create<LLVM::ConstantOp>(loc, indexType, i);
613  auto extractElement = [&](Value operand) -> Value {
614  if (!isa<VectorType>(operand.getType()))
615  return operand;
616  return rewriter.create<LLVM::ExtractElementOp>(loc, operand, index);
617  };
618  auto scalarOperands = llvm::map_to_vector(operands, extractElement);
619  Operation *scalarOp =
620  rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
621  result = rewriter.create<LLVM::InsertElementOp>(
622  loc, result, scalarOp->getResult(0), index);
623  }
624 
625  rewriter.replaceOp(op, result);
626  return success();
627 }
628 
629 static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
630  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
631 }
632 
633 /// Generates a symbol with 0-sized array type for dynamic shared memory usage,
634 /// or uses existing symbol.
636  ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp,
637  gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter,
638  MemRefType memrefType, unsigned alignmentBit) {
639  uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
640 
641  FailureOr<unsigned> addressSpace =
642  typeConverter->getMemRefAddressSpace(memrefType);
643  if (failed(addressSpace)) {
644  op->emitError() << "conversion of memref memory space "
645  << memrefType.getMemorySpace()
646  << " to integer address space "
647  "failed. Consider adding memory space conversions.";
648  }
649 
650  // Step 1. Collect symbol names of LLVM::GlobalOp Ops. Also if any of
651  // LLVM::GlobalOp is suitable for shared memory, return it.
652  llvm::StringSet<> existingGlobalNames;
653  for (auto globalOp : moduleOp.getBody()->getOps<LLVM::GlobalOp>()) {
654  existingGlobalNames.insert(globalOp.getSymName());
655  if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(globalOp.getType())) {
656  if (globalOp.getAddrSpace() == addressSpace.value() &&
657  arrayType.getNumElements() == 0 &&
658  globalOp.getAlignment().value_or(0) == alignmentByte) {
659  return globalOp;
660  }
661  }
662  }
663 
664  // Step 2. Find a unique symbol name
665  unsigned uniquingCounter = 0;
666  SmallString<128> symName = SymbolTable::generateSymbolName<128>(
667  "__dynamic_shmem_",
668  [&](StringRef candidate) {
669  return existingGlobalNames.contains(candidate);
670  },
671  uniquingCounter);
672 
673  // Step 3. Generate a global op
674  OpBuilder::InsertionGuard guard(rewriter);
675  rewriter.setInsertionPointToStart(moduleOp.getBody());
676 
677  auto zeroSizedArrayType = LLVM::LLVMArrayType::get(
678  typeConverter->convertType(memrefType.getElementType()), 0);
679 
680  return rewriter.create<LLVM::GlobalOp>(
681  op->getLoc(), zeroSizedArrayType, /*isConstant=*/false,
682  LLVM::Linkage::Internal, symName, /*value=*/Attribute(), alignmentByte,
683  addressSpace.value());
684 }
685 
687  gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
688  ConversionPatternRewriter &rewriter) const {
689  Location loc = op.getLoc();
690  MemRefType memrefType = op.getResultMemref().getType();
691  Type elementType = typeConverter->convertType(memrefType.getElementType());
692 
693  // Step 1: Generate a memref<0xi8> type
694  MemRefLayoutAttrInterface layout = {};
695  auto memrefType0sz =
696  MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
697 
698  // Step 2: Generate a global symbol or existing for the dynamic shared
699  // memory with memref<0xi8> type
700  auto moduleOp = op->getParentOfType<gpu::GPUModuleOp>();
701  LLVM::GlobalOp shmemOp = getDynamicSharedMemorySymbol(
702  rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
703 
704  // Step 3. Get address of the global symbol
705  OpBuilder::InsertionGuard guard(rewriter);
706  rewriter.setInsertionPoint(op);
707  auto basePtr = rewriter.create<LLVM::AddressOfOp>(loc, shmemOp);
708  Type baseType = basePtr->getResultTypes().front();
709 
710  // Step 4. Generate GEP using offsets
711  SmallVector<LLVM::GEPArg> gepArgs = {0};
712  Value shmemPtr = rewriter.create<LLVM::GEPOp>(loc, baseType, elementType,
713  basePtr, gepArgs);
714  // Step 5. Create a memref descriptor
715  SmallVector<Value> shape, strides;
716  Value sizeBytes;
717  getMemRefDescriptorSizes(loc, memrefType0sz, {}, rewriter, shape, strides,
718  sizeBytes);
719  auto memRefDescriptor = this->createMemRefDescriptor(
720  loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
721 
722  // Step 5. Replace the op with memref descriptor
723  rewriter.replaceOp(op, {memRefDescriptor});
724  return success();
725 }
726 
728  gpu::ReturnOp op, OpAdaptor adaptor,
729  ConversionPatternRewriter &rewriter) const {
730  Location loc = op.getLoc();
731  unsigned numArguments = op.getNumOperands();
732  SmallVector<Value, 4> updatedOperands;
733 
734  bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
735  if (useBarePtrCallConv) {
736  // For the bare-ptr calling convention, extract the aligned pointer to
737  // be returned from the memref descriptor.
738  for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
739  Type oldTy = std::get<0>(it).getType();
740  Value newOperand = std::get<1>(it);
741  if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
742  cast<BaseMemRefType>(oldTy))) {
743  MemRefDescriptor memrefDesc(newOperand);
744  newOperand = memrefDesc.allocatedPtr(rewriter, loc);
745  } else if (isa<UnrankedMemRefType>(oldTy)) {
746  // Unranked memref is not supported in the bare pointer calling
747  // convention.
748  return failure();
749  }
750  updatedOperands.push_back(newOperand);
751  }
752  } else {
753  updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
754  (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
755  updatedOperands,
756  /*toDynamic=*/true);
757  }
758 
759  // If ReturnOp has 0 or 1 operand, create it and return immediately.
760  if (numArguments <= 1) {
761  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
762  op, TypeRange(), updatedOperands, op->getAttrs());
763  return success();
764  }
765 
766  // Otherwise, we need to pack the arguments into an LLVM struct type before
767  // returning.
768  auto packedType = getTypeConverter()->packFunctionResults(
769  op.getOperandTypes(), useBarePtrCallConv);
770  if (!packedType) {
771  return rewriter.notifyMatchFailure(op, "could not convert result types");
772  }
773 
774  Value packed = rewriter.create<LLVM::UndefOp>(loc, packedType);
775  for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
776  packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx);
777  }
778  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
779  op->getAttrs());
780  return success();
781 }
782 
784  TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
785  typeConverter.addTypeAttributeConversion(
786  [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
787  gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
788  unsigned addressSpace = mapping(memorySpace);
789  return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
790  addressSpace);
791  });
792 }
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueSymbolName(gpu::GPUModuleOp moduleOp, StringRef prefix)
LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:102
This class represents an argument of a Block.
Definition: Value.h:319
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:104
UnitAttr getUnitAttr()
Definition: Builders.cpp:94
IntegerType getI64Type()
Definition: Builders.cpp:65
IntegerType getI32Type()
Definition: Builders.cpp:63
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:108
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:67
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:89
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:258
MLIRContext * getContext() const
Definition: Builders.h:56
IndexType getIndexType()
Definition: Builders.cpp:51
IntegerType getI8Type()
Definition: Builders.cpp:59
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:100
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:90
FloatType getF64Type()
Definition: Builders.cpp:45
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:96
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
Definition: Pattern.cpp:216
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
Definition: Pattern.cpp:114
const LLVMTypeConverter * getTypeConverter() const
Definition: Pattern.cpp:27
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
Definition: Pattern.cpp:32
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
Definition: Pattern.cpp:245
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
unsigned getNumSuccessors()
Definition: Operation.h:707
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:674
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:724
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:636
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:542
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:76
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, Value replacement)
Remap an input of the original signature to another replacement value.
Type conversion class.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition: Types.cpp:127
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:133
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op if it's operating on vectors.
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
Definition: GPUCommonPass.h:71
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Find or create an external function declaration in the given module.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, gpu::GPUModuleOp moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override