MLIR  22.0.0git
GPUOpsLowering.cpp
Go to the documentation of this file.
1 //===- GPUOpsLowering.cpp - GPU FuncOp / ReturnOp lowering ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "GPUOpsLowering.h"
10 
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Builders.h"
16 #include "mlir/IR/BuiltinTypes.h"
17 #include "llvm/ADT/SmallVectorExtras.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 using namespace mlir;
22 
23 LLVM::LLVMFuncOp mlir::getOrDefineFunction(Operation *moduleOp, Location loc,
24  OpBuilder &b, StringRef name,
25  LLVM::LLVMFunctionType type) {
26  auto existing = dyn_cast_or_null<LLVM::LLVMFuncOp>(
27  SymbolTable::lookupSymbolIn(moduleOp, name));
28  if (existing)
29  return existing;
30 
32  b.setInsertionPointToStart(&moduleOp->getRegion(0).front());
33  return LLVM::LLVMFuncOp::create(b, loc, name, type, LLVM::Linkage::External);
34 }
35 
37  StringRef prefix) {
38  // Get a unique global name.
39  unsigned stringNumber = 0;
40  SmallString<16> stringConstName;
41  do {
42  stringConstName.clear();
43  (prefix + Twine(stringNumber++)).toStringRef(stringConstName);
44  } while (SymbolTable::lookupSymbolIn(moduleOp, stringConstName));
45  return stringConstName;
46 }
47 
49  Operation *moduleOp, Type llvmI8,
50  StringRef namePrefix,
51  StringRef str,
52  uint64_t alignment,
53  unsigned addrSpace) {
54  llvm::SmallString<20> nullTermStr(str);
55  nullTermStr.push_back('\0'); // Null terminate for C
56  auto globalType =
57  LLVM::LLVMArrayType::get(llvmI8, nullTermStr.size_in_bytes());
58  StringAttr attr = b.getStringAttr(nullTermStr);
59 
60  // Try to find existing global.
61  for (auto globalOp : moduleOp->getRegion(0).getOps<LLVM::GlobalOp>())
62  if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
63  globalOp.getValueAttr() == attr &&
64  globalOp.getAlignment().value_or(0) == alignment &&
65  globalOp.getAddrSpace() == addrSpace)
66  return globalOp;
67 
68  // Not found: create new global.
70  b.setInsertionPointToStart(&moduleOp->getRegion(0).front());
71  SmallString<16> name = getUniqueSymbolName(moduleOp, namePrefix);
72  return LLVM::GlobalOp::create(b, loc, globalType,
73  /*isConstant=*/true, LLVM::Linkage::Internal,
74  name, attr, alignment, addrSpace);
75 }
76 
77 LogicalResult
78 GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
79  ConversionPatternRewriter &rewriter) const {
80  Location loc = gpuFuncOp.getLoc();
81 
82  SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
83  if (encodeWorkgroupAttributionsAsArguments) {
84  // Append an `llvm.ptr` argument to the function signature to encode
85  // workgroup attributions.
86 
87  ArrayRef<BlockArgument> workgroupAttributions =
88  gpuFuncOp.getWorkgroupAttributions();
89  size_t numAttributions = workgroupAttributions.size();
90 
91  // Insert all arguments at the end.
92  unsigned index = gpuFuncOp.getNumArguments();
93  SmallVector<unsigned> argIndices(numAttributions, index);
94 
95  // New arguments will simply be `llvm.ptr` with the correct address space
96  Type workgroupPtrType =
97  rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
98  SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
99 
100  // Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
101  std::array attrs{
102  rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
103  rewriter.getUnitAttr()),
104  rewriter.getNamedAttr(
105  getDialect().getWorkgroupAttributionAttrHelper().getName(),
106  rewriter.getUnitAttr()),
107  };
109  for (BlockArgument attribution : workgroupAttributions) {
110  auto attributionType = cast<MemRefType>(attribution.getType());
111  IntegerAttr numElements =
112  rewriter.getI64IntegerAttr(attributionType.getNumElements());
113  Type llvmElementType =
114  getTypeConverter()->convertType(attributionType.getElementType());
115  if (!llvmElementType)
116  return failure();
117  TypeAttr type = TypeAttr::get(llvmElementType);
118  attrs.back().setValue(
119  rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
120  argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
121  }
122 
123  // Location match function location
124  SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
125 
126  // Perform signature modification
127  rewriter.modifyOpInPlace(
128  gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
129  LogicalResult inserted =
130  static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
131  argIndices, argTypes, argAttrs, argLocs);
132  (void)inserted;
133  assert(succeeded(inserted) &&
134  "expected GPU funcs to support inserting any argument");
135  });
136  } else {
137  workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
138  for (auto [idx, attribution] :
139  llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
140  auto type = dyn_cast<MemRefType>(attribution.getType());
141  assert(type && type.hasStaticShape() && "unexpected type in attribution");
142 
143  uint64_t numElements = type.getNumElements();
144 
145  auto elementType =
146  cast<Type>(typeConverter->convertType(type.getElementType()));
147  auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
148  std::string name =
149  std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
150  uint64_t alignment = 0;
151  if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
152  gpuFuncOp.getWorkgroupAttributionAttr(
153  idx, LLVM::LLVMDialect::getAlignAttrName())))
154  alignment = alignAttr.getInt();
155  auto globalOp = LLVM::GlobalOp::create(
156  rewriter, gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
157  LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
158  workgroupAddrSpace);
159  workgroupBuffers.push_back(globalOp);
160  }
161  }
162 
163  // Remap proper input types.
164  TypeConverter::SignatureConversion signatureConversion(
165  gpuFuncOp.front().getNumArguments());
166 
168  gpuFuncOp.getFunctionType(), /*isVariadic=*/false,
169  getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
170  if (!funcType) {
171  return rewriter.notifyMatchFailure(gpuFuncOp, [&](Diagnostic &diag) {
172  diag << "failed to convert function signature type for: "
173  << gpuFuncOp.getFunctionType();
174  });
175  }
176 
177  // Create the new function operation. Only copy those attributes that are
178  // not specific to function modeling.
180  ArrayAttr argAttrs;
181  for (const auto &attr : gpuFuncOp->getAttrs()) {
182  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
183  attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
184  attr.getName() ==
185  gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
186  attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
187  attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
188  attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
189  attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
190  continue;
191  if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
192  argAttrs = gpuFuncOp.getArgAttrsAttr();
193  continue;
194  }
195  attributes.push_back(attr);
196  }
197 
198  DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
199  DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
200  // Ensure we don't lose information if the function is lowered before its
201  // surrounding context.
202  auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
203  if (knownBlockSize)
204  attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
205  knownBlockSize);
206  if (knownGridSize)
207  attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
208  knownGridSize);
209 
210  // Add a dialect specific kernel attribute in addition to GPU kernel
211  // attribute. The former is necessary for further translation while the
212  // latter is expected by gpu.launch_func.
213  if (gpuFuncOp.isKernel()) {
214  if (kernelAttributeName)
215  attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
216  // Set the dialect-specific block size attribute if there is one.
217  if (kernelBlockSizeAttributeName && knownBlockSize) {
218  attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
219  }
220  }
221  LLVM::CConv callingConvention = gpuFuncOp.isKernel()
222  ? kernelCallingConvention
223  : nonKernelCallingConvention;
224  auto llvmFuncOp = LLVM::LLVMFuncOp::create(
225  rewriter, gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
226  LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
227  /*comdat=*/nullptr, attributes);
228 
229  {
230  // Insert operations that correspond to converted workgroup and private
231  // memory attributions to the body of the function. This must operate on
232  // the original function, before the body region is inlined in the new
233  // function to maintain the relation between block arguments and the
234  // parent operation that assigns their semantics.
235  OpBuilder::InsertionGuard guard(rewriter);
236 
237  // Rewrite workgroup memory attributions to addresses of global buffers.
238  rewriter.setInsertionPointToStart(&gpuFuncOp.front());
239  unsigned numProperArguments = gpuFuncOp.getNumArguments();
240 
241  if (encodeWorkgroupAttributionsAsArguments) {
242  // Build a MemRefDescriptor with each of the arguments added above.
243 
244  unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
245  assert(numProperArguments >= numAttributions &&
246  "Expecting attributions to be encoded as arguments already");
247 
248  // Arguments encoding workgroup attributions will be in positions
249  // [numProperArguments, numProperArguments+numAttributions)
250  ArrayRef<BlockArgument> attributionArguments =
251  gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
252  numAttributions);
253  for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
254  gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
255  auto [attribution, arg] = vals;
256  auto type = cast<MemRefType>(attribution.getType());
257 
258  // Arguments are of llvm.ptr type and attributions are of memref type:
259  // we need to wrap them in memref descriptors.
261  rewriter, loc, *getTypeConverter(), type, arg);
262 
263  // And remap the arguments
264  signatureConversion.remapInput(numProperArguments + idx, descr);
265  }
266  } else {
267  for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
268  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
269  global.getAddrSpace());
270  Value address = LLVM::AddressOfOp::create(rewriter, loc, ptrType,
271  global.getSymNameAttr());
272  Value memory =
273  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getType(),
274  address, ArrayRef<LLVM::GEPArg>{0, 0});
275 
276  // Build a memref descriptor pointing to the buffer to plug with the
277  // existing memref infrastructure. This may use more registers than
278  // otherwise necessary given that memref sizes are fixed, but we can try
279  // and canonicalize that away later.
280  Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
281  auto type = cast<MemRefType>(attribution.getType());
283  rewriter, loc, *getTypeConverter(), type, memory);
284  signatureConversion.remapInput(numProperArguments + idx, descr);
285  }
286  }
287 
288  // Rewrite private memory attributions to alloca'ed buffers.
289  unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
290  auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
291  for (const auto [idx, attribution] :
292  llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
293  auto type = cast<MemRefType>(attribution.getType());
294  assert(type && type.hasStaticShape() && "unexpected type in attribution");
295 
296  // Explicitly drop memory space when lowering private memory
297  // attributions since NVVM models it as `alloca`s in the default
298  // memory space and does not support `alloca`s with addrspace(5).
299  Type elementType = typeConverter->convertType(type.getElementType());
300  auto ptrType =
301  LLVM::LLVMPointerType::get(rewriter.getContext(), allocaAddrSpace);
302  Value numElements = LLVM::ConstantOp::create(
303  rewriter, gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
304  uint64_t alignment = 0;
305  if (auto alignAttr =
306  dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
307  idx, LLVM::LLVMDialect::getAlignAttrName())))
308  alignment = alignAttr.getInt();
309  Value allocated =
310  LLVM::AllocaOp::create(rewriter, gpuFuncOp.getLoc(), ptrType,
311  elementType, numElements, alignment);
313  rewriter, loc, *getTypeConverter(), type, allocated);
314  signatureConversion.remapInput(
315  numProperArguments + numWorkgroupAttributions + idx, descr);
316  }
317  }
318 
319  // Move the region to the new function, update the entry block signature.
320  rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
321  llvmFuncOp.end());
322  if (failed(rewriter.convertRegionTypes(&llvmFuncOp.getBody(), *typeConverter,
323  &signatureConversion)))
324  return failure();
325 
326  // Get memref type from function arguments and set the noalias to
327  // pointer arguments.
328  for (const auto [idx, argTy] :
329  llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
330  auto remapping = signatureConversion.getInputMapping(idx);
331  NamedAttrList argAttr =
332  argAttrs ? cast<DictionaryAttr>(argAttrs[idx]) : NamedAttrList();
333  auto copyAttribute = [&](StringRef attrName) {
334  Attribute attr = argAttr.erase(attrName);
335  if (!attr)
336  return;
337  for (size_t i = 0, e = remapping->size; i < e; ++i)
338  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
339  };
340  auto copyPointerAttribute = [&](StringRef attrName) {
341  Attribute attr = argAttr.erase(attrName);
342 
343  if (!attr)
344  return;
345  if (remapping->size > 1 &&
346  attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
347  emitWarning(llvmFuncOp.getLoc(),
348  "Cannot copy noalias with non-bare pointers.\n");
349  return;
350  }
351  for (size_t i = 0, e = remapping->size; i < e; ++i) {
352  if (isa<LLVM::LLVMPointerType>(
353  llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
354  llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
355  }
356  }
357  };
358 
359  if (argAttr.empty())
360  continue;
361 
362  copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
363  copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
364  copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
365  bool lowersToPointer = false;
366  for (size_t i = 0, e = remapping->size; i < e; ++i) {
367  lowersToPointer |= isa<LLVM::LLVMPointerType>(
368  llvmFuncOp.getArgument(remapping->inputNo + i).getType());
369  }
370 
371  if (lowersToPointer) {
372  copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
373  copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
374  copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
375  copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
376  copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
377  copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
378  copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
379  copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
380  copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
381  copyPointerAttribute(
382  LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
383  copyPointerAttribute(
384  LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
385  }
386  }
387  rewriter.eraseOp(gpuFuncOp);
388  return success();
389 }
390 
392  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
393  ConversionPatternRewriter &rewriter) const {
394  Location loc = gpuPrintfOp->getLoc();
395 
396  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getI8Type());
397  auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
398  mlir::Type llvmI32 = typeConverter->convertType(rewriter.getI32Type());
399  mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
400 
401  Operation *moduleOp = gpuPrintfOp->getParentWithTrait<OpTrait::SymbolTable>();
402  if (!moduleOp)
403  return rewriter.notifyMatchFailure(gpuPrintfOp,
404  "Couldn't find a parent module");
405 
406  auto ocklBegin =
407  getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
408  LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
409  LLVM::LLVMFuncOp ocklAppendArgs;
410  if (!adaptor.getArgs().empty()) {
411  ocklAppendArgs = getOrDefineFunction(
412  moduleOp, loc, rewriter, "__ockl_printf_append_args",
414  llvmI64, {llvmI64, /*numArgs*/ llvmI32, llvmI64, llvmI64, llvmI64,
415  llvmI64, llvmI64, llvmI64, llvmI64, /*isLast*/ llvmI32}));
416  }
417  auto ocklAppendStringN = getOrDefineFunction(
418  moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
420  llvmI64,
421  {llvmI64, ptrType, /*length (bytes)*/ llvmI64, /*isLast*/ llvmI32}));
422 
423  /// Start the printf hostcall
424  Value zeroI64 = LLVM::ConstantOp::create(rewriter, loc, llvmI64, 0);
425  auto printfBeginCall =
426  LLVM::CallOp::create(rewriter, loc, ocklBegin, zeroI64);
427  Value printfDesc = printfBeginCall.getResult();
428 
429  // Create the global op or find an existing one.
430  LLVM::GlobalOp global = getOrCreateStringConstant(
431  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
432 
433  // Get a pointer to the format string's first element and pass it to printf()
434  Value globalPtr = LLVM::AddressOfOp::create(
435  rewriter, loc,
436  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
437  global.getSymNameAttr());
438  Value stringStart =
439  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
440  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
441  Value stringLen = LLVM::ConstantOp::create(
442  rewriter, loc, llvmI64, cast<StringAttr>(global.getValueAttr()).size());
443 
444  Value oneI32 = LLVM::ConstantOp::create(rewriter, loc, llvmI32, 1);
445  Value zeroI32 = LLVM::ConstantOp::create(rewriter, loc, llvmI32, 0);
446 
447  auto appendFormatCall = LLVM::CallOp::create(
448  rewriter, loc, ocklAppendStringN,
449  ValueRange{printfDesc, stringStart, stringLen,
450  adaptor.getArgs().empty() ? oneI32 : zeroI32});
451  printfDesc = appendFormatCall.getResult();
452 
453  // __ockl_printf_append_args takes 7 values per append call
454  constexpr size_t argsPerAppend = 7;
455  size_t nArgs = adaptor.getArgs().size();
456  for (size_t group = 0; group < nArgs; group += argsPerAppend) {
457  size_t bound = std::min(group + argsPerAppend, nArgs);
458  size_t numArgsThisCall = bound - group;
459 
461  arguments.push_back(printfDesc);
462  arguments.push_back(
463  LLVM::ConstantOp::create(rewriter, loc, llvmI32, numArgsThisCall));
464  for (size_t i = group; i < bound; ++i) {
465  Value arg = adaptor.getArgs()[i];
466  if (auto floatType = dyn_cast<FloatType>(arg.getType())) {
467  if (!floatType.isF64())
468  arg = LLVM::FPExtOp::create(
469  rewriter, loc, typeConverter->convertType(rewriter.getF64Type()),
470  arg);
471  arg = LLVM::BitcastOp::create(rewriter, loc, llvmI64, arg);
472  }
473  if (arg.getType().getIntOrFloatBitWidth() != 64)
474  arg = LLVM::ZExtOp::create(rewriter, loc, llvmI64, arg);
475 
476  arguments.push_back(arg);
477  }
478  // Pad out to 7 arguments since the hostcall always needs 7
479  for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
480  arguments.push_back(zeroI64);
481  }
482 
483  auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
484  arguments.push_back(isLast);
485  auto call = LLVM::CallOp::create(rewriter, loc, ocklAppendArgs, arguments);
486  printfDesc = call.getResult();
487  }
488  rewriter.eraseOp(gpuPrintfOp);
489  return success();
490 }
491 
493  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
494  ConversionPatternRewriter &rewriter) const {
495  Location loc = gpuPrintfOp->getLoc();
496 
497  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
498  mlir::Type ptrType =
499  LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
500 
501  Operation *moduleOp = gpuPrintfOp->getParentWithTrait<OpTrait::SymbolTable>();
502  if (!moduleOp)
503  return rewriter.notifyMatchFailure(gpuPrintfOp,
504  "Couldn't find a parent module");
505 
506  auto printfType =
507  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
508  /*isVarArg=*/true);
509  LLVM::LLVMFuncOp printfDecl =
510  getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
511 
512  // Create the global op or find an existing one.
513  LLVM::GlobalOp global = getOrCreateStringConstant(
514  rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat(),
515  /*alignment=*/0, addressSpace);
516 
517  // Get a pointer to the format string's first element
518  Value globalPtr = LLVM::AddressOfOp::create(
519  rewriter, loc,
520  LLVM::LLVMPointerType::get(rewriter.getContext(), global.getAddrSpace()),
521  global.getSymNameAttr());
522  Value stringStart =
523  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
524  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
525 
526  // Construct arguments and function call
527  auto argsRange = adaptor.getArgs();
528  SmallVector<Value, 4> printfArgs;
529  printfArgs.reserve(argsRange.size() + 1);
530  printfArgs.push_back(stringStart);
531  printfArgs.append(argsRange.begin(), argsRange.end());
532 
533  LLVM::CallOp::create(rewriter, loc, printfDecl, printfArgs);
534  rewriter.eraseOp(gpuPrintfOp);
535  return success();
536 }
537 
539  gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
540  ConversionPatternRewriter &rewriter) const {
541  Location loc = gpuPrintfOp->getLoc();
542 
543  mlir::Type llvmI8 = typeConverter->convertType(rewriter.getIntegerType(8));
544  mlir::Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
545 
546  Operation *moduleOp = gpuPrintfOp->getParentWithTrait<OpTrait::SymbolTable>();
547  if (!moduleOp)
548  return rewriter.notifyMatchFailure(gpuPrintfOp,
549  "Couldn't find a parent module");
550 
551  // Create a valid global location removing any metadata attached to the
552  // location as debug info metadata inside of a function cannot be used outside
553  // of that function.
554  Location globalLoc = loc->findInstanceOfOrUnknown<FileLineColLoc>();
555 
556  auto vprintfType =
557  LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType, ptrType});
558  LLVM::LLVMFuncOp vprintfDecl = getOrDefineFunction(
559  moduleOp, globalLoc, rewriter, "vprintf", vprintfType);
560 
561  // Create the global op or find an existing one.
562  LLVM::GlobalOp global =
563  getOrCreateStringConstant(rewriter, globalLoc, moduleOp, llvmI8,
564  "printfFormat_", adaptor.getFormat());
565 
566  // Get a pointer to the format string's first element
567  Value globalPtr = LLVM::AddressOfOp::create(rewriter, loc, global);
568  Value stringStart =
569  LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
570  globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
571  SmallVector<Type> types;
572  SmallVector<Value> args;
573  // Promote and pack the arguments into a stack allocation.
574  for (Value arg : adaptor.getArgs()) {
575  Type type = arg.getType();
576  Value promotedArg = arg;
577  assert(type.isIntOrFloat());
578  if (isa<FloatType>(type)) {
579  type = rewriter.getF64Type();
580  promotedArg = LLVM::FPExtOp::create(rewriter, loc, type, arg);
581  }
582  types.push_back(type);
583  args.push_back(promotedArg);
584  }
585  Type structType =
586  LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
587  Value one = LLVM::ConstantOp::create(rewriter, loc, rewriter.getI64Type(),
588  rewriter.getIndexAttr(1));
589  Value tempAlloc =
590  LLVM::AllocaOp::create(rewriter, loc, ptrType, structType, one,
591  /*alignment=*/0);
592  for (auto [index, arg] : llvm::enumerate(args)) {
593  Value ptr = LLVM::GEPOp::create(
594  rewriter, loc, ptrType, structType, tempAlloc,
595  ArrayRef<LLVM::GEPArg>{0, static_cast<int32_t>(index)});
596  LLVM::StoreOp::create(rewriter, loc, arg, ptr);
597  }
598  std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
599 
600  LLVM::CallOp::create(rewriter, loc, vprintfDecl, printfArgs);
601  rewriter.eraseOp(gpuPrintfOp);
602  return success();
603 }
604 
605 /// Helper for impl::scalarizeVectorOp. Scalarizes vectors to elements.
606 /// Used either directly (for ops on 1D vectors) or as the callback passed to
607 /// detail::handleMultidimensionalVectors (for ops on higher-rank vectors).
609  Type llvm1DVectorTy,
610  ConversionPatternRewriter &rewriter,
611  const LLVMTypeConverter &converter) {
612  TypeRange operandTypes(operands);
613  VectorType vectorType = cast<VectorType>(llvm1DVectorTy);
614  Location loc = op->getLoc();
615  Value result = LLVM::PoisonOp::create(rewriter, loc, vectorType);
616  Type indexType = converter.convertType(rewriter.getIndexType());
617  StringAttr name = op->getName().getIdentifier();
618  Type elementType = vectorType.getElementType();
619 
620  for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
621  Value index = LLVM::ConstantOp::create(rewriter, loc, indexType, i);
622  auto extractElement = [&](Value operand) -> Value {
623  if (!isa<VectorType>(operand.getType()))
624  return operand;
625  return LLVM::ExtractElementOp::create(rewriter, loc, operand, index);
626  };
627  auto scalarOperands = llvm::map_to_vector(operands, extractElement);
628  Operation *scalarOp =
629  rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
630  result = LLVM::InsertElementOp::create(rewriter, loc, result,
631  scalarOp->getResult(0), index);
632  }
633  return result;
634 }
635 
636 /// Unrolls op to array/vector elements.
637 LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
638  ConversionPatternRewriter &rewriter,
639  const LLVMTypeConverter &converter) {
640  TypeRange operandTypes(operands);
641  if (llvm::any_of(operandTypes, llvm::IsaPred<VectorType>)) {
642  VectorType vectorType =
643  cast<VectorType>(converter.convertType(op->getResultTypes()[0]));
644  rewriter.replaceOp(op, scalarizeVectorOpHelper(op, operands, vectorType,
645  rewriter, converter));
646  return success();
647  }
648 
649  if (llvm::any_of(operandTypes, llvm::IsaPred<LLVM::LLVMArrayType>)) {
651  op, operands, converter,
652  [&](Type llvm1DVectorTy, ValueRange operands) -> Value {
653  return scalarizeVectorOpHelper(op, operands, llvm1DVectorTy, rewriter,
654  converter);
655  },
656  rewriter);
657  }
658 
659  return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll");
660 }
661 
662 static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
663  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
664 }
665 
666 /// Generates a symbol with 0-sized array type for dynamic shared memory usage,
667 /// or uses existing symbol.
668 static LLVM::GlobalOp getDynamicSharedMemorySymbol(
669  ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp,
670  gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter,
671  MemRefType memrefType, unsigned alignmentBit) {
672  uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
673 
674  FailureOr<unsigned> addressSpace =
675  typeConverter->getMemRefAddressSpace(memrefType);
676  if (failed(addressSpace)) {
677  op->emitError() << "conversion of memref memory space "
678  << memrefType.getMemorySpace()
679  << " to integer address space "
680  "failed. Consider adding memory space conversions.";
681  }
682 
683  // Step 1. Collect symbol names of LLVM::GlobalOp Ops. Also if any of
684  // LLVM::GlobalOp is suitable for shared memory, return it.
685  llvm::StringSet<> existingGlobalNames;
686  for (auto globalOp : moduleOp.getBody()->getOps<LLVM::GlobalOp>()) {
687  existingGlobalNames.insert(globalOp.getSymName());
688  if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(globalOp.getType())) {
689  if (globalOp.getAddrSpace() == addressSpace.value() &&
690  arrayType.getNumElements() == 0 &&
691  globalOp.getAlignment().value_or(0) == alignmentByte) {
692  return globalOp;
693  }
694  }
695  }
696 
697  // Step 2. Find a unique symbol name
698  unsigned uniquingCounter = 0;
699  SmallString<128> symName = SymbolTable::generateSymbolName<128>(
700  "__dynamic_shmem_",
701  [&](StringRef candidate) {
702  return existingGlobalNames.contains(candidate);
703  },
704  uniquingCounter);
705 
706  // Step 3. Generate a global op
707  OpBuilder::InsertionGuard guard(rewriter);
708  rewriter.setInsertionPointToStart(moduleOp.getBody());
709 
710  auto zeroSizedArrayType = LLVM::LLVMArrayType::get(
711  typeConverter->convertType(memrefType.getElementType()), 0);
712 
713  return LLVM::GlobalOp::create(rewriter, op->getLoc(), zeroSizedArrayType,
714  /*isConstant=*/false, LLVM::Linkage::Internal,
715  symName, /*value=*/Attribute(), alignmentByte,
716  addressSpace.value());
717 }
718 
720  gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
721  ConversionPatternRewriter &rewriter) const {
722  Location loc = op.getLoc();
723  MemRefType memrefType = op.getResultMemref().getType();
724  Type elementType = typeConverter->convertType(memrefType.getElementType());
725 
726  // Step 1: Generate a memref<0xi8> type
727  MemRefLayoutAttrInterface layout = {};
728  auto memrefType0sz =
729  MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
730 
731  // Step 2: Generate a global symbol or existing for the dynamic shared
732  // memory with memref<0xi8> type
733  auto moduleOp = op->getParentOfType<gpu::GPUModuleOp>();
734  LLVM::GlobalOp shmemOp = getDynamicSharedMemorySymbol(
735  rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
736 
737  // Step 3. Get address of the global symbol
738  OpBuilder::InsertionGuard guard(rewriter);
739  rewriter.setInsertionPoint(op);
740  auto basePtr = LLVM::AddressOfOp::create(rewriter, loc, shmemOp);
741  Type baseType = basePtr->getResultTypes().front();
742 
743  // Step 4. Generate GEP using offsets
744  SmallVector<LLVM::GEPArg> gepArgs = {0};
745  Value shmemPtr = LLVM::GEPOp::create(rewriter, loc, baseType, elementType,
746  basePtr, gepArgs);
747  // Step 5. Create a memref descriptor
748  SmallVector<Value> shape, strides;
749  Value sizeBytes;
750  getMemRefDescriptorSizes(loc, memrefType0sz, {}, rewriter, shape, strides,
751  sizeBytes);
752  auto memRefDescriptor = this->createMemRefDescriptor(
753  loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
754 
755  // Step 5. Replace the op with memref descriptor
756  rewriter.replaceOp(op, {memRefDescriptor});
757  return success();
758 }
759 
761  gpu::ReturnOp op, OpAdaptor adaptor,
762  ConversionPatternRewriter &rewriter) const {
763  Location loc = op.getLoc();
764  unsigned numArguments = op.getNumOperands();
765  SmallVector<Value, 4> updatedOperands;
766 
767  bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
768  if (useBarePtrCallConv) {
769  // For the bare-ptr calling convention, extract the aligned pointer to
770  // be returned from the memref descriptor.
771  for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
772  Type oldTy = std::get<0>(it).getType();
773  Value newOperand = std::get<1>(it);
774  if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
775  cast<BaseMemRefType>(oldTy))) {
776  MemRefDescriptor memrefDesc(newOperand);
777  newOperand = memrefDesc.allocatedPtr(rewriter, loc);
778  } else if (isa<UnrankedMemRefType>(oldTy)) {
779  // Unranked memref is not supported in the bare pointer calling
780  // convention.
781  return failure();
782  }
783  updatedOperands.push_back(newOperand);
784  }
785  } else {
786  updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
787  (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
788  updatedOperands,
789  /*toDynamic=*/true);
790  }
791 
792  // If ReturnOp has 0 or 1 operand, create it and return immediately.
793  if (numArguments <= 1) {
794  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
795  op, TypeRange(), updatedOperands, op->getAttrs());
796  return success();
797  }
798 
799  // Otherwise, we need to pack the arguments into an LLVM struct type before
800  // returning.
801  auto packedType = getTypeConverter()->packFunctionResults(
802  op.getOperandTypes(), useBarePtrCallConv);
803  if (!packedType) {
804  return rewriter.notifyMatchFailure(op, "could not convert result types");
805  }
806 
807  Value packed = LLVM::PoisonOp::create(rewriter, loc, packedType);
808  for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
809  packed = LLVM::InsertValueOp::create(rewriter, loc, packed, operand, idx);
810  }
811  rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
812  op->getAttrs());
813  return success();
814 }
815 
817  TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
818  typeConverter.addTypeAttributeConversion(
819  [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
820  gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
821  unsigned addressSpace = mapping(memorySpace);
822  return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
823  addressSpace);
824  });
825 }
static LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueSymbolName(Operation *moduleOp, StringRef prefix)
static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands, Type llvm1DVectorTy, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Helper for impl::scalarizeVectorOp.
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:104
This class represents an argument of a Block.
Definition: Value.h:309
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:108
UnitAttr getUnitAttr()
Definition: Builders.cpp:98
IntegerType getI64Type()
Definition: Builders.cpp:65
IntegerType getI32Type()
Definition: Builders.cpp:63
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:112
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:67
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:91
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:262
MLIRContext * getContext() const
Definition: Builders.h:56
IndexType getIndexType()
Definition: Builders.cpp:51
IntegerType getI8Type()
Definition: Builders.cpp:59
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:104
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:94
FloatType getF64Type()
Definition: Builders.cpp:45
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:98
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
Definition: Pattern.cpp:190
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
Definition: Pattern.cpp:88
const LLVMTypeConverter * getTypeConverter() const
Definition: Pattern.cpp:27
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
Definition: Pattern.cpp:32
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
Definition: Pattern.cpp:278
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:174
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false, SmallVector< SmallVector< Type >> *groupedTypes=nullptr, int64_t *numConvertedTypes=nullptr) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
LocationAttr findInstanceOfOrUnknown()
Return an instance of the given location type if one is nested under the current location else return...
Definition: Location.h:60
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:348
This class helps build Operations.
Definition: Builders.h:207
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:431
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:398
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:457
A trait used to provide symbol table functionalities to a region operation.
Definition: SymbolTable.h:452
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
Definition: Operation.h:248
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
result_type_range getResultTypes()
Definition: Operation.h:428
iterator_range< OpIterator > getOps()
Definition: Region.h:172
Block & front()
Definition: Region.h:65
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:726
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:638
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:529
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:76
static Operation * lookupSymbolIn(Operation *op, StringAttr symbol)
Returns the operation registered with the given symbol name with the regions of 'symbolTableOp'.
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, ArrayRef< Value > replacements)
Remap an input of the original signature to replacements values.
Type conversion class.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition: Types.cpp:116
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:122
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
LogicalResult handleMultidimensionalVectors(Operation *op, ValueRange operands, const LLVMTypeConverter &typeConverter, std::function< Value(Type, ValueRange)> createOperand, ConversionPatternRewriter &rewriter)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
Definition: GPUCommonPass.h:70
LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Note that these functions don't take a SymbolTable because GPU module lowerings can have name collisi...
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, Operation *moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.