MLIR  16.0.0git
GPUDialect.cpp
Go to the documentation of this file.
1 //===- GPUDialect.cpp - MLIR Dialect for GPU Kernels implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the GPU kernel-related dialect and its operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
17 #include "mlir/IR/Attributes.h"
18 #include "mlir/IR/Builders.h"
19 #include "mlir/IR/BuiltinOps.h"
20 #include "mlir/IR/BuiltinTypes.h"
23 #include "mlir/IR/Matchers.h"
25 #include "mlir/IR/PatternMatch.h"
26 #include "mlir/IR/TypeUtilities.h"
29 #include "llvm/ADT/TypeSwitch.h"
30 
31 using namespace mlir;
32 using namespace mlir::gpu;
33 
34 #include "mlir/Dialect/GPU/IR/GPUOpsDialect.cpp.inc"
35 
36 //===----------------------------------------------------------------------===//
37 // GPU Device Mapping Attributes
38 //===----------------------------------------------------------------------===//
39 
40 int64_t GPUBlockMappingAttr::getMappingId() const {
41  return static_cast<int64_t>(getBlock());
42 }
43 
44 int64_t GPUThreadMappingAttr::getMappingId() const {
45  return static_cast<int64_t>(getThread());
46 }
47 
48 //===----------------------------------------------------------------------===//
49 // MMAMatrixType
50 //===----------------------------------------------------------------------===//
51 
53  StringRef operand) {
54  return Base::get(elementType.getContext(), shape, elementType, operand);
55 }
56 
59  ArrayRef<int64_t> shape, Type elementType,
60  StringRef operand) {
61  return Base::getChecked(emitError, elementType.getContext(), shape,
62  elementType, operand);
63 }
64 
65 unsigned MMAMatrixType::getNumDims() const { return getImpl()->numDims; }
66 
68  return getImpl()->getShape();
69 }
70 
71 Type MMAMatrixType::getElementType() const { return getImpl()->elementType; }
72 
73 StringRef MMAMatrixType::getOperand() const { return getImpl()->getOperand(); }
74 
76  return elementType.isF16() || elementType.isF32();
77 }
78 
81  ArrayRef<int64_t> shape, Type elementType,
82  StringRef operand) {
83  if (!operand.equals("AOp") && !operand.equals("BOp") &&
84  !operand.equals("COp"))
85  return emitError() << "operand expected to be one of AOp, BOp or COp";
86 
87  if (shape.size() != 2)
88  return emitError() << "MMAMatrixType must have exactly two dimensions";
89 
90  if (!MMAMatrixType::isValidElementType(elementType))
91  return emitError() << "MMAMatrixType elements must be F16 or F32";
92 
93  return success();
94 }
95 
96 //===----------------------------------------------------------------------===//
97 // GPUDialect
98 //===----------------------------------------------------------------------===//
99 
100 /// GPU memory space identifiers.
102  /// Generic memory space identifier.
104 
105  /// Global memory space identifier.
107 
108  /// Shared memory space identifier.
110 };
111 
112 bool GPUDialect::isKernel(Operation *op) {
113  UnitAttr isKernelAttr = op->getAttrOfType<UnitAttr>(getKernelFuncAttrName());
114  return static_cast<bool>(isKernelAttr);
115 }
116 
117 namespace {
118 /// This class defines the interface for handling inlining with gpu
119 /// operations.
120 struct GPUInlinerInterface : public DialectInlinerInterface {
122 
123  /// All gpu dialect ops can be inlined.
124  bool isLegalToInline(Operation *, Region *, bool,
125  BlockAndValueMapping &) const final {
126  return true;
127  }
128 };
129 } // namespace
130 
131 void GPUDialect::initialize() {
132  addTypes<AsyncTokenType>();
133  addTypes<MMAMatrixType>();
134  addOperations<
135 #define GET_OP_LIST
136 #include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc"
137  >();
138  addAttributes<
139 #define GET_ATTRDEF_LIST
140 #include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"
141  >();
142  addInterfaces<GPUInlinerInterface>();
143 }
144 
146  // Parse the main keyword for the type.
147  StringRef keyword;
148  if (parser.parseKeyword(&keyword))
149  return Type();
150  MLIRContext *context = getContext();
151 
152  // Handle 'async token' types.
153  if (keyword == "async.token")
154  return AsyncTokenType::get(context);
155 
156  if (keyword == "mma_matrix") {
157  SMLoc beginLoc = parser.getNameLoc();
158 
159  // Parse '<'.
160  if (parser.parseLess())
161  return nullptr;
162 
163  // Parse the size and elementType.
164  SmallVector<int64_t> shape;
165  Type elementType;
166  if (parser.parseDimensionList(shape, /*allowDynamic=*/false) ||
167  parser.parseType(elementType))
168  return nullptr;
169 
170  // Parse ','
171  if (parser.parseComma())
172  return nullptr;
173 
174  // Parse operand.
175  std::string operand;
176  if (failed(parser.parseOptionalString(&operand)))
177  return nullptr;
178 
179  // Parse '>'.
180  if (parser.parseGreater())
181  return nullptr;
182 
184  parser.getEncodedSourceLoc(beginLoc)),
185  shape, elementType, operand);
186  }
187 
188  parser.emitError(parser.getNameLoc(), "unknown gpu type: " + keyword);
189  return Type();
190 }
191 
192 void GPUDialect::printType(Type type, DialectAsmPrinter &os) const {
193  TypeSwitch<Type>(type)
194  .Case<AsyncTokenType>([&](Type) { os << "async.token"; })
195  .Case<MMAMatrixType>([&](MMAMatrixType fragTy) {
196  os << "mma_matrix<";
197  auto shape = fragTy.getShape();
198  for (auto dim = shape.begin(), e = shape.end() - 1; dim != e; ++dim)
199  os << *dim << 'x';
200  os << shape.back() << 'x' << fragTy.getElementType();
201  os << ", \"" << fragTy.getOperand() << "\"" << '>';
202  })
203  .Default([](Type) { llvm_unreachable("unexpected 'gpu' type kind"); });
204 }
205 
206 LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
207  NamedAttribute attr) {
208  if (!attr.getValue().isa<UnitAttr>() ||
209  attr.getName() != getContainerModuleAttrName())
210  return success();
211 
212  auto module = dyn_cast<ModuleOp>(op);
213  if (!module)
214  return op->emitError("expected '")
215  << getContainerModuleAttrName() << "' attribute to be attached to '"
216  << ModuleOp::getOperationName() << '\'';
217 
218  auto walkResult = module.walk([&module](LaunchFuncOp launchOp) -> WalkResult {
219  // Ignore launches that are nested more or less deep than functions in the
220  // module we are currently checking.
221  if (!launchOp->getParentOp() ||
222  launchOp->getParentOp()->getParentOp() != module)
223  return success();
224 
225  // Ignore launch ops with missing attributes here. The errors will be
226  // reported by the verifiers of those ops.
227  if (!launchOp->getAttrOfType<SymbolRefAttr>(
228  LaunchFuncOp::getKernelAttrName(launchOp->getName())))
229  return success();
230 
231  // Check that `launch_func` refers to a well-formed GPU kernel module.
232  StringAttr kernelModuleName = launchOp.getKernelModuleName();
233  auto kernelModule = module.lookupSymbol<GPUModuleOp>(kernelModuleName);
234  if (!kernelModule)
235  return launchOp.emitOpError()
236  << "kernel module '" << kernelModuleName.getValue()
237  << "' is undefined";
238 
239  // Check that `launch_func` refers to a well-formed kernel function.
240  Operation *kernelFunc = module.lookupSymbol(launchOp.getKernelAttr());
241  if (!kernelFunc)
242  return launchOp.emitOpError("kernel function '")
243  << launchOp.getKernel() << "' is undefined";
244  auto kernelConvertedFunction = dyn_cast<FunctionOpInterface>(kernelFunc);
245  if (!kernelConvertedFunction) {
246  InFlightDiagnostic diag = launchOp.emitOpError()
247  << "referenced kernel '" << launchOp.getKernel()
248  << "' is not a function";
249  diag.attachNote(kernelFunc->getLoc()) << "see the kernel definition here";
250  return diag;
251  }
252 
253  if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
254  GPUDialect::getKernelFuncAttrName()))
255  return launchOp.emitOpError("kernel function is missing the '")
256  << GPUDialect::getKernelFuncAttrName() << "' attribute";
257 
258  // TODO: If the kernel isn't a GPU function (which happens during separate
259  // compilation), do not check type correspondence as it would require the
260  // verifier to be aware of the type conversion.
261  auto kernelGPUFunction = dyn_cast<gpu::GPUFuncOp>(kernelFunc);
262  if (!kernelGPUFunction)
263  return success();
264 
265  unsigned actualNumArguments = launchOp.getNumKernelOperands();
266  unsigned expectedNumArguments = kernelGPUFunction.getNumArguments();
267  if (expectedNumArguments != actualNumArguments)
268  return launchOp.emitOpError("got ")
269  << actualNumArguments << " kernel operands but expected "
270  << expectedNumArguments;
271 
272  auto functionType = kernelGPUFunction.getFunctionType();
273  for (unsigned i = 0; i < expectedNumArguments; ++i) {
274  if (launchOp.getKernelOperand(i).getType() != functionType.getInput(i)) {
275  return launchOp.emitOpError("type of function argument ")
276  << i << " does not match";
277  }
278  }
279 
280  return success();
281  });
282 
283  return walkResult.wasInterrupted() ? failure() : success();
284 }
285 
286 /// Parses an optional list of async operands with an optional leading keyword.
287 /// (`async`)? (`[` ssa-id-list `]`)?
288 ///
289 /// This method is used by the tablegen assembly format for async ops as well.
291  OpAsmParser &parser, Type &asyncTokenType,
293  auto loc = parser.getCurrentLocation();
294  if (succeeded(parser.parseOptionalKeyword("async"))) {
295  if (parser.getNumResults() == 0)
296  return parser.emitError(loc, "needs to be named when marked 'async'");
297  asyncTokenType = parser.getBuilder().getType<AsyncTokenType>();
298  }
299  return parser.parseOperandList(asyncDependencies,
301 }
302 
303 /// Prints optional async dependencies with its leading keyword.
304 /// (`async`)? (`[` ssa-id-list `]`)?
305 // Used by the tablegen assembly format for several async ops.
307  Type asyncTokenType,
308  OperandRange asyncDependencies) {
309  if (asyncTokenType)
310  printer << "async";
311  if (asyncDependencies.empty())
312  return;
313  if (asyncTokenType)
314  printer << ' ';
315  printer << '[';
316  llvm::interleaveComma(asyncDependencies, printer);
317  printer << ']';
318 }
319 
320 //===----------------------------------------------------------------------===//
321 // AllReduceOp
322 //===----------------------------------------------------------------------===//
323 
324 static bool verifyReduceOpAndType(gpu::AllReduceOperation opName,
325  Type resType) {
326  return (opName != gpu::AllReduceOperation::AND &&
327  opName != gpu::AllReduceOperation::OR &&
328  opName != gpu::AllReduceOperation::XOR) ||
329  resType.isa<IntegerType>();
330 }
331 
332 LogicalResult gpu::AllReduceOp::verifyRegions() {
333  if (getBody().empty() != getOp().has_value())
334  return emitError("expected either an op attribute or a non-empty body");
335  if (!getBody().empty()) {
336  if (getBody().getNumArguments() != 2)
337  return emitError("expected two region arguments");
338  for (auto argument : getBody().getArguments()) {
339  if (argument.getType() != getType())
340  return emitError("incorrect region argument type");
341  }
342  unsigned yieldCount = 0;
343  for (Block &block : getBody()) {
344  if (auto yield = dyn_cast<gpu::YieldOp>(block.getTerminator())) {
345  if (yield.getNumOperands() != 1)
346  return emitError("expected one gpu.yield operand");
347  if (yield.getOperand(0).getType() != getType())
348  return emitError("incorrect gpu.yield type");
349  ++yieldCount;
350  }
351  }
352  if (yieldCount == 0)
353  return emitError("expected gpu.yield op in region");
354  } else {
355  gpu::AllReduceOperation opName = *getOp();
356  if (!verifyReduceOpAndType(opName, getType())) {
357  return emitError()
358  << '`' << gpu::stringifyAllReduceOperation(opName)
359  << "` accumulator is only compatible with Integer type";
360  }
361  }
362  return success();
363 }
364 
365 // TODO: Support optional custom attributes (without dialect prefix).
367  AllReduceOperationAttr &attr) {
368  StringRef enumStr;
369  if (!parser.parseOptionalKeyword(&enumStr)) {
370  Optional<AllReduceOperation> op = gpu::symbolizeAllReduceOperation(enumStr);
371  if (!op)
372  return parser.emitError(parser.getCurrentLocation(), "invalid op kind");
373  attr = AllReduceOperationAttr::get(parser.getContext(), *op);
374  }
375  return success();
376 }
377 
378 static void printAllReduceOperation(AsmPrinter &printer, Operation *op,
379  AllReduceOperationAttr attr) {
380  if (attr)
381  attr.print(printer);
382 }
383 
384 //===----------------------------------------------------------------------===//
385 // SubgroupReduceOp
386 //===----------------------------------------------------------------------===//
387 
389  gpu::AllReduceOperation opName = getOp();
390  if (!verifyReduceOpAndType(opName, getType())) {
391  return emitError() << '`' << gpu::stringifyAllReduceOperation(opName)
392  << "` accumulator is only compatible with Integer type";
393  }
394  return success();
395 }
396 
397 //===----------------------------------------------------------------------===//
398 // AsyncOpInterface
399 //===----------------------------------------------------------------------===//
400 
402  op->insertOperands(0, {token});
403  if (!op->template hasTrait<OpTrait::AttrSizedOperandSegments>())
404  return;
405  auto attrName =
407  auto sizeAttr = op->template getAttrOfType<DenseI32ArrayAttr>(attrName);
408 
409  // Async dependencies is the only variadic operand.
410  if (!sizeAttr)
411  return;
412 
413  SmallVector<int32_t, 8> sizes(sizeAttr.asArrayRef());
414  ++sizes.front();
415  op->setAttr(attrName, Builder(op->getContext()).getDenseI32ArrayAttr(sizes));
416 }
417 
418 //===----------------------------------------------------------------------===//
419 // LaunchOp
420 //===----------------------------------------------------------------------===//
421 
422 void LaunchOp::build(OpBuilder &builder, OperationState &result,
423  Value gridSizeX, Value gridSizeY, Value gridSizeZ,
424  Value getBlockSizeX, Value getBlockSizeY,
425  Value getBlockSizeZ, Value dynamicSharedMemorySize,
426  Type asyncTokenType, ValueRange asyncDependencies) {
427  result.addOperands(asyncDependencies);
428  if (asyncTokenType)
429  result.types.push_back(builder.getType<AsyncTokenType>());
430 
431  // Add grid and block sizes as op operands, followed by the data operands.
432  result.addOperands({gridSizeX, gridSizeY, gridSizeZ, getBlockSizeX,
433  getBlockSizeY, getBlockSizeZ});
434  if (dynamicSharedMemorySize)
435  result.addOperands(dynamicSharedMemorySize);
436 
437  // Create a kernel body region with kNumConfigRegionAttributes + N arguments,
438  // where the first kNumConfigRegionAttributes arguments have `index` type and
439  // the rest have the same types as the data operands.
440  Region *kernelRegion = result.addRegion();
441  Block *body = new Block();
442  for (unsigned i = 0; i < kNumConfigRegionAttributes; ++i)
443  body->addArgument(builder.getIndexType(), result.location);
444  kernelRegion->push_back(body);
445  SmallVector<int32_t, 8> segmentSizes(8, 1);
446  segmentSizes.front() = asyncDependencies.size();
447  segmentSizes.back() = dynamicSharedMemorySize ? 1 : 0;
448  result.addAttribute(getOperandSegmentSizeAttr(),
449  builder.getDenseI32ArrayAttr(segmentSizes));
450 }
451 
452 KernelDim3 LaunchOp::getBlockIds() {
453  assert(!getBody().empty() && "LaunchOp body must not be empty.");
454  auto args = getBody().getArguments();
455  return KernelDim3{args[0], args[1], args[2]};
456 }
457 
458 KernelDim3 LaunchOp::getThreadIds() {
459  assert(!getBody().empty() && "LaunchOp body must not be empty.");
460  auto args = getBody().getArguments();
461  return KernelDim3{args[3], args[4], args[5]};
462 }
463 
464 KernelDim3 LaunchOp::getGridSize() {
465  assert(!getBody().empty() && "LaunchOp body must not be empty.");
466  auto args = getBody().getArguments();
467  return KernelDim3{args[6], args[7], args[8]};
468 }
469 
470 KernelDim3 LaunchOp::getBlockSize() {
471  assert(!getBody().empty() && "LaunchOp body must not be empty.");
472  auto args = getBody().getArguments();
473  return KernelDim3{args[9], args[10], args[11]};
474 }
475 
476 KernelDim3 LaunchOp::getGridSizeOperandValues() {
477  auto operands = getOperands().drop_front(getAsyncDependencies().size());
478  return KernelDim3{operands[0], operands[1], operands[2]};
479 }
480 
481 KernelDim3 LaunchOp::getBlockSizeOperandValues() {
482  auto operands = getOperands().drop_front(getAsyncDependencies().size());
483  return KernelDim3{operands[3], operands[4], operands[5]};
484 }
485 
486 LogicalResult LaunchOp::verifyRegions() {
487  // Kernel launch takes kNumConfigOperands leading operands for grid/block
488  // sizes and transforms them into kNumConfigRegionAttributes region arguments
489  // for block/thread identifiers and grid/block sizes.
490  if (!getBody().empty()) {
491  if (getBody().getNumArguments() !=
492  LaunchOp::kNumConfigOperands + getNumOperands() -
493  (getDynamicSharedMemorySize() ? 1 : 0) -
494  getAsyncDependencies().size())
495  return emitOpError("unexpected number of region arguments");
496  }
497 
498  // Block terminators without successors are expected to exit the kernel region
499  // and must be `gpu.terminator`.
500  for (Block &block : getBody()) {
501  if (block.empty())
502  continue;
503  if (block.back().getNumSuccessors() != 0)
504  continue;
505  if (!isa<gpu::TerminatorOp>(&block.back())) {
506  return block.back()
507  .emitError()
508  .append("expected '", gpu::TerminatorOp::getOperationName(),
509  "' or a terminator with successors")
510  .attachNote(getLoc())
511  .append("in '", LaunchOp::getOperationName(), "' body region");
512  }
513  }
514 
515  if (getNumResults() == 0 && getAsyncToken())
516  return emitOpError("needs to be named when async keyword is specified");
517 
518  return success();
519 }
520 
521 // Pretty-print the kernel grid/block size assignment as
522 // (%iter-x, %iter-y, %iter-z) in
523 // (%size-x = %ssa-use, %size-y = %ssa-use, %size-z = %ssa-use)
524 // where %size-* and %iter-* will correspond to the body region arguments.
526  KernelDim3 operands, KernelDim3 ids) {
527  p << '(' << ids.x << ", " << ids.y << ", " << ids.z << ") in (";
528  p << size.x << " = " << operands.x << ", ";
529  p << size.y << " = " << operands.y << ", ";
530  p << size.z << " = " << operands.z << ')';
531 }
532 
533 void LaunchOp::print(OpAsmPrinter &p) {
534  if (getAsyncToken()) {
535  p << " async";
536  if (!getAsyncDependencies().empty())
537  p << " [" << getAsyncDependencies() << ']';
538  }
539  // Print the launch configuration.
540  p << ' ' << getBlocksKeyword();
541  printSizeAssignment(p, getGridSize(), getGridSizeOperandValues(),
542  getBlockIds());
543  p << ' ' << getThreadsKeyword();
544  printSizeAssignment(p, getBlockSize(), getBlockSizeOperandValues(),
545  getThreadIds());
546  if (getDynamicSharedMemorySize())
547  p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
548  << getDynamicSharedMemorySize();
549 
550  p << ' ';
551  p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
552  p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
553  LaunchOp::getOperandSegmentSizeAttr()});
554 }
555 
556 // Parse the size assignment blocks for blocks and threads. These have the form
557 // (%region_arg, %region_arg, %region_arg) in
558 // (%region_arg = %operand, %region_arg = %operand, %region_arg = %operand)
559 // where %region_arg are percent-identifiers for the region arguments to be
560 // introduced further (SSA defs), and %operand are percent-identifiers for the
561 // SSA value uses.
562 static ParseResult
567  assert(indices.size() == 3 && "space for three indices expected");
570  /*allowResultNumber=*/false) ||
571  parser.parseKeyword("in") || parser.parseLParen())
572  return failure();
573  std::move(args.begin(), args.end(), indices.begin());
574 
575  for (int i = 0; i < 3; ++i) {
576  if (i != 0 && parser.parseComma())
577  return failure();
578  if (parser.parseOperand(regionSizes[i], /*allowResultNumber=*/false) ||
579  parser.parseEqual() || parser.parseOperand(sizes[i]))
580  return failure();
581  }
582 
583  return parser.parseRParen();
584 }
585 
586 /// Parses a Launch operation.
587 /// operation ::= `gpu.launch` (`async` `[` ssa-id-list `]`)?
588 // `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
589 /// `threads` `(` ssa-id-list `)` `in` ssa-reassignment
590 /// region attr-dict?
591 /// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
592 ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
593  // Sizes of the grid and block.
595  sizes(LaunchOp::kNumConfigOperands);
597 
598  // Actual (data) operands passed to the kernel.
600 
601  // Region arguments to be created.
603  LaunchOp::kNumConfigRegionAttributes);
604  MutableArrayRef<OpAsmParser::UnresolvedOperand> regionArgsRef(regionArgs);
605 
606  // Parse optional async dependencies.
608  Type asyncTokenType;
609  if (failed(
610  parseAsyncDependencies(parser, asyncTokenType, asyncDependencies)) ||
611  parser.resolveOperands(asyncDependencies, asyncTokenType,
612  result.operands))
613  return failure();
614  if (parser.getNumResults() > 0)
615  result.types.push_back(asyncTokenType);
616 
617  // Parse the size assignment segments: the first segment assigns grid sizes
618  // and defines values for block identifiers; the second segment assigns block
619  // sizes and defines values for thread identifiers. In the region argument
620  // list, identifiers precede sizes, and block-related values precede
621  // thread-related values.
622  if (parser.parseKeyword(LaunchOp::getBlocksKeyword().data()) ||
623  parseSizeAssignment(parser, sizesRef.take_front(3),
624  regionArgsRef.slice(6, 3),
625  regionArgsRef.slice(0, 3)) ||
626  parser.parseKeyword(LaunchOp::getThreadsKeyword().data()) ||
627  parseSizeAssignment(parser, sizesRef.drop_front(3),
628  regionArgsRef.slice(9, 3),
629  regionArgsRef.slice(3, 3)) ||
630  parser.resolveOperands(sizes, parser.getBuilder().getIndexType(),
631  result.operands))
632  return failure();
633 
634  OpAsmParser::UnresolvedOperand dynamicSharedMemorySize;
635  bool hasDynamicSharedMemorySize = false;
636  if (!parser.parseOptionalKeyword(
637  LaunchOp::getDynamicSharedMemorySizeKeyword())) {
638  hasDynamicSharedMemorySize = true;
639  if (parser.parseOperand(dynamicSharedMemorySize) ||
640  parser.resolveOperand(dynamicSharedMemorySize,
641  parser.getBuilder().getI32Type(),
642  result.operands))
643  return failure();
644  }
645 
646  // Introduce the body region and parse it. The region has
647  // kNumConfigRegionAttributes arguments that correspond to
648  // block/thread identifiers and grid/block sizes, all of the `index` type.
649  Type index = parser.getBuilder().getIndexType();
651  LaunchOp::kNumConfigRegionAttributes, index);
652 
653  SmallVector<OpAsmParser::Argument> regionArguments;
654  for (auto ssaValueAndType : llvm::zip(regionArgs, dataTypes)) {
656  arg.ssaName = std::get<0>(ssaValueAndType);
657  arg.type = std::get<1>(ssaValueAndType);
658  regionArguments.push_back(arg);
659  }
660 
661  Region *body = result.addRegion();
662  if (parser.parseRegion(*body, regionArguments) ||
663  parser.parseOptionalAttrDict(result.attributes))
664  return failure();
665 
666  SmallVector<int32_t, 8> segmentSizes(8, 1);
667  segmentSizes.front() = asyncDependencies.size();
668  segmentSizes.back() = hasDynamicSharedMemorySize ? 1 : 0;
669  result.addAttribute(LaunchOp::getOperandSegmentSizeAttr(),
670  parser.getBuilder().getDenseI32ArrayAttr(segmentSizes));
671  return success();
672 }
673 
674 /// Simplify the gpu.launch when the range of a thread or block ID is
675 /// trivially known to be one.
676 struct FoldLaunchArguments : public OpRewritePattern<LaunchOp> {
679  PatternRewriter &rewriter) const override {
680  // If the range implies a single value for `id`, replace `id`'s uses by
681  // zero.
682  Value zero;
683  bool simplified = false;
684  auto constPropIdUses = [&](Value id, Value size) {
685  // Check if size is trivially one.
686  if (!matchPattern(size, m_One()))
687  return;
688  if (!simplified) {
689  // Create a zero value the first time.
690  OpBuilder::InsertionGuard guard(rewriter);
691  rewriter.setInsertionPointToStart(&op.getBody().front());
692  zero =
693  rewriter.create<arith::ConstantIndexOp>(op.getLoc(), /*value=*/0);
694  }
695  id.replaceAllUsesWith(zero);
696  simplified = true;
697  };
698  constPropIdUses(op.getBlockIds().x, op.getGridSizeX());
699  constPropIdUses(op.getBlockIds().y, op.getGridSizeY());
700  constPropIdUses(op.getBlockIds().z, op.getGridSizeZ());
701  constPropIdUses(op.getThreadIds().x, op.getBlockSizeX());
702  constPropIdUses(op.getThreadIds().y, op.getBlockSizeY());
703  constPropIdUses(op.getThreadIds().z, op.getBlockSizeZ());
704 
705  return success(simplified);
706  }
707 };
708 
709 void LaunchOp::getCanonicalizationPatterns(RewritePatternSet &rewrites,
710  MLIRContext *context) {
711  rewrites.add<FoldLaunchArguments>(context);
712 }
713 
714 //===----------------------------------------------------------------------===//
715 // LaunchFuncOp
716 //===----------------------------------------------------------------------===//
717 
718 void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
719  GPUFuncOp kernelFunc, KernelDim3 gridSize,
720  KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
721  ValueRange kernelOperands, Type asyncTokenType,
722  ValueRange asyncDependencies) {
723  result.addOperands(asyncDependencies);
724  if (asyncTokenType)
725  result.types.push_back(builder.getType<AsyncTokenType>());
726 
727  // Add grid and block sizes as op operands, followed by the data operands.
728  result.addOperands({gridSize.x, gridSize.y, gridSize.z, getBlockSize.x,
729  getBlockSize.y, getBlockSize.z});
730  if (dynamicSharedMemorySize)
731  result.addOperands(dynamicSharedMemorySize);
732  result.addOperands(kernelOperands);
733  auto kernelModule = kernelFunc->getParentOfType<GPUModuleOp>();
734  auto kernelSymbol =
735  SymbolRefAttr::get(kernelModule.getNameAttr(),
736  {SymbolRefAttr::get(kernelFunc.getNameAttr())});
737  result.addAttribute(getKernelAttrName(result.name), kernelSymbol);
738  SmallVector<int32_t, 9> segmentSizes(9, 1);
739  segmentSizes.front() = asyncDependencies.size();
740  segmentSizes[segmentSizes.size() - 2] = dynamicSharedMemorySize ? 1 : 0;
741  segmentSizes.back() = static_cast<int32_t>(kernelOperands.size());
742  result.addAttribute(getOperandSegmentSizeAttr(),
743  builder.getDenseI32ArrayAttr(segmentSizes));
744 }
745 
746 StringAttr LaunchFuncOp::getKernelModuleName() {
747  return getKernel().getRootReference();
748 }
749 
750 StringAttr LaunchFuncOp::getKernelName() {
751  return getKernel().getLeafReference();
752 }
753 
754 unsigned LaunchFuncOp::getNumKernelOperands() {
755  return getKernelOperands().size();
756 }
757 
758 Value LaunchFuncOp::getKernelOperand(unsigned i) {
759  return getKernelOperands()[i];
760 }
761 
762 KernelDim3 LaunchFuncOp::getGridSizeOperandValues() {
763  auto operands = getOperands().drop_front(getAsyncDependencies().size());
764  return KernelDim3{operands[0], operands[1], operands[2]};
765 }
766 
767 KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
768  auto operands = getOperands().drop_front(getAsyncDependencies().size());
769  return KernelDim3{operands[3], operands[4], operands[5]};
770 }
771 
773  auto module = (*this)->getParentOfType<ModuleOp>();
774  if (!module)
775  return emitOpError("expected to belong to a module");
776 
777  if (!module->getAttrOfType<UnitAttr>(
778  GPUDialect::getContainerModuleAttrName()))
779  return emitOpError("expected the closest surrounding module to have the '" +
780  GPUDialect::getContainerModuleAttrName() +
781  "' attribute");
782 
783  return success();
784 }
785 
787  OpAsmParser &parser,
789  SmallVectorImpl<Type> &argTypes) {
790  if (parser.parseOptionalKeyword("args"))
791  return success();
792 
795  /*allowType=*/true))
796  return failure();
797  for (auto &arg : args) {
798  argNames.push_back(arg.ssaName);
799  argTypes.push_back(arg.type);
800  }
801  return success();
802 }
803 
805  OperandRange operands, TypeRange types) {
806  if (operands.empty())
807  return;
808  printer << "args(";
809  llvm::interleaveComma(llvm::zip(operands, types), printer,
810  [&](const auto &pair) {
811  printer.printOperand(std::get<0>(pair));
812  printer << " : ";
813  printer.printType(std::get<1>(pair));
814  });
815  printer << ")";
816 }
817 
818 //===----------------------------------------------------------------------===//
819 // ShuffleOp
820 //===----------------------------------------------------------------------===//
821 
822 void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
823  int32_t offset, int32_t width, ShuffleMode mode) {
824  build(builder, result, value,
825  builder.create<arith::ConstantOp>(result.location,
826  builder.getI32IntegerAttr(offset)),
827  builder.create<arith::ConstantOp>(result.location,
828  builder.getI32IntegerAttr(width)),
829  mode);
830 }
831 
832 //===----------------------------------------------------------------------===//
833 // GPUFuncOp
834 //===----------------------------------------------------------------------===//
835 
836 /// Adds a new block argument that corresponds to buffers located in
837 /// workgroup memory.
838 BlockArgument GPUFuncOp::addWorkgroupAttribution(Type type, Location loc) {
839  auto attrName = getNumWorkgroupAttributionsAttrName();
840  auto attr = (*this)->getAttrOfType<IntegerAttr>(attrName);
841  (*this)->setAttr(attrName,
842  IntegerAttr::get(attr.getType(), attr.getValue() + 1));
843  return getBody().insertArgument(
844  getFunctionType().getNumInputs() + attr.getInt(), type, loc);
845 }
846 
847 /// Adds a new block argument that corresponds to buffers located in
848 /// private memory.
849 BlockArgument GPUFuncOp::addPrivateAttribution(Type type, Location loc) {
850  // Buffers on the private memory always come after buffers on the workgroup
851  // memory.
852  return getBody().addArgument(type, loc);
853 }
854 
855 void GPUFuncOp::build(OpBuilder &builder, OperationState &result,
856  StringRef name, FunctionType type,
857  TypeRange workgroupAttributions,
858  TypeRange privateAttributions,
859  ArrayRef<NamedAttribute> attrs) {
861  builder.getStringAttr(name));
862  result.addAttribute(getTypeAttrName(), TypeAttr::get(type));
863  result.addAttribute(getNumWorkgroupAttributionsAttrName(),
864  builder.getI64IntegerAttr(workgroupAttributions.size()));
865  result.addAttributes(attrs);
866  Region *body = result.addRegion();
867  Block *entryBlock = new Block;
868 
869  // TODO: Allow passing in proper locations here.
870  for (Type argTy : type.getInputs())
871  entryBlock->addArgument(argTy, result.location);
872  for (Type argTy : workgroupAttributions)
873  entryBlock->addArgument(argTy, result.location);
874  for (Type argTy : privateAttributions)
875  entryBlock->addArgument(argTy, result.location);
876 
877  body->getBlocks().push_back(entryBlock);
878 }
879 
880 /// Parses a GPU function memory attribution.
881 ///
882 /// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
883 /// (`private` `(` ssa-id-and-type-list `)`)?
884 ///
885 /// Note that this function parses only one of the two similar parts, with the
886 /// keyword provided as argument.
887 static ParseResult
888 parseAttributions(OpAsmParser &parser, StringRef keyword,
890  // If we could not parse the keyword, just assume empty list and succeed.
891  if (failed(parser.parseOptionalKeyword(keyword)))
892  return success();
893 
895  /*allowType=*/true);
896 }
897 
898 /// Parses a GPU function.
899 ///
900 /// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
901 /// (`->` function-result-list)? memory-attribution `kernel`?
902 /// function-attributes? region
903 ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
905  SmallVector<DictionaryAttr> resultAttrs;
906  SmallVector<Type> resultTypes;
907  bool isVariadic;
908 
909  // Parse the function name.
910  StringAttr nameAttr;
911  if (parser.parseSymbolName(nameAttr, ::mlir::SymbolTable::getSymbolAttrName(),
912  result.attributes))
913  return failure();
914 
915  auto signatureLocation = parser.getCurrentLocation();
917  parser, /*allowVariadic=*/false, entryArgs, isVariadic, resultTypes,
918  resultAttrs)))
919  return failure();
920 
921  if (!entryArgs.empty() && entryArgs[0].ssaName.name.empty())
922  return parser.emitError(signatureLocation)
923  << "gpu.func requires named arguments";
924 
925  // Construct the function type. More types will be added to the region, but
926  // not to the function type.
927  Builder &builder = parser.getBuilder();
928 
929  SmallVector<Type> argTypes;
930  for (auto &arg : entryArgs)
931  argTypes.push_back(arg.type);
932  auto type = builder.getFunctionType(argTypes, resultTypes);
933  result.addAttribute(GPUFuncOp::getTypeAttrName(), TypeAttr::get(type));
934 
935  function_interface_impl::addArgAndResultAttrs(builder, result, entryArgs,
936  resultAttrs);
937 
938  // Parse workgroup memory attributions.
939  if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(),
940  entryArgs)))
941  return failure();
942 
943  // Store the number of operands we just parsed as the number of workgroup
944  // memory attributions.
945  unsigned numWorkgroupAttrs = entryArgs.size() - type.getNumInputs();
946  result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(),
947  builder.getI64IntegerAttr(numWorkgroupAttrs));
948 
949  // Parse private memory attributions.
950  if (failed(
951  parseAttributions(parser, GPUFuncOp::getPrivateKeyword(), entryArgs)))
952  return failure();
953 
954  // Parse the kernel attribute if present.
955  if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword())))
956  result.addAttribute(GPUDialect::getKernelFuncAttrName(),
957  builder.getUnitAttr());
958 
959  // Parse attributes.
961  return failure();
962 
963  // Parse the region. If no argument names were provided, take all names
964  // (including those of attributions) from the entry block.
965  auto *body = result.addRegion();
966  return parser.parseRegion(*body, entryArgs);
967 }
968 
969 static void printAttributions(OpAsmPrinter &p, StringRef keyword,
970  ArrayRef<BlockArgument> values) {
971  if (values.empty())
972  return;
973 
974  p << ' ' << keyword << '(';
975  llvm::interleaveComma(
976  values, p, [&p](BlockArgument v) { p << v << " : " << v.getType(); });
977  p << ')';
978 }
979 
981  p << ' ';
982  p.printSymbolName(getName());
983 
984  FunctionType type = getFunctionType();
985  function_interface_impl::printFunctionSignature(p, *this, type.getInputs(),
986  /*isVariadic=*/false,
987  type.getResults());
988 
989  printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
990  printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
991  if (isKernel())
992  p << ' ' << getKernelKeyword();
993 
995  p, *this, type.getNumInputs(), type.getNumResults(),
996  {getNumWorkgroupAttributionsAttrName(),
997  GPUDialect::getKernelFuncAttrName()});
998  p << ' ';
999  p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
1000 }
1001 
1002 LogicalResult GPUFuncOp::verifyType() {
1003  Type type = getFunctionTypeAttr().getValue();
1004  if (!type.isa<FunctionType>())
1005  return emitOpError("requires '" + getTypeAttrName() +
1006  "' attribute of function type");
1007 
1008  if (isKernel() && getFunctionType().getNumResults() != 0)
1009  return emitOpError() << "expected void return type for kernel function";
1010 
1011  return success();
1012 }
1013 
1015  ArrayRef<BlockArgument> attributions,
1016  unsigned memorySpace) {
1017  for (Value v : attributions) {
1018  auto type = v.getType().dyn_cast<MemRefType>();
1019  if (!type)
1020  return op->emitOpError() << "expected memref type in attribution";
1021 
1022  if (type.getMemorySpaceAsInt() != memorySpace) {
1023  return op->emitOpError()
1024  << "expected memory space " << memorySpace << " in attribution";
1025  }
1026  }
1027  return success();
1028 }
1029 
1030 /// Verifies the body of the function.
1031 LogicalResult GPUFuncOp::verifyBody() {
1032  if (empty())
1033  return emitOpError() << "expected body with at least one block";
1034  unsigned numFuncArguments = getNumArguments();
1035  unsigned numWorkgroupAttributions = getNumWorkgroupAttributions();
1036  unsigned numBlockArguments = front().getNumArguments();
1037  if (numBlockArguments < numFuncArguments + numWorkgroupAttributions)
1038  return emitOpError() << "expected at least "
1039  << numFuncArguments + numWorkgroupAttributions
1040  << " arguments to body region";
1041 
1042  ArrayRef<Type> funcArgTypes = getFunctionType().getInputs();
1043  for (unsigned i = 0; i < numFuncArguments; ++i) {
1044  Type blockArgType = front().getArgument(i).getType();
1045  if (funcArgTypes[i] != blockArgType)
1046  return emitOpError() << "expected body region argument #" << i
1047  << " to be of type " << funcArgTypes[i] << ", got "
1048  << blockArgType;
1049  }
1050 
1051  if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(),
1052  GPUDialect::getWorkgroupAddressSpace())) ||
1053  failed(verifyAttributions(getOperation(), getPrivateAttributions(),
1054  GPUDialect::getPrivateAddressSpace())))
1055  return failure();
1056 
1057  return success();
1058 }
1059 
1060 //===----------------------------------------------------------------------===//
1061 // ReturnOp
1062 //===----------------------------------------------------------------------===//
1063 
1065  GPUFuncOp function = (*this)->getParentOfType<GPUFuncOp>();
1066 
1067  FunctionType funType = function.getFunctionType();
1068 
1069  if (funType.getNumResults() != getOperands().size())
1070  return emitOpError()
1071  .append("expected ", funType.getNumResults(), " result operands")
1072  .attachNote(function.getLoc())
1073  .append("return type declared here");
1074 
1075  for (const auto &pair : llvm::enumerate(
1076  llvm::zip(function.getFunctionType().getResults(), getOperands()))) {
1077  auto [type, operand] = pair.value();
1078  if (type != operand.getType())
1079  return emitOpError() << "unexpected type `" << operand.getType()
1080  << "' for operand #" << pair.index();
1081  }
1082  return success();
1083 }
1084 
1085 //===----------------------------------------------------------------------===//
1086 // GPUModuleOp
1087 //===----------------------------------------------------------------------===//
1088 
1089 void GPUModuleOp::build(OpBuilder &builder, OperationState &result,
1090  StringRef name) {
1091  ensureTerminator(*result.addRegion(), builder, result.location);
1092  result.attributes.push_back(builder.getNamedAttr(
1094 }
1095 
1096 ParseResult GPUModuleOp::parse(OpAsmParser &parser, OperationState &result) {
1097  StringAttr nameAttr;
1099  result.attributes) ||
1100  // If module attributes are present, parse them.
1102  return failure();
1103 
1104  // Parse the module body.
1105  auto *body = result.addRegion();
1106  if (parser.parseRegion(*body, {}))
1107  return failure();
1108 
1109  // Ensure that this module has a valid terminator.
1110  GPUModuleOp::ensureTerminator(*body, parser.getBuilder(), result.location);
1111  return success();
1112 }
1113 
1115  p << ' ';
1116  p.printSymbolName(getName());
1117  p.printOptionalAttrDictWithKeyword((*this)->getAttrs(),
1118  {mlir::SymbolTable::getSymbolAttrName()});
1119  p << ' ';
1120  p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
1121  /*printBlockTerminators=*/false);
1122 }
1123 
1124 //===----------------------------------------------------------------------===//
1125 // GPUMemcpyOp
1126 //===----------------------------------------------------------------------===//
1127 
1129  auto srcType = getSrc().getType();
1130  auto dstType = getDst().getType();
1131 
1132  if (getElementTypeOrSelf(srcType) != getElementTypeOrSelf(dstType))
1133  return emitOpError("arguments have incompatible element type");
1134 
1135  if (failed(verifyCompatibleShape(srcType, dstType)))
1136  return emitOpError("arguments have incompatible shape");
1137 
1138  return success();
1139 }
1140 
1141 namespace {
1142 
1143 /// Erases a common case of copy ops where a destination value is used only by
1144 /// the copy op, alloc and dealloc ops.
1145 struct EraseTrivialCopyOp : public OpRewritePattern<MemcpyOp> {
1147 
1148  LogicalResult matchAndRewrite(MemcpyOp op,
1149  PatternRewriter &rewriter) const override {
1150  Value dest = op.getDst();
1151  Operation *destDefOp = dest.getDefiningOp();
1152  // `dest` must be defined by an op having Allocate memory effect in order to
1153  // perform the folding.
1154  if (!destDefOp ||
1155  !hasSingleEffect<MemoryEffects::Allocate>(destDefOp, dest))
1156  return failure();
1157  // We can erase `op` iff `dest` has no other use apart from its
1158  // use by `op` and dealloc ops.
1159  if (llvm::any_of(dest.getUsers(), [op, dest](Operation *user) {
1160  return user != op &&
1161  !hasSingleEffect<MemoryEffects::Free>(user, dest);
1162  }))
1163  return failure();
1164  // We can perform the folding if and only if op has a single async
1165  // dependency and produces an async token as result, or if it does not have
1166  // any async dependency and does not produce any async token result.
1167  if (op.getAsyncDependencies().size() > 1 ||
1168  ((op.getAsyncDependencies().empty() && op.getAsyncToken()) ||
1169  (!op.getAsyncDependencies().empty() && !op.getAsyncToken())))
1170  return failure();
1171  rewriter.replaceOp(op, op.getAsyncDependencies());
1172  return success();
1173  }
1174 };
1175 
1176 } // end anonymous namespace
1177 
1178 void MemcpyOp::getCanonicalizationPatterns(RewritePatternSet &results,
1179  MLIRContext *context) {
1180  results.add<EraseTrivialCopyOp>(context);
1181 }
1182 
1183 //===----------------------------------------------------------------------===//
1184 // GPU_SubgroupMmaLoadMatrixOp
1185 //===----------------------------------------------------------------------===//
1186 
1187 /// Return true if the last dimension of the MemRefType has unit stride. Also
1188 /// return true for memrefs with no strides.
1189 static bool isLastMemrefDimUnitStride(MemRefType type) {
1190  int64_t offset;
1191  SmallVector<int64_t> strides;
1192  if (failed(getStridesAndOffset(type, strides, offset))) {
1193  return false;
1194  }
1195  return strides.back() == 1;
1196 }
1197 
1199  auto srcType = getSrcMemref().getType();
1200  auto resType = getRes().getType();
1201  auto resMatrixType = resType.cast<gpu::MMAMatrixType>();
1202  auto operand = resMatrixType.getOperand();
1203  auto srcMemrefType = srcType.cast<MemRefType>();
1204 
1205  if (!isLastMemrefDimUnitStride(srcMemrefType))
1206  return emitError(
1207  "expected source memref most minor dim must have unit stride");
1208 
1209  if (!operand.equals("AOp") && !operand.equals("BOp") &&
1210  !operand.equals("COp"))
1211  return emitError("only AOp, BOp and COp can be loaded");
1212 
1213  return success();
1214 }
1215 
1216 //===----------------------------------------------------------------------===//
1217 // GPU_SubgroupMmaStoreMatrixOp
1218 //===----------------------------------------------------------------------===//
1219 
1221  auto srcType = getSrc().getType();
1222  auto dstType = getDstMemref().getType();
1223  auto srcMatrixType = srcType.cast<gpu::MMAMatrixType>();
1224  auto dstMemrefType = dstType.cast<MemRefType>();
1225 
1226  if (!isLastMemrefDimUnitStride(dstMemrefType))
1227  return emitError(
1228  "expected destination memref most minor dim must have unit stride");
1229 
1230  if (!srcMatrixType.getOperand().equals("COp"))
1231  return emitError(
1232  "expected the operand matrix being stored to have 'COp' operand type");
1233 
1234  return success();
1235 }
1236 
1237 //===----------------------------------------------------------------------===//
1238 // GPU_SubgroupMmaComputeOp
1239 //===----------------------------------------------------------------------===//
1240 
1242  enum OperandMap { A, B, C };
1244  opTypes.push_back(getOpA().getType().cast<MMAMatrixType>());
1245  opTypes.push_back(getOpB().getType().cast<MMAMatrixType>());
1246  opTypes.push_back(getOpC().getType().cast<MMAMatrixType>());
1247 
1248  if (!opTypes[A].getOperand().equals("AOp") ||
1249  !opTypes[B].getOperand().equals("BOp") ||
1250  !opTypes[C].getOperand().equals("COp"))
1251  return emitError("operands must be in the order AOp, BOp, COp");
1252 
1253  ArrayRef<int64_t> aShape, bShape, cShape;
1254  aShape = opTypes[A].getShape();
1255  bShape = opTypes[B].getShape();
1256  cShape = opTypes[C].getShape();
1257 
1258  if (aShape[1] != bShape[0] || aShape[0] != cShape[0] ||
1259  bShape[1] != cShape[1])
1260  return emitError("operand shapes do not satisfy matmul constraints");
1261 
1262  return success();
1263 }
1264 
1265 LogicalResult MemcpyOp::fold(ArrayRef<Attribute> operands,
1267  return memref::foldMemRefCast(*this);
1268 }
1269 
1270 LogicalResult MemsetOp::fold(ArrayRef<Attribute> operands,
1272  return memref::foldMemRefCast(*this);
1273 }
1274 
1275 //===----------------------------------------------------------------------===//
1276 // GPU_WaitOp
1277 //===----------------------------------------------------------------------===//
1278 
1279 namespace {
1280 
1281 /// Remove gpu.wait op use of gpu.wait op def without async dependencies.
1282 /// %t = gpu.wait async [] // No async dependencies.
1283 /// ... gpu.wait ... [%t, ...] // %t can be removed.
1284 struct EraseRedundantGpuWaitOpPairs : public OpRewritePattern<WaitOp> {
1285 public:
1287 
1288  LogicalResult matchAndRewrite(WaitOp op,
1289  PatternRewriter &rewriter) const final {
1290  auto predicate = [](Value value) {
1291  auto waitOp = value.getDefiningOp<WaitOp>();
1292  return waitOp && waitOp->getNumOperands() == 0;
1293  };
1294  if (llvm::none_of(op.getAsyncDependencies(), predicate))
1295  return failure();
1296  SmallVector<Value> validOperands;
1297  for (Value operand : op->getOperands()) {
1298  if (predicate(operand))
1299  continue;
1300  validOperands.push_back(operand);
1301  }
1302  op->setOperands(validOperands);
1303  return success();
1304  }
1305 };
1306 
1307 /// Simplify trivial gpu.wait ops for the following patterns.
1308 /// 1. %t = gpu.wait async ... ops, where %t has no uses (regardless of async
1309 /// dependencies).
1310 /// 2. %t1 = gpu.wait async [%t0], in this case, we can replace uses of %t1 with
1311 /// %t0.
1312 /// 3. gpu.wait [] ops, i.e gpu.wait ops that neither have any async
1313 /// dependencies nor return any token.
1314 struct SimplifyGpuWaitOp : public OpRewritePattern<WaitOp> {
1315 public:
1317 
1318  LogicalResult matchAndRewrite(WaitOp op,
1319  PatternRewriter &rewriter) const final {
1320  // Erase gpu.wait ops that neither have any async dependencies nor return
1321  // any async token.
1322  if (op.getAsyncDependencies().empty() && !op.getAsyncToken()) {
1323  rewriter.eraseOp(op);
1324  return success();
1325  }
1326  // Replace uses of %t1 = gpu.wait async [%t0] ops with %t0 and erase the op.
1327  if (llvm::hasSingleElement(op.getAsyncDependencies()) &&
1328  op.getAsyncToken()) {
1329  rewriter.replaceOp(op, op.getAsyncDependencies());
1330  return success();
1331  }
1332  // Erase %t = gpu.wait async ... ops, where %t has no uses.
1333  if (op.getAsyncToken() && op.getAsyncToken().use_empty()) {
1334  rewriter.eraseOp(op);
1335  return success();
1336  }
1337  return failure();
1338  }
1339 };
1340 
1341 } // end anonymous namespace
1342 
1343 void WaitOp::getCanonicalizationPatterns(RewritePatternSet &results,
1344  MLIRContext *context) {
1345  results.add<EraseRedundantGpuWaitOpPairs, SimplifyGpuWaitOp>(context);
1346 }
1347 
1348 //===----------------------------------------------------------------------===//
1349 // GPU_AllocOp
1350 //===----------------------------------------------------------------------===//
1351 
1353  auto memRefType = getMemref().getType().cast<MemRefType>();
1354 
1355  if (static_cast<int64_t>(getDynamicSizes().size()) !=
1356  memRefType.getNumDynamicDims())
1357  return emitOpError("dimension operand count does not equal memref "
1358  "dynamic dimension count");
1359 
1360  unsigned numSymbols = 0;
1361  if (!memRefType.getLayout().isIdentity())
1362  numSymbols = memRefType.getLayout().getAffineMap().getNumSymbols();
1363  if (getSymbolOperands().size() != numSymbols) {
1364  return emitOpError(
1365  "symbol operand count does not equal memref symbol count");
1366  }
1367 
1368  return success();
1369 }
1370 
1371 namespace {
1372 
1373 /// Folding of memref.dim(gpu.alloc(%size), %idx) -> %size similar to
1374 /// `memref::AllocOp`.
1375 struct SimplifyDimOfAllocOp : public OpRewritePattern<memref::DimOp> {
1377 
1378  LogicalResult matchAndRewrite(memref::DimOp dimOp,
1379  PatternRewriter &rewriter) const override {
1380  auto index = dimOp.getIndex().getDefiningOp<arith::ConstantIndexOp>();
1381  if (!index)
1382  return failure();
1383 
1384  auto memrefType = dimOp.getSource().getType().dyn_cast<MemRefType>();
1385  if (!memrefType || !memrefType.isDynamicDim(index.value()))
1386  return failure();
1387 
1388  auto alloc = dimOp.getSource().getDefiningOp<AllocOp>();
1389  if (!alloc)
1390  return failure();
1391 
1392  Value substituteOp = *(alloc.getDynamicSizes().begin() +
1393  memrefType.getDynamicDimIndex(index.value()));
1394  rewriter.replaceOp(dimOp, substituteOp);
1395  return success();
1396  }
1397 };
1398 
1399 } // namespace
1400 
1401 void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results,
1402  MLIRContext *context) {
1403  results.add<SimplifyDimOfAllocOp>(context);
1404 }
1405 
1406 #include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc"
1407 #include "mlir/Dialect/GPU/IR/GPUOpsEnums.cpp.inc"
1408 
1409 #define GET_ATTRDEF_CLASSES
1410 #include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"
1411 
1412 #define GET_OP_CLASSES
1413 #include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc"
static std::string diag(llvm::Value &value)
static void printLaunchFuncOperands(OpAsmPrinter &printer, Operation *, OperandRange operands, TypeRange types)
Definition: GPUDialect.cpp:804
static bool isLastMemrefDimUnitStride(MemRefType type)
Return true if the last dimension of the MemRefType has unit stride.
static ParseResult parseAsyncDependencies(OpAsmParser &parser, Type &asyncTokenType, SmallVectorImpl< OpAsmParser::UnresolvedOperand > &asyncDependencies)
Parses an optional list of async operands with an optional leading keyword.
Definition: GPUDialect.cpp:290
static LogicalResult verifyAttributions(Operation *op, ArrayRef< BlockArgument > attributions, unsigned memorySpace)
static ParseResult parseAllReduceOperation(AsmParser &parser, AllReduceOperationAttr &attr)
Definition: GPUDialect.cpp:366
static void printAsyncDependencies(OpAsmPrinter &printer, Operation *op, Type asyncTokenType, OperandRange asyncDependencies)
Prints optional async dependencies with its leading keyword.
Definition: GPUDialect.cpp:306
GPUMemorySpace
GPU memory space identifiers.
Definition: GPUDialect.cpp:101
@ kGlobalMemorySpace
Global memory space identifier.
Definition: GPUDialect.cpp:106
@ kSharedMemorySpace
Shared memory space identifier.
Definition: GPUDialect.cpp:109
@ kGenericMemorySpace
Generic memory space identifier.
Definition: GPUDialect.cpp:103
static ParseResult parseSizeAssignment(OpAsmParser &parser, MutableArrayRef< OpAsmParser::UnresolvedOperand > sizes, MutableArrayRef< OpAsmParser::UnresolvedOperand > regionSizes, MutableArrayRef< OpAsmParser::UnresolvedOperand > indices)
Definition: GPUDialect.cpp:563
static void printAttributions(OpAsmPrinter &p, StringRef keyword, ArrayRef< BlockArgument > values)
Definition: GPUDialect.cpp:969
static void printAllReduceOperation(AsmPrinter &printer, Operation *op, AllReduceOperationAttr attr)
Definition: GPUDialect.cpp:378
static ParseResult parseAttributions(OpAsmParser &parser, StringRef keyword, SmallVectorImpl< OpAsmParser::Argument > &args)
Parses a GPU function memory attribution.
Definition: GPUDialect.cpp:888
static ParseResult parseLaunchFuncOperands(OpAsmParser &parser, SmallVectorImpl< OpAsmParser::UnresolvedOperand > &argNames, SmallVectorImpl< Type > &argTypes)
Definition: GPUDialect.cpp:786
static bool verifyReduceOpAndType(gpu::AllReduceOperation opName, Type resType)
Definition: GPUDialect.cpp:324
static void printSizeAssignment(OpAsmPrinter &p, KernelDim3 size, KernelDim3 operands, KernelDim3 ids)
Definition: GPUDialect.cpp:525
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, BlockAndValueMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
static constexpr const bool value
static void print(spirv::VerCapExtAttr triple, DialectAsmPrinter &printer)
static void getDynamicSizes(RankedTensorType tp, const SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &dynSizes)
Collects the dynamic dimension sizes for tp with the assumption that sizes are the dimension sizes fo...
This base class exposes generic asm parser hooks, usable across the various derived parsers.
ParseResult parseSymbolName(StringAttr &result)
Parse an -identifier and store it (without the '@' symbol) in a string attribute.
@ Paren
Parens surrounding zero or more operands.
@ OptionalSquare
Square brackets supporting zero or more ops, or nothing.
virtual Builder & getBuilder() const =0
Return a builder which provides useful access to MLIRContext, global objects like types and attribute...
virtual ParseResult parseOptionalAttrDict(NamedAttrList &result)=0
Parse a named dictionary into 'result' if it is present.
virtual ParseResult parseOptionalKeyword(StringRef keyword)=0
Parse the given keyword if present.
MLIRContext * getContext() const
Definition: AsmPrinter.cpp:67
virtual Location getEncodedSourceLoc(SMLoc loc)=0
Re-encode the given source location as an MLIR location and return it.
virtual ParseResult parseRParen()=0
Parse a ) token.
virtual InFlightDiagnostic emitError(SMLoc loc, const Twine &message={})=0
Emit a diagnostic at the specified location and return failure.
virtual ParseResult parseLess()=0
Parse a '<' token.
virtual ParseResult parseDimensionList(SmallVectorImpl< int64_t > &dimensions, bool allowDynamic=true, bool withTrailingX=true)=0
Parse a dimension list of a tensor or memref type.
virtual ParseResult parseEqual()=0
Parse a = token.
virtual ParseResult parseOptionalAttrDictWithKeyword(NamedAttrList &result)=0
Parse a named dictionary into 'result' if the attributes keyword is present.
virtual SMLoc getCurrentLocation()=0
Get the location of the next token and store it into the argument.
virtual SMLoc getNameLoc() const =0
Return the location of the original name token.
virtual ParseResult parseOptionalString(std::string *string)=0
Parse a quoted string token if present.
virtual ParseResult parseGreater()=0
Parse a '>' token.
virtual ParseResult parseLParen()=0
Parse a ( token.
virtual ParseResult parseType(Type &result)=0
Parse a type.
virtual ParseResult parseComma()=0
Parse a , token.
ParseResult parseKeyword(StringRef keyword)
Parse a given keyword.
This base class exposes generic asm printer hooks, usable across the various derived printers.
virtual void printType(Type type)
virtual void printSymbolName(StringRef symbolRef)
Print the given string as a symbol reference, i.e.
bool isa() const
Casting utility functions.
Definition: Attributes.h:117
This class represents an argument of a Block.
Definition: Value.h:296
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgument addArgument(Type type, Location loc)
Add one value to the argument list.
Definition: Block.cpp:141
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:49
UnitAttr getUnitAttr()
Definition: Builders.cpp:99
IntegerAttr getI32IntegerAttr(int32_t value)
Definition: Builders.cpp:190
DenseI32ArrayAttr getDenseI32ArrayAttr(ArrayRef< int32_t > values)
Definition: Builders.cpp:153
FunctionType getFunctionType(TypeRange inputs, TypeRange results)
Definition: Builders.cpp:81
IntegerType getI32Type()
Definition: Builders.cpp:68
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:113
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:88
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:243
IndexType getIndexType()
Definition: Builders.cpp:56
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:95
The DialectAsmParser has methods for interacting with the asm parser when parsing attributes and type...
This is a pure-virtual base class that exposes the asmprinter hooks necessary to implement a custom p...
This is the interface that must be implemented by the dialects of operations to be inlined.
Definition: InliningUtils.h:41
DialectInlinerInterface(Dialect *dialect)
Definition: InliningUtils.h:43
This class represents a diagnostic that is inflight and set to be reported.
Definition: Diagnostics.h:307
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:64
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:56
void push_back(NamedAttribute newAttribute)
Add an attribute with the specified name.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:150
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:32
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:164
The OpAsmParser has methods for interacting with the asm parser: parsing things from it,...
virtual size_t getNumResults() const =0
Return the number of declared SSA results.
virtual ParseResult parseRegion(Region &region, ArrayRef< Argument > arguments={}, bool enableNameShadowing=false)=0
Parses a region.
virtual ParseResult parseArgumentList(SmallVectorImpl< Argument > &result, Delimiter delimiter=Delimiter::None, bool allowType=false, bool allowAttrs=false)=0
Parse zero or more arguments with a specified surrounding delimiter.
virtual ParseResult resolveOperand(const UnresolvedOperand &operand, Type type, SmallVectorImpl< Value > &result)=0
Resolve an operand to an SSA value, emitting an error on failure.
ParseResult resolveOperands(Operands &&operands, Type type, SmallVectorImpl< Value > &result)
Resolve a list of operands to SSA values, emitting an error on failure, or appending the results to t...
virtual ParseResult parseOperand(UnresolvedOperand &result, bool allowResultNumber=true)=0
Parse a single SSA value operand name along with a result number if allowResultNumber is true.
virtual ParseResult parseOperandList(SmallVectorImpl< UnresolvedOperand > &result, Delimiter delimiter=Delimiter::None, bool allowResultNumber=true, int requiredOperandCount=-1)=0
Parse zero or more SSA comma-separated operand references with a specified surrounding delimiter,...
This is a pure-virtual base class that exposes the asmprinter hooks necessary to implement a custom p...
virtual void printOptionalAttrDictWithKeyword(ArrayRef< NamedAttribute > attrs, ArrayRef< StringRef > elidedAttrs={})=0
If the specified operation has attributes, print out an attribute dictionary prefixed with 'attribute...
virtual void printOptionalAttrDict(ArrayRef< NamedAttribute > attrs, ArrayRef< StringRef > elidedAttrs={})=0
If the specified operation has attributes, print out an attribute dictionary with their values.
virtual void printRegion(Region &blocks, bool printEntryBlockArgs=true, bool printBlockTerminators=true, bool printEmptyBlock=false)=0
Prints a region.
virtual void printOperand(Value value)=0
Print implementations for various things an operation contains.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:300
This class helps build Operations.
Definition: Builders.h:198
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:383
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:422
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:41
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
void insertOperands(unsigned index, ValueRange operands)
Insert the given operands into the operand list at the given 'index'.
Definition: Operation.cpp:213
AttrClass getAttrOfType(StringAttr name)
Definition: Operation.h:375
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:147
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:154
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:225
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:395
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:203
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:512
This class represents success/failure for parsing-like operations that find it important to chain tog...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:605
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
void push_back(Block *block)
Definition: Region.h:61
BlockListType & getBlocks()
Definition: Region.h:45
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
virtual void replaceOp(Operation *op, ValueRange newValues)
This method replaces the results of the operation with the specified list of values.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
Definition: SymbolTable.h:58
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
MLIRContext * getContext() const
Return the MLIRContext in which this type was uniqued.
Definition: Types.cpp:19
U dyn_cast() const
Definition: Types.h:270
bool isF32() const
Definition: Types.cpp:25
bool isF16() const
Definition: Types.cpp:24
bool isa() const
Definition: Types.h:260
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:349
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
Type getType() const
Return the type of this value.
Definition: Value.h:114
user_range getUsers() const
Definition: Value.h:209
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
static ConcreteT get(MLIRContext *ctx, Args... args)
Get or create a new ConcreteT instance within the ctx.
ImplType * getImpl() const
Utility for easy access to the storage instance.
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
Definition: GPUDialect.h:125
ArrayRef< int64_t > getShape() const
Get shape of the matrix.
Definition: GPUDialect.cpp:67
static MMAMatrixType get(ArrayRef< int64_t > shape, Type elementType, StringRef operand)
Get MMAMatrixType and verify construction Invariants.
Definition: GPUDialect.cpp:52
Type getElementType() const
Get elementType of a single element.
Definition: GPUDialect.cpp:71
static bool isValidElementType(Type elementType)
Check if a type is valid a MMAMatrixType elementType.
Definition: GPUDialect.cpp:75
StringRef getOperand() const
The general form of operation this type supports is given by the equation C += A*B.
Definition: GPUDialect.cpp:73
static MMAMatrixType getChecked(function_ref< InFlightDiagnostic()> emitError, ArrayRef< int64_t > shape, Type elementType, StringRef operand)
Get MMAMatrixType at a particular location and verify construction Invariants.
Definition: GPUDialect.cpp:58
static LogicalResult verify(function_ref< InFlightDiagnostic()> emitError, ArrayRef< int64_t > shape, Type elementType, StringRef operand)
Verify that shape and elementType are actually allowed for the MMAMatrixType.
Definition: GPUDialect.cpp:80
unsigned getNumDims() const
Get number of dims.
Definition: GPUDialect.cpp:65
void printType(Type type, AsmPrinter &printer)
Prints an LLVM Dialect type.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:230
llvm::unique_function< InFlightDiagnostic()> getDefaultDiagnosticEmitFn(MLIRContext *ctx)
Utility method to generate a callback that can be used to generate a diagnostic when checking the con...
void printFunctionSignature(OpAsmPrinter &p, Operation *op, ArrayRef< Type > argTypes, bool isVariadic, ArrayRef< Type > resultTypes)
Prints the signature of the function-like operation op.
void printFunctionAttributes(OpAsmPrinter &p, Operation *op, unsigned numInputs, unsigned numResults, ArrayRef< StringRef > elided={})
Prints the list of function prefixed with the "attributes" keyword.
void addArgAndResultAttrs(Builder &builder, OperationState &result, ArrayRef< DictionaryAttr > argAttrs, ArrayRef< DictionaryAttr > resultAttrs)
Adds argument and result attributes, provided as argAttrs and resultAttrs arguments,...
StringRef getTypeAttrName()
Return the name of the attribute used for function types.
ParseResult parseFunctionSignature(OpAsmParser &parser, bool allowVariadic, SmallVectorImpl< OpAsmParser::Argument > &arguments, bool &isVariadic, SmallVectorImpl< Type > &resultTypes, SmallVectorImpl< DictionaryAttr > &resultAttrs)
Parses a function signature using parser.
Type getFunctionType(Builder &builder, ArrayRef< OpAsmParser::Argument > argAttrs, ArrayRef< Type > resultTypes)
Get a function type corresponding to an array of arguments (which have types) and a set of result typ...
void addAsyncDependency(Operation *op, Value token)
Definition: GPUDialect.cpp:401
LogicalResult foldMemRefCast(Operation *op, Value inner=nullptr)
This is a common utility used for patterns of the form "someop(memref.cast) -> someop".
Definition: MemRefOps.cpp:89
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:329
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult getStridesAndOffset(MemRefType t, SmallVectorImpl< int64_t > &strides, int64_t &offset)
Returns the strides of the MemRef if the layout map is in strided form.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
detail::constant_int_predicate_matcher m_One()
Matches a constant scalar / vector splat / tensor splat integer one.
Definition: Matchers.h:317
Type parseType(llvm::StringRef typeStr, MLIRContext *context)
This parses a single MLIR type to an MLIR context if it was valid.
LogicalResult verifyCompatibleShape(ArrayRef< int64_t > shape1, ArrayRef< int64_t > shape2)
Returns success if the given two shapes are compatible.
LogicalResult verify(Operation *op, bool verifyRecursively=true)
Perform (potentially expensive) checks of invariants, used to detect compiler bugs,...
Definition: Verifier.cpp:372
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
Simplify the gpu.launch when the range of a thread or block ID is trivially known to be one.
Definition: GPUDialect.cpp:676
LogicalResult matchAndRewrite(LaunchOp op, PatternRewriter &rewriter) const override
Definition: GPUDialect.cpp:678
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
This is the representation of an operand reference.
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:356
OpRewritePattern(MLIRContext *context, PatternBenefit benefit=1, ArrayRef< StringRef > generatedNames={})
Patterns must specify the root operation name they match against, and can also specify the benefit of...
Definition: PatternMatch.h:360
This represents an operation in an abstracted form, suitable for use with the builder APIs.
SmallVector< Value, 4 > operands
void addOperands(ValueRange newOperands)
void addAttributes(ArrayRef< NamedAttribute > newAttributes)
Add an array of named attributes.
void addAttribute(StringRef name, Attribute attr)
Add an attribute with the specified name.
NamedAttrList attributes
SmallVector< Type, 4 > types
Types of the results of this operation.
Region * addRegion()
Create a region that should be attached to the operation.
Utility class for the GPU dialect to represent triples of Values accessible through ....
Definition: GPUDialect.h:35