MLIR  22.0.0git
LowerGpuOpsToROCDLOps.cpp
Go to the documentation of this file.
1 //===- LowerGpuOpsToROCDLOps.cpp - MLIR GPU to ROCDL lowering passes ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to generate ROCDLIR operations for higher-level
10 // GPU operations.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "mlir/Pass/Pass.h"
17 #include "mlir/Pass/PassManager.h"
18 
41 
42 #include "../GPUCommon/GPUOpsLowering.h"
43 #include "../GPUCommon/IndexIntrinsicsOpLowering.h"
44 
45 namespace mlir {
46 #define GEN_PASS_DEF_CONVERTGPUOPSTOROCDLOPS
47 #include "mlir/Conversion/Passes.h.inc"
48 } // namespace mlir
49 
50 using namespace mlir;
51 
52 // Truncate or extend the result depending on the index bitwidth specified
53 // by the LLVMTypeConverter options.
55  Location loc, Value value,
56  const LLVMTypeConverter &converter) {
57  int64_t intWidth = cast<IntegerType>(value.getType()).getWidth();
58  int64_t indexBitwidth = converter.getIndexTypeBitwidth();
59  auto indexBitwidthType =
60  IntegerType::get(rewriter.getContext(), converter.getIndexTypeBitwidth());
61  // TODO: use <=> in C++20.
62  if (indexBitwidth > intWidth) {
63  return LLVM::SExtOp::create(rewriter, loc, indexBitwidthType, value);
64  }
65  if (indexBitwidth < intWidth) {
66  return LLVM::TruncOp::create(rewriter, loc, indexBitwidthType, value);
67  }
68  return value;
69 }
70 
71 /// Returns true if the given `gpu.func` can be safely called using the bare
72 /// pointer calling convention.
73 static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func) {
74  bool canBeBare = true;
75  for (Type type : func.getArgumentTypes())
76  if (auto memrefTy = dyn_cast<BaseMemRefType>(type))
77  canBeBare &= LLVMTypeConverter::canConvertToBarePtr(memrefTy);
78  return canBeBare;
79 }
80 
81 static Value getLaneId(RewriterBase &rewriter, Location loc) {
82  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
83  Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 32);
84  Value minus1 = arith::ConstantIntOp::create(rewriter, loc, -1, 32);
85  NamedAttribute noundef = rewriter.getNamedAttr(
86  LLVM::LLVMDialect::getNoUndefAttrName(), rewriter.getUnitAttr());
87  NamedAttribute lowRange = rewriter.getNamedAttr(
88  LLVM::LLVMDialect::getRangeAttrName(),
90  APInt(32, 32)));
91  NamedAttribute highRange = rewriter.getNamedAttr(
92  LLVM::LLVMDialect::getRangeAttrName(),
94  APInt(32, 64)));
95  Value mbcntLo = ROCDL::MbcntLoOp::create(
96  rewriter, loc, int32Type, minus1, zero, /*arg_attrs=*/{},
97  /*res_attrs=*/
98  rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, lowRange})));
99  Value laneId = ROCDL::MbcntHiOp::create(
100  rewriter, loc, int32Type, minus1, mbcntLo, /*arg_attrs=*/{},
101  rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, highRange})));
102  return laneId;
103 }
104 
105 static constexpr StringLiteral amdgcnDataLayout =
106  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
107  "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:"
108  "32-v32:"
109  "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:"
110  "64-S32-A5-G1-ni:7:8:9";
111 
112 namespace {
113 struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
115 
116  LogicalResult
117  matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
118  ConversionPatternRewriter &rewriter) const override {
119  Location loc = op.getLoc();
120  MLIRContext *context = rewriter.getContext();
121  // convert to:
122  // %mlo = call noundef range(i32 0, 32)
123  // @llvm.amdgcn.mbcnt.lo(-1, 0)
124  // followed by:
125  // %lid = call noundef range(i32 0, 64)
126  // @llvm.amdgcn.mbcnt.hi(-1, %mlo)
127 
128  Value laneId = getLaneId(rewriter, loc);
129  // Truncate or extend the result depending on the index bitwidth specified
130  // by the LLVMTypeConverter options.
131  const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
132  if (indexBitwidth > 32) {
133  laneId = LLVM::SExtOp::create(
134  rewriter, loc, IntegerType::get(context, indexBitwidth), laneId);
135  } else if (indexBitwidth < 32) {
136  laneId = LLVM::TruncOp::create(
137  rewriter, loc, IntegerType::get(context, indexBitwidth), laneId);
138  }
139  rewriter.replaceOp(op, {laneId});
140  return success();
141  }
142 };
143 
144 struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
146 
147  GPUSubgroupSizeOpToROCDL(const LLVMTypeConverter &converter,
148  amdgpu::Chipset chipset)
150  chipset(chipset) {}
151 
152  LogicalResult
153  matchAndRewrite(gpu::SubgroupSizeOp op, gpu::SubgroupSizeOp::Adaptor adaptor,
154  ConversionPatternRewriter &rewriter) const override {
155  LLVM::ConstantRangeAttr bounds = nullptr;
156  bool isBeforeGfx10 = chipset.majorVersion < 10;
157  if (auto upperBoundAttr = op.getUpperBoundAttr()) {
158  bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
159  /*bitWidth=*/32, /*lower=*/isBeforeGfx10 ? 64 : 32,
160  /*upper=*/op.getUpperBoundAttr().getInt() + 1);
161  }
162  Value wavefrontOp = ROCDL::WavefrontSizeOp::create(
163  rewriter, op.getLoc(), rewriter.getI32Type(), bounds);
164  wavefrontOp = truncOrExtToLLVMType(rewriter, op.getLoc(), wavefrontOp,
165  *getTypeConverter());
166  rewriter.replaceOp(op, {wavefrontOp});
167  return success();
168  }
169 
170  const amdgpu::Chipset chipset;
171 };
172 
173 static bool isSupportedReadLaneType(Type type) {
174  // read(first)lane also supports some vector types, but limit it for scalars
175  // for now.
176  return type.isInteger(16) || type.isInteger(32) || type.isInteger(64) ||
177  isa<Float16Type, BFloat16Type, Float32Type, Float64Type,
178  LLVM::LLVMPointerType>(type);
179 }
180 
181 struct GPUSubgroupBroadcastOpToROCDL
182  : public ConvertOpToLLVMPattern<gpu::SubgroupBroadcastOp> {
184 
185  LogicalResult
186  matchAndRewrite(gpu::SubgroupBroadcastOp op, OpAdaptor adaptor,
187  ConversionPatternRewriter &rewriter) const override {
188  Value src = adaptor.getSrc();
189  if (!isSupportedReadLaneType(src.getType()))
190  return rewriter.notifyMatchFailure(op, "unsupported readlane type");
191 
192  if (adaptor.getBroadcastType() == gpu::BroadcastType::specific_lane) {
193  rewriter.replaceOpWithNewOp<ROCDL::ReadlaneOp>(op, src.getType(), src,
194  adaptor.getLane());
195  } else { // first_active_lane
196  rewriter.replaceOpWithNewOp<ROCDL::ReadfirstlaneOp>(op, src.getType(),
197  src);
198  }
199  return success();
200  }
201 };
202 
203 struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
205 
206  /// Lowers a shuffle to the corresponding ROCDL ops.
207  ///
208  /// Use the `width` argument to see if src lane is participating.
209  /// If not the dstLane would be itself.
210  ///
211  /// Shuffle with DS Bpermute:
212  /// let shflMode = [xor, up, down, idx]
213  /// let width = 32(usually warpsize), step = [1, 2, 4, 8, 16, ... , width].
214  /// 1. curLaneId = using mbcnt.lo + mbcnt.hi
215  /// 2. widthOrZeroIfOutside = (curLaneId + width) & -width
216  /// 3. dstLane = shflMode(curLaneId, step)
217  /// 4. isActiveSrcLane = dstLane < isActiveSrcLane
218  /// 5. dstLane = isActiveSrcLane ? dstLane : curLaneId
219  /// 6. dwordAlignedDstLane = dstLane * 4 or dstLane << 2.
220  /// 7. bpermute(dwordAlignedDstLane, shfl_value).
221  ///
222  LogicalResult
223  matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
224  ConversionPatternRewriter &rewriter) const override {
225  Location loc = op->getLoc();
226  Value initShflValue = adaptor.getValue();
227 
228  Value srcLaneId = getLaneId(rewriter, loc);
229 
230  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
231  Value width = adaptor.getWidth();
232  Value zero = LLVM::ConstantOp::create(rewriter, loc, int32Type, 0);
233  Value negwidth = LLVM::SubOp::create(rewriter, loc, int32Type, zero, width);
234  Value add = LLVM::AddOp::create(rewriter, loc, int32Type, srcLaneId, width);
235  Value widthOrZeroIfOutside =
236  LLVM::AndOp::create(rewriter, loc, int32Type, add, negwidth);
237  Value dstLane;
238 
239  switch (op.getMode()) {
240  case gpu::ShuffleMode::UP:
241  dstLane = LLVM::SubOp::create(rewriter, loc, int32Type, srcLaneId,
242  adaptor.getOffset());
243  break;
244  case gpu::ShuffleMode::DOWN:
245  dstLane = LLVM::AddOp::create(rewriter, loc, int32Type, srcLaneId,
246  adaptor.getOffset());
247  break;
248  case gpu::ShuffleMode::XOR:
249  dstLane = LLVM::XOrOp::create(rewriter, loc, int32Type, srcLaneId,
250  adaptor.getOffset());
251  break;
252  case gpu::ShuffleMode::IDX:
253  dstLane = adaptor.getOffset();
254  break;
255  }
256  Value isActiveSrcLane = LLVM::ICmpOp::create(
257  rewriter, loc, LLVM::ICmpPredicate::slt, dstLane, widthOrZeroIfOutside);
258  Value selectDstLane = LLVM::SelectOp::create(rewriter, loc, isActiveSrcLane,
259  dstLane, srcLaneId);
260  Value two = LLVM::ConstantOp::create(rewriter, loc, int32Type, 2);
261  Value dwordAlignedDstLane =
262  LLVM::ShlOp::create(rewriter, loc, int32Type, selectDstLane, two);
263 
264  SmallVector<Value> decomposed =
265  LLVM::decomposeValue(rewriter, loc, initShflValue, int32Type);
266  SmallVector<Value> swizzled;
267  for (Value v : decomposed) {
268  Value res = ROCDL::DsBpermuteOp::create(rewriter, loc, int32Type,
269  dwordAlignedDstLane, v);
270  swizzled.emplace_back(res);
271  }
272  Value shflValue =
273  LLVM::composeValue(rewriter, loc, swizzled, initShflValue.getType());
274  rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
275  return success();
276  }
277 };
278 
279 /// Import the GPU Ops to ROCDL Patterns.
280 #include "GPUToROCDL.cpp.inc"
281 
282 // A pass that replaces all occurrences of GPU device operations with their
283 // corresponding ROCDL equivalent.
284 //
285 // This pass only handles device code and is not meant to be run on GPU host
286 // code.
287 struct LowerGpuOpsToROCDLOpsPass final
288  : public impl::ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
289  using Base::Base;
290 
291  void getDependentDialects(DialectRegistry &registry) const override {
292  Base::getDependentDialects(registry);
294  }
295 
296  void runOnOperation() override {
297  gpu::GPUModuleOp m = getOperation();
298  MLIRContext *ctx = m.getContext();
299 
300  auto llvmDataLayout = m->getAttrOfType<StringAttr>(
301  LLVM::LLVMDialect::getDataLayoutAttrName());
302  if (!llvmDataLayout) {
303  llvmDataLayout = StringAttr::get(ctx, amdgcnDataLayout);
304  m->setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(), llvmDataLayout);
305  }
306  // Request C wrapper emission.
307  for (auto func : m.getOps<func::FuncOp>()) {
308  func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
309  UnitAttr::get(ctx));
310  }
311 
312  FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
313  if (failed(maybeChipset)) {
314  emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
315  return signalPassFailure();
316  }
317 
318  /// Customize the bitwidth used for the device side index computations.
320  ctx, DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
321  options.dataLayout = llvm::DataLayout(llvmDataLayout.getValue());
322  if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
323  options.overrideIndexBitwidth(indexBitwidth);
324 
325  if (useBarePtrCallConv) {
326  options.useBarePtrCallConv = true;
327  WalkResult canUseBarePointers =
328  m.walk([](gpu::GPUFuncOp func) -> WalkResult {
329  if (canBeCalledWithBarePointers(func))
330  return WalkResult::advance();
331  return WalkResult::interrupt();
332  });
333  if (canUseBarePointers.wasInterrupted()) {
335  "bare pointer calling convention requires all memrefs to "
336  "have static shape and use the identity map");
337  return signalPassFailure();
338  }
339  }
340 
341  // Apply in-dialect lowering. In-dialect lowering will replace
342  // ops which need to be lowered further, which is not supported by a
343  // single conversion pass.
344  {
348  (void)applyPatternsGreedily(m, std::move(patterns));
349  }
350 
351  LLVMTypeConverter converter(ctx, options);
353  converter, [](gpu::AddressSpace space) {
354  switch (space) {
355  case gpu::AddressSpace::Global:
356  return 1;
357  case gpu::AddressSpace::Workgroup:
358  return 3;
359  case gpu::AddressSpace::Private:
360  return 5;
361  }
362  llvm_unreachable("unknown address space enum value");
363  return 0;
364  });
365 
366  RewritePatternSet llvmPatterns(ctx);
368 
369  llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
370  allowedDialects.end());
371  for (Dialect *dialect : ctx->getLoadedDialects()) {
372  bool allowed = allowedDialectsSet.contains(dialect->getNamespace());
373  // Empty `allowedDialectsSet` means all dialects are allowed.
374  if (!allowedDialectsSet.empty() && !allowed)
375  continue;
376 
377  auto *iface = dyn_cast<ConvertToLLVMPatternInterface>(dialect);
378  if (!iface) {
379  // Error out if dialect was explicily specified but doesn't implement
380  // conversion interface.
381  if (allowed) {
382  m.emitError()
383  << "dialect does not implement ConvertToLLVMPatternInterface: "
384  << dialect->getNamespace();
385  return signalPassFailure();
386  }
387  continue;
388  }
389 
390  iface->populateConvertToLLVMConversionPatterns(target, converter,
391  llvmPatterns);
392  }
393 
394  populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
395  *maybeChipset);
396  populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime,
397  *maybeChipset);
399  if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
400  signalPassFailure();
401  auto *rocdlDialect = getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
402  auto reqdWorkGroupSizeAttrHelper =
403  rocdlDialect->getReqdWorkGroupSizeAttrHelper();
404  auto flatWorkGroupSizeAttrHelper =
405  rocdlDialect->getFlatWorkGroupSizeAttrHelper();
406  // Manually rewrite known block size attributes so the LLVMIR translation
407  // infrastructure can pick them up.
408  m.walk([&](LLVM::LLVMFuncOp op) {
409  if (reqdWorkGroupSizeAttrHelper.isAttrPresent(op)) {
410  auto blockSizes = reqdWorkGroupSizeAttrHelper.getAttr(op);
411  // Also set up the rocdl.flat_work_group_size attribute to prevent
412  // conflicting metadata.
413  uint32_t flatSize = 1;
414  for (uint32_t size : blockSizes.asArrayRef()) {
415  flatSize *= size;
416  }
417  StringAttr flatSizeAttr =
418  StringAttr::get(ctx, Twine(flatSize) + "," + Twine(flatSize));
419  flatWorkGroupSizeAttrHelper.setAttr(op, flatSizeAttr);
420  }
421  });
422  }
423 };
424 
425 } // namespace
426 
428  target.addIllegalOp<func::FuncOp>();
429  target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
430  target.addLegalDialect<ROCDL::ROCDLDialect>();
431  target.addIllegalDialect<gpu::GPUDialect>();
432  target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
433  LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
434  LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
435  // These ops are legal for f32 type.
436  target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
437  return any_of(op->getOperandTypes(), llvm::IsaPred<Float32Type>);
438  });
439  // TODO: Remove once we support replacing non-root ops.
440  target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
441 }
442 
444  const LLVMTypeConverter &converter, RewritePatternSet &patterns,
445  mlir::gpu::amd::Runtime runtime, amdgpu::Chipset chipset) {
449  auto *rocdlDialect =
450  converter.getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
451  populateWithGenerated(patterns);
452  patterns.add<
453  gpu::index_lowering::OpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
454  ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>>(
455  converter, IndexKind::Block, IntrType::Id);
457  gpu::BlockIdOp, ROCDL::BlockIdXOp, ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>>(
458  converter, IndexKind::Grid, IntrType::Id);
459  patterns.add<
460  gpu::index_lowering::OpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
461  ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>>(
462  converter, IndexKind::Block, IntrType::Dim);
464  gpu::GridDimOp, ROCDL::GridDimXOp, ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
465  converter, IndexKind::Grid, IntrType::Dim);
466  patterns.add<GPUReturnOpLowering>(converter);
468  converter,
470  /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace,
471  /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace,
472  rocdlDialect->getKernelAttrHelper().getName(),
473  rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName()});
474  if (Runtime::HIP == runtime) {
475  patterns.add<GPUPrintfOpToHIPLowering>(converter);
476  } else if (Runtime::OpenCL == runtime) {
477  // Use address space = 4 to match the OpenCL definition of printf()
478  patterns.add<GPUPrintfOpToLLVMCallLowering>(converter, /*addressSpace=*/4);
479  }
480  // TODO: Add alignment for workgroup memory
482 
483  patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL,
484  GPUSubgroupBroadcastOpToROCDL>(converter);
485  patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
486 
488 }
static Value getZero(OpBuilder &b, Location loc, Type elementType)
Get zero value for an element type.
static MLIRContext * getContext(OpFoldResult val)
static Value getLaneId(RewriterBase &rewriter, Location loc)
static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func)
Returns true if the given gpu.func can be safely called using the bare pointer calling convention.
static constexpr StringLiteral amdgcnDataLayout
static Value truncOrExtToLLVMType(ConversionPatternRewriter &rewriter, Location loc, Value value, const LLVMTypeConverter &converter)
static llvm::ManagedStatic< PassManagerOptions > options
UnitAttr getUnitAttr()
Definition: Builders.cpp:97
IntegerType getI32Type()
Definition: Builders.cpp:62
MLIRContext * getContext() const
Definition: Builders.h:56
ArrayAttr getArrayAttr(ArrayRef< Attribute > value)
Definition: Builders.cpp:265
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition: Builders.cpp:103
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition: Builders.cpp:93
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:98
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
This class describes a specific conversion target.
void addLegalOp(OperationName op)
Register the given operations as legal.
void addLegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as legal.
void addDynamicallyLegalOp(OperationName op, const DynamicLegalityCallbackFn &callback)
Register the given operation as dynamically legal and set the dynamic legalization callback to the on...
void addIllegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as illegal, i.e.
void addIllegalOp(OperationName op)
Register the given operation as illegal, i.e.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:209
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Definition: Pattern.h:215
The main mechanism for performing data layout queries.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:38
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
static bool canConvertToBarePtr(BaseMemRefType type)
Check if a memref type can be converted to a bare pointer.
MLIRContext & getContext() const
Returns the MLIR context.
unsigned getIndexTypeBitwidth() const
Gets the bitwidth of the index type when converted to LLVM.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
std::vector< Dialect * > getLoadedDialects()
Return information about all IR dialects loaded in the context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:368
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:726
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:529
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: WalkResult.h:29
static WalkResult advance()
Definition: WalkResult.h:47
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: WalkResult.h:51
static WalkResult interrupt()
Definition: WalkResult.h:46
static ConstantIntOp create(OpBuilder &builder, Location location, int64_t value, unsigned width)
Definition: ArithOps.cpp:258
Value composeValue(OpBuilder &builder, Location loc, ValueRange src, Type dstType)
Composes a set of src values into a single value of type dstType through series of bitcasts and vecto...
Definition: Pattern.cpp:439
SmallVector< Value > decomposeValue(OpBuilder &builder, Location loc, Value src, Type dstType)
Decomposes a src value into a set of values of type dstType through series of bitcasts and vector ops...
Definition: Pattern.cpp:400
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
detail::LazyTextBuild add(const char *fmt, Ts &&...ts)
Create a Remark with llvm::formatv formatting.
Definition: Remarks.h:463
Include the generated interface declarations.
void populateGpuToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, gpu::amd::Runtime runtime, amdgpu::Chipset chipset)
Collect a set of patterns to convert from the GPU dialect to ROCDL.
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:91
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void configureGpuToROCDLConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to ROCDL.
const FrozenRewritePatternSet & patterns
void registerConvertToLLVMDependentDialectLoading(DialectRegistry &registry)
Register the extension that will load dependent dialects for LLVM conversion.
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
void populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, amdgpu::Chipset chipset)
Note: This function will also add conversions for the AMDGPU-specific address spaces,...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void populateGpuPromoteShuffleToAMDGPUPatterns(RewritePatternSet &patterns, std::optional< amdgpu::Chipset > maybeChipset)
Tries to promote gpu.shuffles to specialized AMDGPU intrinsics.
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
void populateMathToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Populate the given list with patterns that convert from Math to ROCDL calls.
Definition: MathToROCDL.cpp:45
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
The lowering of gpu.printf to a call to HIP hostcalls.
The lowering of gpu.printf to a call to an external printf() function.
Represents the amdgpu gfx chipset version, e.g., gfx90a, gfx942, gfx1103.
Definition: Chipset.h:22
static FailureOr< Chipset > parse(StringRef name)
Parses the chipset version string and returns the chipset on success, and failure otherwise.
Definition: Chipset.cpp:14