MLIR 23.0.0git
LowerGpuOpsToROCDLOps.cpp
Go to the documentation of this file.
1//===- LowerGpuOpsToROCDLOps.cpp - MLIR GPU to ROCDL lowering passes ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a pass to generate ROCDLIR operations for higher-level
10// GPU operations.
11//
12//===----------------------------------------------------------------------===//
13
16#include "mlir/Pass/Pass.h"
18
41
44
45namespace mlir {
46#define GEN_PASS_DEF_CONVERTGPUOPSTOROCDLOPS
47#include "mlir/Conversion/Passes.h.inc"
48} // namespace mlir
49
50using namespace mlir;
51
52// Truncate or extend the result depending on the index bitwidth specified
53// by the LLVMTypeConverter options.
54static Value truncOrExtToLLVMType(ConversionPatternRewriter &rewriter,
55 Location loc, Value value,
56 const LLVMTypeConverter &converter) {
57 int64_t intWidth = cast<IntegerType>(value.getType()).getWidth();
58 int64_t indexBitwidth = converter.getIndexTypeBitwidth();
59 auto indexBitwidthType =
60 IntegerType::get(rewriter.getContext(), converter.getIndexTypeBitwidth());
61 // TODO: use <=> in C++20.
62 if (indexBitwidth > intWidth) {
63 return LLVM::SExtOp::create(rewriter, loc, indexBitwidthType, value);
64 }
65 if (indexBitwidth < intWidth) {
66 return LLVM::TruncOp::create(rewriter, loc, indexBitwidthType, value);
67 }
68 return value;
69}
70
71/// Returns true if the given `gpu.func` can be safely called using the bare
72/// pointer calling convention.
73static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func) {
74 bool canBeBare = true;
75 for (Type type : func.getArgumentTypes())
76 if (auto memrefTy = dyn_cast<BaseMemRefType>(type))
77 canBeBare &= LLVMTypeConverter::canConvertToBarePtr(memrefTy);
78 return canBeBare;
79}
80
81static Value getLaneId(RewriterBase &rewriter, Location loc) {
82 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
83 Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 32);
84 Value minus1 = arith::ConstantIntOp::create(rewriter, loc, -1, 32);
85 NamedAttribute noundef = rewriter.getNamedAttr(
86 LLVM::LLVMDialect::getNoUndefAttrName(), rewriter.getUnitAttr());
87 NamedAttribute lowRange = rewriter.getNamedAttr(
88 LLVM::LLVMDialect::getRangeAttrName(),
89 LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
90 APInt(32, 32)));
91 NamedAttribute highRange = rewriter.getNamedAttr(
92 LLVM::LLVMDialect::getRangeAttrName(),
93 LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
94 APInt(32, 64)));
95 Value mbcntLo = ROCDL::MbcntLoOp::create(
96 rewriter, loc, int32Type, minus1, zero, /*arg_attrs=*/{},
97 /*res_attrs=*/
98 rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, lowRange})));
99 Value laneId = ROCDL::MbcntHiOp::create(
100 rewriter, loc, int32Type, minus1, mbcntLo, /*arg_attrs=*/{},
101 rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, highRange})));
102 return laneId;
103}
104
105static constexpr StringLiteral amdgcnDataLayout =
106 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
107 "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:"
108 "32-v32:"
109 "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:"
110 "64-S32-A5-G1-ni:7:8:9";
111
112namespace {
113struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
115
116 LogicalResult
117 matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
118 ConversionPatternRewriter &rewriter) const override {
119 Location loc = op.getLoc();
120 MLIRContext *context = rewriter.getContext();
121 // convert to:
122 // %mlo = call noundef range(i32 0, 32)
123 // @llvm.amdgcn.mbcnt.lo(-1, 0)
124 // followed by:
125 // %lid = call noundef range(i32 0, 64)
126 // @llvm.amdgcn.mbcnt.hi(-1, %mlo)
127
128 Value laneId = getLaneId(rewriter, loc);
129 // Truncate or extend the result depending on the index bitwidth specified
130 // by the LLVMTypeConverter options.
131 const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
132 if (indexBitwidth > 32) {
133 laneId = LLVM::SExtOp::create(
134 rewriter, loc, IntegerType::get(context, indexBitwidth), laneId);
135 } else if (indexBitwidth < 32) {
136 laneId = LLVM::TruncOp::create(
137 rewriter, loc, IntegerType::get(context, indexBitwidth), laneId);
138 }
139 rewriter.replaceOp(op, {laneId});
140 return success();
141 }
142};
143
144struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
146
147 GPUSubgroupSizeOpToROCDL(const LLVMTypeConverter &converter,
148 amdgpu::Chipset chipset)
150 chipset(chipset) {}
151
152 LogicalResult
153 matchAndRewrite(gpu::SubgroupSizeOp op, gpu::SubgroupSizeOp::Adaptor adaptor,
154 ConversionPatternRewriter &rewriter) const override {
155 LLVM::ConstantRangeAttr bounds = nullptr;
156 bool isBeforeGfx10 = chipset.majorVersion < 10;
157 if (auto upperBoundAttr = op.getUpperBoundAttr()) {
158 bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
159 /*bitWidth=*/32, /*lower=*/isBeforeGfx10 ? 64 : 32,
160 /*upper=*/op.getUpperBoundAttr().getInt() + 1);
161 }
162 Value wavefrontOp = ROCDL::WavefrontSizeOp::create(
163 rewriter, op.getLoc(), rewriter.getI32Type(), bounds);
164 wavefrontOp = truncOrExtToLLVMType(rewriter, op.getLoc(), wavefrontOp,
165 *getTypeConverter());
166 rewriter.replaceOp(op, {wavefrontOp});
167 return success();
168 }
169
170 const amdgpu::Chipset chipset;
171};
172
173static bool isSupportedReadLaneType(Type type) {
174 // https://llvm.org/docs/AMDGPUUsage.html#llvm-ir-intrinsics
175 if (isa<Float16Type, BFloat16Type, Float32Type, Float64Type,
176 LLVM::LLVMPointerType>(type))
177 return true;
178
179 if (auto intType = dyn_cast<IntegerType>(type))
180 return llvm::is_contained({16, 32, 64},
181 static_cast<int>(intType.getWidth()));
182
183 if (auto vecType = dyn_cast<VectorType>(type)) {
184 Type elementType = vecType.getElementType();
185 if (elementType.isInteger(32))
186 return true;
187
188 if (vecType.getNumElements() == 2 &&
189 (isa<Float16Type, BFloat16Type>(elementType) ||
190 elementType.isInteger(16)))
191 return true;
192 }
193
194 return false;
195}
196
197struct GPUSubgroupBroadcastOpToROCDL
198 : public ConvertOpToLLVMPattern<gpu::SubgroupBroadcastOp> {
200
201 LogicalResult
202 matchAndRewrite(gpu::SubgroupBroadcastOp op, OpAdaptor adaptor,
203 ConversionPatternRewriter &rewriter) const override {
204 Value src = adaptor.getSrc();
205 if (isSupportedReadLaneType(src.getType())) {
206 Value result = createReadlaneOp(op, adaptor, rewriter, src);
207 rewriter.replaceOp(op, result);
208 return success();
209 }
210
211 Type i32 = rewriter.getI32Type();
212 Location loc = op.getLoc();
213 SmallVector<Value> decomposed =
214 LLVM::decomposeValue(rewriter, loc, src, i32);
215
216 SmallVector<Value> results;
217 results.reserve(decomposed.size());
218 for (Value v : decomposed)
219 results.emplace_back(createReadlaneOp(op, adaptor, rewriter, v));
220
221 Value result = LLVM::composeValue(rewriter, loc, results, src.getType());
222 rewriter.replaceOp(op, result);
223 return success();
224 }
225
226private:
227 static Value createReadlaneOp(gpu::SubgroupBroadcastOp op, OpAdaptor adaptor,
228 ConversionPatternRewriter &rewriter,
229 Value src) {
230 if (adaptor.getBroadcastType() == gpu::BroadcastType::specific_lane) {
231 return ROCDL::ReadlaneOp::create(rewriter, op.getLoc(), src.getType(),
232 src, adaptor.getLane());
233 } else { // first_active_lane
234 return ROCDL::ReadfirstlaneOp::create(rewriter, op.getLoc(),
235 src.getType(), src);
236 }
237 }
238};
239
240struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
242
243 /// Lowers a shuffle to the corresponding ROCDL ops.
244 ///
245 /// Use the `width` argument to see if src lane is participating.
246 /// If not the dstLane would be itself.
247 ///
248 /// Shuffle with DS Bpermute:
249 /// let shflMode = [xor, up, down, idx]
250 /// let width = 32(usually warpsize), step = [1, 2, 4, 8, 16, ... , width].
251 /// 1. curLaneId = using mbcnt.lo + mbcnt.hi
252 /// 2. widthOrZeroIfOutside = (curLaneId + width) & -width
253 /// 3. dstLane = shflMode(curLaneId, step)
254 /// 4. isActiveSrcLane = dstLane < isActiveSrcLane
255 /// 5. dstLane = isActiveSrcLane ? dstLane : curLaneId
256 /// 6. dwordAlignedDstLane = dstLane * 4 or dstLane << 2.
257 /// 7. bpermute(dwordAlignedDstLane, shfl_value).
258 ///
259 LogicalResult
260 matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
261 ConversionPatternRewriter &rewriter) const override {
262 Location loc = op->getLoc();
263 Value initShflValue = adaptor.getValue();
264
265 Value srcLaneId = getLaneId(rewriter, loc);
266
267 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
268 Value width = adaptor.getWidth();
269 Value zero = LLVM::ConstantOp::create(rewriter, loc, int32Type, 0);
270 Value negwidth = LLVM::SubOp::create(rewriter, loc, int32Type, zero, width);
271 Value add = LLVM::AddOp::create(rewriter, loc, int32Type, srcLaneId, width);
272 Value widthOrZeroIfOutside =
273 LLVM::AndOp::create(rewriter, loc, int32Type, add, negwidth);
274 Value dstLane;
275
276 switch (op.getMode()) {
277 case gpu::ShuffleMode::UP:
278 dstLane = LLVM::SubOp::create(rewriter, loc, int32Type, srcLaneId,
279 adaptor.getOffset());
280 break;
281 case gpu::ShuffleMode::DOWN:
282 dstLane = LLVM::AddOp::create(rewriter, loc, int32Type, srcLaneId,
283 adaptor.getOffset());
284 break;
285 case gpu::ShuffleMode::XOR:
286 dstLane = LLVM::XOrOp::create(rewriter, loc, int32Type, srcLaneId,
287 adaptor.getOffset());
288 break;
289 case gpu::ShuffleMode::IDX:
290 dstLane = adaptor.getOffset();
291 break;
292 }
293 Value isActiveSrcLane = LLVM::ICmpOp::create(
294 rewriter, loc, LLVM::ICmpPredicate::slt, dstLane, widthOrZeroIfOutside);
295 Value selectDstLane = LLVM::SelectOp::create(rewriter, loc, isActiveSrcLane,
296 dstLane, srcLaneId);
297 Value two = LLVM::ConstantOp::create(rewriter, loc, int32Type, 2);
298 Value dwordAlignedDstLane =
299 LLVM::ShlOp::create(rewriter, loc, int32Type, selectDstLane, two);
300
301 SmallVector<Value> decomposed =
302 LLVM::decomposeValue(rewriter, loc, initShflValue, int32Type);
303 SmallVector<Value> swizzled;
304 for (Value v : decomposed) {
305 Value res = ROCDL::DsBpermuteOp::create(rewriter, loc, int32Type,
306 dwordAlignedDstLane, v);
307 swizzled.emplace_back(res);
308 }
309 Value shflValue =
310 LLVM::composeValue(rewriter, loc, swizzled, initShflValue.getType());
311 rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
312 return success();
313 }
314};
315
316/// Import the GPU Ops to ROCDL Patterns.
317#include "GPUToROCDL.cpp.inc"
318
319// A pass that replaces all occurrences of GPU device operations with their
320// corresponding ROCDL equivalent.
321//
322// This pass only handles device code and is not meant to be run on GPU host
323// code.
324struct LowerGpuOpsToROCDLOpsPass final
325 : public impl::ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
326 using Base::Base;
327
328 void getDependentDialects(DialectRegistry &registry) const override {
329 Base::getDependentDialects(registry);
331 }
332
333 void runOnOperation() override {
334 gpu::GPUModuleOp m = getOperation();
335 MLIRContext *ctx = m.getContext();
336
337 auto llvmDataLayout = m->getAttrOfType<StringAttr>(
338 LLVM::LLVMDialect::getDataLayoutAttrName());
339 if (!llvmDataLayout) {
340 llvmDataLayout = StringAttr::get(ctx, amdgcnDataLayout);
341 m->setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(), llvmDataLayout);
342 }
343 // Request C wrapper emission.
344 for (auto func : m.getOps<func::FuncOp>()) {
345 func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
346 UnitAttr::get(ctx));
347 }
348
349 FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
350 if (failed(maybeChipset)) {
351 emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
352 return signalPassFailure();
353 }
354
355 /// Customize the bitwidth used for the device side index computations.
357 ctx, DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
358 options.dataLayout = llvm::DataLayout(llvmDataLayout.getValue());
359 if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
360 options.overrideIndexBitwidth(indexBitwidth);
361
362 if (useBarePtrCallConv) {
363 options.useBarePtrCallConv = true;
364 WalkResult canUseBarePointers =
365 m.walk([](gpu::GPUFuncOp func) -> WalkResult {
367 return WalkResult::advance();
368 return WalkResult::interrupt();
369 });
370 if (canUseBarePointers.wasInterrupted()) {
371 emitError(UnknownLoc::get(ctx),
372 "bare pointer calling convention requires all memrefs to "
373 "have static shape and use the identity map");
374 return signalPassFailure();
375 }
376 }
377
378 // Apply in-dialect lowering. In-dialect lowering will replace
379 // ops which need to be lowered further, which is not supported by a
380 // single conversion pass.
381 {
385 (void)applyPatternsGreedily(m, std::move(patterns));
386 }
387
388 LLVMTypeConverter converter(ctx, options);
390
391 RewritePatternSet llvmPatterns(ctx);
393
394 llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
395 allowedDialects.end());
396 for (Dialect *dialect : ctx->getLoadedDialects()) {
397 bool allowed = allowedDialectsSet.contains(dialect->getNamespace());
398 // Empty `allowedDialectsSet` means all dialects are allowed.
399 if (!allowedDialectsSet.empty() && !allowed)
400 continue;
401
402 auto *iface = dyn_cast<ConvertToLLVMPatternInterface>(dialect);
403 if (!iface) {
404 // Error out if dialect was explicily specified but doesn't implement
405 // conversion interface.
406 if (allowed) {
407 m.emitError()
408 << "dialect does not implement ConvertToLLVMPatternInterface: "
409 << dialect->getNamespace();
410 return signalPassFailure();
411 }
412 continue;
413 }
414
415 iface->populateConvertToLLVMConversionPatterns(target, converter,
416 llvmPatterns);
417 }
418
419 populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
420 *maybeChipset);
421 populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime,
422 *maybeChipset);
424 if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
425 signalPassFailure();
426 auto *rocdlDialect = getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
427 auto reqdWorkGroupSizeAttrHelper =
428 rocdlDialect->getReqdWorkGroupSizeAttrHelper();
429 auto flatWorkGroupSizeAttrHelper =
430 rocdlDialect->getFlatWorkGroupSizeAttrHelper();
431 // Manually rewrite known block size attributes so the LLVMIR translation
432 // infrastructure can pick them up.
433 m.walk([&](LLVM::LLVMFuncOp op) {
434 if (reqdWorkGroupSizeAttrHelper.isAttrPresent(op)) {
435 auto blockSizes = reqdWorkGroupSizeAttrHelper.getAttr(op);
436 // Also set up the rocdl.flat_work_group_size attribute to prevent
437 // conflicting metadata.
438 uint32_t flatSize = 1;
439 for (uint32_t size : blockSizes.asArrayRef()) {
440 flatSize *= size;
441 }
442 StringAttr flatSizeAttr =
443 StringAttr::get(ctx, Twine(flatSize) + "," + Twine(flatSize));
444 flatWorkGroupSizeAttrHelper.setAttr(op, flatSizeAttr);
445 }
446 });
447 }
448};
449
450} // namespace
451
453 target.addIllegalOp<func::FuncOp>();
454 target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
455 target.addLegalDialect<ROCDL::ROCDLDialect>();
456 target.addIllegalDialect<gpu::GPUDialect>();
457 target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
458 LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
459 LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
460 // These ops are legal for f32 type.
461 target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
462 return any_of(op->getOperandTypes(), llvm::IsaPred<Float32Type>);
463 });
464 // TODO: Remove once we support replacing non-root ops.
465 target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
466}
467
474 auto *rocdlDialect =
475 converter.getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
476 populateWithGenerated(patterns);
477 patterns.add<
478 gpu::index_lowering::OpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
479 ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>>(
480 converter, IndexKind::Block, IntrType::Id);
482 gpu::BlockIdOp, ROCDL::BlockIdXOp, ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>>(
483 converter, IndexKind::Grid, IntrType::Id);
484 patterns.add<
485 gpu::index_lowering::OpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
486 ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>>(
487 converter, IndexKind::Block, IntrType::Dim);
489 gpu::GridDimOp, ROCDL::GridDimXOp, ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
490 converter, IndexKind::Grid, IntrType::Dim);
491 patterns.add<GPUReturnOpLowering>(converter);
493 converter,
495 /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace,
496 /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace,
497 rocdlDialect->getKernelAttrHelper().getName(),
498 rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName(),
499 /*kernelClusterSizeAttributeName=*/{}});
500 if (Runtime::HIP == runtime) {
501 patterns.add<GPUPrintfOpToHIPLowering>(converter);
502 } else if (Runtime::OpenCL == runtime) {
503 // Use address space = 4 to match the OpenCL definition of printf()
504 patterns.add<GPUPrintfOpToLLVMCallLowering>(converter, /*addressSpace=*/4);
505 }
506 // TODO: Add alignment for workgroup memory
508
509 patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL,
510 GPUSubgroupBroadcastOpToROCDL>(converter);
511 patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
512
514}
return success()
b getContext())
static Value getLaneId(RewriterBase &rewriter, Location loc)
static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func)
Returns true if the given gpu.func can be safely called using the bare pointer calling convention.
static constexpr StringLiteral amdgcnDataLayout
static Value truncOrExtToLLVMType(ConversionPatternRewriter &rewriter, Location loc, Value value, const LLVMTypeConverter &converter)
static llvm::ManagedStatic< PassManagerOptions > options
#define add(a, b)
UnitAttr getUnitAttr()
Definition Builders.cpp:98
ArrayAttr getArrayAttr(ArrayRef< Attribute > value)
Definition Builders.cpp:266
MLIRContext * getContext() const
Definition Builders.h:56
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition Builders.cpp:104
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition Builders.cpp:94
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition Pattern.h:216
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Definition Pattern.h:222
The main mechanism for performing data layout queries.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition Dialect.h:38
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
static bool canConvertToBarePtr(BaseMemRefType type)
Check if a memref type can be converted to a bare pointer.
MLIRContext & getContext() const
Returns the MLIR context.
unsigned getIndexTypeBitwidth() const
Gets the bitwidth of the index type when converted to LLVM.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
std::vector< Dialect * > getLoadedDialects()
Return information about all IR dialects loaded in the context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition Attributes.h:164
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition Types.cpp:56
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
A utility result that is used to signal how to proceed with an ongoing walk:
Definition WalkResult.h:29
static WalkResult advance()
Definition WalkResult.h:47
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition WalkResult.h:51
static WalkResult interrupt()
Definition WalkResult.h:46
static ConstantIntOp create(OpBuilder &builder, Location location, int64_t value, unsigned width)
Definition ArithOps.cpp:258
Value composeValue(OpBuilder &builder, Location loc, ValueRange src, Type dstType)
Composes a set of src values into a single value of type dstType through series of bitcasts and vecto...
Definition Pattern.cpp:432
SmallVector< Value > decomposeValue(OpBuilder &builder, Location loc, Value src, Type dstType)
Decomposes a src value into a set of values of type dstType through series of bitcasts and vector ops...
Definition Pattern.cpp:393
void populateCommonGPUTypeAndAttributeConversions(TypeConverter &typeConverter)
Remap common GPU memory spaces (Workgroup, Private, etc) to LLVM address spaces.
Runtime
Potential runtimes for AMD GPU kernels.
Definition Runtimes.h:15
Include the generated interface declarations.
void populateGpuToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, gpu::amd::Runtime runtime, amdgpu::Chipset chipset)
Collect a set of patterns to convert from the GPU dialect to ROCDL.
void populateMathToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, std::optional< amdgpu::Chipset > chipset)
Populate the given list with patterns that convert from Math to ROCDL calls.
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition Passes.h:91
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void configureGpuToROCDLConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to ROCDL.
const FrozenRewritePatternSet & patterns
void registerConvertToLLVMDependentDialectLoading(DialectRegistry &registry)
Register the extension that will load dependent dialects for LLVM conversion.
void populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, amdgpu::Chipset chipset)
Note: This function will also add conversions for the AMDGPU-specific address spaces and types,...
void populateGpuPromoteShuffleToAMDGPUPatterns(RewritePatternSet &patterns, std::optional< amdgpu::Chipset > maybeChipset)
Tries to promote gpu.shuffles to specialized AMDGPU intrinsics.
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
The lowering of gpu.printf to a call to HIP hostcalls.
The lowering of gpu.printf to a call to an external printf() function.
Represents the amdgpu gfx chipset version, e.g., gfx90a, gfx942, gfx1103.
Definition Chipset.h:22
unsigned majorVersion
Definition Chipset.h:23
static FailureOr< Chipset > parse(StringRef name)
Parses the chipset version string and returns the chipset on success, and failure otherwise.
Definition Chipset.cpp:14