40#define GEN_PASS_DEF_CONVERTGPUOPSTONVVMOPS
41#include "mlir/Conversion/Passes.h.inc"
49static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) {
51 case gpu::ShuffleMode::XOR:
52 return NVVM::ShflKind::bfly;
53 case gpu::ShuffleMode::UP:
54 return NVVM::ShflKind::up;
55 case gpu::ShuffleMode::DOWN:
56 return NVVM::ShflKind::down;
57 case gpu::ShuffleMode::IDX:
58 return NVVM::ShflKind::idx;
60 llvm_unreachable(
"unknown shuffle mode");
63static std::optional<NVVM::ReduxKind>
64convertReduxKind(gpu::AllReduceOperation mode) {
66 case gpu::AllReduceOperation::ADD:
67 return NVVM::ReduxKind::ADD;
68 case gpu::AllReduceOperation::MUL:
70 case gpu::AllReduceOperation::MINSI:
71 return NVVM::ReduxKind::MIN;
72 case gpu::AllReduceOperation::MINUI:
74 case gpu::AllReduceOperation::MINNUMF:
75 return NVVM::ReduxKind::MIN;
76 case gpu::AllReduceOperation::MAXSI:
77 return NVVM::ReduxKind::MAX;
78 case gpu::AllReduceOperation::MAXUI:
80 case gpu::AllReduceOperation::MAXNUMF:
81 return NVVM::ReduxKind::MAX;
82 case gpu::AllReduceOperation::AND:
83 return NVVM::ReduxKind::AND;
84 case gpu::AllReduceOperation::OR:
85 return NVVM::ReduxKind::OR;
86 case gpu::AllReduceOperation::XOR:
87 return NVVM::ReduxKind::XOR;
88 case gpu::AllReduceOperation::MINIMUMF:
89 case gpu::AllReduceOperation::MAXIMUMF:
97struct GPUSubgroupReduceOpLowering
99 using ConvertOpToLLVMPattern<gpu::SubgroupReduceOp>::ConvertOpToLLVMPattern;
102 matchAndRewrite(gpu::SubgroupReduceOp op, OpAdaptor adaptor,
103 ConversionPatternRewriter &rewriter)
const override {
104 if (op.getClusterSize())
105 return rewriter.notifyMatchFailure(
106 op,
"lowering for clustered reduce not implemented");
108 if (!op.getUniform())
109 return rewriter.notifyMatchFailure(
110 op,
"cannot be lowered to redux as the op must be run "
111 "uniformly (entire subgroup).");
112 if (!op.getValue().getType().isInteger(32))
113 return rewriter.notifyMatchFailure(op,
"unsupported data type");
115 std::optional<NVVM::ReduxKind> mode = convertReduxKind(op.getOp());
116 if (!mode.has_value())
117 return rewriter.notifyMatchFailure(
118 op,
"unsupported reduction mode for redux");
120 Location loc = op->getLoc();
121 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
122 Value offset = LLVM::ConstantOp::create(rewriter, loc, int32Type, -1);
124 auto reduxOp = NVVM::ReduxOp::create(rewriter, loc, int32Type,
125 op.getValue(), mode.value(), offset);
127 rewriter.replaceOp(op, reduxOp->getResult(0));
133 using ConvertOpToLLVMPattern<gpu::ShuffleOp>::ConvertOpToLLVMPattern;
154 matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
155 ConversionPatternRewriter &rewriter)
const override {
156 Location loc = op->getLoc();
158 auto valueTy = adaptor.getValue().getType();
159 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
160 auto predTy = IntegerType::get(rewriter.getContext(), 1);
162 Value one = LLVM::ConstantOp::create(rewriter, loc, int32Type, 1);
163 Value minusOne = LLVM::ConstantOp::create(rewriter, loc, int32Type, -1);
164 Value thirtyTwo = LLVM::ConstantOp::create(rewriter, loc, int32Type, 32);
165 Value numLeadInactiveLane = LLVM::SubOp::create(
166 rewriter, loc, int32Type, thirtyTwo, adaptor.getWidth());
168 Value activeMask = LLVM::LShrOp::create(rewriter, loc, int32Type, minusOne,
169 numLeadInactiveLane);
171 if (op.getMode() == gpu::ShuffleMode::UP) {
173 maskAndClamp = numLeadInactiveLane;
176 maskAndClamp = LLVM::SubOp::create(rewriter, loc, int32Type,
177 adaptor.getWidth(), one);
180 bool predIsUsed = !op->getResult(1).use_empty();
181 UnitAttr returnValueAndIsValidAttr =
nullptr;
182 Type resultTy = valueTy;
184 returnValueAndIsValidAttr = rewriter.getUnitAttr();
185 resultTy = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
188 Value shfl = NVVM::ShflOp::create(
189 rewriter, loc, resultTy, activeMask, adaptor.getValue(),
190 adaptor.getOffset(), maskAndClamp, convertShflKind(op.getMode()),
191 returnValueAndIsValidAttr);
193 Value shflValue = LLVM::ExtractValueOp::create(rewriter, loc, shfl, 0);
194 Value isActiveSrcLane =
195 LLVM::ExtractValueOp::create(rewriter, loc, shfl, 1);
196 rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
198 rewriter.replaceOp(op, {shfl,
nullptr});
205 using ConvertOpToLLVMPattern<gpu::LaneIdOp>::ConvertOpToLLVMPattern;
208 matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
209 ConversionPatternRewriter &rewriter)
const override {
210 auto loc = op->getLoc();
211 MLIRContext *context = rewriter.getContext();
212 LLVM::ConstantRangeAttr bounds =
nullptr;
213 if (std::optional<APInt> upperBound = op.getUpperBound())
214 bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
215 32, 0, upperBound->getZExtValue());
217 bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
220 NVVM::LaneIdOp::create(rewriter, loc, rewriter.getI32Type(), bounds);
223 const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
224 if (indexBitwidth > 32) {
225 newOp = LLVM::SExtOp::create(
226 rewriter, loc, IntegerType::get(context, indexBitwidth), newOp);
227 }
else if (indexBitwidth < 32) {
228 newOp = LLVM::TruncOp::create(
229 rewriter, loc, IntegerType::get(context, indexBitwidth), newOp);
231 rewriter.replaceOp(op, {newOp});
237struct AssertOpToAssertfailLowering
239 using ConvertOpToLLVMPattern<cf::AssertOp>::ConvertOpToLLVMPattern;
242 matchAndRewrite(cf::AssertOp assertOp, cf::AssertOpAdaptor adaptor,
243 ConversionPatternRewriter &rewriter)
const override {
244 MLIRContext *ctx = rewriter.getContext();
245 Location loc = assertOp.getLoc();
246 Type i8Type = typeConverter->convertType(rewriter.getIntegerType(8));
247 Type i32Type = typeConverter->convertType(rewriter.getIntegerType(32));
248 Type i64Type = typeConverter->convertType(rewriter.getIntegerType(64));
249 Type ptrType = LLVM::LLVMPointerType::get(ctx);
250 Type voidType = LLVM::LLVMVoidType::get(ctx);
253 auto moduleOp = assertOp->getParentOfType<gpu::GPUModuleOp>();
254 auto assertfailType = LLVM::LLVMFunctionType::get(
255 voidType, {ptrType, ptrType, i32Type, ptrType, i64Type});
257 moduleOp, loc, rewriter,
"__assertfail", assertfailType);
258 assertfailDecl.setPassthroughAttr(
259 ArrayAttr::get(ctx, StringAttr::get(ctx,
"noreturn")));
270 Block *beforeBlock = assertOp->getBlock();
272 rewriter.splitBlock(beforeBlock, assertOp->getIterator());
274 rewriter.splitBlock(assertBlock, ++assertOp->getIterator());
275 rewriter.setInsertionPointToEnd(beforeBlock);
276 cf::CondBranchOp::create(rewriter, loc, adaptor.getArg(), afterBlock,
278 rewriter.setInsertionPointToEnd(assertBlock);
279 cf::BranchOp::create(rewriter, loc, afterBlock);
282 rewriter.setInsertionPoint(assertOp);
286 StringRef fileName =
"(unknown)";
287 StringRef funcName =
"(unknown)";
288 int32_t fileLine = 0;
289 while (
auto callSiteLoc = dyn_cast<CallSiteLoc>(loc))
290 loc = callSiteLoc.getCallee();
291 if (
auto fileLineColLoc = dyn_cast<FileLineColRange>(loc)) {
292 fileName = fileLineColLoc.getFilename().strref();
293 fileLine = fileLineColLoc.getStartLine();
294 }
else if (
auto nameLoc = dyn_cast<NameLoc>(loc)) {
295 funcName = nameLoc.getName().strref();
296 if (
auto fileLineColLoc =
297 dyn_cast<FileLineColRange>(nameLoc.getChildLoc())) {
298 fileName = fileLineColLoc.getFilename().strref();
299 fileLine = fileLineColLoc.getStartLine();
304 auto getGlobal = [&](LLVM::GlobalOp global) {
306 Value globalPtr = LLVM::AddressOfOp::create(
307 rewriter, loc, LLVM::LLVMPointerType::get(ctx, global.getAddrSpace()),
308 global.getSymNameAttr());
310 LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
311 globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
315 rewriter, loc, moduleOp, i8Type,
"assert_message_", assertOp.getMsg()));
317 rewriter, loc, moduleOp, i8Type,
"assert_file_", fileName));
319 rewriter, loc, moduleOp, i8Type,
"assert_func_", funcName));
321 LLVM::ConstantOp::create(rewriter, loc, i32Type, fileLine);
322 Value c1 = LLVM::ConstantOp::create(rewriter, loc, i64Type, 1);
325 SmallVector<Value> arguments{assertMessage, assertFile, assertLine,
327 rewriter.replaceOpWithNewOp<LLVM::CallOp>(assertOp, assertfailDecl,
334#include "GPUToNVVM.cpp.inc"
341struct LowerGpuOpsToNVVMOpsPass final
345 void getDependentDialects(DialectRegistry ®istry)
const override {
346 Base::getDependentDialects(registry);
350 void runOnOperation()
override {
351 gpu::GPUModuleOp m = getOperation();
354 for (
auto func : m.getOps<func::FuncOp>()) {
355 func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
362 DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
364 options.overrideIndexBitwidth(indexBitwidth);
365 options.useBarePtrCallConv = useBarePtrCallConv;
371 RewritePatternSet
patterns(m.getContext());
375 vector::populateVectorFromElementsUnrollPatterns(
patterns);
377 return signalPassFailure();
380 LLVMTypeConverter converter(m.getContext(),
options);
382 RewritePatternSet llvmPatterns(m.getContext());
389 llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
390 allowedDialects.end());
391 for (Dialect *dialect :
getContext().getLoadedDialects()) {
393 if (isa<math::MathDialect>(dialect))
396 bool allowed = allowedDialectsSet.contains(dialect->getNamespace());
398 if (!allowedDialectsSet.empty() && !allowed)
401 auto *iface = dyn_cast<ConvertToLLVMPatternInterface>(dialect);
407 <<
"dialect does not implement ConvertToLLVMPatternInterface: "
408 << dialect->getNamespace();
409 return signalPassFailure();
414 iface->populateConvertToLLVMConversionPatterns(
target, converter,
422 if (
failed(applyPartialConversion(m,
target, std::move(llvmPatterns))))
430 target.addIllegalOp<func::FuncOp>();
431 target.addIllegalOp<cf::AssertOp>();
432 target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
433 target.addLegalDialect<::mlir::NVVM::NVVMDialect>();
434 target.addIllegalDialect<gpu::GPUDialect>();
435 target.addIllegalOp<LLVM::CopySignOp, LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op,
436 LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp,
437 LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp,
438 LLVM::RoundEvenOp, LLVM::RoundOp, LLVM::SinOp,
439 LLVM::SincosOp, LLVM::SqrtOp>();
442 target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
451 converter, [](gpu::AddressSpace space) ->
unsigned {
453 case gpu::AddressSpace::Global:
454 return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
455 case gpu::AddressSpace::Workgroup:
456 return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
457 case gpu::AddressSpace::Private:
460 llvm_unreachable(
"unknown address space enum value");
461 return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
474 ConversionPatternRewriter &rewriter)
const override {
476 Value input = adaptor.getOperand();
478 auto convertedInput = maybeExt(input, rewriter);
479 auto computeType = convertedInput.getType();
481 StringRef sincosFunc;
482 if (isa<Float32Type>(computeType)) {
483 const arith::FastMathFlags flag = op.getFastmath();
484 const bool useApprox =
485 mlir::arith::bitEnumContainsAny(flag, arith::FastMathFlags::afn);
486 sincosFunc = useApprox ?
"__nv_fast_sincosf" :
"__nv_sincosf";
487 }
else if (isa<Float64Type>(computeType)) {
488 sincosFunc =
"__nv_sincos";
490 return rewriter.notifyMatchFailure(op,
491 "unsupported operand type for sincos");
494 auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
496 Value sinPtr, cosPtr;
501 assert(scope &&
"Expected op to be inside automatic allocation scope");
502 rewriter.setInsertionPointToStart(&scope->getRegion(0).front());
503 auto one = LLVM::ConstantOp::create(rewriter, loc, rewriter.getI32Type(),
504 rewriter.getI32IntegerAttr(1));
506 LLVM::AllocaOp::create(rewriter, loc, ptrType, computeType, one, 0);
508 LLVM::AllocaOp::create(rewriter, loc, ptrType, computeType, one, 0);
511 createSincosCall(rewriter, loc, sincosFunc, convertedInput, sinPtr, cosPtr,
514 auto sinResult = LLVM::LoadOp::create(rewriter, loc, computeType, sinPtr);
515 auto cosResult = LLVM::LoadOp::create(rewriter, loc, computeType, cosPtr);
517 rewriter.replaceOp(op, {maybeTrunc(sinResult, inputType, rewriter),
518 maybeTrunc(cosResult, inputType, rewriter)});
524 if (isa<Float16Type, BFloat16Type>(operand.
getType()))
525 return LLVM::FPExtOp::create(rewriter, operand.
getLoc(),
531 Value maybeTrunc(Value operand, Type type, PatternRewriter &rewriter)
const {
533 return LLVM::FPTruncOp::create(rewriter, operand.
getLoc(), type, operand);
537 void createSincosCall(ConversionPatternRewriter &rewriter, Location loc,
538 StringRef funcName, Value input, Value sinPtr,
539 Value cosPtr, Operation *op)
const {
540 auto voidType = LLVM::LLVMVoidType::get(rewriter.getContext());
541 auto ptrType = sinPtr.
getType();
543 SmallVector<Type> operandTypes = {input.
getType(), ptrType, ptrType};
544 auto funcType = LLVM::LLVMFunctionType::get(voidType, operandTypes);
546 auto funcAttr = StringAttr::get(op->
getContext(), funcName);
552 assert(parentFunc &&
"expected there to be a parent function");
553 OpBuilder
b(parentFunc);
556 funcOp = LLVM::LLVMFuncOp::create(
b, globalloc, funcName, funcType);
559 SmallVector<Value> callOperands = {input, sinPtr, cosPtr};
560 LLVM::CallOp::create(rewriter, loc, funcOp, callOperands);
564template <
typename OpTy>
568 StringRef f64Func, StringRef f32ApproxFunc =
"",
569 StringRef f16Func =
"") {
572 f32ApproxFunc, f16Func,
576template <
typename OpTy>
585template <
typename OpTy>
589 StringRef f32Func, StringRef f64Func) {
598 patterns.add<GPUSubgroupReduceOpLowering>(converter, benefit);
607 "__nv_fmaxf",
"__nv_fmax");
609 "__nv_fminf",
"__nv_fmin");
633 "__nv_copysignf",
"__nv_copysign");
635 "__nv_cos",
"__nv_fast_cosf");
643 "__nv_exp",
"__nv_fast_expf");
654 "__nv_finitef",
"__nv_isfinited");
660 "__nv_log",
"__nv_fast_logf");
662 "__nv_log10",
"__nv_fast_log10f");
666 "__nv_log2",
"__nv_fast_log2f");
668 "__nv_pow",
"__nv_fast_powf");
670 "__nv_powif",
"__nv_powi");
674 "__nv_rintf",
"__nv_rint");
678 "__nv_sin",
"__nv_fast_sinf");
684 "__nv_tan",
"__nv_fast_tanf");
689 patterns.add<SincosOpLowering>(converter, benefit);
705 NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>>(
706 converter, IndexKind::Block, IntrType::Id, benefit);
709 NVVM::BlockDimYOp, NVVM::BlockDimZOp>>(
710 converter, IndexKind::Block, IntrType::Dim, benefit);
713 NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>>(
714 converter, IndexKind::Other, IntrType::Id, benefit);
716 gpu::ClusterDimOp, NVVM::ClusterDimXOp, NVVM::ClusterDimYOp,
717 NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim,
720 gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
721 NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>>(
722 converter, IndexKind::Other, IntrType::Id, benefit);
724 gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
725 NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>>(
726 converter, IndexKind::Other, IntrType::Dim, benefit);
728 gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
729 converter, IndexKind::Grid, IntrType::Id, benefit);
731 gpu::GridDimOp, NVVM::GridDimXOp, NVVM::GridDimYOp, NVVM::GridDimZOp>>(
732 converter, IndexKind::Grid, IntrType::Dim, benefit);
747 static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared),
749 NVVM::NVVMDialect::getKernelFuncAttrName()),
751 NVVM::NVVMDialect::getMaxntidAttrName())},
762struct NVVMTargetConvertToLLVMAttrInterface
763 :
public ConvertToLLVMAttrInterface::ExternalModel<
764 NVVMTargetConvertToLLVMAttrInterface, NVVM::NVVMTargetAttr> {
766 void populateConvertToLLVMConversionPatterns(
772void NVVMTargetConvertToLLVMAttrInterface::
773 populateConvertToLLVMConversionPatterns(
Attribute attr,
784 NVVMTargetAttr::attachInterface<NVVMTargetConvertToLLVMAttrInterface>(*ctx);
static void populateFloatIntOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef f32Func, StringRef f64Func)
static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef f32Func, StringRef f64Func, StringRef f32ApproxFunc="", StringRef f16Func="")
static void populateIntOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef i32Func)
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
typename math::SincosOp::Adaptor OpAdaptor
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Conversion from types to the LLVM IR dialect.
MLIRContext & getContext() const
Returns the MLIR context.
LocationAttr findInstanceOfOrUnknown()
Return an instance of the given location type if one is nested under the current location else return...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
A trait of region holding operations that define a new scope for automatic allocations,...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
MLIRContext * getContext()
Return the context this operation is associated with.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
static Operation * lookupNearestSymbolFrom(Operation *from, StringAttr symbol)
Returns the operation registered with the given symbol name within the closest parent operation of,...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
constexpr int kSharedMemoryAlignmentBit
void registerConvertGpuToNVVMInterface(DialectRegistry ®istry)
Registers the ConvertToLLVMAttrInterface interface on the NVVM::NVVMTargetAttr attribute.
Include the generated interface declarations.
LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type)
Return the LLVMStructureType corresponding to the MMAMatrixType type.
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
LogicalResult applyPatternsGreedily(Region ®ion, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Note that these functions don't take a SymbolTable because GPU module lowerings can have name collisi...
void configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter)
Configure the LLVM type convert to convert types and address spaces from the GPU dialect to NVVM.
void configureGpuToNVVMConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to NVVM.
const FrozenRewritePatternSet & patterns
void registerConvertToLLVMDependentDialectLoading(DialectRegistry ®istry)
Register the extension that will load dependent dialects for LLVM conversion.
void populateGpuSubgroupReduceOpLoweringPattern(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate GpuSubgroupReduce pattern to NVVM.
void populateGpuToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to convert from the GPU dialect to NVVM.
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, Operation *moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
void populateLibDeviceConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns that lower certain arith and math dialect ops to libdevice calls.
void populateGpuWMMAToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM.
LogicalResult matchAndRewrite(math::SincosOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
Lowering of gpu.printf to a vprintf standard library.
Rewriting that replaces SourceOp with a CallOp to f32Func or f64Func or f32ApproxFunc or f16Func or i...
Unrolls SourceOp to array/vector elements.