40#define GEN_PASS_DEF_CONVERTGPUOPSTONVVMOPS
41#include "mlir/Conversion/Passes.h.inc"
49static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) {
51 case gpu::ShuffleMode::XOR:
52 return NVVM::ShflKind::bfly;
53 case gpu::ShuffleMode::UP:
54 return NVVM::ShflKind::up;
55 case gpu::ShuffleMode::DOWN:
56 return NVVM::ShflKind::down;
57 case gpu::ShuffleMode::IDX:
58 return NVVM::ShflKind::idx;
60 llvm_unreachable(
"unknown shuffle mode");
63static std::optional<NVVM::ReduxKind>
64convertReduxKind(gpu::AllReduceOperation mode) {
66 case gpu::AllReduceOperation::ADD:
67 return NVVM::ReduxKind::ADD;
68 case gpu::AllReduceOperation::MUL:
70 case gpu::AllReduceOperation::MINSI:
71 return NVVM::ReduxKind::MIN;
72 case gpu::AllReduceOperation::MINUI:
74 case gpu::AllReduceOperation::MINNUMF:
75 return NVVM::ReduxKind::MIN;
76 case gpu::AllReduceOperation::MAXSI:
77 return NVVM::ReduxKind::MAX;
78 case gpu::AllReduceOperation::MAXUI:
80 case gpu::AllReduceOperation::MAXNUMF:
81 return NVVM::ReduxKind::MAX;
82 case gpu::AllReduceOperation::AND:
83 return NVVM::ReduxKind::AND;
84 case gpu::AllReduceOperation::OR:
85 return NVVM::ReduxKind::OR;
86 case gpu::AllReduceOperation::XOR:
87 return NVVM::ReduxKind::XOR;
88 case gpu::AllReduceOperation::MINIMUMF:
89 case gpu::AllReduceOperation::MAXIMUMF:
97struct GPUSubgroupReduceOpLowering
99 using ConvertOpToLLVMPattern<gpu::SubgroupReduceOp>::ConvertOpToLLVMPattern;
102 matchAndRewrite(gpu::SubgroupReduceOp op, OpAdaptor adaptor,
103 ConversionPatternRewriter &rewriter)
const override {
104 if (op.getClusterSize())
105 return rewriter.notifyMatchFailure(
106 op,
"lowering for clustered reduce not implemented");
108 if (!op.getUniform())
109 return rewriter.notifyMatchFailure(
110 op,
"cannot be lowered to redux as the op must be run "
111 "uniformly (entire subgroup).");
112 if (!op.getValue().getType().isInteger(32))
113 return rewriter.notifyMatchFailure(op,
"unsupported data type");
115 std::optional<NVVM::ReduxKind> mode = convertReduxKind(op.getOp());
116 if (!mode.has_value())
117 return rewriter.notifyMatchFailure(
118 op,
"unsupported reduction mode for redux");
120 Location loc = op->getLoc();
121 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
122 Value offset = LLVM::ConstantOp::create(rewriter, loc, int32Type, -1);
124 auto reduxOp = NVVM::ReduxOp::create(rewriter, loc, int32Type,
125 op.getValue(), mode.value(), offset);
127 rewriter.replaceOp(op, reduxOp->getResult(0));
133 using ConvertOpToLLVMPattern<gpu::ShuffleOp>::ConvertOpToLLVMPattern;
154 matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
155 ConversionPatternRewriter &rewriter)
const override {
156 Location loc = op->getLoc();
158 auto valueTy = adaptor.getValue().getType();
159 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
160 auto predTy = IntegerType::get(rewriter.getContext(), 1);
162 Value one = LLVM::ConstantOp::create(rewriter, loc, int32Type, 1);
163 Value minusOne = LLVM::ConstantOp::create(rewriter, loc, int32Type, -1);
164 Value thirtyTwo = LLVM::ConstantOp::create(rewriter, loc, int32Type, 32);
165 Value numLeadInactiveLane = LLVM::SubOp::create(
166 rewriter, loc, int32Type, thirtyTwo, adaptor.getWidth());
168 Value activeMask = LLVM::LShrOp::create(rewriter, loc, int32Type, minusOne,
169 numLeadInactiveLane);
171 if (op.getMode() == gpu::ShuffleMode::UP) {
173 maskAndClamp = numLeadInactiveLane;
176 maskAndClamp = LLVM::SubOp::create(rewriter, loc, int32Type,
177 adaptor.getWidth(), one);
180 bool predIsUsed = !op->getResult(1).use_empty();
181 UnitAttr returnValueAndIsValidAttr =
nullptr;
182 Type resultTy = valueTy;
184 returnValueAndIsValidAttr = rewriter.getUnitAttr();
185 resultTy = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
188 Value shfl = NVVM::ShflOp::create(
189 rewriter, loc, resultTy, activeMask, adaptor.getValue(),
190 adaptor.getOffset(), maskAndClamp, convertShflKind(op.getMode()),
191 returnValueAndIsValidAttr);
193 Value shflValue = LLVM::ExtractValueOp::create(rewriter, loc, shfl, 0);
194 Value isActiveSrcLane =
195 LLVM::ExtractValueOp::create(rewriter, loc, shfl, 1);
196 rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
198 rewriter.replaceOp(op, {shfl,
nullptr});
205 using ConvertOpToLLVMPattern<gpu::LaneIdOp>::ConvertOpToLLVMPattern;
208 matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
209 ConversionPatternRewriter &rewriter)
const override {
210 auto loc = op->getLoc();
211 MLIRContext *context = rewriter.getContext();
212 LLVM::ConstantRangeAttr bounds =
nullptr;
213 if (std::optional<APInt> upperBound = op.getUpperBound())
214 bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
215 32, 0, upperBound->getZExtValue());
217 bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
220 NVVM::LaneIdOp::create(rewriter, loc, rewriter.getI32Type(), bounds);
223 const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
224 if (indexBitwidth > 32) {
225 newOp = LLVM::SExtOp::create(
226 rewriter, loc, IntegerType::get(context, indexBitwidth), newOp);
227 }
else if (indexBitwidth < 32) {
228 newOp = LLVM::TruncOp::create(
229 rewriter, loc, IntegerType::get(context, indexBitwidth), newOp);
231 rewriter.replaceOp(op, {newOp});
237struct AssertOpToAssertfailLowering
239 using ConvertOpToLLVMPattern<cf::AssertOp>::ConvertOpToLLVMPattern;
242 matchAndRewrite(cf::AssertOp assertOp, cf::AssertOpAdaptor adaptor,
243 ConversionPatternRewriter &rewriter)
const override {
244 MLIRContext *ctx = rewriter.getContext();
245 Location loc = assertOp.getLoc();
246 Type i8Type = typeConverter->convertType(rewriter.getIntegerType(8));
247 Type i32Type = typeConverter->convertType(rewriter.getIntegerType(32));
248 Type i64Type = typeConverter->convertType(rewriter.getIntegerType(64));
249 Type ptrType = LLVM::LLVMPointerType::get(ctx);
250 Type voidType = LLVM::LLVMVoidType::get(ctx);
253 auto moduleOp = assertOp->getParentOfType<gpu::GPUModuleOp>();
254 auto assertfailType = LLVM::LLVMFunctionType::get(
255 voidType, {ptrType, ptrType, i32Type, ptrType, i64Type});
257 moduleOp, loc, rewriter,
"__assertfail", assertfailType);
258 assertfailDecl.setPassthroughAttr(
259 ArrayAttr::get(ctx, StringAttr::get(ctx,
"noreturn")));
270 Block *beforeBlock = assertOp->getBlock();
272 rewriter.splitBlock(beforeBlock, assertOp->getIterator());
274 rewriter.splitBlock(assertBlock, ++assertOp->getIterator());
275 rewriter.setInsertionPointToEnd(beforeBlock);
276 cf::CondBranchOp::create(rewriter, loc, adaptor.getArg(), afterBlock,
278 rewriter.setInsertionPointToEnd(assertBlock);
279 cf::BranchOp::create(rewriter, loc, afterBlock);
282 rewriter.setInsertionPoint(assertOp);
286 StringRef fileName =
"(unknown)";
287 StringRef funcName =
"(unknown)";
288 int32_t fileLine = 0;
289 while (
auto callSiteLoc = dyn_cast<CallSiteLoc>(loc))
290 loc = callSiteLoc.getCallee();
291 if (
auto fileLineColLoc = dyn_cast<FileLineColRange>(loc)) {
292 fileName = fileLineColLoc.getFilename().strref();
293 fileLine = fileLineColLoc.getStartLine();
294 }
else if (
auto nameLoc = dyn_cast<NameLoc>(loc)) {
295 funcName = nameLoc.getName().strref();
296 if (
auto fileLineColLoc =
297 dyn_cast<FileLineColRange>(nameLoc.getChildLoc())) {
298 fileName = fileLineColLoc.getFilename().strref();
299 fileLine = fileLineColLoc.getStartLine();
304 auto getGlobal = [&](LLVM::GlobalOp global) {
306 Value globalPtr = LLVM::AddressOfOp::create(
307 rewriter, loc, LLVM::LLVMPointerType::get(ctx, global.getAddrSpace()),
308 global.getSymNameAttr());
310 LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
311 globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
315 rewriter, loc, moduleOp, i8Type,
"assert_message_", assertOp.getMsg()));
317 rewriter, loc, moduleOp, i8Type,
"assert_file_", fileName));
319 rewriter, loc, moduleOp, i8Type,
"assert_func_", funcName));
321 LLVM::ConstantOp::create(rewriter, loc, i32Type, fileLine);
322 Value c1 = LLVM::ConstantOp::create(rewriter, loc, i64Type, 1);
325 SmallVector<Value> arguments{assertMessage, assertFile, assertLine,
327 rewriter.replaceOpWithNewOp<LLVM::CallOp>(assertOp, assertfailDecl,
334#include "GPUToNVVM.cpp.inc"
341struct LowerGpuOpsToNVVMOpsPass final
345 void getDependentDialects(DialectRegistry ®istry)
const override {
346 Base::getDependentDialects(registry);
350 void runOnOperation()
override {
351 gpu::GPUModuleOp m = getOperation();
354 for (
auto func : m.getOps<func::FuncOp>()) {
355 func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
362 DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
364 options.overrideIndexBitwidth(indexBitwidth);
365 options.useBarePtrCallConv = useBarePtrCallConv;
371 RewritePatternSet
patterns(m.getContext());
375 vector::populateVectorFromElementsUnrollPatterns(
patterns);
377 return signalPassFailure();
380 LLVMTypeConverter converter(m.getContext(),
options);
382 RewritePatternSet llvmPatterns(m.getContext());
389 llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
390 allowedDialects.end());
391 for (Dialect *dialect :
getContext().getLoadedDialects()) {
393 if (isa<math::MathDialect>(dialect))
396 bool allowed = allowedDialectsSet.contains(dialect->getNamespace());
398 if (!allowedDialectsSet.empty() && !allowed)
401 auto *iface = dyn_cast<ConvertToLLVMPatternInterface>(dialect);
407 <<
"dialect does not implement ConvertToLLVMPatternInterface: "
408 << dialect->getNamespace();
409 return signalPassFailure();
414 iface->populateConvertToLLVMConversionPatterns(
target, converter,
423 config.allowPatternRollback = allowPatternRollback;
425 applyPartialConversion(m,
target, std::move(llvmPatterns),
config)))
433 target.addIllegalOp<func::FuncOp>();
434 target.addIllegalOp<cf::AssertOp>();
435 target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
436 target.addLegalDialect<::mlir::NVVM::NVVMDialect>();
437 target.addIllegalDialect<gpu::GPUDialect>();
438 target.addIllegalOp<LLVM::CopySignOp, LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op,
439 LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp,
440 LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp,
441 LLVM::RoundEvenOp, LLVM::RoundOp, LLVM::SinOp,
442 LLVM::SincosOp, LLVM::SqrtOp>();
445 target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
454 converter, [](gpu::AddressSpace space) ->
unsigned {
456 case gpu::AddressSpace::Global:
457 return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
458 case gpu::AddressSpace::Workgroup:
459 return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
460 case gpu::AddressSpace::Private:
463 llvm_unreachable(
"unknown address space enum value");
464 return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
477 ConversionPatternRewriter &rewriter)
const override {
479 Value input = adaptor.getOperand();
481 auto convertedInput = maybeExt(input, rewriter);
482 auto computeType = convertedInput.getType();
484 StringRef sincosFunc;
485 if (isa<Float32Type>(computeType)) {
486 const arith::FastMathFlags flag = op.getFastmath();
487 const bool useApprox =
488 mlir::arith::bitEnumContainsAny(flag, arith::FastMathFlags::afn);
489 sincosFunc = useApprox ?
"__nv_fast_sincosf" :
"__nv_sincosf";
490 }
else if (isa<Float64Type>(computeType)) {
491 sincosFunc =
"__nv_sincos";
493 return rewriter.notifyMatchFailure(op,
494 "unsupported operand type for sincos");
497 auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
499 Value sinPtr, cosPtr;
504 assert(scope &&
"Expected op to be inside automatic allocation scope");
505 rewriter.setInsertionPointToStart(&scope->getRegion(0).front());
506 auto one = LLVM::ConstantOp::create(rewriter, loc, rewriter.getI32Type(),
507 rewriter.getI32IntegerAttr(1));
509 LLVM::AllocaOp::create(rewriter, loc, ptrType, computeType, one, 0);
511 LLVM::AllocaOp::create(rewriter, loc, ptrType, computeType, one, 0);
514 createSincosCall(rewriter, loc, sincosFunc, convertedInput, sinPtr, cosPtr,
517 auto sinResult = LLVM::LoadOp::create(rewriter, loc, computeType, sinPtr);
518 auto cosResult = LLVM::LoadOp::create(rewriter, loc, computeType, cosPtr);
520 rewriter.replaceOp(op, {maybeTrunc(sinResult, inputType, rewriter),
521 maybeTrunc(cosResult, inputType, rewriter)});
527 if (isa<Float16Type, BFloat16Type>(operand.
getType()))
528 return LLVM::FPExtOp::create(rewriter, operand.
getLoc(),
534 Value maybeTrunc(Value operand, Type type, PatternRewriter &rewriter)
const {
536 return LLVM::FPTruncOp::create(rewriter, operand.
getLoc(), type, operand);
540 void createSincosCall(ConversionPatternRewriter &rewriter, Location loc,
541 StringRef funcName, Value input, Value sinPtr,
542 Value cosPtr, Operation *op)
const {
543 auto voidType = LLVM::LLVMVoidType::get(rewriter.getContext());
544 auto ptrType = sinPtr.
getType();
546 SmallVector<Type> operandTypes = {input.
getType(), ptrType, ptrType};
547 auto funcType = LLVM::LLVMFunctionType::get(voidType, operandTypes);
549 auto funcAttr = StringAttr::get(op->
getContext(), funcName);
555 assert(parentFunc &&
"expected there to be a parent function");
556 OpBuilder
b(parentFunc);
559 funcOp = LLVM::LLVMFuncOp::create(
b, globalloc, funcName, funcType);
562 SmallVector<Value> callOperands = {input, sinPtr, cosPtr};
563 LLVM::CallOp::create(rewriter, loc, funcOp, callOperands);
567template <
typename OpTy>
571 StringRef f64Func, StringRef f32ApproxFunc =
"",
572 StringRef f16Func =
"") {
575 f32ApproxFunc, f16Func,
579template <
typename OpTy>
588template <
typename OpTy>
592 StringRef f32Func, StringRef f64Func) {
601 patterns.add<GPUSubgroupReduceOpLowering>(converter, benefit);
610 "__nv_fmaxf",
"__nv_fmax");
612 "__nv_fminf",
"__nv_fmin");
636 "__nv_copysignf",
"__nv_copysign");
638 "__nv_cos",
"__nv_fast_cosf");
646 "__nv_exp",
"__nv_fast_expf");
657 "__nv_finitef",
"__nv_isfinited");
663 "__nv_log",
"__nv_fast_logf");
665 "__nv_log10",
"__nv_fast_log10f");
669 "__nv_log2",
"__nv_fast_log2f");
671 "__nv_pow",
"__nv_fast_powf");
673 "__nv_powif",
"__nv_powi");
677 "__nv_rintf",
"__nv_rint");
681 "__nv_sin",
"__nv_fast_sinf");
687 "__nv_tan",
"__nv_fast_tanf");
692 patterns.add<SincosOpLowering>(converter, benefit);
708 NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>>(
709 converter, IndexKind::Block, IntrType::Id, benefit);
712 NVVM::BlockDimYOp, NVVM::BlockDimZOp>>(
713 converter, IndexKind::Block, IntrType::Dim, benefit);
716 NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>>(
717 converter, IndexKind::Other, IntrType::Id, benefit);
719 gpu::ClusterDimOp, NVVM::ClusterDimXOp, NVVM::ClusterDimYOp,
720 NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim,
723 gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
724 NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>>(
725 converter, IndexKind::Other, IntrType::Id, benefit);
727 gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
728 NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>>(
729 converter, IndexKind::Other, IntrType::Dim, benefit);
731 gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
732 converter, IndexKind::Grid, IntrType::Id, benefit);
734 gpu::GridDimOp, NVVM::GridDimXOp, NVVM::GridDimYOp, NVVM::GridDimZOp>>(
735 converter, IndexKind::Grid, IntrType::Dim, benefit);
750 static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared),
752 NVVM::NVVMDialect::getKernelFuncAttrName()),
754 NVVM::NVVMDialect::getMaxntidAttrName())},
765struct NVVMTargetConvertToLLVMAttrInterface
766 :
public ConvertToLLVMAttrInterface::ExternalModel<
767 NVVMTargetConvertToLLVMAttrInterface, NVVM::NVVMTargetAttr> {
769 void populateConvertToLLVMConversionPatterns(
775void NVVMTargetConvertToLLVMAttrInterface::
776 populateConvertToLLVMConversionPatterns(
Attribute attr,
787 NVVMTargetAttr::attachInterface<NVVMTargetConvertToLLVMAttrInterface>(*ctx);
static void populateFloatIntOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef f32Func, StringRef f64Func)
static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef f32Func, StringRef f64Func, StringRef f32ApproxFunc="", StringRef f16Func="")
static void populateIntOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef i32Func)
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
typename math::SincosOp::Adaptor OpAdaptor
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Conversion from types to the LLVM IR dialect.
MLIRContext & getContext() const
Returns the MLIR context.
LocationAttr findInstanceOfOrUnknown()
Return an instance of the given location type if one is nested under the current location else return...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
A trait of region holding operations that define a new scope for automatic allocations,...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
MLIRContext * getContext()
Return the context this operation is associated with.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
static Operation * lookupNearestSymbolFrom(Operation *from, StringAttr symbol)
Returns the operation registered with the given symbol name within the closest parent operation of,...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
constexpr int kSharedMemoryAlignmentBit
void registerConvertGpuToNVVMInterface(DialectRegistry ®istry)
Registers the ConvertToLLVMAttrInterface interface on the NVVM::NVVMTargetAttr attribute.
Include the generated interface declarations.
LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type)
Return the LLVMStructureType corresponding to the MMAMatrixType type.
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
const FrozenRewritePatternSet GreedyRewriteConfig config
LogicalResult applyPatternsGreedily(Region ®ion, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Note that these functions don't take a SymbolTable because GPU module lowerings can have name collisi...
void configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter)
Configure the LLVM type convert to convert types and address spaces from the GPU dialect to NVVM.
void configureGpuToNVVMConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to NVVM.
const FrozenRewritePatternSet & patterns
void registerConvertToLLVMDependentDialectLoading(DialectRegistry ®istry)
Register the extension that will load dependent dialects for LLVM conversion.
void populateGpuSubgroupReduceOpLoweringPattern(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate GpuSubgroupReduce pattern to NVVM.
void populateGpuToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to convert from the GPU dialect to NVVM.
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, Operation *moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
void populateLibDeviceConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns that lower certain arith and math dialect ops to libdevice calls.
void populateGpuWMMAToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM.
LogicalResult matchAndRewrite(math::SincosOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
Lowering of gpu.printf to a vprintf standard library.
Rewriting that replaces SourceOp with a CallOp to f32Func or f64Func or f32ApproxFunc or f16Func or i...
Unrolls SourceOp to array/vector elements.