41#define GEN_PASS_DEF_CONVERTGPUOPSTONVVMOPS
42#include "mlir/Conversion/Passes.h.inc"
50static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) {
52 case gpu::ShuffleMode::XOR:
53 return NVVM::ShflKind::bfly;
54 case gpu::ShuffleMode::UP:
55 return NVVM::ShflKind::up;
56 case gpu::ShuffleMode::DOWN:
57 return NVVM::ShflKind::down;
58 case gpu::ShuffleMode::IDX:
59 return NVVM::ShflKind::idx;
61 llvm_unreachable(
"unknown shuffle mode");
64static std::optional<NVVM::ReduxKind>
65convertReduxKind(gpu::AllReduceOperation mode) {
67 case gpu::AllReduceOperation::ADD:
68 return NVVM::ReduxKind::ADD;
69 case gpu::AllReduceOperation::MUL:
71 case gpu::AllReduceOperation::MINSI:
72 return NVVM::ReduxKind::MIN;
73 case gpu::AllReduceOperation::MINUI:
75 case gpu::AllReduceOperation::MINNUMF:
76 return NVVM::ReduxKind::MIN;
77 case gpu::AllReduceOperation::MAXSI:
78 return NVVM::ReduxKind::MAX;
79 case gpu::AllReduceOperation::MAXUI:
81 case gpu::AllReduceOperation::MAXNUMF:
82 return NVVM::ReduxKind::MAX;
83 case gpu::AllReduceOperation::AND:
84 return NVVM::ReduxKind::AND;
85 case gpu::AllReduceOperation::OR:
86 return NVVM::ReduxKind::OR;
87 case gpu::AllReduceOperation::XOR:
88 return NVVM::ReduxKind::XOR;
89 case gpu::AllReduceOperation::MINIMUMF:
90 case gpu::AllReduceOperation::MAXIMUMF:
98struct GPUSubgroupReduceOpLowering
100 using ConvertOpToLLVMPattern<gpu::SubgroupReduceOp>::ConvertOpToLLVMPattern;
103 matchAndRewrite(gpu::SubgroupReduceOp op, OpAdaptor adaptor,
104 ConversionPatternRewriter &rewriter)
const override {
105 if (op.getClusterSize())
106 return rewriter.notifyMatchFailure(
107 op,
"lowering for clustered reduce not implemented");
109 if (!op.getUniform())
110 return rewriter.notifyMatchFailure(
111 op,
"cannot be lowered to redux as the op must be run "
112 "uniformly (entire subgroup).");
113 if (!op.getValue().getType().isInteger(32))
114 return rewriter.notifyMatchFailure(op,
"unsupported data type");
116 std::optional<NVVM::ReduxKind> mode = convertReduxKind(op.getOp());
117 if (!mode.has_value())
118 return rewriter.notifyMatchFailure(
119 op,
"unsupported reduction mode for redux");
121 Location loc = op->getLoc();
122 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
123 Value offset = LLVM::ConstantOp::create(rewriter, loc, int32Type, -1);
125 auto reduxOp = NVVM::ReduxOp::create(rewriter, loc, int32Type,
126 op.getValue(), mode.value(), offset);
128 rewriter.replaceOp(op, reduxOp->getResult(0));
134 using ConvertOpToLLVMPattern<gpu::ShuffleOp>::ConvertOpToLLVMPattern;
155 matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
156 ConversionPatternRewriter &rewriter)
const override {
157 Location loc = op->getLoc();
159 auto valueTy = adaptor.getValue().getType();
160 auto int32Type = IntegerType::get(rewriter.getContext(), 32);
161 auto predTy = IntegerType::get(rewriter.getContext(), 1);
163 Value one = LLVM::ConstantOp::create(rewriter, loc, int32Type, 1);
164 Value minusOne = LLVM::ConstantOp::create(rewriter, loc, int32Type, -1);
165 Value thirtyTwo = LLVM::ConstantOp::create(rewriter, loc, int32Type, 32);
166 Value numLeadInactiveLane = LLVM::SubOp::create(
167 rewriter, loc, int32Type, thirtyTwo, adaptor.getWidth());
169 Value activeMask = LLVM::LShrOp::create(rewriter, loc, int32Type, minusOne,
170 numLeadInactiveLane);
172 if (op.getMode() == gpu::ShuffleMode::UP) {
174 maskAndClamp = numLeadInactiveLane;
177 maskAndClamp = LLVM::SubOp::create(rewriter, loc, int32Type,
178 adaptor.getWidth(), one);
181 bool predIsUsed = !op->getResult(1).use_empty();
182 UnitAttr returnValueAndIsValidAttr =
nullptr;
183 Type resultTy = valueTy;
185 returnValueAndIsValidAttr = rewriter.getUnitAttr();
186 resultTy = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
189 Value shfl = NVVM::ShflOp::create(
190 rewriter, loc, resultTy, activeMask, adaptor.getValue(),
191 adaptor.getOffset(), maskAndClamp, convertShflKind(op.getMode()),
192 returnValueAndIsValidAttr);
194 Value shflValue = LLVM::ExtractValueOp::create(rewriter, loc, shfl, 0);
195 Value isActiveSrcLane =
196 LLVM::ExtractValueOp::create(rewriter, loc, shfl, 1);
197 rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
199 rewriter.replaceOp(op, {shfl,
nullptr});
206 using ConvertOpToLLVMPattern<gpu::LaneIdOp>::ConvertOpToLLVMPattern;
209 matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
210 ConversionPatternRewriter &rewriter)
const override {
211 auto loc = op->getLoc();
212 MLIRContext *context = rewriter.getContext();
213 LLVM::ConstantRangeAttr bounds =
nullptr;
214 if (std::optional<APInt> upperBound = op.getUpperBound())
215 bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
216 32, 0, upperBound->getZExtValue());
218 bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
221 NVVM::LaneIdOp::create(rewriter, loc, rewriter.getI32Type(), bounds);
224 const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
225 if (indexBitwidth > 32) {
226 newOp = LLVM::SExtOp::create(
227 rewriter, loc, IntegerType::get(context, indexBitwidth), newOp);
228 }
else if (indexBitwidth < 32) {
229 newOp = LLVM::TruncOp::create(
230 rewriter, loc, IntegerType::get(context, indexBitwidth), newOp);
232 rewriter.replaceOp(op, {newOp});
238struct AssertOpToAssertfailLowering
240 using ConvertOpToLLVMPattern<cf::AssertOp>::ConvertOpToLLVMPattern;
243 matchAndRewrite(cf::AssertOp assertOp, cf::AssertOpAdaptor adaptor,
244 ConversionPatternRewriter &rewriter)
const override {
245 MLIRContext *ctx = rewriter.getContext();
246 Location loc = assertOp.getLoc();
247 Type i8Type = typeConverter->convertType(rewriter.getIntegerType(8));
248 Type i32Type = typeConverter->convertType(rewriter.getIntegerType(32));
249 Type i64Type = typeConverter->convertType(rewriter.getIntegerType(64));
250 Type ptrType = LLVM::LLVMPointerType::get(ctx);
251 Type voidType = LLVM::LLVMVoidType::get(ctx);
254 auto moduleOp = assertOp->getParentOfType<gpu::GPUModuleOp>();
255 auto assertfailType = LLVM::LLVMFunctionType::get(
256 voidType, {ptrType, ptrType, i32Type, ptrType, i64Type});
258 moduleOp, loc, rewriter,
"__assertfail", assertfailType);
259 assertfailDecl.setPassthroughAttr(
260 ArrayAttr::get(ctx, StringAttr::get(ctx,
"noreturn")));
271 Block *beforeBlock = assertOp->getBlock();
273 rewriter.splitBlock(beforeBlock, assertOp->getIterator());
275 rewriter.splitBlock(assertBlock, ++assertOp->getIterator());
276 rewriter.setInsertionPointToEnd(beforeBlock);
277 cf::CondBranchOp::create(rewriter, loc, adaptor.getArg(), afterBlock,
279 rewriter.setInsertionPointToEnd(assertBlock);
280 cf::BranchOp::create(rewriter, loc, afterBlock);
283 rewriter.setInsertionPoint(assertOp);
287 StringRef fileName =
"(unknown)";
288 StringRef funcName =
"(unknown)";
289 int32_t fileLine = 0;
290 while (
auto callSiteLoc = dyn_cast<CallSiteLoc>(loc))
291 loc = callSiteLoc.getCallee();
292 if (
auto fileLineColLoc = dyn_cast<FileLineColRange>(loc)) {
293 fileName = fileLineColLoc.getFilename().strref();
294 fileLine = fileLineColLoc.getStartLine();
295 }
else if (
auto nameLoc = dyn_cast<NameLoc>(loc)) {
296 funcName = nameLoc.getName().strref();
297 if (
auto fileLineColLoc =
298 dyn_cast<FileLineColRange>(nameLoc.getChildLoc())) {
299 fileName = fileLineColLoc.getFilename().strref();
300 fileLine = fileLineColLoc.getStartLine();
305 auto getGlobal = [&](LLVM::GlobalOp global) {
307 Value globalPtr = LLVM::AddressOfOp::create(
308 rewriter, loc, LLVM::LLVMPointerType::get(ctx, global.getAddrSpace()),
309 global.getSymNameAttr());
311 LLVM::GEPOp::create(rewriter, loc, ptrType, global.getGlobalType(),
312 globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
316 rewriter, loc, moduleOp, i8Type,
"assert_message_", assertOp.getMsg()));
318 rewriter, loc, moduleOp, i8Type,
"assert_file_", fileName));
320 rewriter, loc, moduleOp, i8Type,
"assert_func_", funcName));
322 LLVM::ConstantOp::create(rewriter, loc, i32Type, fileLine);
323 Value c1 = LLVM::ConstantOp::create(rewriter, loc, i64Type, 1);
326 SmallVector<Value> arguments{assertMessage, assertFile, assertLine,
328 rewriter.replaceOpWithNewOp<LLVM::CallOp>(assertOp, assertfailDecl,
335#include "GPUToNVVM.cpp.inc"
342struct LowerGpuOpsToNVVMOpsPass final
346 void getDependentDialects(DialectRegistry ®istry)
const override {
347 Base::getDependentDialects(registry);
351 void runOnOperation()
override {
352 gpu::GPUModuleOp m = getOperation();
355 for (
auto func : m.getOps<func::FuncOp>()) {
356 func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
363 DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
365 options.overrideIndexBitwidth(indexBitwidth);
366 options.useBarePtrCallConv = useBarePtrCallConv;
372 RewritePatternSet
patterns(m.getContext());
376 vector::populateVectorFromElementsUnrollPatterns(
patterns);
378 return signalPassFailure();
381 LLVMTypeConverter converter(m.getContext(),
options);
383 RewritePatternSet llvmPatterns(m.getContext());
390 llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
391 allowedDialects.end());
392 for (Dialect *dialect :
getContext().getLoadedDialects()) {
394 if (isa<math::MathDialect>(dialect))
397 bool allowed = allowedDialectsSet.contains(dialect->getNamespace());
399 if (!allowedDialectsSet.empty() && !allowed)
402 auto *iface = dyn_cast<ConvertToLLVMPatternInterface>(dialect);
408 <<
"dialect does not implement ConvertToLLVMPatternInterface: "
409 << dialect->getNamespace();
410 return signalPassFailure();
415 iface->populateConvertToLLVMConversionPatterns(
target, converter,
424 config.allowPatternRollback = allowPatternRollback;
426 applyPartialConversion(m,
target, std::move(llvmPatterns),
config)))
434 target.addIllegalOp<func::FuncOp>();
435 target.addIllegalOp<cf::AssertOp>();
436 target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
437 target.addLegalDialect<::mlir::NVVM::NVVMDialect>();
438 target.addIllegalDialect<gpu::GPUDialect>();
439 target.addIllegalOp<LLVM::CopySignOp, LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op,
440 LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp,
441 LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp,
442 LLVM::RoundEvenOp, LLVM::RoundOp, LLVM::SinOp,
443 LLVM::SincosOp, LLVM::SqrtOp>();
446 target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
463 ConversionPatternRewriter &rewriter)
const override {
465 Value input = adaptor.getOperand();
467 auto convertedInput = maybeExt(input, rewriter);
468 auto computeType = convertedInput.getType();
470 StringRef sincosFunc;
471 if (isa<Float32Type>(computeType)) {
472 const arith::FastMathFlags flag = op.getFastmath();
473 const bool useApprox =
474 mlir::arith::bitEnumContainsAny(flag, arith::FastMathFlags::afn);
475 sincosFunc = useApprox ?
"__nv_fast_sincosf" :
"__nv_sincosf";
476 }
else if (isa<Float64Type>(computeType)) {
477 sincosFunc =
"__nv_sincos";
479 return rewriter.notifyMatchFailure(op,
480 "unsupported operand type for sincos");
483 auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
485 Value sinPtr, cosPtr;
490 assert(scope &&
"Expected op to be inside automatic allocation scope");
491 rewriter.setInsertionPointToStart(&scope->getRegion(0).front());
492 auto one = LLVM::ConstantOp::create(rewriter, loc, rewriter.getI32Type(),
493 rewriter.getI32IntegerAttr(1));
495 LLVM::AllocaOp::create(rewriter, loc, ptrType, computeType, one, 0);
497 LLVM::AllocaOp::create(rewriter, loc, ptrType, computeType, one, 0);
500 createSincosCall(rewriter, loc, sincosFunc, convertedInput, sinPtr, cosPtr,
503 auto sinResult = LLVM::LoadOp::create(rewriter, loc, computeType, sinPtr);
504 auto cosResult = LLVM::LoadOp::create(rewriter, loc, computeType, cosPtr);
506 rewriter.replaceOp(op, {maybeTrunc(sinResult, inputType, rewriter),
507 maybeTrunc(cosResult, inputType, rewriter)});
513 if (isa<Float16Type, BFloat16Type>(operand.
getType()))
514 return LLVM::FPExtOp::create(rewriter, operand.
getLoc(),
520 Value maybeTrunc(Value operand, Type type, PatternRewriter &rewriter)
const {
522 return LLVM::FPTruncOp::create(rewriter, operand.
getLoc(), type, operand);
526 void createSincosCall(ConversionPatternRewriter &rewriter, Location loc,
527 StringRef funcName, Value input, Value sinPtr,
528 Value cosPtr, Operation *op)
const {
529 auto voidType = LLVM::LLVMVoidType::get(rewriter.getContext());
530 auto ptrType = sinPtr.
getType();
532 SmallVector<Type> operandTypes = {input.
getType(), ptrType, ptrType};
533 auto funcType = LLVM::LLVMFunctionType::get(voidType, operandTypes);
535 auto funcAttr = StringAttr::get(op->
getContext(), funcName);
541 assert(parentFunc &&
"expected there to be a parent function");
542 OpBuilder
b(parentFunc);
545 funcOp = LLVM::LLVMFuncOp::create(
b, globalloc, funcName, funcType);
548 SmallVector<Value> callOperands = {input, sinPtr, cosPtr};
549 LLVM::CallOp::create(rewriter, loc, funcOp, callOperands);
553template <
typename OpTy>
557 StringRef f64Func, StringRef f32ApproxFunc =
"",
558 StringRef f16Func =
"") {
561 f32ApproxFunc, f16Func,
565template <
typename OpTy>
574template <
typename OpTy>
578 StringRef f32Func, StringRef f64Func) {
587 patterns.add<GPUSubgroupReduceOpLowering>(converter, benefit);
596 "__nv_fmaxf",
"__nv_fmax");
598 "__nv_fminf",
"__nv_fmin");
622 "__nv_copysignf",
"__nv_copysign");
624 "__nv_cos",
"__nv_fast_cosf");
632 "__nv_exp",
"__nv_fast_expf");
643 "__nv_finitef",
"__nv_isfinited");
649 "__nv_log",
"__nv_fast_logf");
651 "__nv_log10",
"__nv_fast_log10f");
655 "__nv_log2",
"__nv_fast_log2f");
657 "__nv_pow",
"__nv_fast_powf");
659 "__nv_powif",
"__nv_powi");
663 "__nv_rintf",
"__nv_rint");
667 "__nv_sin",
"__nv_fast_sinf");
673 "__nv_tan",
"__nv_fast_tanf");
678 patterns.add<SincosOpLowering>(converter, benefit);
694 NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>>(
695 converter, IndexKind::Block, IntrType::Id, benefit);
698 NVVM::BlockDimYOp, NVVM::BlockDimZOp>>(
699 converter, IndexKind::Block, IntrType::Dim, benefit);
702 NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>>(
703 converter, IndexKind::Other, IntrType::Id, benefit);
705 gpu::ClusterDimOp, NVVM::ClusterDimXOp, NVVM::ClusterDimYOp,
706 NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim,
709 gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
710 NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>>(
711 converter, IndexKind::Other, IntrType::Id, benefit);
713 gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
714 NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>>(
715 converter, IndexKind::Other, IntrType::Dim, benefit);
717 gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
718 converter, IndexKind::Grid, IntrType::Id, benefit);
720 gpu::GridDimOp, NVVM::GridDimXOp, NVVM::GridDimYOp, NVVM::GridDimZOp>>(
721 converter, IndexKind::Grid, IntrType::Dim, benefit);
736 static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared),
738 NVVM::NVVMDialect::getKernelFuncAttrName()),
740 NVVM::NVVMDialect::getMaxntidAttrName())},
751struct NVVMTargetConvertToLLVMAttrInterface
752 :
public ConvertToLLVMAttrInterface::ExternalModel<
753 NVVMTargetConvertToLLVMAttrInterface, NVVM::NVVMTargetAttr> {
755 void populateConvertToLLVMConversionPatterns(
761void NVVMTargetConvertToLLVMAttrInterface::
762 populateConvertToLLVMConversionPatterns(
Attribute attr,
773 NVVMTargetAttr::attachInterface<NVVMTargetConvertToLLVMAttrInterface>(*ctx);
static void populateFloatIntOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef f32Func, StringRef f64Func)
static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef f32Func, StringRef f64Func, StringRef f32ApproxFunc="", StringRef f16Func="")
static void populateIntOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit, StringRef i32Func)
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
typename math::SincosOp::Adaptor OpAdaptor
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Conversion from types to the LLVM IR dialect.
MLIRContext & getContext() const
Returns the MLIR context.
LocationAttr findInstanceOfOrUnknown()
Return an instance of the given location type if one is nested under the current location else return...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
A trait of region holding operations that define a new scope for automatic allocations,...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
MLIRContext * getContext()
Return the context this operation is associated with.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
static Operation * lookupNearestSymbolFrom(Operation *from, StringAttr symbol)
Returns the operation registered with the given symbol name within the closest parent operation of,...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
constexpr int kSharedMemoryAlignmentBit
void registerConvertGpuToNVVMInterface(DialectRegistry ®istry)
Registers the ConvertToLLVMAttrInterface interface on the NVVM::NVVMTargetAttr attribute.
void populateCommonGPUTypeAndAttributeConversions(TypeConverter &typeConverter)
Remap common GPU memory spaces (Workgroup, Private, etc) to LLVM address spaces.
Include the generated interface declarations.
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
const FrozenRewritePatternSet GreedyRewriteConfig config
LogicalResult applyPatternsGreedily(Region ®ion, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Type convertMMAToLLVMType(gpu::MMAMatrixType type)
Return the LLVMStructureType corresponding to the MMAMatrixType type.
LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Note that these functions don't take a SymbolTable because GPU module lowerings can have name collisi...
void configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter)
Configure the LLVM type convert to convert types and address spaces from the GPU dialect to NVVM.
void configureGpuToNVVMConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to NVVM.
const FrozenRewritePatternSet & patterns
void registerConvertToLLVMDependentDialectLoading(DialectRegistry ®istry)
Register the extension that will load dependent dialects for LLVM conversion.
void populateGpuSubgroupReduceOpLoweringPattern(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate GpuSubgroupReduce pattern to NVVM.
void populateGpuToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to convert from the GPU dialect to NVVM.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, Operation *moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
void populateLibDeviceConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns that lower certain arith and math dialect ops to libdevice calls.
void populateGpuWMMAToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM.
LogicalResult matchAndRewrite(math::SincosOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
Lowering of gpu.printf to a vprintf standard library.
Rewriting that replaces SourceOp with a CallOp to f32Func or f64Func or f32ApproxFunc or f16Func or i...
Unrolls SourceOp to array/vector elements.