19 #define GEN_PASS_DEF_NVVMOPTIMIZEFORTARGET
20 #include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc"
38 LogicalResult matchAndRewrite(LLVM::FDivOp op,
42 struct NVVMOptimizeForTarget
43 :
public NVVM::impl::NVVMOptimizeForTargetBase<NVVMOptimizeForTarget> {
44 void runOnOperation()
override;
47 registry.
insert<NVVM::NVVMDialect>();
52 LogicalResult ExpandDivF16::matchAndRewrite(LLVM::FDivOp op,
54 if (!op.getType().isF16())
62 Value lhs = rewriter.
create<LLVM::FPExtOp>(loc, f32Type, op.getLhs());
63 Value rhs = rewriter.
create<LLVM::FPExtOp>(loc, f32Type, op.getRhs());
66 Value rcp = rewriter.
create<NVVM::RcpApproxFtzF32Op>(loc, f32Type, rhs);
67 Value approx = rewriter.
create<LLVM::FMulOp>(loc, lhs, rcp);
72 loc, approx, rewriter.
create<LLVM::FNegOp>(loc, rhs), lhs);
73 Value refined = rewriter.
create<LLVM::FMAOp>(loc, err, rcp, approx);
78 Value cast = rewriter.
create<LLVM::BitcastOp>(loc, i32Type, approx);
79 Value exp = rewriter.
create<LLVM::AndOp>(loc, i32Type, cast, mask);
84 rewriter.
create<LLVM::ICmpOp>(loc, LLVM::ICmpPredicate::eq, exp, zero),
85 rewriter.
create<LLVM::ICmpOp>(loc, LLVM::ICmpPredicate::eq, exp, mask));
87 rewriter.
create<LLVM::SelectOp>(loc, f32Type, pred, approx, refined);
95 void NVVMOptimizeForTarget::runOnOperation() {
98 patterns.add<ExpandDivF16>(ctx);
100 return signalPassFailure();
104 return std::make_unique<NVVMOptimizeForTarget>();
IntegerAttr getUI32IntegerAttr(uint32_t value)
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Location getLoc()
The source location the operation was defined or derived from.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
std::unique_ptr< Pass > createOptimizeForTargetPass()
Creates a pass that optimizes LLVM IR for the NVVM target.
Include the generated interface declarations.
LogicalResult applyPatternsAndFoldGreedily(Region ®ion, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...