19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/Support/CommandLine.h"
23 #define GEN_PASS_DEF_CONVERTAFFINEFORTOGPU
24 #define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPU
25 #include "mlir/Conversion/Passes.h.inc"
35 struct ForLoopMapper :
public impl::ConvertAffineForToGPUBase<ForLoopMapper> {
36 ForLoopMapper() =
default;
37 ForLoopMapper(
unsigned numBlockDims,
unsigned numThreadDims) {
38 this->numBlockDims = numBlockDims;
39 this->numThreadDims = numThreadDims;
42 void runOnOperation()
override {
43 for (
Operation &op : llvm::make_early_inc_range(
44 getOperation().getFunctionBody().getOps())) {
45 if (
auto forOp = dyn_cast<affine::AffineForOp>(&op)) {
54 struct ParallelLoopToGpuPass
55 :
public impl::ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
56 void runOnOperation()
override {
60 target.markUnknownOpDynamicallyLegal([](
Operation *) {
return true; });
71 std::unique_ptr<InterfacePass<FunctionOpInterface>>
73 return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
75 std::unique_ptr<InterfacePass<FunctionOpInterface>>
77 return std::make_unique<ForLoopMapper>();
81 return std::make_unique<ParallelLoopToGpuPass>();
static MLIRContext * getContext(OpFoldResult val)
This class describes a specific conversion target.
Operation is the basic unit of execution within MLIR.
Include the generated interface declarations.
void finalizeParallelLoopToGPUConversion(Operation *op)
Clean up after applyPartialConversion/applyFullConversion call.
std::unique_ptr< InterfacePass< FunctionOpInterface > > createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims)
Create a pass that converts loop nests into GPU kernels.
void populateParallelLoopToGPUPatterns(RewritePatternSet &patterns)
Adds the conversion pattern from scf.parallel to gpu.launch to the provided pattern list.
LogicalResult convertAffineLoopNestToGPULaunch(affine::AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)
Convert a perfect affine loop nest with the outermost loop identified by forOp into a gpu::Launch ope...
const FrozenRewritePatternSet & patterns
std::unique_ptr< Pass > createParallelLoopToGpuPass()
Creates a pass that converts scf.parallel operations into a gpu.launch operation.
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
void configureParallelLoopToGPULegality(ConversionTarget &target)
Configures the rewrite target such that only scf.parallel operations that are not rewritten by the pr...