doxygen/SCFToGPUPass_8cpp_source.html

 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"


 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"

 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Transforms/DialectConversion.h"


 namespace mlir {

 #define GEN_PASS_DEF_CONVERTAFFINEFORTOGPUPASS

 #define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPUPASS

 #include "mlir/Conversion/Passes.h.inc"

 } // namespace mlir


 using namespace mlir;

 using namespace mlir::scf;


 namespace {

 // A pass that traverses top-level loops in the function and converts them to

 // GPU launch operations.  Nested launches are not allowed, so this does not

 // walk the function recursively to avoid considering nested loops.

 struct ForLoopMapper

     : public impl::ConvertAffineForToGPUPassBase<ForLoopMapper> {

   using Base::Base;


   void runOnOperation() override {

     for (Operation &op : llvm::make_early_inc_range(

              getOperation().getFunctionBody().getOps())) {

       if (auto forOp = dyn_cast<affine::AffineForOp>(&op)) {

         if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,

                                                     numThreadDims)))

           signalPassFailure();

       }

     }

   }

 };


 struct ParallelLoopToGpuPass

     : public impl::ConvertParallelLoopToGpuPassBase<ParallelLoopToGpuPass> {

   void runOnOperation() override {

     RewritePatternSet patterns(&getContext());

     populateParallelLoopToGPUPatterns(patterns);

     ConversionTarget target(getContext());

     target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });

     configureParallelLoopToGPULegality(target);

     if (failed(applyPartialConversion(getOperation(), target,

                                       std::move(patterns))))

       signalPassFailure();

     finalizeParallelLoopToGPUConversion(getOperation());

   }

 };


 } // namespace

AffineOps.h

DialectConversion.h

GPUDialect.h

getContext
static MLIRContext * getContext(OpFoldResult val)
Definition: IndexingUtils.cpp:296

SCFToGPUPass.h

SCFToGPU.h

mlir::ConversionTarget
This class describes a specific conversion target.
Definition: DialectConversion.h:870

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::RewritePatternSet
Definition: PatternMatch.h:792

mlir::scf
Definition: SCFToGPU.h:24

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::finalizeParallelLoopToGPUConversion
void finalizeParallelLoopToGPUConversion(Operation *op)
Clean up after applyPartialConversion/applyFullConversion call.
Definition: SCFToGPU.cpp:716

mlir::populateParallelLoopToGPUPatterns
void populateParallelLoopToGPUPatterns(RewritePatternSet &patterns)
Adds the conversion pattern from scf.parallel to gpu.launch to the provided pattern list.
Definition: SCFToGPU.cpp:704

mlir::convertAffineLoopNestToGPULaunch
LogicalResult convertAffineLoopNestToGPULaunch(affine::AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)
Convert a perfect affine loop nest with the outermost loop identified by forOp into a gpu::Launch ope...

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::applyPartialConversion
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
Definition: DialectConversion.cpp:3549

mlir::configureParallelLoopToGPULegality
void configureParallelLoopToGPULegality(ConversionTarget &target)
Configures the rewrite target such that only scf.parallel operations that are not rewritten by the pr...
Definition: SCFToGPU.cpp:708