doxygen/ParallelLoopMapper_8cpp_source.html

 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements utilities to generate mappings for parallel loops to

 // GPU devices.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/GPU/Transforms/Passes.h"


 #include "mlir/Dialect/Func/IR/FuncOps.h"

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"

 #include "mlir/IR/AffineMap.h"


 namespace mlir {

 #define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS

 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"

 } // namespace mlir


 namespace mlir {


 using scf::ParallelOp;


 StringRef gpu::getMappingAttrName() { return "mapping"; }


 LogicalResult

 gpu::setMappingAttr(ParallelOp ploopOp,

                     ArrayRef<ParallelLoopDimMappingAttr> mapping) {

   // Verify that each processor is mapped to only once.

   llvm::DenseSet<gpu::Processor> specifiedMappings;

   for (auto dimAttr : mapping) {

     gpu::Processor processor = dimAttr.getProcessor();

     if (processor != gpu::Processor::Sequential &&

         specifiedMappings.count(processor))

       return ploopOp.emitError(

           "invalid mapping multiple loops to same processor");

     specifiedMappings.insert(processor);

   }

   ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());

   ploopOp->setAttr(getMappingAttrName(),

                    ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));

   return success();

 }


 namespace gpu {

 namespace {

 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };

 } // namespace


 static constexpr int kNumHardwareIds = 3;


 /// Bounded increment on MappingLevel. Increments to the next

 /// level unless Sequential was already reached.

 static MappingLevel &operator++(MappingLevel &mappingLevel) {

   if (mappingLevel < Sequential) {

     mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);

   }

   return mappingLevel;

 }


 /// Computed the hardware id to use for a given mapping level. Will

 /// assign x,y and z hardware ids for the first 3 dimensions and use

 /// sequential after.

 /// TODO: Make this use x for the inner-most loop that is

 /// distributed to map to x, the next innermost to y and the next innermost to

 /// z.

 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {


   if (dimension >= kNumHardwareIds || level == Sequential)

     return Processor::Sequential;

   switch (level) {

   case MapGrid:

     switch (dimension) {

     case 0:

       return Processor::BlockX;

     case 1:

       return Processor::BlockY;

     case 2:

       return Processor::BlockZ;

     default:

       return Processor::Sequential;

     }

     break;

   case MapBlock:

     switch (dimension) {

     case 0:

       return Processor::ThreadX;

     case 1:

       return Processor::ThreadY;

     case 2:

       return Processor::ThreadZ;

     default:

       return Processor::Sequential;

     }

   default:;

   }

   return Processor::Sequential;

 }


 /// Add mapping information to the given parallel loop. Do not add

 /// mapping information if the loop already has it. Also, don't

 /// start a mapping at a nested loop.

 static void mapParallelOp(ParallelOp parallelOp,

                           MappingLevel mappingLevel = MapGrid) {

   // Do not try to add a mapping to already mapped loops or nested loops.

   if (parallelOp->getAttr(getMappingAttrName()) ||

       ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))

     return;


   MLIRContext *ctx = parallelOp.getContext();

   Builder b(ctx);

   SmallVector<ParallelLoopDimMappingAttr, 4> attrs;

   attrs.reserve(parallelOp.getNumLoops());

   for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {

     attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(

         getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),

         b.getDimIdentityMap()));

   }

   (void)setMappingAttr(parallelOp, attrs);

   ++mappingLevel;

   // Parallel loop operations are immediately nested, so do not use

   // walk but just iterate over the operations.

   for (Operation &op : *parallelOp.getBody()) {

     if (ParallelOp nested = dyn_cast<ParallelOp>(op))

       mapParallelOp(nested, mappingLevel);

   }

 }


 namespace {

 struct GpuMapParallelLoopsPass

     : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {

   void runOnOperation() override {

     for (Region &region : getOperation()->getRegions()) {

       region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });

     }

   }

 };


 } // namespace

 } // namespace gpu

 } // namespace mlir

Passes.h

FuncOps.h

GPUDialect.h

ParallelLoopMapper.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::DenseSet
Definition: LLVM.h:59

llvm::SmallVector
Definition: LLVM.h:72

mlir::Builder
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:50

mlir::Builder::getDimIdentityMap
AffineMap getDimIdentityMap()
Definition: Builders.cpp:378

mlir::Builder::getAttr
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:96

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26

SCF.h

AffineMap.h

mlir::gpu::getHardwareIdForMapping
static Processor getHardwareIdForMapping(MappingLevel level, int dimension)
Computed the hardware id to use for a given mapping level.
Definition: ParallelLoopMapper.cpp:74

mlir::gpu::operator++
static MappingLevel & operator++(MappingLevel &mappingLevel)
Bounded increment on MappingLevel.
Definition: ParallelLoopMapper.cpp:61

mlir::gpu::mapParallelOp
static void mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel=MapGrid)
Add mapping information to the given parallel loop.
Definition: ParallelLoopMapper.cpp:110

mlir::gpu::setMappingAttr
LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef< ParallelLoopDimMappingAttr > mapping)
Sets the mapping attribute of a scf.parallel operation.

mlir::gpu::kNumHardwareIds
static constexpr int kNumHardwareIds
Definition: ParallelLoopMapper.cpp:57

mlir::gpu::getMappingAttrName
StringRef getMappingAttrName()
Name of the mapping attribute produced by loop mappers.
Definition: ParallelLoopMapper.cpp:31

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509