MLIR  19.0.0git
ParallelLoopMapper.cpp
Go to the documentation of this file.
1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities to generate mappings for parallel loops to
10 // GPU devices.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
20 #include "mlir/IR/AffineMap.h"
21 
22 namespace mlir {
23 #define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
24 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
25 } // namespace mlir
26 
27 namespace mlir {
28 
29 using scf::ParallelOp;
30 
31 StringRef gpu::getMappingAttrName() { return "mapping"; }
32 
34 gpu::setMappingAttr(ParallelOp ploopOp,
36  // Verify that each processor is mapped to only once.
37  llvm::DenseSet<gpu::Processor> specifiedMappings;
38  for (auto dimAttr : mapping) {
39  gpu::Processor processor = dimAttr.getProcessor();
40  if (processor != gpu::Processor::Sequential &&
41  specifiedMappings.count(processor))
42  return ploopOp.emitError(
43  "invalid mapping multiple loops to same processor");
44  specifiedMappings.insert(processor);
45  }
46  ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
47  ploopOp->setAttr(getMappingAttrName(),
48  ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
49  return success();
50 }
51 
52 namespace gpu {
53 namespace {
54 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
55 } // namespace
56 
57 static constexpr int kNumHardwareIds = 3;
58 
59 /// Bounded increment on MappingLevel. Increments to the next
60 /// level unless Sequential was already reached.
61 static MappingLevel &operator++(MappingLevel &mappingLevel) {
62  if (mappingLevel < Sequential) {
63  mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
64  }
65  return mappingLevel;
66 }
67 
68 /// Computed the hardware id to use for a given mapping level. Will
69 /// assign x,y and z hardware ids for the first 3 dimensions and use
70 /// sequential after.
71 /// TODO: Make this use x for the inner-most loop that is
72 /// distributed to map to x, the next innermost to y and the next innermost to
73 /// z.
74 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
75 
76  if (dimension >= kNumHardwareIds || level == Sequential)
77  return Processor::Sequential;
78  switch (level) {
79  case MapGrid:
80  switch (dimension) {
81  case 0:
82  return Processor::BlockX;
83  case 1:
84  return Processor::BlockY;
85  case 2:
86  return Processor::BlockZ;
87  default:
88  return Processor::Sequential;
89  }
90  break;
91  case MapBlock:
92  switch (dimension) {
93  case 0:
94  return Processor::ThreadX;
95  case 1:
96  return Processor::ThreadY;
97  case 2:
98  return Processor::ThreadZ;
99  default:
100  return Processor::Sequential;
101  }
102  default:;
103  }
104  return Processor::Sequential;
105 }
106 
107 /// Add mapping information to the given parallel loop. Do not add
108 /// mapping information if the loop already has it. Also, don't
109 /// start a mapping at a nested loop.
110 static void mapParallelOp(ParallelOp parallelOp,
111  MappingLevel mappingLevel = MapGrid) {
112  // Do not try to add a mapping to already mapped loops or nested loops.
113  if (parallelOp->getAttr(getMappingAttrName()) ||
114  ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
115  return;
116 
117  MLIRContext *ctx = parallelOp.getContext();
118  Builder b(ctx);
120  attrs.reserve(parallelOp.getNumLoops());
121  for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
122  attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
123  getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
124  b.getDimIdentityMap()));
125  }
126  (void)setMappingAttr(parallelOp, attrs);
127  ++mappingLevel;
128  // Parallel loop operations are immediately nested, so do not use
129  // walk but just iterate over the operations.
130  for (Operation &op : *parallelOp.getBody()) {
131  if (ParallelOp nested = dyn_cast<ParallelOp>(op))
132  mapParallelOp(nested, mappingLevel);
133  }
134 }
135 
136 namespace {
137 struct GpuMapParallelLoopsPass
138  : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
139  void runOnOperation() override {
140  for (Region &region : getOperation()->getRegions()) {
141  region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
142  }
143  }
144 };
145 
146 } // namespace
147 } // namespace gpu
148 } // namespace mlir
149 
150 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
152  return std::make_unique<gpu::GpuMapParallelLoopsPass>();
153 }
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:50
AffineMap getDimIdentityMap()
Definition: Builders.cpp:390
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:100
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
static Processor getHardwareIdForMapping(MappingLevel level, int dimension)
Computed the hardware id to use for a given mapping level.
static MappingLevel & operator++(MappingLevel &mappingLevel)
Bounded increment on MappingLevel.
static void mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel=MapGrid)
Add mapping information to the given parallel loop.
LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef< ParallelLoopDimMappingAttr > mapping)
Sets the mapping attribute of a scf.parallel operation.
static constexpr int kNumHardwareIds
StringRef getMappingAttrName()
Name of the mapping attribute produced by loop mappers.
Include the generated interface declarations.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26