MLIR  18.0.0git
ParallelLoopMapper.cpp
Go to the documentation of this file.
1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities to generate mappings for parallel loops to
10 // GPU devices.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
20 #include "mlir/IR/AffineMap.h"
21 
22 namespace mlir {
23 #define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
24 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
25 } // namespace mlir
26 
27 namespace mlir {
28 
29 using scf::ParallelOp;
30 
31 StringRef gpu::getMappingAttrName() { return "mapping"; }
32 
34 gpu::setMappingAttr(ParallelOp ploopOp,
36  // Verify that each processor is mapped to only once.
37  llvm::DenseSet<gpu::Processor> specifiedMappings;
38  for (auto dimAttr : mapping) {
39  gpu::Processor processor = dimAttr.getProcessor();
40  if (processor != gpu::Processor::Sequential &&
41  specifiedMappings.count(processor))
42  return ploopOp.emitError(
43  "invalid mapping multiple loops to same processor");
44  }
45  ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
46  ploopOp->setAttr(getMappingAttrName(),
47  ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
48  return success();
49 }
50 
51 namespace gpu {
52 namespace {
53 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
54 } // namespace
55 
56 static constexpr int kNumHardwareIds = 3;
57 
58 /// Bounded increment on MappingLevel. Increments to the next
59 /// level unless Sequential was already reached.
60 static MappingLevel &operator++(MappingLevel &mappingLevel) {
61  if (mappingLevel < Sequential) {
62  mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
63  }
64  return mappingLevel;
65 }
66 
67 /// Computed the hardware id to use for a given mapping level. Will
68 /// assign x,y and z hardware ids for the first 3 dimensions and use
69 /// sequential after.
70 /// TODO: Make this use x for the inner-most loop that is
71 /// distributed to map to x, the next innermost to y and the next innermost to
72 /// z.
73 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
74 
75  if (dimension >= kNumHardwareIds || level == Sequential)
76  return Processor::Sequential;
77  switch (level) {
78  case MapGrid:
79  switch (dimension) {
80  case 0:
81  return Processor::BlockX;
82  case 1:
83  return Processor::BlockY;
84  case 2:
85  return Processor::BlockZ;
86  default:
87  return Processor::Sequential;
88  }
89  break;
90  case MapBlock:
91  switch (dimension) {
92  case 0:
93  return Processor::ThreadX;
94  case 1:
95  return Processor::ThreadY;
96  case 2:
97  return Processor::ThreadZ;
98  default:
99  return Processor::Sequential;
100  }
101  default:;
102  }
103  return Processor::Sequential;
104 }
105 
106 /// Add mapping information to the given parallel loop. Do not add
107 /// mapping information if the loop already has it. Also, don't
108 /// start a mapping at a nested loop.
109 static void mapParallelOp(ParallelOp parallelOp,
110  MappingLevel mappingLevel = MapGrid) {
111  // Do not try to add a mapping to already mapped loops or nested loops.
112  if (parallelOp->getAttr(getMappingAttrName()) ||
113  ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
114  return;
115 
116  MLIRContext *ctx = parallelOp.getContext();
117  Builder b(ctx);
119  attrs.reserve(parallelOp.getNumLoops());
120  for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
121  attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
122  getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
123  b.getDimIdentityMap()));
124  }
125  (void)setMappingAttr(parallelOp, attrs);
126  ++mappingLevel;
127  // Parallel loop operations are immediately nested, so do not use
128  // walk but just iterate over the operations.
129  for (Operation &op : *parallelOp.getBody()) {
130  if (ParallelOp nested = dyn_cast<ParallelOp>(op))
131  mapParallelOp(nested, mappingLevel);
132  }
133 }
134 
135 namespace {
136 struct GpuMapParallelLoopsPass
137  : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
138  void runOnOperation() override {
139  for (Region &region : getOperation()->getRegions()) {
140  region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
141  }
142  }
143 };
144 
145 } // namespace
146 } // namespace gpu
147 } // namespace mlir
148 
149 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
151  return std::make_unique<gpu::GpuMapParallelLoopsPass>();
152 }
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:50
AffineMap getDimIdentityMap()
Definition: Builders.cpp:372
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:100
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
static Processor getHardwareIdForMapping(MappingLevel level, int dimension)
Computed the hardware id to use for a given mapping level.
static MappingLevel & operator++(MappingLevel &mappingLevel)
Bounded increment on MappingLevel.
static void mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel=MapGrid)
Add mapping information to the given parallel loop.
LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef< ParallelLoopDimMappingAttr > mapping)
Sets the mapping attribute of a scf.parallel operation.
static constexpr int kNumHardwareIds
StringRef getMappingAttrName()
Name of the mapping attribute produced by loop mappers.
This header declares functions that assist transformations in the MemRef dialect.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26