MLIR  22.0.0git
ParallelLoopMapper.cpp
Go to the documentation of this file.
1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities to generate mappings for parallel loops to
10 // GPU devices.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
20 #include "mlir/IR/AffineMap.h"
21 
22 namespace mlir {
23 #define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
24 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
25 } // namespace mlir
26 
27 namespace mlir {
28 
29 using scf::ParallelOp;
30 
31 StringRef gpu::getMappingAttrName() { return "mapping"; }
32 
33 LogicalResult
34 gpu::setMappingAttr(ParallelOp ploopOp,
36  // Verify that each processor is mapped to only once.
37  llvm::DenseSet<gpu::Processor> specifiedMappings;
38  for (auto dimAttr : mapping) {
39  gpu::Processor processor = dimAttr.getProcessor();
40  if (processor != gpu::Processor::Sequential &&
41  specifiedMappings.count(processor))
42  return ploopOp.emitError(
43  "invalid mapping multiple loops to same processor");
44  specifiedMappings.insert(processor);
45  }
46  ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
47  ploopOp->setAttr(getMappingAttrName(),
48  ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
49  return success();
50 }
51 
52 namespace gpu {
53 namespace {
54 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
55 enum class MappingPolicy { OutermostFirst, InnermostFirst };
56 } // namespace
57 
58 static constexpr int kNumHardwareIds = 3;
59 
60 /// Bounded increment on MappingLevel. Increments to the next
61 /// level unless Sequential was already reached.
62 static MappingLevel &operator++(MappingLevel &mappingLevel) {
63  if (mappingLevel < Sequential) {
64  mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
65  }
66  return mappingLevel;
67 }
68 
69 // Map the policy string to a typed mapping policy.
70 // TODO: Revisit this and possibly use a loop interchange pass instead.
71 static FailureOr<MappingPolicy> getMappingPolicyFromStr(StringRef policy) {
72  std::string policyCanonical = policy.trim().lower();
73 
74  std::optional<MappingPolicy> option =
76  .Case("innermost-first", MappingPolicy::InnermostFirst)
77  .Case("outermost-first", MappingPolicy::OutermostFirst)
78  .Default(std::nullopt);
79 
80  if (!option)
81  return failure();
82  return *option;
83 }
84 
85 /// Computed the hardware id to use for a given mapping level. Will
86 /// assign x,y and z hardware ids for the first 3 dimensions and use
87 /// sequential after.
88 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
89 
90  if (dimension >= kNumHardwareIds || level == Sequential)
91  return Processor::Sequential;
92 
93  switch (level) {
94  case MapGrid:
95  switch (dimension) {
96  case 0:
97  return Processor::BlockX;
98  case 1:
99  return Processor::BlockY;
100  case 2:
101  return Processor::BlockZ;
102  default:
103  return Processor::Sequential;
104  }
105  break;
106  case MapBlock:
107  switch (dimension) {
108  case 0:
109  return Processor::ThreadX;
110  case 1:
111  return Processor::ThreadY;
112  case 2:
113  return Processor::ThreadZ;
114  default:
115  return Processor::Sequential;
116  }
117  default:;
118  }
119  return Processor::Sequential;
120 }
121 
122 /// Add mapping information to the given parallel loop. Do not add
123 /// mapping information if the loop already has it. Also, don't
124 /// start a mapping at a nested loop.
125 static void
126 mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel = MapGrid,
127  MappingPolicy mappingPolicy = MappingPolicy::OutermostFirst) {
128  // Do not try to add a mapping to already mapped loops or nested loops.
129  if (parallelOp->getAttr(getMappingAttrName()) ||
130  ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
131  return;
132 
133  const int numLoops = static_cast<int>(parallelOp.getNumLoops());
134  const int loopsToMap = std::min(numLoops, kNumHardwareIds);
135 
136  MLIRContext *ctx = parallelOp.getContext();
137  Builder b(ctx);
139  attrs.reserve(numLoops);
140 
141  for (int i = 0; i < numLoops; ++i) {
142 
143  // Determine the mapping to use for this loop.
144  // If the are more loops to map than HW IDs map to sequential.
145  int hwMapping = kNumHardwareIds;
146  if (i < loopsToMap) {
147  hwMapping = (mappingPolicy == MappingPolicy::OutermostFirst)
148  ? i
149  : (loopsToMap - 1 - i);
150  }
151 
152  attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
153  getHardwareIdForMapping(mappingLevel, hwMapping), b.getDimIdentityMap(),
154  b.getDimIdentityMap()));
155  }
156  (void)setMappingAttr(parallelOp, attrs);
157  ++mappingLevel;
158  // Parallel loop operations are immediately nested, so do not use
159  // walk but just iterate over the operations.
160  for (Operation &op : *parallelOp.getBody()) {
161  if (ParallelOp nested = dyn_cast<ParallelOp>(op))
162  mapParallelOp(nested, mappingLevel, mappingPolicy);
163  }
164 }
165 
166 namespace {
167 struct GpuMapParallelLoopsPass
168  : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
169  using Base::Base;
170 
171  void runOnOperation() override {
172  // Parse the mapping policy.
173  FailureOr<MappingPolicy> policyOrFailure =
174  getMappingPolicyFromStr(mappingPolicyStr);
175  if (failed(policyOrFailure)) {
176  getOperation()->emitError() << "Invalid mapping policy specified.";
177  return signalPassFailure();
178  }
179 
180  MappingPolicy policy = *policyOrFailure;
181  MappingLevel topLevel = MappingLevel::MapGrid;
182 
183  for (Region &region : getOperation()->getRegions()) {
184  region.walk([&](ParallelOp parallelOp) {
185  mapParallelOp(parallelOp, topLevel, policy);
186  });
187  }
188  }
189 };
190 
191 } // namespace
192 } // namespace gpu
193 } // namespace mlir
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
AffineMap getDimIdentityMap()
Definition: Builders.cpp:383
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:98
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
static Processor getHardwareIdForMapping(MappingLevel level, int dimension)
Computed the hardware id to use for a given mapping level.
static MappingLevel & operator++(MappingLevel &mappingLevel)
Bounded increment on MappingLevel.
static FailureOr< MappingPolicy > getMappingPolicyFromStr(StringRef policy)
LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef< ParallelLoopDimMappingAttr > mapping)
Sets the mapping attribute of a scf.parallel operation.
static constexpr int kNumHardwareIds
StringRef getMappingAttrName()
Name of the mapping attribute produced by loop mappers.
static void mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel=MapGrid, MappingPolicy mappingPolicy=MappingPolicy::OutermostFirst)
Add mapping information to the given parallel loop.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:561
Include the generated interface declarations.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...