MLIR 22.0.0git
ParallelLoopMapper.cpp
Go to the documentation of this file.
1//===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utilities to generate mappings for parallel loops to
10// GPU devices.
11//
12//===----------------------------------------------------------------------===//
13
15
20#include "mlir/IR/AffineMap.h"
21
22namespace mlir {
23#define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
24#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
25} // namespace mlir
26
27namespace mlir {
28
29using scf::ParallelOp;
30
31StringRef gpu::getMappingAttrName() { return "mapping"; }
32
33LogicalResult
34gpu::setMappingAttr(ParallelOp ploopOp,
36 // Verify that each processor is mapped to only once.
37 llvm::DenseSet<gpu::Processor> specifiedMappings;
38 for (auto dimAttr : mapping) {
39 gpu::Processor processor = dimAttr.getProcessor();
40 if (processor != gpu::Processor::Sequential &&
41 specifiedMappings.count(processor))
42 return ploopOp.emitError(
43 "invalid mapping multiple loops to same processor");
44 specifiedMappings.insert(processor);
45 }
46 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
47 ploopOp->setAttr(getMappingAttrName(),
48 ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
49 return success();
50}
51
52namespace gpu {
53namespace {
54enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
55enum class MappingPolicy { OutermostFirst, InnermostFirst };
56} // namespace
57
58static constexpr int kNumHardwareIds = 3;
59
60/// Bounded increment on MappingLevel. Increments to the next
61/// level unless Sequential was already reached.
62static MappingLevel &operator++(MappingLevel &mappingLevel) {
63 if (mappingLevel < Sequential) {
64 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
65 }
66 return mappingLevel;
67}
68
69// Map the policy string to a typed mapping policy.
70// TODO: Revisit this and possibly use a loop interchange pass instead.
71static FailureOr<MappingPolicy> getMappingPolicyFromStr(StringRef policy) {
72 std::string policyCanonical = policy.trim().lower();
73
74 std::optional<MappingPolicy> option =
76 .Case("innermost-first", MappingPolicy::InnermostFirst)
77 .Case("outermost-first", MappingPolicy::OutermostFirst)
78 .Default(std::nullopt);
79
80 if (!option)
81 return failure();
82 return *option;
83}
84
85/// Computed the hardware id to use for a given mapping level. Will
86/// assign x,y and z hardware ids for the first 3 dimensions and use
87/// sequential after.
88static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
89
90 if (dimension >= kNumHardwareIds || level == Sequential)
91 return Processor::Sequential;
92
93 switch (level) {
94 case MapGrid:
95 switch (dimension) {
96 case 0:
97 return Processor::BlockX;
98 case 1:
99 return Processor::BlockY;
100 case 2:
101 return Processor::BlockZ;
102 default:
103 return Processor::Sequential;
104 }
105 break;
106 case MapBlock:
107 switch (dimension) {
108 case 0:
109 return Processor::ThreadX;
110 case 1:
111 return Processor::ThreadY;
112 case 2:
113 return Processor::ThreadZ;
114 default:
115 return Processor::Sequential;
116 }
117 default:;
118 }
119 return Processor::Sequential;
120}
121
122/// Add mapping information to the given parallel loop. Do not add
123/// mapping information if the loop already has it. Also, don't
124/// start a mapping at a nested loop.
125static void
126mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel = MapGrid,
127 MappingPolicy mappingPolicy = MappingPolicy::OutermostFirst) {
128 // Do not try to add a mapping to already mapped loops or nested loops.
129 if (parallelOp->getAttr(getMappingAttrName()) ||
130 ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
131 return;
132
133 const int numLoops = static_cast<int>(parallelOp.getNumLoops());
134 const int loopsToMap = std::min(numLoops, kNumHardwareIds);
135
136 MLIRContext *ctx = parallelOp.getContext();
137 Builder b(ctx);
139 attrs.reserve(numLoops);
140
141 for (int i = 0; i < numLoops; ++i) {
142
143 // Determine the mapping to use for this loop.
144 // If the are more loops to map than HW IDs map to sequential.
145 int hwMapping = kNumHardwareIds;
146 if (i < loopsToMap) {
147 hwMapping = (mappingPolicy == MappingPolicy::OutermostFirst)
148 ? i
149 : (loopsToMap - 1 - i);
150 }
151
152 attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
153 getHardwareIdForMapping(mappingLevel, hwMapping), b.getDimIdentityMap(),
154 b.getDimIdentityMap()));
155 }
156 (void)setMappingAttr(parallelOp, attrs);
157 ++mappingLevel;
158 // Parallel loop operations are immediately nested, so do not use
159 // walk but just iterate over the operations.
160 for (Operation &op : *parallelOp.getBody()) {
161 if (ParallelOp nested = dyn_cast<ParallelOp>(op))
162 mapParallelOp(nested, mappingLevel, mappingPolicy);
163 }
164}
165
166namespace {
167struct GpuMapParallelLoopsPass
168 : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
169 using Base::Base;
170
171 void runOnOperation() override {
172 // Parse the mapping policy.
173 FailureOr<MappingPolicy> policyOrFailure =
174 getMappingPolicyFromStr(mappingPolicyStr);
175 if (failed(policyOrFailure)) {
176 getOperation()->emitError() << "Invalid mapping policy specified.";
177 return signalPassFailure();
178 }
179
180 MappingPolicy policy = *policyOrFailure;
181 MappingLevel topLevel = MappingLevel::MapGrid;
182
183 for (Region &region : getOperation()->getRegions()) {
184 region.walk([&](ParallelOp parallelOp) {
185 mapParallelOp(parallelOp, topLevel, policy);
186 });
187 }
188 }
189};
190
191} // namespace
192} // namespace gpu
193} // namespace mlir
return success()
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
This class is a general helper class for creating context-global objects like types,...
Definition Builders.h:51
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
static Processor getHardwareIdForMapping(MappingLevel level, int dimension)
Computed the hardware id to use for a given mapping level.
static FailureOr< MappingPolicy > getMappingPolicyFromStr(StringRef policy)
static MappingLevel & operator++(MappingLevel &mappingLevel)
Bounded increment on MappingLevel.
LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef< ParallelLoopDimMappingAttr > mapping)
Sets the mapping attribute of a scf.parallel operation.
static constexpr int kNumHardwareIds
StringRef getMappingAttrName()
Name of the mapping attribute produced by loop mappers.
static void mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel=MapGrid, MappingPolicy mappingPolicy=MappingPolicy::OutermostFirst)
Add mapping information to the given parallel loop.
Include the generated interface declarations.