MLIR 22.0.0git
IterationGraphSorter.cpp
Go to the documentation of this file.
1//===- IterationGraphSorter.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
16
17using namespace mlir;
18using namespace mlir::sparse_tensor;
19
20namespace {
21
22/// A helper class that visits an affine expression and tries to find
23/// an AffineDimExpr to which the corresponding iterator from a GenericOp
24/// matches the desired iterator type. If there is no matched iterator
25/// type, the method returns the first DimExpr in the expression.
26class AffineDimFinder : public AffineExprVisitor<AffineDimFinder> {
27public:
28 explicit AffineDimFinder(ArrayRef<utils::IteratorType> itTypes)
29 : iterTypes(itTypes) {}
30
31 /// Overrides the visit method from AffineExprVisitor.
32 void visitDimExpr(AffineDimExpr expr) {
33 if (pickedDim == nullptr || pickIterType == iterTypes[expr.getPosition()])
34 pickedDim = expr;
35 }
36
37 /// Sets the desired iterator type that we want to pick.
38 void setPickedIterType(utils::IteratorType iterType) {
39 pickIterType = iterType;
40 }
41
42 /// Gets the desired AffineDimExpr.
43 AffineDimExpr getDimExpr() const {
44 return llvm::cast<AffineDimExpr>(pickedDim);
45 }
46
47 /// Walks the graph in post order to find dim expr.
48 void walkPostOrder(AffineExpr expr) {
49 pickedDim = nullptr;
51 }
52
53private:
54 /// The picked AffineDimExpr after visit.
55 AffineExpr pickedDim;
56 /// The iterator type that we want.
57 utils::IteratorType pickIterType;
58 /// The mapping between levels and iterator types.
59 ArrayRef<utils::IteratorType> iterTypes;
60};
61
62/// Flattens an affine expression into a list of AffineDimExprs.
63struct AffineDimCollector : public AffineExprVisitor<AffineDimCollector> {
64 // Overrides method from AffineExprVisitor.
65 void visitDimExpr(AffineDimExpr expr) { dims.push_back(expr); }
66 SmallVector<AffineDimExpr> dims;
67};
68
69} // namespace
70
71inline static bool includesAny(SortMask mask1, SortMask mask2) {
72 return static_cast<unsigned>(mask1) & static_cast<unsigned>(mask2);
73}
74
75inline static bool includesDenseInput(SortMask mask) {
77}
78
79inline static bool includesDenseOutput(SortMask mask) {
81}
82
83/// Returns a sparsity rank for loop ordering: lower values indicate
84/// dimensions that should be placed in outer loops.
85/// 0 = Dense, 1 = Compressed, 2 = Singleton, 3 = Other/Unknown.
86static unsigned getLoopSparsityRank(unsigned loop, ArrayRef<Value> allTensors,
87 ArrayRef<AffineMap> allMaps) {
88 // Start with highest rank.
89 unsigned minRank = 3;
90
91 for (auto [tensor, map] : llvm::zip(allTensors, allMaps)) {
92 // Check if this loop accesses this tensor.
93 bool loopAccessesTensor = false;
94 unsigned tensorDim = 0;
95 for (AffineExpr expr : map.getResults()) {
96 if (auto dimExpr = dyn_cast<AffineDimExpr>(expr)) {
97 if (dimExpr.getPosition() == loop) {
98 loopAccessesTensor = true;
99 break;
100 }
101 }
102 tensorDim++;
103 }
104
105 if (loopAccessesTensor) {
106 const auto enc = getSparseTensorEncoding(tensor.getType());
107 if (!enc) {
108 // Dense tensor - lowest rank.
109 return 0;
110 } else {
111 // Sparse tensor - check the level type for this dimension.
112 auto lvlTypes = enc.getLvlTypes();
113 if (tensorDim < lvlTypes.size()) {
114 auto lvlType = lvlTypes[tensorDim];
115 if (isDenseLT(lvlType)) {
116 return 0; // Dense level.
117 } else if (isCompressedLT(lvlType)) {
118 minRank = std::min(minRank, 1u); // Compressed level.
119 } else if (isSingletonLT(lvlType)) {
120 minRank = std::min(minRank, 2u); // Singleton level.
121 }
122 }
123 }
124 }
125 }
126
127 return minRank;
128}
129
130AffineMap IterationGraphSorter::topoSort() {
131 // The sorted result will put the first Reduction iterator to the
132 // latest possible position.
133 std::vector<unsigned> redIt; // reduce iterator with 0 degree
134 std::vector<unsigned> parIt; // parallel iterator with 0 degree
135 const unsigned numLoops = getNumLoops();
136 for (unsigned i = 0; i < numLoops; i++) {
137 if (inDegree[i] == 0) {
138 if (iterTypes[i] == utils::IteratorType::reduction)
139 redIt.push_back(i);
140 else
141 parIt.push_back(i);
142 }
143 }
144
145 SmallVector<unsigned> loopOrder;
146 while (!redIt.empty() || !parIt.empty()) {
147 // We always prefer a parallel loop over a reduction loop because putting
148 // a reduction loop early might make the loop sequence inadmissible.
149 auto &it = !parIt.empty() ? parIt : redIt;
150
151 // Select loop based on strategy.
152 unsigned src;
153 switch (strategy) {
155 src = it.back();
156 break;
158 // Prefer dense, then compressed, then singleton dimensions outermost.
159 // Create combined tensor and map lists for analysis.
160 SmallVector<Value> allTensors = ins;
161 allTensors.push_back(out);
162 SmallVector<AffineMap> allMaps = loop2InsLvl;
163 allMaps.push_back(loop2OutLvl);
164
165 // Find loop with minimum (lowest) sparsity rank.
166 unsigned minLoop = it[0];
167 unsigned minRank = getLoopSparsityRank(minLoop, allTensors, allMaps);
168
169 for (auto candidateLoop : it) {
170 unsigned rank = getLoopSparsityRank(candidateLoop, allTensors, allMaps);
171 if (rank < minRank || (rank == minRank && candidateLoop < minLoop)) {
172 minLoop = candidateLoop;
173 minRank = rank;
174 }
175 }
176 src = minLoop;
177 break;
178 }
179 }
180
181 loopOrder.push_back(src);
182 // Remove the selected loop from the worklist.
183 it.erase(std::find(it.begin(), it.end(), src));
184 // Update in-degree, and push 0-degree node into worklist.
185 for (unsigned dst = 0; dst < numLoops; dst++) {
186 if (itGraph[src][dst] && --inDegree[dst] == 0) {
187 if (iterTypes[dst] == utils::IteratorType::reduction)
188 redIt.push_back(dst);
189 else
190 parIt.push_back(dst);
191 }
192 }
193 }
194
195 // Return the topological sort on success.
196 if (loopOrder.size() == numLoops)
197 return AffineMap::getPermutationMap(loopOrder, out.getContext());
198
199 // Cycle detected.
200 return AffineMap();
201}
202
204 linalg::GenericOp genericOp, sparse_tensor::LoopOrderingStrategy strategy) {
205 // Must be a demapped sparse kernel.
206 assert(!hasAnyNonIdentityOperandsOrResults(genericOp) &&
207 hasAnySparseOperandOrResult(genericOp) &&
208 genericOp.getNumDpsInits() == 1);
209
210 SmallVector<AffineMap> loopMap = genericOp.getIndexingMapsArray();
211 SmallVector<Value> ins = genericOp.getDpsInputs();
212
213 AffineMap outMap = loopMap.back();
214 loopMap.pop_back();
215
216 Value out = genericOp.getDpsInitOperand(0)->get();
218 genericOp.getIteratorTypesArray();
219
220 return IterationGraphSorter(std::move(ins), std::move(loopMap), out, outMap,
221 std::move(iterTypes), strategy);
222}
223
224IterationGraphSorter::IterationGraphSorter(
225 SmallVector<Value> &&insArg, SmallVector<AffineMap> &&loop2InsLvlArg,
226 Value out, AffineMap loop2OutLvl,
229 : ins(std::move(insArg)), loop2InsLvl(std::move(loop2InsLvlArg)), out(out),
230 loop2OutLvl(loop2OutLvl), iterTypes(std::move(iterTypesArg)),
231 strategy(strategy) {
232 // One map per tensor.
233 assert(loop2InsLvl.size() == ins.size());
234 // All the affine maps have the same number of dimensions (loops).
235 assert(llvm::all_equal(llvm::map_range(
236 loop2InsLvl, [](AffineMap m) { return m.getNumDims(); })));
237 // The number of results of the map should match the rank of the tensor.
238 assert(llvm::all_of(llvm::zip(loop2InsLvl, ins), [](auto mvPair) {
239 auto [m, v] = mvPair;
240
241 // For ranked types the rank must match.
242 // Simply return true for UnrankedTensorType
243 if (auto shapedType = llvm::dyn_cast<ShapedType>(v.getType())) {
244 return !shapedType.hasRank() ||
245 (m.getNumResults() == shapedType.getRank());
246 }
247 // Non-shaped (scalar) types behave like rank-0.
248 return m.getNumResults() == 0;
249 }));
250
251 itGraph.resize(getNumLoops(), std::vector<bool>(getNumLoops(), false));
252 inDegree.resize(getNumLoops());
253}
254
256 // Reset the adjacency matrix that represents the iteration graph.
257 for (auto &row : itGraph)
258 llvm::fill(row, false);
259
260 // Reset in-degree.
261 llvm::fill(inDegree, 0);
262
263 // Add the constraints for the loop to level map.
264 for (auto [in, map] : llvm::zip(ins, loop2InsLvl)) {
265 // Get map and encoding.
266 const auto enc = getSparseTensorEncoding(in.getType());
267 // Skip dense inputs when not requested.
268 if ((!enc && !includesDenseInput(mask)) || in == ignored)
269 continue;
270 addConstraints(in, map);
271 }
272
273 // Add the constraints for the output map.
274 const auto enc = getSparseTensorEncoding(out.getType());
275 if ((enc || includesDenseOutput(mask)) && out != ignored)
276 addConstraints(out, loop2OutLvl);
277
278 // Return the topological sort (empty for cyclic).
279 return topoSort();
280}
281
282void IterationGraphSorter::addConstraints(Value t, AffineMap loop2LvlMap) {
283 auto addIterOrdering = [this](unsigned f, unsigned t) {
284 if (!itGraph[f][t] && f != t) {
285 itGraph[f][t] = true;
286 inDegree[t]++;
287 }
288 };
289
290 // Set up a reduction finder.
291 AffineDimFinder finder(iterTypes);
292 finder.setPickedIterType(utils::IteratorType::reduction);
293
294 // To compute iteration graph for tensor[d0 + d1 + d3, d4 + d5 + d6],
295 // we require there exist d_x \in {d0, d1, d3} and d_y \in {d4, d5, d6},
296 // and d_x > d_y && {d0, d1, d3} - d_x > {d4, d5, d6} - d_y
297 const Level lvlRank = loop2LvlMap.getNumResults();
298 for (Level lvl = 1; lvl < lvlRank; lvl++) {
299 const AffineExpr fa = loop2LvlMap.getResult(lvl - 1);
300 const AffineExpr ta = loop2LvlMap.getResult(lvl);
301
302 if (llvm::isa<AffineDimExpr>(fa) || llvm::isa<AffineDimExpr>(ta)) {
303 // Special case when at least one loop2LvlExp is a simple AffineDimExpr
304 // (say, d0) and we require d0 > {d1, d2, ...} or {d1, d2, ...} > d0
305 AffineDimCollector fCollector;
306 fCollector.walkPostOrder(fa);
307 AffineDimCollector tCollector;
308 tCollector.walkPostOrder(ta);
309
310 for (auto fd : fCollector.dims) {
311 for (auto td : tCollector.dims) {
312 const unsigned f = fd.getPosition();
313 const unsigned t = td.getPosition();
314 addIterOrdering(f, t);
315 }
316 }
317 continue;
318 }
319
320 // When both loop2LvlExpr is compound, we pick an abitrary reduction loop
321 // from lhs and rhs and use them as d_x and d_y.
322 finder.walkPostOrder(fa);
323 const AffineDimExpr fexp = finder.getDimExpr();
324 const unsigned fldx = fexp.getPosition();
325
326 finder.walkPostOrder(ta);
327 const AffineDimExpr texp = finder.getDimExpr();
328 const unsigned tldx = texp.getPosition();
329
330 // d_x > d_y
331 addIterOrdering(fldx, tldx);
332
333 AffineDimCollector fCollector;
334 fCollector.walkPostOrder(fa);
335 AffineDimCollector tCollector;
336 tCollector.walkPostOrder(ta);
337
338 // Make sure dx and dy is the last.
339 for (auto fd : fCollector.dims) {
340 const unsigned f = fd.getPosition();
341 addIterOrdering(f, fldx);
342 }
343 for (auto td : tCollector.dims) {
344 const unsigned t = td.getPosition();
345 addIterOrdering(t, tldx);
346 }
347 // {d0, d1, d3} - d_x > {d4, d5, d6} - d_y
348 // This is to ensure that the affine expressions are reduced in sparse
349 // tensor level ordering.
350 for (auto fd : fCollector.dims) {
351 const unsigned f = fd.getPosition();
352 if (f == fldx) // skip d_x
353 continue;
354 for (auto td : tCollector.dims) {
355 const unsigned t = td.getPosition();
356 if (t == tldx) // skip d_y
357 continue;
358 addIterOrdering(f, t);
359 }
360 }
361 }
362}
static bool includesDenseOutput(SortMask mask)
static bool includesAny(SortMask mask1, SortMask mask2)
static unsigned getLoopSparsityRank(unsigned loop, ArrayRef< Value > allTensors, ArrayRef< AffineMap > allMaps)
Returns a sparsity rank for loop ordering: lower values indicate dimensions that should be placed in ...
static bool includesDenseInput(SortMask mask)
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
RetTy walkPostOrder(AffineExpr expr)
Base type for affine expression.
Definition AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
unsigned getNumDims() const
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
static IterationGraphSorter fromGenericOp(linalg::GenericOp genericOp, sparse_tensor::LoopOrderingStrategy strategy)
Factory method that constructs an iteration graph sorter for the given linalg.generic operation with ...
unsigned getNumLoops() const
Returns the number of loops in the iteration graph.
AffineMap sort(SortMask mask, Value ignored=nullptr)
Returns a permutation that represents the scheduled loop order.
bool hasAnySparseOperandOrResult(Operation *op)
Returns true iff MLIR operand has any sparse operand or result.
bool isSingletonLT(LevelType lt)
Definition Enums.h:421
bool isCompressedLT(LevelType lt)
Definition Enums.h:415
uint64_t Level
The type of level identifiers and level-ranks.
LoopOrderingStrategy
Defines a strategy for loop ordering during sparse code generation.
Definition Passes.h:62
SparseTensorEncodingAttr getSparseTensorEncoding(Type type)
Convenience method to get a sparse encoding attribute from a type.
bool isDenseLT(LevelType lt)
Definition Enums.h:413
bool hasAnyNonIdentityOperandsOrResults(Operation *op)
Returns true iff MLIR operation has any sparse tensor with non-identity dim2lvl maps.
SortMask
Iteration graph sorting mask,.
Include the generated interface declarations.