MLIR 23.0.0git
IterationGraphSorter.cpp
Go to the documentation of this file.
1//===- IterationGraphSorter.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
16
17using namespace mlir;
18using namespace mlir::sparse_tensor;
19
20namespace {
21
22/// A helper class that visits an affine expression and tries to find
23/// an AffineDimExpr to which the corresponding iterator from a GenericOp
24/// matches the desired iterator type. If there is no matched iterator
25/// type, the method returns the first DimExpr in the expression.
26class AffineDimFinder : public AffineExprVisitor<AffineDimFinder> {
27public:
28 explicit AffineDimFinder(ArrayRef<utils::IteratorType> itTypes)
29 : iterTypes(itTypes) {}
30
31 /// Overrides the visit method from AffineExprVisitor.
32 void visitDimExpr(AffineDimExpr expr) {
33 if (pickedDim == nullptr || pickIterType == iterTypes[expr.getPosition()])
34 pickedDim = expr;
35 }
36
37 /// Sets the desired iterator type that we want to pick.
38 void setPickedIterType(utils::IteratorType iterType) {
39 pickIterType = iterType;
40 }
41
42 /// Gets the desired AffineDimExpr.
43 AffineDimExpr getDimExpr() const {
44 return llvm::cast<AffineDimExpr>(pickedDim);
45 }
46
47 /// Walks the graph in post order to find dim expr.
48 void walkPostOrder(AffineExpr expr) {
49 pickedDim = nullptr;
51 }
52
53private:
54 /// The picked AffineDimExpr after visit.
55 AffineExpr pickedDim;
56 /// The iterator type that we want.
57 utils::IteratorType pickIterType;
58 /// The mapping between levels and iterator types.
59 ArrayRef<utils::IteratorType> iterTypes;
60};
61
62/// Flattens an affine expression into a list of AffineDimExprs.
63struct AffineDimCollector : public AffineExprVisitor<AffineDimCollector> {
64 // Overrides method from AffineExprVisitor.
65 void visitDimExpr(AffineDimExpr expr) { dims.push_back(expr); }
66 SmallVector<AffineDimExpr> dims;
67};
68
69} // namespace
70
71inline static bool includesAny(SortMask mask1, SortMask mask2) {
72 return static_cast<unsigned>(mask1) & static_cast<unsigned>(mask2);
73}
74
75inline static bool includesDenseInput(SortMask mask) {
77}
78
79inline static bool includesDenseOutput(SortMask mask) {
81}
82
83/// Returns a sparsity rank for loop ordering: lower values indicate
84/// dimensions that should be placed in outer loops.
85/// When preferDenseOuter is true the ranking is
86/// 0 = Dense, 1 = Compressed, 2 = Singleton, 3 = Other/Unknown.
87/// Otherwise
88/// 0 = Singleton, 1 = Compressed, 2 = Dense, 3 = Other/Unknown.
89static unsigned getLoopSparsityRank(unsigned loop, ArrayRef<Value> allTensors,
90 ArrayRef<AffineMap> allMaps,
91 bool preferDenseOuter) {
92 const unsigned denseRank = preferDenseOuter ? 0 : 2;
93 const unsigned singletonRank = preferDenseOuter ? 2 : 0;
94 const unsigned compressedRank = 1;
95 const unsigned unknownRank = 3;
96
97 unsigned minRank = unknownRank;
98
99 for (auto [tensor, map] : llvm::zip(allTensors, allMaps)) {
100 // Check if this loop accesses this tensor.
101 bool loopAccessesTensor = false;
102 unsigned tensorDim = 0;
103 for (AffineExpr expr : map.getResults()) {
104 if (auto dimExpr = dyn_cast<AffineDimExpr>(expr)) {
105 if (dimExpr.getPosition() == loop) {
106 loopAccessesTensor = true;
107 break;
108 }
109 }
110 tensorDim++;
111 }
112
113 if (loopAccessesTensor) {
114 const auto enc = getSparseTensorEncoding(tensor.getType());
115 if (!enc) {
116 // Dense tensor.
117 return denseRank;
118 } else {
119 // Sparse tensor - check the level type for this dimension.
120 auto lvlTypes = enc.getLvlTypes();
121 if (tensorDim < lvlTypes.size()) {
122 auto lvlType = lvlTypes[tensorDim];
123 if (isDenseLT(lvlType)) {
124 return denseRank; // Dense level.
125 } else if (isCompressedLT(lvlType)) {
126 minRank = std::min(minRank, compressedRank); // Compressed level.
127 } else if (isSingletonLT(lvlType)) {
128 minRank = std::min(minRank, singletonRank); // Singleton level.
129 }
130 }
131 }
132 }
133 }
134
135 return minRank;
136}
137
138AffineMap IterationGraphSorter::topoSort() {
139 // The sorted result will put the first Reduction iterator to the
140 // latest possible position.
141 std::vector<unsigned> redIt; // reduce iterator with 0 degree
142 std::vector<unsigned> parIt; // parallel iterator with 0 degree
143 const unsigned numLoops = getNumLoops();
144 for (unsigned i = 0; i < numLoops; i++) {
145 if (inDegree[i] == 0) {
146 if (iterTypes[i] == utils::IteratorType::reduction)
147 redIt.push_back(i);
148 else
149 parIt.push_back(i);
150 }
151 }
152
153 SmallVector<unsigned> loopOrder;
154 while (!redIt.empty() || !parIt.empty()) {
155 // We always prefer a parallel loop over a reduction loop because putting
156 // a reduction loop early might make the loop sequence inadmissible.
157 auto &it = !parIt.empty() ? parIt : redIt;
158
159 // Select loop based on strategy.
160 unsigned src;
161 switch (strategy) {
163 src = it.back();
164 break;
166 // Prefer dense, then compressed, then singleton dimensions outermost.
167 // Create combined tensor and map lists for analysis.
168 SmallVector<Value> allTensors = ins;
169 allTensors.push_back(out);
170 SmallVector<AffineMap> allMaps = loop2InsLvl;
171 allMaps.push_back(loop2OutLvl);
172
173 // Find loop with minimum (lowest) sparsity rank.
174 unsigned minLoop = it[0];
175 unsigned minRank =
176 getLoopSparsityRank(minLoop, allTensors, allMaps, true);
177
178 for (auto candidateLoop : it) {
179 unsigned rank =
180 getLoopSparsityRank(candidateLoop, allTensors, allMaps, true);
181 if (rank < minRank || (rank == minRank && candidateLoop < minLoop)) {
182 minLoop = candidateLoop;
183 minRank = rank;
184 }
185 }
186 src = minLoop;
187 break;
188 }
190 // Prefer singleton, then compressed, then dense dimensions outermost.
191 SmallVector<Value> allTensors = ins;
192 allTensors.push_back(out);
193 SmallVector<AffineMap> allMaps = loop2InsLvl;
194 allMaps.push_back(loop2OutLvl);
195
196 unsigned minLoop = it[0];
197 unsigned minRank =
198 getLoopSparsityRank(minLoop, allTensors, allMaps, false);
199
200 for (auto candidateLoop : it) {
201 unsigned rank =
202 getLoopSparsityRank(candidateLoop, allTensors, allMaps, false);
203 if (rank < minRank || (rank == minRank && candidateLoop < minLoop)) {
204 minLoop = candidateLoop;
205 minRank = rank;
206 }
207 }
208 src = minLoop;
209 break;
210 }
211 }
212
213 loopOrder.push_back(src);
214 // Remove the selected loop from the worklist.
215 it.erase(std::find(it.begin(), it.end(), src));
216 // Update in-degree, and push 0-degree node into worklist.
217 for (unsigned dst = 0; dst < numLoops; dst++) {
218 if (itGraph[src][dst] && --inDegree[dst] == 0) {
219 if (iterTypes[dst] == utils::IteratorType::reduction)
220 redIt.push_back(dst);
221 else
222 parIt.push_back(dst);
223 }
224 }
225 }
226
227 // Return the topological sort on success.
228 if (loopOrder.size() == numLoops)
229 return AffineMap::getPermutationMap(loopOrder, out.getContext());
230
231 // Cycle detected.
232 return AffineMap();
233}
234
236 linalg::GenericOp genericOp, sparse_tensor::LoopOrderingStrategy strategy) {
237 // Must be a demapped sparse kernel.
238 assert(!hasAnyNonIdentityOperandsOrResults(genericOp) &&
239 hasAnySparseOperandOrResult(genericOp) &&
240 genericOp.getNumDpsInits() == 1);
241
242 SmallVector<AffineMap> loopMap = genericOp.getIndexingMapsArray();
243 SmallVector<Value> ins = genericOp.getDpsInputs();
244
245 AffineMap outMap = loopMap.back();
246 loopMap.pop_back();
247
248 Value out = genericOp.getDpsInitOperand(0)->get();
250 genericOp.getIteratorTypesArray();
251
252 return IterationGraphSorter(std::move(ins), std::move(loopMap), out, outMap,
253 std::move(iterTypes), strategy);
254}
255
256IterationGraphSorter::IterationGraphSorter(
257 SmallVector<Value> &&insArg, SmallVector<AffineMap> &&loop2InsLvlArg,
258 Value out, AffineMap loop2OutLvl,
261 : ins(std::move(insArg)), loop2InsLvl(std::move(loop2InsLvlArg)), out(out),
262 loop2OutLvl(loop2OutLvl), iterTypes(std::move(iterTypesArg)),
263 strategy(strategy) {
264 // One map per tensor.
265 assert(loop2InsLvl.size() == ins.size());
266 // All the affine maps have the same number of dimensions (loops).
267 assert(llvm::all_equal(llvm::map_range(
268 loop2InsLvl, [](AffineMap m) { return m.getNumDims(); })));
269 // The number of results of the map should match the rank of the tensor.
270 assert(llvm::all_of(llvm::zip(loop2InsLvl, ins), [](auto mvPair) {
271 auto [m, v] = mvPair;
272
273 // For ranked types the rank must match.
274 // Simply return true for UnrankedTensorType
275 if (auto shapedType = llvm::dyn_cast<ShapedType>(v.getType())) {
276 return !shapedType.hasRank() ||
277 (m.getNumResults() == shapedType.getRank());
278 }
279 // Non-shaped (scalar) types behave like rank-0.
280 return m.getNumResults() == 0;
281 }));
282
283 itGraph.resize(getNumLoops(), std::vector<bool>(getNumLoops(), false));
284 inDegree.resize(getNumLoops());
285}
286
288 // Reset the adjacency matrix that represents the iteration graph.
289 for (auto &row : itGraph)
290 llvm::fill(row, false);
291
292 // Reset in-degree.
293 llvm::fill(inDegree, 0);
294
295 // Add the constraints for the loop to level map.
296 for (auto [in, map] : llvm::zip(ins, loop2InsLvl)) {
297 // Get map and encoding.
298 const auto enc = getSparseTensorEncoding(in.getType());
299 // Skip dense inputs when not requested.
300 if ((!enc && !includesDenseInput(mask)) || in == ignored)
301 continue;
302 addConstraints(in, map);
303 }
304
305 // Add the constraints for the output map.
306 const auto enc = getSparseTensorEncoding(out.getType());
307 if ((enc || includesDenseOutput(mask)) && out != ignored)
308 addConstraints(out, loop2OutLvl);
309
310 // Return the topological sort (empty for cyclic).
311 return topoSort();
312}
313
314void IterationGraphSorter::addConstraints(Value t, AffineMap loop2LvlMap) {
315 auto addIterOrdering = [this](unsigned f, unsigned t) {
316 if (!itGraph[f][t] && f != t) {
317 itGraph[f][t] = true;
318 inDegree[t]++;
319 }
320 };
321
322 // Set up a reduction finder.
323 AffineDimFinder finder(iterTypes);
324 finder.setPickedIterType(utils::IteratorType::reduction);
325
326 // To compute iteration graph for tensor[d0 + d1 + d3, d4 + d5 + d6],
327 // we require there exist d_x \in {d0, d1, d3} and d_y \in {d4, d5, d6},
328 // and d_x > d_y && {d0, d1, d3} - d_x > {d4, d5, d6} - d_y
329 const Level lvlRank = loop2LvlMap.getNumResults();
330 for (Level lvl = 1; lvl < lvlRank; lvl++) {
331 const AffineExpr fa = loop2LvlMap.getResult(lvl - 1);
332 const AffineExpr ta = loop2LvlMap.getResult(lvl);
333
334 if (llvm::isa<AffineDimExpr>(fa) || llvm::isa<AffineDimExpr>(ta)) {
335 // Special case when at least one loop2LvlExp is a simple AffineDimExpr
336 // (say, d0) and we require d0 > {d1, d2, ...} or {d1, d2, ...} > d0
337 AffineDimCollector fCollector;
338 fCollector.walkPostOrder(fa);
339 AffineDimCollector tCollector;
340 tCollector.walkPostOrder(ta);
341
342 for (auto fd : fCollector.dims) {
343 for (auto td : tCollector.dims) {
344 const unsigned f = fd.getPosition();
345 const unsigned t = td.getPosition();
346 addIterOrdering(f, t);
347 }
348 }
349 continue;
350 }
351
352 // When both loop2LvlExpr is compound, we pick an abitrary reduction loop
353 // from lhs and rhs and use them as d_x and d_y.
354 finder.walkPostOrder(fa);
355 const AffineDimExpr fexp = finder.getDimExpr();
356 const unsigned fldx = fexp.getPosition();
357
358 finder.walkPostOrder(ta);
359 const AffineDimExpr texp = finder.getDimExpr();
360 const unsigned tldx = texp.getPosition();
361
362 // d_x > d_y
363 addIterOrdering(fldx, tldx);
364
365 AffineDimCollector fCollector;
366 fCollector.walkPostOrder(fa);
367 AffineDimCollector tCollector;
368 tCollector.walkPostOrder(ta);
369
370 // Make sure dx and dy is the last.
371 for (auto fd : fCollector.dims) {
372 const unsigned f = fd.getPosition();
373 addIterOrdering(f, fldx);
374 }
375 for (auto td : tCollector.dims) {
376 const unsigned t = td.getPosition();
377 addIterOrdering(t, tldx);
378 }
379 // {d0, d1, d3} - d_x > {d4, d5, d6} - d_y
380 // This is to ensure that the affine expressions are reduced in sparse
381 // tensor level ordering.
382 for (auto fd : fCollector.dims) {
383 const unsigned f = fd.getPosition();
384 if (f == fldx) // skip d_x
385 continue;
386 for (auto td : tCollector.dims) {
387 const unsigned t = td.getPosition();
388 if (t == tldx) // skip d_y
389 continue;
390 addIterOrdering(f, t);
391 }
392 }
393 }
394}
static bool includesDenseOutput(SortMask mask)
static bool includesAny(SortMask mask1, SortMask mask2)
static unsigned getLoopSparsityRank(unsigned loop, ArrayRef< Value > allTensors, ArrayRef< AffineMap > allMaps, bool preferDenseOuter)
Returns a sparsity rank for loop ordering: lower values indicate dimensions that should be placed in ...
static bool includesDenseInput(SortMask mask)
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
RetTy walkPostOrder(AffineExpr expr)
Base type for affine expression.
Definition AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
unsigned getNumDims() const
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
static IterationGraphSorter fromGenericOp(linalg::GenericOp genericOp, sparse_tensor::LoopOrderingStrategy strategy)
Factory method that constructs an iteration graph sorter for the given linalg.generic operation with ...
unsigned getNumLoops() const
Returns the number of loops in the iteration graph.
AffineMap sort(SortMask mask, Value ignored=nullptr)
Returns a permutation that represents the scheduled loop order.
bool hasAnySparseOperandOrResult(Operation *op)
Returns true iff MLIR operand has any sparse operand or result.
bool isSingletonLT(LevelType lt)
Definition Enums.h:421
bool isCompressedLT(LevelType lt)
Definition Enums.h:415
uint64_t Level
The type of level identifiers and level-ranks.
LoopOrderingStrategy
Defines a strategy for loop ordering during sparse code generation.
Definition Passes.h:62
SparseTensorEncodingAttr getSparseTensorEncoding(Type type)
Convenience method to get a sparse encoding attribute from a type.
bool isDenseLT(LevelType lt)
Definition Enums.h:413
bool hasAnyNonIdentityOperandsOrResults(Operation *op)
Returns true iff MLIR operation has any sparse tensor with non-identity dim2lvl maps.
SortMask
Iteration graph sorting mask,.
Include the generated interface declarations.