MLIR  19.0.0git
LoopEmitter.h
Go to the documentation of this file.
1 //===- LoopEmitter.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_LOOPEMITTER_H_
10 #define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_LOOPEMITTER_H_
11 
12 #include <vector>
13 
14 #include "SparseTensorIterator.h"
15 
20 #include "mlir/IR/PatternMatch.h"
21 
22 namespace mlir {
23 namespace sparse_tensor {
24 
25 // A compressed <tensor id, level> pair.
26 using TensorLevel = unsigned;
27 
28 //
29 // SparseTensorLoopEmiter class, manages sparse tensors and helps to
30 // generate loop structure to (co)-iterate sparse tensors.
31 //
32 // An example usage:
33 // To generate the following loops over T1<?x?> and T2<?x?>
34 //
35 // for i in TENSOR_1_0 {
36 // for j : TENSOR_2_0 {
37 // for k : TENSOR_1_1 {}
38 // for k : TENSOR_2_1 {}
39 // }
40 // }
41 //
42 // One can use
43 //
44 // LoopEmiter loopEmiter({T1, T1});
45 // loopEmiter.initializeLoopEmit();
46 // loopEmiter.enterLoopOverTensorAtLvl(T1, 0);
47 // loopEmiter.enterLoopOverTensorAtLvl(T2, 0);
48 // loopEmiter.enterLoopOverTensorAtLvl(T1, 1);
49 // loopEmiter.exitCurrentLoop();
50 // loopEmiter.enterLoopOverTensorAtLvl(T2, 1);
51 // loopEmiter.exitCurrentLoop(); // exit k
52 // loopEmiter.exitCurrentLoop(); // exit j
53 // loopEmiter.exitCurrentLoop(); // exit i
54 //
55 class LoopEmitter {
56 public:
57  /// Optional callback function to setup dense output tensors when
58  /// initializing the loop emitter (e.g., to fill a dense output with zeros).
60  Value memref, Value tensor)>;
61 
62  /// Optional callback function to set the bound for the synthetic tensor,
63  /// which essentially is the dense loop bound.
65  function_ref<Value(OpBuilder &builder, Location loc, Level lvl)>;
66 
67  // Map from [tid, lvl] to a list of dependent [LoopId, coeffecient] for
68  // subscript expressions on sparse tensors.
69  //
70  // E.g., for affine index (2 * d0 + d1), it depends on loop d0 and d1 (for
71  // affine expression reduction) and uses 2 and 1 for coefficients on d0, d1
72  // respectively. If the list is empty, it means that there is no affine
73  // expression on the input [tid, lvl].
74  //
75  // NOTE: LoopEmitter assumes that the loop id is consistent with the loop
76  // order, i.e., loop `d0` will be generated before loop `d1`.
79 
80  LoopEmitter() = default;
81 
82  /// Takes an array of input tensors, which the generated loops will
83  /// iterate over. Each tensor is given a `TensorId` (numerically equal
84  /// to the position of that tensor `Value` in the array). Setting
85  /// `isSparseOut` indicates that the sparse output tensor is empty,
86  /// so the loop emitter will generate loops over it according to the
87  /// level-sizes.
88  void
89  initialize(ValueRange tensors, StringAttr loopTag = nullptr,
90  bool hasOutput = false, bool isSparseOut = false,
91  unsigned numLoops = 0, DependentLvlGetter getter = nullptr,
93 
94  explicit LoopEmitter(
95  ValueRange tensors, StringAttr loopTag = nullptr, bool hasOutput = false,
96  bool isSparseOut = false, unsigned numLoops = 0,
97  DependentLvlGetter getter = nullptr,
99 
100  /// Starts a loop emitting session by generating all the buffers needed
101  /// for iterating over the tensors.
102  void initializeLoopEmit(OpBuilder &builder, Location loc,
103  OutputUpdater updater = nullptr,
104  SynTensorBoundSetter synSetter = nullptr);
105 
106  /// Generates code to compute an affine expression whose variables are
107  /// `LoopId`s (i.e., `a.cast<AffineDimExpr>().getPosition()` is a valid
108  /// `LoopId`).
109  Value genAffine(OpBuilder &builder, Location loc, AffineExpr a);
110 
111  /// Enters a new loop sequence, the loops within the same sequence starts
112  /// from the break points of previous loop instead of starting over from 0.
113  /// e.g.,
114  /// {
115  /// // loop sequence start.
116  /// p0 = while(xxx)
117  /// ...
118  /// break p0
119  ///
120  /// // Starts loop from p0
121  /// for (i = p0; i < end; i++)
122  /// ...
123  /// // loop sequence end.
124  /// }
125  void enterNewLoopSeq(OpBuilder &builder, Location loc,
126  ArrayRef<TensorLevel> tidLvls);
127 
128  /// Exits the current loop sequence, this will reset universal index to 0.
129  void exitCurrentLoopSeq(OpBuilder &builder, Location loc);
130 
131  /// Emits the address for a dense level based on the value evaluated by the
132  /// provided affine expression.
133  void locateLvlAtAffineAddress(OpBuilder &builder, Location loc,
134  TensorLevel tidLvl, AffineExpr lvlExpr);
135 
136  // TODO: Get rid of `lvls` in the argument list? Track the level we
137  // are currently at internally. Then it would be enterNextLvlForTensor.
138  // Still need a way to specify the lvl for non-annotated tensors though,
139  // as those can be accessed out of order.
140  //
141  /// Emits a co-iteration loop over a set of tensors.
142  /// Emits loop over tensor_tid_lvl, it assumes that loops between
143  /// tensor_tid_[0, lvl - 1] have already been generated.
144  /// The function will also perform in-place update on the `reduc` vector to
145  /// return the reduction variable used inside the generated loop.
147  OpBuilder &builder, Location loc, ArrayRef<TensorLevel> tidLvls,
148  MutableArrayRef<Value> reduc = {}, bool isParallel = false,
149  bool needsUniv = false);
150 
151  /// Generates code to exit the current loop (e.g., generates yields, forwards
152  /// loop induction variables, etc).
153  void exitCurrentLoop(RewriterBase &rewriter, Location loc,
154  MutableArrayRef<Value> reduc = {});
155 
156  /// Get the range of values for all induction variables.
157  auto getLoopIVsRange() const {
158  return llvm::map_range(loopStack, [](const LoopInfo &li) { return li.iv; });
159  }
160 
161  /// Fills the out-parameter with the loop induction variables for all
162  /// loops in the current loop-stack.
164  return llvm::to_vector(getLoopIVsRange());
165  }
166 
167  /// Gets the current depth of the loop-stack.
168  LoopId getCurrentDepth() const { return llvm::range_size(getLoopIVsRange()); }
169 
170  /// Gets loop induction variable for the given loop
171  Value getLoopIV(LoopId n) const {
172  if (n >= getCurrentDepth())
173  return Value();
174  auto it = getLoopIVsRange().begin();
175  std::advance(it, n);
176  return *it;
177  }
178 
179  /// Gets the total number of manifest tensors (excluding the synthetic
180  /// tensor).
181  unsigned getNumManifestTensors() const { return tensors.size(); }
182 
183  /// Gets the total number of tensors that loopEmitter is operating on.
184  unsigned getNumTensors() const {
185  // Manifest tensors with one synthetic tensor at the end.
186  return getNumManifestTensors() + 1;
187  }
188 
189  /// Gets the TensorId for synthetic tensor.
190  TensorId getSynTensorId() const { return tensors.size(); }
191 
192  /// Gets the TensorId for output tensor.
194  assert(hasOutput);
195  return getNumManifestTensors() - 1;
196  }
197 
198  /// Compresses a TensorId and Level into a TensorLevel.
200  return l * getNumTensors() + t;
201  }
202 
203  /// De-compresses a TensorLevel back to a pair of TensorId and Level.
204  std::pair<TensorId, Level> unpackTensorLevel(TensorLevel tidLvl) const {
205  unsigned nt = getNumTensors();
206  return std::make_pair(tidLvl % nt, tidLvl / nt);
207  }
208 
209  /// Converts a range of TensorLevel to a range of std::pair<TensorId, Level>
210  template <class ContainerTy>
211  auto unpackTensorLevelRange(ContainerTy &&c) const {
212  using EltTy = decltype(*c.begin());
213  static_assert(std::is_same_v<llvm::remove_cvref_t<EltTy>, TensorLevel>,
214  "Must be unpacking a TensorLevel range");
215  return llvm::map_range(std::forward<ContainerTy>(c), [this](EltTy tl) {
216  return this->unpackTensorLevel(tl);
217  });
218  }
219 
220  ///
221  /// Getters.
222  ///
224  SmallVector<Value> batchCrds = iters[tid].back().back()->getBatchCrds();
225  Value lastLvlPos = iters[tid].back().back()->getCurPosition().first;
226  batchCrds.push_back(lastLvlPos);
227  return batchCrds;
228  };
229  Value getCoord(TensorId tid, Level lvl) const {
230  return getCurIterator(tid, lvl).getCrd();
231  };
232  const std::vector<Value> &getValBuffer() const { return valBuffer; };
233 
234  constexpr static llvm::StringLiteral getLoopEmitterLoopAttrName() {
235  return llvm::StringLiteral("Emitted from");
236  }
237 
238 private:
239  ///
240  /// Structure definitions that hold different kinds of loops information.
241  ///
242 
243  // LoopInfo stores information of a loop generated by LoopEmitter. E.g.,
244  // the set of tensors levels that the loop is iterating over.
245  struct LoopInfo final {
246  LoopInfo(ArrayRef<TensorLevel> tidLvls, Operation *loop, Block *userBlock,
247  Value iv, StringAttr loopTag)
248  : tidLvls(tidLvls), loop(loop), userCodeBlock(userBlock), iv(iv) {
249  // Attached a special tag to loop emitter generated loop.
250  if (loopTag)
252  }
253  // The set of <tensor, lvl>, with *only* trivial index expressions, that are
254  // used as the condition for the generated loop. Extra information is
255  // required for levels with non-tivial index expressions, which is
256  // maintained by the sliceDrivenInfo array below.
257  const llvm::SmallVector<TensorLevel> tidLvls;
258  const Operation *loop; // the loop operation
259  Block *const userCodeBlock; // the block holding users' generated code.
260  const Value iv; // the induction variable for the loop
261  };
262 
263  void categorizeIterators(ArrayRef<TensorLevel> tidLvls,
264  SmallVectorImpl<SparseIterator *> &raIters,
265  SmallVectorImpl<SparseIterator *> &spIters);
266  ///
267  /// LoopEmitter internal helper functions.
268  ///
269 
270  using LoopBodyBuilder = llvm::function_ref<void(OpBuilder &, Location, Value,
271  MutableArrayRef<Value>)>;
272 
273  /// Whether the list of the sparse condition should be iterated by for loop.
274  bool shouldIteratedByForLoop(ArrayRef<SparseIterator *> spIters);
275 
276  /// Generates instructions to compute the coordinate of tensors[tid][lvl]
277  /// under the current loop context. The final argument is the
278  /// collapsed-output level, whereas this function handles converting
279  /// that to the uncollapsed-input level
280  Value genSparseCrd(OpBuilder &builder, Location loc, TensorId tid,
281  Level dstLvl);
282 
283  bool isSynTensor(TensorId tid) const { return tid == getSynTensorId(); }
284 
285  bool isOutputTensor(TensorId tid) const {
286  return hasOutput && tid == getOutTensorId();
287  }
288 
289  bool isSparseOutput(TensorId tid) const {
290  return isOutputTensor(tid) && isSparseOut;
291  }
292 
293  bool isValidLevel(TensorId tid, Level lvl) const {
294  return tid < lvls.size() && lvl < lvls[tid].size();
295  }
296 
297  /// Prepares loop for iterating over `tensor[lvl]`, under the assumption
298  /// that `tensor[0...lvl-1]` loops have already been set up.
299  void prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
300  TensorId tid, Level lvl);
301 
302  /// Emits a for loop to iterate over a tensor level with the provided
303  /// lower bound `lo` and upper bound `hi`. Apart from iterating just
304  /// single tensor level, for loops can be used for slice-driven loop on
305  /// dense level too.
306  /// Returns a pair: the loop generated and the value for the induction
307  /// variable.
308  std::pair<Operation *, Value>
309  emitForLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
310  SparseIterator &iter, MutableArrayRef<Value> reduc,
311  bool isParallel);
312 
313  /// Emits a while loop to co-iterate over a list of sparse condition, or
314  /// (complex) single sparse condition that can not be handled by for loop
315  /// (e.g., index reduction loop).
316  /// Returns a pair: the loop generated and the value for the induction
317  /// variable (which is the minimum coordinate of all the tensor that being
318  /// iterated).
319  std::pair<Operation *, Value>
320  emitWhileLoopOverTensorsAtLvls(OpBuilder &builder, Location loc,
321  ArrayRef<SparseIterator *> iters,
322  MutableArrayRef<Value> reduc, bool needsUniv);
323 
324  /// Exits a for loop, returns the reduction results, e.g.,
325  /// For sequential for loops:
326  /// %ret = for () {
327  /// ...
328  /// %val = addi %args, %c
329  /// yield %val
330  /// }
331  /// For parallel loops, the following generated code by users:
332  /// %ret = parallel () init(%args) {
333  /// ...
334  /// %val = op %args, %c
335  /// }
336  /// will be transformed into
337  /// %ret = parallel () init(%args) {
338  /// ...
339  /// scf.reduce(%c) bb0(%0, %1){
340  /// %val = op %0, %1
341  /// scf.reduce.return %val
342  /// }
343  /// }
344  /// NOTE: only one instruction will be moved into reduce block,
345  /// transformation will fail if multiple instructions are used to compute
346  /// the reduction value. Return %ret to user, while %val is provided by
347  /// users (`reduc`).
348  void exitForLoop(RewriterBase &rewriter, Location loc,
349  MutableArrayRef<Value> reduc);
350 
351  /// Exits a while loop, returns the reduction results.
352  void exitWhileLoop(OpBuilder &builder, Location loc,
353  MutableArrayRef<Value> reduc);
354 
355  //
356  // Slice-driven loop related methods.
357  //
358 
359  void initSubSectIterator(OpBuilder &builder, Location loc);
360 
361  /// Get the reduced number of contraints on tensor[tid][lvl].
362  unsigned redDepOnLevel(TensorId tid, Level lvl) const {
363  return levelReducedDep[tid][lvl];
364  };
365 
366  SparseIterator &getCurIterator(TensorId tid, Level lvl) const {
367  if (dependentLvlMap[tid][lvl].empty())
368  return *iters[tid][lvl].back();
369 
370  assert(redDepOnLevel(tid, lvl) >= 1);
371  return *iters[tid][lvl][redDepOnLevel(tid, lvl) - 1];
372  }
373 
374  std::unique_ptr<SparseIterator>
375  makeLevelIterator(OpBuilder &builder, Location loc, TensorId tid, Level l);
376 
377  /// A optional string attribute that should be attached to the loop
378  /// generated by loop emitter, it might help following passes to identify
379  /// loops that operates on sparse tensors more easily.
380  StringAttr loopTag;
381  /// Whether the loop emitter needs to treat the last tensor as the output
382  /// tensor.
383  bool hasOutput;
384  bool isSparseOut;
385  SparseEmitStrategy emitStrategy;
386 
387  //
388  // Fields which have `numTensor` many entries.
389  //
390 
391  /// Input and (optional) output tensors.
392  std::vector<Value> tensors;
393  std::vector<Value> loopHighs;
394  std::vector<std::vector<std::unique_ptr<SparseTensorLevel>>> lvls;
395  std::vector<std::vector<std::vector<std::unique_ptr<SparseIterator>>>> iters;
396  std::vector<Value> valBuffer; // to_value
397 
398  // Map from [tid, level] to a list of dependent [tidlevel, coefficient].
399  // See comments for `DependentLvlGetter`.
400  std::vector<std::vector<std::vector<std::pair<LoopId, unsigned>>>>
401  dependentLvlMap;
402 
403  // The (size, stride) for each conceptual slice used for index reduction
404  // loops.
405  std::vector<std::vector<std::vector<std::pair<Value, unsigned>>>> sliceMeta;
406 
407  // The number of reduced dependencies on a tensor level so far.
408  std::vector<std::vector<unsigned>> levelReducedDep;
409 
410  //
411  // Fields which have at most `numLoops` many entries.
412  //
413 
414  /// Loop Stack, stores the information of all the nested loops that are
415  /// alive.
416  std::vector<LoopInfo> loopStack;
417 
418  // Loop Sequence Stack, stores the universal index for the current loop
419  // sequence. and a list of tid level that the loop sequence traverse.
420  std::vector<std::pair<Value, std::vector<TensorLevel>>> loopSeqStack;
421 };
422 
423 } // namespace sparse_tensor
424 } // namespace mlir
425 
426 #endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_LOOPEMITTER_H_
Base type for affine expression.
Definition: AffineExpr.h:69
Block represents an ordered list of Operations.
Definition: Block.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
This class helps build Operations.
Definition: Builders.h:209
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:577
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:400
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
void exitCurrentLoop(RewriterBase &rewriter, Location loc, MutableArrayRef< Value > reduc={})
Generates code to exit the current loop (e.g., generates yields, forwards loop induction variables,...
constexpr static llvm::StringLiteral getLoopEmitterLoopAttrName()
Definition: LoopEmitter.h:234
void locateLvlAtAffineAddress(OpBuilder &builder, Location loc, TensorLevel tidLvl, AffineExpr lvlExpr)
Emits the address for a dense level based on the value evaluated by the provided affine expression.
const std::vector< Value > & getValBuffer() const
Definition: LoopEmitter.h:232
void enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls)
Enters a new loop sequence, the loops within the same sequence starts from the break points of previo...
Value genAffine(OpBuilder &builder, Location loc, AffineExpr a)
Generates code to compute an affine expression whose variables are LoopIds (i.e., a....
Operation * enterCoIterationOverTensorsAtLvls(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls, MutableArrayRef< Value > reduc={}, bool isParallel=false, bool needsUniv=false)
Emits a co-iteration loop over a set of tensors.
TensorId getOutTensorId() const
Gets the TensorId for output tensor.
Definition: LoopEmitter.h:193
TensorLevel makeTensorLevel(TensorId t, Level l) const
Compresses a TensorId and Level into a TensorLevel.
Definition: LoopEmitter.h:199
unsigned getNumManifestTensors() const
Gets the total number of manifest tensors (excluding the synthetic tensor).
Definition: LoopEmitter.h:181
void initialize(ValueRange tensors, StringAttr loopTag=nullptr, bool hasOutput=false, bool isSparseOut=false, unsigned numLoops=0, DependentLvlGetter getter=nullptr, SparseEmitStrategy emitStrategy=SparseEmitStrategy::kFunctional)
Takes an array of input tensors, which the generated loops will iterate over.
Definition: LoopEmitter.cpp:89
Value getLoopIV(LoopId n) const
Gets loop induction variable for the given loop.
Definition: LoopEmitter.h:171
std::pair< TensorId, Level > unpackTensorLevel(TensorLevel tidLvl) const
De-compresses a TensorLevel back to a pair of TensorId and Level.
Definition: LoopEmitter.h:204
auto unpackTensorLevelRange(ContainerTy &&c) const
Converts a range of TensorLevel to a range of std::pair<TensorId, Level>
Definition: LoopEmitter.h:211
SmallVector< Value > getValPosits(TensorId tid) const
Getters.
Definition: LoopEmitter.h:223
unsigned getNumTensors() const
Gets the total number of tensors that loopEmitter is operating on.
Definition: LoopEmitter.h:184
SmallVector< Value > getLoopIVs() const
Fills the out-parameter with the loop induction variables for all loops in the current loop-stack.
Definition: LoopEmitter.h:163
auto getLoopIVsRange() const
Get the range of values for all induction variables.
Definition: LoopEmitter.h:157
void initializeLoopEmit(OpBuilder &builder, Location loc, OutputUpdater updater=nullptr, SynTensorBoundSetter synSetter=nullptr)
Starts a loop emitting session by generating all the buffers needed for iterating over the tensors.
LoopId getCurrentDepth() const
Gets the current depth of the loop-stack.
Definition: LoopEmitter.h:168
void exitCurrentLoopSeq(OpBuilder &builder, Location loc)
Exits the current loop sequence, this will reset universal index to 0.
TensorId getSynTensorId() const
Gets the TensorId for synthetic tensor.
Definition: LoopEmitter.h:190
Value getCoord(TensorId tid, Level lvl) const
Definition: LoopEmitter.h:229
unsigned TensorLevel
Definition: LoopEmitter.h:26
uint64_t Level
The type of level identifiers and level-ranks.
Definition: SparseTensor.h:38
unsigned LoopId
Loop identifiers.
Definition: Merger.h:38
unsigned TensorId
Tensor identifiers, chosen to be the BlockArgument::getArgNumber of the value passed to Merger::build...
Definition: Merger.h:35
Include the generated interface declarations.
SparseEmitStrategy
Defines a scope for reinterpret map pass.
Definition: Passes.h:51