MLIR  18.0.0git
LoopEmitter.cpp
Go to the documentation of this file.
1 //===- LoopEmitter.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "LoopEmitter.h"
10 #include "CodegenUtils.h"
11 
21 
22 using namespace mlir;
23 using namespace mlir::sparse_tensor;
24 
25 //===----------------------------------------------------------------------===//
26 // File local shorthand macros
27 //===----------------------------------------------------------------------===//
28 
29 #define CMPI(p, l, r) \
30  (builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::p, (l), (r)) \
31  .getResult())
32 
33 #define C_IDX(v) (constantIndex(builder, loc, (v)))
34 #define YIELD(vs) (builder.create<scf::YieldOp>(loc, (vs)))
35 #define ADDI(lhs, rhs) (builder.create<arith::AddIOp>(loc, (lhs), (rhs)))
36 #define ANDI(lhs, rhs) (builder.create<arith::AndIOp>(loc, (lhs), (rhs)))
37 #define SUBI(lhs, rhs) (builder.create<arith::SubIOp>(loc, (lhs), (rhs)))
38 #define MULI(lhs, rhs) (builder.create<arith::MulIOp>(loc, (lhs), (rhs)))
39 #define REMUI(lhs, rhs) (builder.create<arith::RemUIOp>(loc, (lhs), (rhs)))
40 #define DIVUI(lhs, rhs) (builder.create<arith::DivUIOp>(loc, (lhs), (rhs)))
41 #define SELECT(c, l, r) (builder.create<arith::SelectOp>(loc, (c), (l), (r)))
42 
43 //===----------------------------------------------------------------------===//
44 // Debugging utils
45 //===----------------------------------------------------------------------===//
46 
47 #ifndef NDEBUG
48 LLVM_ATTRIBUTE_UNUSED static void dumpIndexMemRef(OpBuilder &builder,
49  Location loc, Value memref) {
50  memref = builder.create<memref::CastOp>(
51  loc, UnrankedMemRefType::get(builder.getIndexType(), 0), memref);
52  createFuncCall(builder, loc, "printMemrefInd", TypeRange{},
54 }
55 #endif
56 
57 //===----------------------------------------------------------------------===//
58 // File local helper functions.
59 //===----------------------------------------------------------------------===//
60 
61 // For index reduction loops, since the tensor are sliced into non-continuous
62 // fragments, we need a triple [pLo, pHi, pPtr], in which the pair (pLo, pHi)
63 // specifies the range of the fragment, and pPtr specifies the index of the
64 // corresponding fragment in the child level (i.e., a pointer to the sliced
65 // position array).
66 static constexpr unsigned kSliceIterWidth = 3;
67 
68 static Value genSliceOffset(OpBuilder &builder, Location loc, Value tensor,
69  Level lvl) {
70  auto enc = getSparseTensorEncoding(tensor.getType());
71  return createOrFoldSliceOffsetOp(builder, loc, tensor, toDim(enc, lvl));
72 }
73 
74 static Value genSliceStride(OpBuilder &builder, Location loc, Value tensor,
75  Level lvl) {
76  auto enc = getSparseTensorEncoding(tensor.getType());
77  return createOrFoldSliceStrideOp(builder, loc, tensor, toDim(enc, lvl));
78 }
79 
80 /// Converts a coordinate relative to the slice to the coordinate relative
81 /// to the underlying tensor.
82 // FIXME: that description says "sliceCrd -> tensorCrd"; but the function
83 // name suggests it should be "tensorCrd -> sliceCrd".
84 static Value toSliceCrd(OpBuilder &builder, Location loc, Value crd,
85  Value offset, Value stride, Value tensor, Level lvl) {
86  // tensorCrd = sliceCrd * stride + offset
87  return ADDI(MULI(crd, stride), offset);
88 }
89 
90 /// Generates code to compute the *absolute* offset of the slice based on the
91 /// provide minimum coordinates in the slice.
92 /// E.g., when reducing d0 + d1 + d2, we need two slices to fully reduced the
93 /// expression, i,e, s1 = slice(T, d0), s2 = slice(s1, d1). The *absolute*
94 /// offset is the offset computed relative to the initial tensors T.
95 ///
96 /// When isNonEmpty == true, the computed offset is meaningless and should not
97 /// be used during runtime, the method generates code to return 0 currently in
98 /// that case.
99 ///
100 /// offset = isNonEmpty && minCrd >= size ? minCrd - size + 1 : 0;
101 static Value offsetFromMinCoord(OpBuilder &builder, Location loc, Value minCrd,
102  Value size, Value isNonEmpty) {
103  Value geSize = CMPI(uge, minCrd, size);
104  Value pred = ANDI(isNonEmpty, geSize);
105  // Computes minCrd - size + 1
106  Value mms = SUBI(ADDI(minCrd, C_IDX(1)), size);
107  // This is the absolute offset related to the underly tensor.
108  return SELECT(pred, mms, C_IDX(0));
109 }
110 
111 /// Converts a coordinate relative to the underlying tensor to the coordinate
112 /// relative to the slice, returns a extra reminder value
113 // FIXME: that description says "tensorCrd -> sliceCrd"; but the function
114 // name suggests it should be "sliceCrd -> tensorCrd".
115 static std::pair<Value, Value> fromSliceCrd(OpBuilder &builder, Location loc,
116  Value crd, Value offset,
117  Value stride, Value tensor,
118  Level lvl) {
119  // sliceCrd = (tensorCrd - offset) / stride
120  crd = SUBI(crd, offset);
121  Value rem = REMUI(crd, stride);
122  crd = DIVUI(crd, stride);
123  return std::make_pair(crd, rem);
124 }
125 
126 // Generates a bool value for while loop condition that tries to iterate over a
127 // fully reduced level with affine index expression.
129  Value crdBuf, Value crdHi, Value posit,
130  Value posHi) {
131  Value inBound = CMPI(ult, posit, posHi);
132  auto ifOp =
133  builder.create<scf::IfOp>(loc, builder.getI1Type(), inBound, true);
134  // if (inbound)
135  // yield coord < crdHi
136  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
137  Value crd = genIndexLoad(builder, loc, crdBuf, posit);
138  YIELD(CMPI(ult, crd, crdHi));
139  // else
140  // yield false
141  builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
142  YIELD(constantI1(builder, loc, false));
143 
144  builder.setInsertionPointAfter(ifOp);
145  return ifOp.getResult(0);
146 }
147 
148 // Helper functions that load/store into the position buffer for slice-driven
149 // loops.
150 // The sliced pointer buffer is orgnized as:
151 // [size, curPtr] (two metadata) + [[pLo0, pLo1, pLo2, ...],
152 // [pHi0, pHi1, pHi2, ...],
153 // [pNx0, pNx1, pNx2, ...]]
155  Value tupleCnt) {
156  Value bufSz = MULI(tupleCnt, C_IDX(kSliceIterWidth));
157  // Additional two metadata {memSize, idx} at head.
158  bufSz = ADDI(bufSz, C_IDX(2));
159  return genAlloca(builder, loc, bufSz, builder.getIndexType());
160 }
161 // TODO: We should use SSA value for it.
162 // Gets and sets metadata.
163 static Value loadSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf) {
164  return genIndexLoad(builder, loc, sPosBuf, C_IDX(1));
165 }
166 static void updateSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf,
167  Value pPtr) {
168  builder.create<memref::StoreOp>(loc, pPtr, sPosBuf, C_IDX(1));
169 }
171  Value sPosBuf) {
172  return genIndexLoad(builder, loc, sPosBuf, C_IDX(0));
173 }
174 static void updateSlicePosTupleNum(OpBuilder &builder, Location loc, Value num,
175  Value sPosBuf) {
176  builder.create<memref::StoreOp>(loc, num, sPosBuf, C_IDX(0));
177 }
178 
179 // Gets and sets position values for slice-driven loops.
180 enum class SlicePosKind { kLo, kHi, kNext };
181 static Value getSlicePosIdx(OpBuilder &builder, Location loc, Value posBuf,
182  Value tupleIdx, SlicePosKind posKind) {
183  Value dim = builder.create<memref::DimOp>(loc, posBuf, C_IDX(0));
184  Value tupleCnt = DIVUI(SUBI(dim, C_IDX(2)), C_IDX(kSliceIterWidth));
185  switch (posKind) {
186  case SlicePosKind::kLo:
187  return ADDI(tupleIdx, C_IDX(2));
188  case SlicePosKind::kHi:
189  return ADDI(tupleIdx, ADDI(tupleCnt, C_IDX(2)));
190  case SlicePosKind::kNext:
191  return ADDI(tupleIdx, ADDI(tupleCnt, ADDI(tupleCnt, C_IDX(2))));
192  }
193  llvm_unreachable("unexpected kind");
194 }
195 static Value loadSlicePos(OpBuilder &builder, Location loc, Value sPosBuf,
196  Value tupleIdx, SlicePosKind posKind) {
197  return genIndexLoad(builder, loc, sPosBuf,
198  getSlicePosIdx(builder, loc, sPosBuf, tupleIdx, posKind));
199 }
200 static void updateSlicePos(OpBuilder &builder, Location loc, Value sPosBuf,
201  Value pos, Value tupleIdx, SlicePosKind posKind) {
202  builder.create<memref::StoreOp>(
203  loc, pos, sPosBuf,
204  getSlicePosIdx(builder, loc, sPosBuf, tupleIdx, posKind));
205 }
206 
207 std::pair<Value, Value>
208 LoopEmitter::genSliceLegitPredicate(OpBuilder &builder, Location loc, Value crd,
209  TensorId tid, Level lvl) {
210  assert(isSparseSlices[tid]);
211  Value slice = tensors[tid];
212  Value offset = sliceOffsets[tid][lvl];
213  Value stride = sliceStrides[tid][lvl];
214  auto enc = getSparseTensorEncoding(slice.getType());
215 
216  const auto [newCrd, crdRem] =
217  fromSliceCrd(builder, loc, crd, offset, stride, slice, lvl);
218 
219  SmallVector<Value, 3> conds; // at most 3 conditions
220 
221  // First, coord >= offset (skip the check if offset is known to be 0).
222  if (auto staticOffset = enc.getStaticLvlSliceOffset(lvl);
223  !(staticOffset.has_value() && *staticOffset == 0)) {
224  auto geOffset = CMPI(uge, crd, offset);
225  conds.push_back(geOffset);
226  }
227 
228  // Second, coord_in_slice < length
229  auto ltLength = CMPI(ult, newCrd, lvlSizes[tid][lvl]);
230  conds.push_back(ltLength);
231 
232  // Third, rem == 0 (skip the check if stride is known to be 1).
233  if (auto staticStride = enc.getStaticLvlSliceStride(lvl);
234  !(staticStride.has_value() && *staticStride == 1)) {
235  auto fitStride = CMPI(eq, crdRem, C_IDX(0));
236  conds.push_back(fitStride);
237  }
238 
239  // Must meet all condition to be a valid coordinate in slice.
240  auto pred = conds.front();
241  for (auto cond : ValueRange(conds).drop_front())
242  pred = ANDI(pred, cond);
243 
244  return {newCrd, pred};
245 }
246 
247 //===----------------------------------------------------------------------===//
248 // Sparse tensor loop emitter class implementations
249 //===----------------------------------------------------------------------===//
250 
251 Value LoopEmitter::genAddress(OpBuilder &builder, Location loc, TensorId tid,
252  Level lvl, Value crd) {
253  Value pos = lvl == 0 ? C_IDX(0) : posits[tid][lvl - 1];
254  Value mul = MULI(highs[tid][lvl], pos);
255  if (isSparseSlices[tid])
256  crd = toSliceCrd(builder, loc, crd, sliceOffsets[tid][lvl],
257  sliceStrides[tid][lvl], tensors[tid], lvl);
258  Value add = ADDI(mul, crd);
259  return add;
260 }
261 
262 Value LoopEmitter::genSegmentHigh(OpBuilder &builder, Location loc,
263  TensorId tid, Level lvl, Value pLo,
264  Value pHi) {
265  const auto coordinates = coordinatesBuffers[tid][lvl];
266  const auto sameCrd = genIndexLoad(builder, loc, coordinates, pLo);
267  auto whileOp = builder.create<scf::WhileOp>(
268  loc, builder.getIndexType(), pLo,
269  /*beforeBuilder=*/
270  [pHi, coordinates, sameCrd](OpBuilder &builder, Location loc,
271  ValueRange ivs) {
272  const auto pos = ivs[0];
273  Value inBound = builder.create<arith::CmpIOp>(
274  loc, arith::CmpIPredicate::ult, pos, pHi);
275  auto ifInBound =
276  builder.create<scf::IfOp>(loc, builder.getI1Type(), inBound, true);
277  {
278  OpBuilder::InsertionGuard guard(builder);
279  // Load the next coordinates only when inbound (to avoid OOB
280  // accesses).
281  builder.setInsertionPointToStart(ifInBound.thenBlock());
282  Value crd = genIndexLoad(builder, loc, coordinates, pos);
283  Value isSameCrd = builder.create<arith::CmpIOp>(
284  loc, arith::CmpIPredicate::eq, crd, sameCrd);
285  YIELD(isSameCrd);
286  // Else, the position is out of bound, yield false to terminate the
287  // loop.
288  builder.setInsertionPointToStart(ifInBound.elseBlock());
289  YIELD(constantI1(builder, loc, false));
290  }
291  builder.create<scf::ConditionOp>(loc, ifInBound.getResults()[0], ivs);
292  },
293  /*afterBuilder=*/
294  [](OpBuilder &builder, Location loc, ValueRange ivs) {
295  // pos ++
296  Value nextPos = ADDI(ivs[0], C_IDX(1));
297  YIELD(nextPos);
298  });
299  // Return the segment high.
300  return whileOp.getResult(0);
301 }
302 
303 Value LoopEmitter::genSparseCrd(OpBuilder &builder, Location loc, TensorId tid,
304  Level lvl) {
305  // A load on the coordinates array yields the coordinate.
306  const Value mem = coordinatesBuffers[tid][lvl];
307  /// FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
308  const Value pos = posits[tid][lvl];
309  const Value crd = genIndexLoad(builder, loc, mem, pos);
310  return crd;
311 }
312 
313 LoopEmitter::LoopEmitter(ValueRange tensors, StringAttr loopTag, bool hasOutput,
314  bool isSparseOut, unsigned numLoops,
315  DependentLvlGetter dimGetter) {
316  initialize(tensors, loopTag, hasOutput, isSparseOut, numLoops, dimGetter);
317 }
318 
319 void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
320  bool isSparseOut, unsigned numLoops,
321  DependentLvlGetter dimGetter) {
322  // First initialize the top-level type of the fields.
323  this->loopTag = loopTag;
324  this->hasOutput = hasOutput;
325  this->isSparseOut = isSparseOut;
326 
327  const unsigned numManifestTensors = ts.size();
328  const unsigned synTensorId = numManifestTensors;
329  const unsigned numTensors = numManifestTensors + 1;
330  // tensors array (len == numManifestTensor).
331  this->tensors.assign(ts.begin(), ts.end());
332  // Arrays with len == numTensor.
333  this->lvlTypes.assign(numTensors, std::vector<LevelType>());
334  this->lvlSizes.assign(numTensors, std::vector<Value>());
335  this->highs.assign(numTensors, std::vector<Value>());
336  this->segHi.assign(numTensors, std::vector<Value>());
337  this->posits.assign(numTensors, std::vector<Value>());
338  this->coords.assign(numTensors, std::vector<Value>());
339  this->positionsBuffers.assign(numTensors, std::vector<Value>());
340  this->coordinatesBuffers.assign(numTensors, std::vector<Value>());
341  this->valBuffer.assign(numTensors, nullptr);
342  this->isSparseSlices.assign(numTensors, false);
343  this->sliceOffsets.assign(numTensors, std::vector<Value>());
344  this->sliceStrides.assign(numTensors, std::vector<Value>());
345 
346  // These zeros will be overwritten below, but we need to initialize
347  // them to something since we'll need random-access assignment.
348  this->loopStack.reserve(numLoops);
349  this->loopSeqStack.reserve(numLoops);
350 
351  // Index-reduction related fields.
352  this->dependentLvlMap.assign(
353  numTensors, std::vector<std::vector<std::pair<TensorLevel, unsigned>>>());
354  this->slicePosBuffer.assign(numTensors, std::vector<std::vector<Value>>());
355  this->sliceMeta.assign(
356  numTensors, std::vector<std::vector<std::pair<Value, unsigned>>>());
357  this->sliceStack.assign(numTensors, std::vector<SliceInfo>());
358  this->levelReducedDep.assign(numTensors, std::vector<unsigned>());
359 
360  // Initialize nested types of `TensorId`-indexed fields.
361  for (TensorId tid = 0; tid < numTensors; tid++) {
362  Level lvlRank;
363  if (tid == synTensorId) {
364  // Synthetic tensor (conceptually) is an all-dense tensor with rank equal
365  // to the total number of loops (each level can potentially be mapped to
366  // one of the loop being generated).
367  lvlRank = numLoops;
368  lvlTypes[tid].assign(lvlRank, LevelType::Dense);
369  } else {
370  const Value t = tensors[tid];
371  // a scalar or 0-dimension tensors
373  continue;
374 
375  auto rtp = getRankedTensorType(t);
376  const SparseTensorType stt(rtp);
377  lvlRank = stt.getLvlRank();
378 
379  if (stt.hasEncoding()) {
380  const auto enc = stt.getEncoding();
381  isSparseSlices[tid] = enc.isSlice();
382  for (auto lvlTp : enc.getLvlTypes())
383  lvlTypes[tid].push_back(lvlTp);
384  } else {
385  lvlTypes[tid].assign(lvlRank, LevelType::Dense);
386  }
387  }
388 
389  // Initialize using empty value.
390  lvlSizes[tid].assign(lvlRank, Value());
391  highs[tid].assign(lvlRank, Value());
392  segHi[tid].assign(lvlRank, Value());
393  posits[tid].assign(lvlRank, Value());
394  coords[tid].assign(lvlRank, Value());
395  positionsBuffers[tid].assign(lvlRank, Value());
396  coordinatesBuffers[tid].assign(lvlRank, Value());
397  sliceOffsets[tid].assign(lvlRank, Value());
398  sliceStrides[tid].assign(lvlRank, Value());
399 
400  // Slice-driven loops related initialization.
401  levelReducedDep[tid].assign(lvlRank, 0);
402  dependentLvlMap[tid].assign(
403  lvlRank, std::vector<std::pair<TensorLevel, unsigned>>());
404  slicePosBuffer[tid].assign(lvlRank, std::vector<Value>());
405  sliceMeta[tid].assign(lvlRank, std::vector<std::pair<Value, unsigned>>());
406  sliceStack[tid].emplace_back(/*minCrd=*/Value(),
407  /*offset=*/Value(), /*isNonEmpty*/ Value(),
408  std::nullopt, 0);
409  if (dimGetter && !isSynTensor(tid)) {
410  for (Level l = 0; l < lvlRank; l++) {
411  dependentLvlMap[tid][l] = dimGetter(tid, l);
412  unsigned depends = dependentLvlMap[tid][l].size();
413  if (depends == 0)
414  continue;
415  sliceMeta[tid][l].assign(depends, std::make_pair(nullptr, 0));
416  // We need `depends - 1` slices to fully reduce the affine expression.
417  slicePosBuffer[tid][l].assign(depends - 1, nullptr);
418  }
419  }
420  }
421 }
422 
424  OpBuilder &builder, Location loc, LoopEmitter::OutputUpdater updater,
426 
427  // For every synthetic tensor, set the high bound by calling the callback.
428  if (synSetter)
429  for (unsigned i = 0, e = highs[getSynTensorId()].size(); i < e; i++)
430  highs[getSynTensorId()][i] = synSetter(builder, loc, i);
431 
432  // For every manifest tensor:
433  // * get the values buffer.
434  // * For every level:
435  // * get the positions and coordinates buffers
436  // * get/compute the level-size, which is also used as the upper-bound
437  // on positions.
438  for (TensorId t = 0, numTensors = getNumManifestTensors(); t < numTensors;
439  t++) {
440  const Value tensor = tensors[t];
441  const auto rtp = dyn_cast<RankedTensorType>(tensor.getType());
442  if (!rtp)
443  // Skips only scalar, zero ranked tensor still need to be bufferized and
444  // (probably) filled with zeros by users.
445  continue;
446  // FIXME: the definition of `lvlRank` looks more like a dim-rank;
447  // but the variable is used as a level everywhere below, which
448  // suggests there may be some dim/lvl confusion going on here.
449  auto stt = getSparseTensorType(tensor);
450  const Level lvlRank = stt.getLvlRank();
451  const auto shape = rtp.getShape();
452  const Level cooStart = stt.getCOOStart();
453 
454  SmallVector<Value> lvlSzs;
455  for (Level l = 0; l < stt.getLvlRank(); l++) {
456  if (stt.hasEncoding())
457  lvlSzs.push_back(builder.create<LvlOp>(loc, tensor, l));
458  else
459  lvlSzs.push_back(builder.create<tensor::DimOp>(loc, tensor, l));
460  }
461 
462  // Scan all levels of current tensor.
463  for (Level l = 0; l < lvlRank; l++) {
464  // This should be called only once at beginning.
465  assert(!positionsBuffers[t][l] && !coordinatesBuffers[t][l] &&
466  !highs[t][l]);
467  const auto lvlTp = lvlTypes[t][l];
468  // Handle sparse storage schemes.
469  if (isCompressedLT(lvlTp) || isLooseCompressedLT(lvlTp)) {
470  // Generate sparse primitives to obtain positions and coordinates.
471  positionsBuffers[t][l] = genToPositions(builder, loc, tensor, l);
472  coordinatesBuffers[t][l] =
473  genToCoordinates(builder, loc, tensor, l, cooStart);
474  } else if (isSingletonLT(lvlTp) || is2OutOf4LT(lvlTp)) {
475  // Singleton level, fetch coordinates.
476  coordinatesBuffers[t][l] =
477  genToCoordinates(builder, loc, tensor, l, cooStart);
478  } else {
479  // Dense level, nothing to fetch.
480  assert(isDenseLT(lvlTp));
481  }
482 
483  // Find upper bound in current dimension.
484  highs[t][l] = lvlSizes[t][l] = lvlSzs[l];
485  if (isSparseSlices[t]) {
486  sliceOffsets[t][l] = genSliceOffset(builder, loc, tensors[t], l);
487  sliceStrides[t][l] = genSliceStride(builder, loc, tensors[t], l);
488  }
489  }
490 
491  // Perform the required bufferization. Dense inputs materialize
492  // from the input tensors. Sparse inputs use sparse primitives to obtain the
493  // values.
494  // Delegates extra output initialization to clients.
495  bool isOutput = isOutputTensor(t);
496  Type elementType = stt.getElementType();
497  if (!stt.hasEncoding()) {
498  // Non-annotated dense tensors.
499  BaseMemRefType denseTp = MemRefType::get(shape, elementType);
500 
501  // TODO: if we unconditionally use fully dynamic layout here, it breaks
502  // some vectorization passes which requires static stride = 1.
503  // Is it possible to call vectorization pass after bufferization?
504  if (llvm::isa_and_nonnull<tensor::ExtractSliceOp>(tensor.getDefiningOp()))
506 
507  Value denseVal =
508  builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
509  // Dense outputs need special handling.
510  if (isOutput && updater)
511  denseVal = updater(builder, loc, denseVal, tensor);
512 
513  valBuffer[t] = denseVal;
514  } else {
515  // Annotated sparse tensors.
516  // We also need the value buffer for all-dense annotated "sparse"
517  // tensors.
518  valBuffer[t] = genToValues(builder, loc, tensor);
519  }
520  // NOTE: we can also prepare for 0 lvl here in advance, this will hoist
521  // some loop preparation from tensor iteration, but will also (undesirably)
522  // hoist the code ouside if-conditions.
523  }
524 
525  Type indexType = builder.getIndexType();
526  Value c0 = constantZero(builder, loc, indexType);
527  for (TensorId t = 0, e = tensors.size(); t < e; t++) {
528  auto rtp = dyn_cast<RankedTensorType>(tensors[t].getType());
529  if (!rtp)
530  continue;
531 
532  Level lvlRank = SparseTensorType(rtp).getLvlRank();
533  for (Level lvl = 0; lvl < lvlRank; lvl++) {
534  if (!dependentLvlMap[t][lvl].empty()) {
536  dependentLvlMap[t][lvl];
537  // Needs at least two operands to form a non-trivial affine expression.
538  assert(depLvls.size() == sliceMeta[t][lvl].size());
539 
540  Value size = c0;
541  for (int e = depLvls.size() - 1; e >= 0; e--) {
542  auto [dt, dl] = unpackTensorLevel(depLvls[e].first);
543  unsigned stride = depLvls[e].second;
544  Value stridedSize = lvlSizes[dt][dl];
545  if (stride != 1)
546  stridedSize = MULI(stridedSize, C_IDX(stride));
547  size = ADDI(size, stridedSize);
548  sliceMeta[t][lvl][e] = std::make_pair(size, stride);
549  }
550  }
551  }
552  }
553  localInsertPos = builder.getInsertionPoint()->getPrevNode();
554 }
555 
556 void LoopEmitter::categorizeLoopCondition(
559  // Finds out the tensor level that we should use to generate loops. Amongs all
560  // the tensor levels, there is at most one sparse tensor level.
561  for (auto [t, l] : unpackTensorLevelRange(tidLvls)) {
562  assert(lvlTypes[t].size() > l); // Must be a valid tid, dim pair
563  auto lvlType = lvlTypes[t][l];
564  // Must be a recognizable LT.
565  assert(isDenseLT(lvlType) || isCompressedLT(lvlType) ||
566  isLooseCompressedLT(lvlType) || isSingletonLT(lvlType) ||
567  is2OutOf4LT(lvlType));
568 
569  bool isSparse = !isDenseLT(lvlType);
570  bool isSlice = isSparseSlices[t];
571  bool isAffine = !dependentLvlMap[t][l].empty();
572  bool isUnRedu = false;
573  // TODO: Supports affine index expression on sparse tensor slices.
574  assert(!isSlice || !isAffine);
575 
576  // Whether the affine index expression has been fully reduced or not.
577  if (!dependentLvlMap[t][l].empty())
578  isUnRedu = !depFullyReduced(t, l);
579 
580  auto &dstVec = isSparse ? spConds : dnConds;
581  dstVec.emplace_back(
582  makeTensorLevel(t, l),
583  makeLoopCondKind(isSparse, isSlice, isAffine, isUnRedu));
584  }
585 
586  std::stable_sort(spConds.begin(), spConds.end(), [](auto lhs, auto rhs) {
587  // AffineUnRed > Affine > Slice > Trivial
588  return static_cast<uint8_t>(lhs.second) > static_cast<uint8_t>(rhs.second);
589  });
590 }
591 
593  ArrayRef<TensorLevel> tidLvls) {
594  // TODO: sort
595  assert(loopSeqStack.size() == loopStack.size());
596  // Prepares for all the tensors used in the current loop sequence.
597  std::vector<std::tuple<TensorId, Level, bool>> slicedTids;
598 
599  for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {
600  if (!dependentLvlMap[tid][lvl].empty()) {
601  bool fullyRed = genSliceBegin(builder, loc, tid, lvl);
602  slicedTids.emplace_back(tid, lvl, fullyRed);
603  } else if (!isSynTensor(tid)) {
604  prepareLoopOverTensorAtLvl(builder, loc, tid, lvl);
605  }
606  }
607 
608  // Universal Index starts from 0.
609  loopSeqStack.emplace_back(C_IDX(0), std::move(slicedTids));
610 }
611 
613  assert(loopSeqStack.size() == loopStack.size() + 1);
614 
615  const auto &slicedTids = loopSeqStack.back().second;
616 
617  // Depending on whether the slice is resolved or not at current loop sequence,
618  // end them in different ways.
619  for (auto [tid, lvl, res] : slicedTids) {
620  if (!res) {
621  // If this is a unresolved-slice-driven loop, pops out the slice.
622  assert(sliceStack[tid].back().slicedOnLvl == lvl);
623  sliceStack[tid].pop_back();
624  }
625  }
626  loopSeqStack.pop_back();
627 }
628 
630  switch (a.getKind()) {
631  case AffineExprKind::DimId: {
632  // FIXME: since the one callsite in Sparsification passes in a
633  // level-expression, the `getPosition` must in fact be a `Dimension`.
634  // However, elsewhere we have been lead to expect that `loopIdToOrd`
635  // should be indexed by `LoopId`...
636  const auto loopId = cast<AffineDimExpr>(a).getPosition();
637  return loopStack[loopId].iv;
638  }
639  case AffineExprKind::Add: {
640  auto binOp = cast<AffineBinaryOpExpr>(a);
641  return ADDI(genAffine(builder, loc, binOp.getLHS()),
642  genAffine(builder, loc, binOp.getRHS()));
643  }
644  case AffineExprKind::Mul: {
645  auto binOp = cast<AffineBinaryOpExpr>(a);
646  return MULI(genAffine(builder, loc, binOp.getLHS()),
647  genAffine(builder, loc, binOp.getRHS()));
648  }
650  int64_t c = cast<AffineConstantExpr>(a).getValue();
651  return C_IDX(c);
652  }
653  default:
654  llvm_unreachable("unexpected affine subscript");
655  }
656 }
657 
658 std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl(
659  OpBuilder &builder, Location loc, TensorId tid, Level lvl, Value lo,
660  Value hi, MutableArrayRef<Value> reduc, bool isParallel) {
661  bool isSparseCond = isCompressedLT(lvlTypes[tid][lvl]) ||
662  isLooseCompressedLT(lvlTypes[tid][lvl]) ||
663  is2OutOf4LT(lvlTypes[tid][lvl]) ||
664  isSingletonLT(lvlTypes[tid][lvl]);
665  // TODO: support dynamic slices.
666  // Uses the first dimension here to build the loop bound (which is also the
667  // biggest range).
668  Value step = C_IDX(1);
669  Operation *loop = nullptr;
670  Value iv;
671  if (isParallel) {
672  scf::ParallelOp parOp =
673  builder.create<scf::ParallelOp>(loc, lo, hi, step, reduc);
674  builder.setInsertionPointToStart(parOp.getBody());
675  assert(parOp.getNumReductions() == reduc.size());
676  iv = parOp.getInductionVars()[0];
677 
678  // In-place update on the reduction variable vector.
679  // Note that the init vals is not the actual reduction variables but instead
680  // used as a "special handle" to (temporarily) represent them. The
681  // expression on init vals will be moved into scf.reduce and replaced with
682  // the block arguments when exiting the loop (see exitForLoop). This is
683  // needed as we can not build the actual reduction block and get the actual
684  // reduction variable before users fill parallel loop body.
685  for (int i = 0, e = reduc.size(); i < e; i++)
686  reduc[i] = parOp.getInitVals()[i];
687  loop = parOp;
688  } else {
689  scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);
690  builder.setInsertionPointToStart(forOp.getBody());
691  iv = forOp.getInductionVar();
692 
693  // In-place update on the reduction variable vector.
694  assert(forOp.getNumRegionIterArgs() == reduc.size());
695  for (int i = 0, e = reduc.size(); i < e; i++)
696  reduc[i] = forOp.getRegionIterArg(i);
697  loop = forOp;
698  }
699  assert(loop && iv);
700 
701  Value crd;
702  if (isSparseCond) {
703  // For COO, the position is the same across consecutive levels.
704  /// FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
705  posits[tid][lvl] = iv;
706  crd = genSparseCrd(builder, loc, tid, lvl);
707  } else {
708  // Dense tensor, the coordinate is the inducation variable.
709  crd = iv;
710  }
711 
712  if (isSparseSlices[tid] && isSparseCond) {
713  // For sparse level slices, we need to filter out invalid coordinates that
714  // are not included in the slice.
715  SmallVector<Type> types;
716  for (Value red : reduc)
717  types.push_back(red.getType());
718 
719  auto [trans, pred] = genSliceLegitPredicate(builder, loc, crd, tid, lvl);
720  bool hasReduc = !types.empty();
721  scf::IfOp ifOp = builder.create<scf::IfOp>(loc, types, pred,
722  /*else*/ hasReduc);
723  if (hasReduc) {
724  // scf.for (a) -> v
725  // %s = scf.if (a) -> v
726  // user-generated code.
727  // else
728  // yield a
729  // yield %s
730  YIELD(ifOp.getResults());
731  builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
732  // On mismatch.
733  YIELD(reduc);
734  }
735  // Set the insertion point to matched branch.
736  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
737  crd = trans;
738  }
739 
740  assert(crd);
741  coords[tid][lvl] = crd;
742  return {loop, crd};
743 }
744 
745 Value LoopEmitter::genWhileLoopConditions(OpBuilder &builder, Location loc,
746  ValueRange ivs, TensorLvlCond cond) {
747  auto [tid, lvl] = unpackTensorLevel(cond.first);
748 
749  switch (cond.second) {
750  case LoopCondKind::SparseCond: {
751  assert(ivs.size() == 1);
752  // We used the first level bound as the bound the collapsed set of levels.
753  return CMPI(ult, ivs.back(), highs[tid][lvl]);
754  }
755  case LoopCondKind::SparseSliceCond: {
756  assert(ivs.size() == 1);
757  return CMPI(ult, ivs.back(), highs[tid][lvl]);
758  }
759  case LoopCondKind::SparseAffineCond: {
760  assert(ivs.size() == 1);
761 
762  Value crdHi; // loop upper bound
763  {
764  OpBuilder::InsertionGuard guard(builder);
765  Operation *loop = builder.getInsertionBlock()->getParentOp();
766  // crdHi is a loop invariant, hosit the computation outside the loop.
767  if (llvm::isa_and_nonnull<scf::WhileOp>(loop))
768  builder.setInsertionPoint(loop);
769  auto [remSz, stride] = sliceMeta[tid][lvl].back();
770  assert(stride == 1 && "Not yet implemented");
771  crdHi = ADDI(getMostRecentSliceOnLvl(tid, lvl).offset, remSz);
772  }
773  assert(crdHi);
774  return genSparseReducedAffineCond(builder, loc,
775  coordinatesBuffers[tid][lvl], crdHi,
776  ivs[0], highs[tid][lvl]);
777  }
778  case LoopCondKind::SparseAffineUnRedCond: {
779  assert(ivs.size() == 3);
780  return ivs.front(); // isNonEmpty
781  }
782  default:
783  llvm_unreachable("Unhandled LoopCondKind");
784  }
785  llvm_unreachable("Unhandled LoopCondKind");
786 }
787 
788 std::optional<Value> LoopEmitter::genWhileLoopBody(OpBuilder &builder,
789  Location loc, ValueRange ivs,
790  TensorLvlCond cond) {
791  auto [tid, lvl] = unpackTensorLevel(cond.first);
792 
793  switch (cond.second) {
794  case LoopCondKind::SparseCond: {
795  // Updates position. For collapsed COO, the position is the same across
796  // consecutive levels.
797  posits[tid][lvl] = ivs.back();
798 
799  // Update coordinates.
800  coords[tid][lvl] = genSparseCrd(builder, loc, tid, lvl);
801  return std::nullopt;
802  }
803  case LoopCondKind::SparseSliceCond: {
804  assert(ivs.size() == 1);
805  posits[tid][lvl] = ivs.front();
806  Value sCrd = genSparseCrd(builder, loc, tid, lvl);
807  // Converts the coordinate loaded from the actual sparse tensor to the
808  // coordinates in the sparse slice.
809  auto [dCrd, pred] = genSliceLegitPredicate(builder, loc, sCrd, tid, lvl);
810  coords[tid][lvl] = dCrd;
811  return pred;
812  }
813  case LoopCondKind::SparseAffineCond: {
814  assert(ivs.size() == 1);
815  // Coord is the relative offset related to its parents.
816  assert(sliceStack[tid].back().depth == 1 && "TODO: not yet implement");
817  // Update c = absOffset[lvl][depth] - absOffset[lvl][depth - 1]
818  Value posit = ivs[0];
819  Value crdBuf = coordinatesBuffers[tid][lvl];
820  // We need to substract the offset to get relative coordinates.
821  // TODO: Maybe assert relC >=0 during runtime in debug build?
822  Value absC = genIndexLoad(builder, loc, crdBuf, posit);
823  auto relC = SUBI(absC, getFinalSliceOnLvl(tid, lvl).offset);
824  posits[tid][lvl] = posit;
825  coords[tid][lvl] = relC;
826  return std::nullopt;
827  }
828  case LoopCondKind::SparseAffineUnRedCond: {
829  unsigned depth = sliceStack[tid].back().depth;
830  unsigned curStride = sliceMeta[tid][lvl][depth - 1].second;
831  assert(ivs.size() == 3);
832 
833  // Updates the current slice info
834  SliceInfo &sliceInfo = sliceStack[tid].back();
835  sliceInfo.isNonEmpty = ivs[0];
836  sliceInfo.minCrd = ivs[1];
837  sliceInfo.offset = ivs[2];
838 
839  // Crd (the value we used to coiterate) is the relative offset related to
840  // its parents, we can use the absolute offset here because when depth = 1,
841  // absOffset[lvl][depth - 1] always equals zero.
842  // TODO: Update crd =absOffset[lvl][depth] - absOffset[lvl][depth - 1]
843  assert(depth == 1 && "TODO: not yet implement");
844  Value crd = sliceInfo.offset;
845 
846  Value onStride = constantI1(builder, loc, true);
847  if (curStride != 1) {
848  Value strideVal = C_IDX(curStride);
849  Value rem = REMUI(crd, strideVal);
850  crd = DIVUI(crd, strideVal);
851  onStride = CMPI(eq, rem, C_IDX(0));
852  }
853  coords[tid][lvl] = crd;
854  // No extra check is needed before accessing the tensor level.
855  return onStride;
856  }
857  default:
858  llvm_unreachable("Unhandled LoopCondKind");
859  }
860  llvm_unreachable("Unhandled LoopCondKind");
861 }
862 
863 ValueRange LoopEmitter::genCheckedValue(OpBuilder &builder, Location loc,
864  Value pred, ValueRange curArgs,
865  TensorLvlCond cond) {
866  assert(isSparseCond(cond.second));
867  auto [tid, lvl] = unpackTensorLevel(cond.first);
868  if (isAffineIdxUnRedCond(cond.second)) {
869  unsigned depth = sliceStack[tid].back().depth;
870  unsigned curStride = sliceMeta[tid][lvl][depth - 1].second;
871  if (curStride == 1)
872  return curArgs;
873  // Build
874  // if (onStride) {
875  // yield curSlice
876  // } else {
877  // yield nxSlice.
878  //}
879  assert(curArgs.size() == 3);
880  auto ifOp = builder.create<scf::IfOp>(loc, curArgs.getTypes(), pred, true);
881  {
882  OpBuilder::InsertionGuard guard(builder);
883  // If not all slices are legit, yield the updated value.
884  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
885 
886  YIELD(curArgs);
887  // If not all slices are legit, yield the updated value.
888  builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
889  auto [nonEmpty, minCrd, offset] =
890  genSliceNextInduction(builder, loc, tid, lvl);
891  SmallVector<Value> nxSlice{nonEmpty, minCrd, offset};
892  YIELD(nxSlice);
893  }
894  // If all slices are legit, start the user generated code.
895  return ifOp.getResults();
896  } else {
897  // Currently only sparse slice condition need extra check.
898  assert(isSliceCond(cond.second) && isSparseCond(cond.second));
899  assert(curArgs.size() == 1);
900  Value nextPos = ADDI(curArgs.front(), C_IDX(1));
901  return SELECT(pred, curArgs.front(), nextPos)->getResults();
902  }
903  llvm_unreachable("unhandled case");
904 }
905 
906 std::pair<Operation *, Value> LoopEmitter::emitWhileLoopOverTensorsAtLvls(
907  OpBuilder &builder, Location loc, ArrayRef<TensorLvlCond> spConds,
908  MutableArrayRef<Value> reduc, bool needsUniv) {
909  // NOTE: the slice driven tensor-related reduction variable must
910  // appear before normal tensors.
911  assert(!spConds.empty());
912 
913  // The set of induction variables for the while loop.
914  SmallVector<Value> ivs;
915  // Segment sizes for induction variables used for different kinds of loop
916  // conditions.
917  SmallVector<unsigned> opSegSize;
918 
919  // Construct the while-loop with a parameter for each coordinate.
920  for (auto [tl, cKind] : spConds) {
921  auto [tid, lvl] = unpackTensorLevel(tl);
922  const auto lvlTp = lvlTypes[tid][lvl];
923  // Dense level are handled by the shared univeral index.
924  assert(!isDenseCond(cKind));
925  // Must be a recognizable sparse level.
926  assert(isCompressedLT(lvlTp) || isLooseCompressedLT(lvlTp) ||
927  isSingletonLT(lvlTp));
928  (void)lvlTp;
929 
930  unsigned prevSz = ivs.size();
931  if (isAffineIdxCond(cKind)) {
932  // TODO: Support view-based reshape on sparse levels with affine index
933  // expressions.
934  if (isAffineIdxUnRedCond(cKind)) {
935  SliceInfo &sliceInfo = sliceStack[tid].back();
936  // The order matters!
937  ivs.push_back(sliceInfo.isNonEmpty);
938  ivs.push_back(sliceInfo.minCrd);
939  ivs.push_back(sliceInfo.offset);
940  } else {
941  ivs.push_back(posits[tid][lvl]); // loop lower bound (pos low).
942  }
943  // We reduced one more dependency after entering the loop.
944  levelReducedDep[tid][lvl]++;
945  } else {
946  assert(dependentLvlMap[tid][lvl].empty());
947  const Value pos = posits[tid][lvl];
948  ivs.push_back(pos);
949  }
950  opSegSize.push_back(ivs.size() - prevSz);
951  }
952 
953  // The position where user-supplied reduction variable starts.
954  ivs.append(reduc.begin(), reduc.end());
955  // Update universal index.
956  if (needsUniv)
957  ivs.push_back(loopSeqStack.back().first);
958 
959  // Ensures all operands are valid.
960  assert(llvm::all_of(ivs, [](Value v) { return v != nullptr; }));
961  TypeRange types = ValueRange(ivs).getTypes();
962  auto whileOp = builder.create<scf::WhileOp>(loc, types, ivs);
963 
964  SmallVector<Location> locs(types.size(), loc);
965  Block *before = builder.createBlock(&whileOp.getBefore(), {}, types, locs);
966  Block *after = builder.createBlock(&whileOp.getAfter(), {}, types, locs);
967 
968  // Generates loop conditions.
969  builder.setInsertionPointToStart(before);
970  ValueRange bArgs = before->getArguments();
971  Value whileCond = nullptr; // bool values for loop condition.
972  for (auto [c, segSz] : llvm::zip_equal(spConds, opSegSize)) {
973  Value cv = genWhileLoopConditions(builder, loc, bArgs.take_front(segSz), c);
974  bArgs = bArgs.drop_front(segSz);
975  whileCond = !whileCond ? cv : ANDI(whileCond, cv);
976  }
977  // The remaining block arguments are user-provided reduction values and an
978  // optional universal index. Make sure their sizes match.
979  assert(bArgs.size() == reduc.size() + needsUniv ? 1 : 0);
980  builder.create<scf::ConditionOp>(loc, whileCond, before->getArguments());
981 
982  // Generates loop body.
983  builder.setInsertionPointToStart(after);
984  ValueRange aArgs = after->getArguments();
985  // Since some LoopCondKind might need extra checks to filter out invalid
986  // iterations, we maintains another array to hold the iteration arguments to
987  // yield if the checks fails.
988  SmallVector<Value> nextArgs(aArgs.begin(), aArgs.end());
989  // A mutable alias for convenient slicing.
990  MutableArrayRef<Value> nextArgsRef = nextArgs;
991  Value extraPred = nullptr;
992  for (auto [c, segSz] : llvm::zip_equal(spConds, opSegSize)) {
993  ValueRange condArgs = aArgs.take_front(segSz);
994  auto pred = genWhileLoopBody(builder, loc, condArgs, c);
995  assert(pred.has_value() == isCondWithExtraCheck(c.second));
996  if (pred.has_value()) {
997  // We need all extra checks to pass.
998  extraPred = extraPred == nullptr ? *pred : ANDI(*pred, extraPred);
999  ValueRange nxArgs = genCheckedValue(builder, loc, *pred, condArgs, c);
1000  assert(nxArgs.size() == segSz);
1001  // Update the value for cases when some check fails.
1002  for (unsigned i = 0; i < segSz; i++) {
1003  nextArgsRef[i] = nxArgs[i];
1004  }
1005  }
1006  aArgs = aArgs.drop_front(segSz);
1007  nextArgsRef = nextArgsRef.drop_front(segSz);
1008  }
1009 
1010  if (extraPred) {
1011  auto ifOp = builder.create<scf::IfOp>(loc, types, extraPred, /*else*/ true);
1012  // Marks this special IfOp so that Sparsification does not finalizing it.
1014  StringAttr::get(builder.getContext(), "slice"));
1015  // Links the SSA chain outside the if statement.
1016  YIELD(ifOp->getResults());
1017 
1018  // If not all slices are legit, yield the updated value.
1019  builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
1020  YIELD(nextArgs);
1021 
1022  // If all slices are legit, start the user generated code.
1023  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1024  }
1025 
1026  for (auto [tid, lvl] : unpackTensorLevelFromCondRange(spConds)) {
1027  // Generates segment high for non-unique level.
1028  if (!isUniqueLT(lvlTypes[tid][lvl])) {
1029  segHi[tid][lvl] = genSegmentHigh(builder, loc, tid, lvl, posits[tid][lvl],
1030  highs[tid][lvl]);
1031  }
1032  }
1033 
1034  // In-place update on reduction variable.
1035  assert(aArgs.size() == reduc.size() + needsUniv ? 1 : 0);
1036  for (unsigned i = 0, e = reduc.size(); i < e; i++)
1037  reduc[i] = aArgs[i];
1038 
1039  Value min;
1040  // Finds the minimum coordinate
1041  if (!needsUniv) {
1042  for (auto [tid, lvl] : unpackTensorLevelFromCondRange(spConds)) {
1043  const auto lvlTp = lvlTypes[tid][lvl];
1044  if (isCompressedLT(lvlTp) || isSingletonLT(lvlTp) ||
1045  isLooseCompressedLT(lvlTp)) {
1046  const auto crd = coords[tid][lvl];
1047  if (min) {
1048  Value cmp = CMPI(ult, coords[tid][lvl], min);
1049  min = SELECT(cmp, coords[tid][lvl], min);
1050  } else {
1051  min = crd;
1052  }
1053  }
1054  }
1055  } else {
1056  assert(!min);
1057  // Otherwise, universal index is the minimal pos.
1058  min = whileOp.getAfterArguments().back();
1059  }
1060 
1061  return {whileOp, min};
1062 }
1063 
1064 bool LoopEmitter::shouldIteratedByForLoop(ArrayRef<TensorLvlCond> sparseConds,
1065  bool genDedup) {
1066  assert(llvm::all_of(sparseConds,
1067  [](TensorLvlCond c) { return isSparseCond(c.second); }));
1068 
1069  // If we need to co-iterate over two sparse tensors, we need a while loop
1070  if (sparseConds.size() > 1)
1071  return false;
1072 
1073  // We also need a while loop for levels with affine index expression and
1074  // non-unique levels when deduplication is required.
1075  if (sparseConds.size() == 1) {
1076  auto [tid, lvl] = unpackTensorLevel(sparseConds.back().first);
1077  return !isAffineIdxCond(sparseConds.back().second) &&
1078  !(genDedup && !isUniqueLT(lvlTypes[tid][lvl]));
1079  }
1080 
1081  return true;
1082 }
1083 
1085  OpBuilder &builder, Location loc, ArrayRef<TensorLevel> tidLvls,
1086  MutableArrayRef<Value> reduc, bool tryParallel, bool genDedup,
1087  bool needsUniv) {
1088 #ifndef NDEBUG
1089  // Sanity checks.
1090  assert(!tidLvls.empty());
1091  for (auto [t, l] : unpackTensorLevelRange(tidLvls)) {
1092  assert(!coords[t][l] || // We cannot re-enter the same level
1093  !dependentLvlMap[t][l].empty()); // unless it is a slice-driver loop
1094  }
1095 #endif
1096  // TODO: support multiple return on parallel for?
1097  tryParallel = tryParallel && reduc.size() <= 1;
1098 
1101  categorizeLoopCondition(tidLvls, dnConds, spConds);
1102 
1103  // Only when there is at least one sparse conditions, do we really need the
1104  // universal index.
1105  // TODO: Maybe we should instead requires merger to pass in a valid value at
1106  // the first place instead of adjusting it in LoopEmitter?
1107  needsUniv = !spConds.empty() && needsUniv;
1108  // The TensorLevel used for loop conditions.
1109  // If there is any sparse level, we need to use the sparse condition.
1110  // If all levels are dense, we can pick arbitrary one (dense slice-driven loop
1111  // can be generated using a simple ForOp as well).
1112  Operation *l = nullptr;
1113  Value iv = nullptr;
1114  SmallVector<SliceLoopInfo> sliceDrivenInfo;
1115  SmallVector<TensorLevel> trivialLvls;
1116 
1117  // Generates loops differently depending on whether we need a slice-driven
1118  // loop or a simple level traversal loop.
1119  if (shouldIteratedByForLoop(spConds, genDedup) && !needsUniv) {
1120  assert(spConds.size() <= 1);
1121  TensorLvlCond tlCond = spConds.empty() ? dnConds.front() : spConds.front();
1122  auto loopCondKind = tlCond.second;
1123  auto [tid, lvl] = unpackTensorLevel(tlCond.first);
1124  Value lo = isSparseCond(loopCondKind)
1125  ? posits[tid][lvl] // current offset
1126  : loopSeqStack.back().first; // universal index
1127  Value hi = highs[tid][lvl];
1128  if (isDenseCond(loopCondKind) && isAffineIdxCond(loopCondKind)) {
1129  bool unReduc = isAffineIdxUnRedCond(loopCondKind);
1130  assert(unReduc == !depFullyReduced(tid, lvl));
1131  unsigned depth = sliceStack[tid].back().depth;
1132  assert(depth >= 1);
1133  // The *next* slice size after reducing the current index variable.
1134  auto [nxSz, nxStride] = sliceMeta[tid][lvl][depth];
1135  // The *current* stride to reduce the current index variable.
1136  // E.g., for 2 * i, stride = 2.
1137  unsigned stride = sliceMeta[tid][lvl][depth - 1].second;
1138  hi = nxSz;
1139  if (unReduc) {
1140  // Adjust for loop hi for dense slice-driven loop.
1141  hi = SUBI(lvlSizes[tid][lvl], hi);
1142  hi = ADDI(hi, C_IDX(1));
1143  hi = DIVUI(hi, C_IDX(stride));
1144  } else {
1145  // TODO: dialuted convolution.
1146  assert(nxStride == 1 && "Not yet implemented.");
1147  }
1148  }
1149  std::tie(l, iv) = emitForLoopOverTensorAtLvl(builder, loc, tid, lvl, lo, hi,
1150  reduc, tryParallel);
1151  // For loop condition must be a trivial condition (levels without affine
1152  // index expression).
1153  trivialLvls.push_back(tlCond.first);
1154  } else {
1155  for (auto [tl, cKind] : spConds) {
1156  if (isAffineIdxCond(cKind)) {
1157  auto [tid, lvl] = unpackTensorLevel(tl);
1158  bool unReduc = isAffineIdxUnRedCond(cKind);
1159  assert(unReduc == !depFullyReduced(tid, lvl));
1160  sliceDrivenInfo.emplace_back(tid, lvl, /*fullyReduced=*/!unReduc);
1161  } else {
1162  trivialLvls.push_back(tl);
1163  }
1164  }
1165 
1166  std::tie(l, iv) =
1167  emitWhileLoopOverTensorsAtLvls(builder, loc, spConds, reduc, needsUniv);
1168  }
1169 
1170  // Enter dense tensor levels.
1171  enterTensorsAtDenseLvls(builder, loc, dnConds, iv, sliceDrivenInfo);
1172  // NOTE: we can also prepare for next dim here in advance
1173 
1174  // Pushes the loop into stack.
1175  loopStack.emplace_back(trivialLvls, sliceDrivenInfo, l,
1176  builder.getInsertionBlock(), iv, loopTag);
1177  return l;
1178 }
1179 
1181  OpBuilder &builder, Location loc, TensorId tid, Level lvl,
1182  AffineExpr affine, MutableArrayRef<Value> reduc) {
1183  assert(isValidLevel(tid, lvl));
1184  assert(!isa<AffineDimExpr>(affine) && !isDenseLT(lvlTypes[tid][lvl]));
1185  // We can not re-enter the same level.
1186  assert(!coords[tid][lvl]);
1187 
1188  // TODO: We should instead use a whileOp for filter loop to allow early
1189  // break when exceeding (for ordered levels).
1190  // TODO: There are many other potiential opportunities that we might apply in
1191  // the future. E.g., we could use binary search to locate positions.
1192  const Value step = C_IDX(1);
1193  const Value pLo = posits[tid][lvl];
1194  const Value pHi = highs[tid][lvl];
1195  scf::ForOp forOp = builder.create<scf::ForOp>(loc, pLo, pHi, step, reduc);
1196 
1197  // In-place update on the reduction variable vector.
1198  assert(forOp.getNumRegionIterArgs() == reduc.size());
1199  for (int i = 0, e = reduc.size(); i < e; i++)
1200  reduc[i] = forOp.getRegionIterArg(i);
1201 
1202  builder.setInsertionPointToStart(forOp.getBody());
1203  // The induction variable gives the position.
1204  const Value pos = forOp.getInductionVar();
1205  posits[tid][lvl] = pos;
1206  // Generating a load on the coordinates array yields the crd.
1207  const Value mem = coordinatesBuffers[tid][lvl];
1208  const Value crd = genIndexLoad(builder, loc, mem, pos);
1209  coords[tid][lvl] = crd;
1210 
1211  // Generate an if-condition to filter out coordinates that are not
1212  // equal to the result of the affine expression.
1213  Value expected = genAffine(builder, loc, affine);
1214  auto pred = CMPI(eq, crd, expected);
1215  SmallVector<Type> types;
1216  for (Value red : reduc) {
1217  types.push_back(red.getType());
1218  }
1219 
1220  bool hasReduc = !types.empty();
1221  scf::IfOp ifOp =
1222  builder.create<scf::IfOp>(loc, types, pred, /*else*/ hasReduc);
1223  if (hasReduc) {
1224  // scf.for (a) -> v
1225  // %s = scf.if (a) -> v
1226  // user-generated code.
1227  // else
1228  // yield a
1229  // yield %s
1230  YIELD(ifOp.getResults());
1231  builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
1232  // On mismatch.
1233  YIELD(reduc);
1234  }
1235  // Set the insert point to matched branch.
1236  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1237 
1238  // NOTE: we can also prepare for next lvl here in advance
1239  // Push the loop into stack
1240  loopStack.emplace_back(ArrayRef<TensorLevel>(makeTensorLevel(tid, lvl)),
1241  ArrayRef<SliceLoopInfo>(), forOp,
1242  builder.getInsertionBlock(), coords[tid][lvl],
1243  nullptr);
1244  return forOp;
1245 }
1246 
1248  TensorLevel tidLvl,
1249  AffineExpr lvlExpr) {
1250  auto [tid, lvl] = unpackTensorLevel(tidLvl);
1251  assert(isDenseLT(lvlTypes[tid][lvl]));
1252  // For dense levels, the vel-coordinate also serves as the position.
1253  Value lvlCrd = genAffine(builder, loc, lvlExpr);
1254  posits[tid][lvl] = genAddress(builder, loc, tid, lvl, lvlCrd);
1255 }
1256 
1257 void LoopEmitter::prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
1258  TensorId tid, Level lvl) {
1259  assert(isValidLevel(tid, lvl));
1260  const auto lvlTp = lvlTypes[tid][lvl];
1261 
1262  if (isDenseLT(lvlTp))
1263  return;
1264 
1265  const Value c0 = C_IDX(0);
1266  const Value c1 = C_IDX(1);
1267  const Value c2 = C_IDX(2);
1268  // Either the first level, or the previous level has been set.
1269  /// FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
1270  assert(lvl == 0 || posits[tid][lvl - 1]);
1271  if (isCompressedLT(lvlTp) || isLooseCompressedLT(lvlTp)) {
1272  const Value mem = positionsBuffers[tid][lvl];
1273 
1274  Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1];
1275  if (isLooseCompressedLT(lvlTp))
1276  pLo = builder.create<arith::MulIOp>(loc, pLo, c2);
1277  posits[tid][lvl] = genIndexLoad(builder, loc, mem, pLo);
1278 
1279  const Value pHi = ADDI(pLo, c1);
1280  highs[tid][lvl] = genIndexLoad(builder, loc, mem, pHi);
1281  return;
1282  }
1283  if (isSingletonLT(lvlTp)) {
1284  const Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1];
1285  posits[tid][lvl] = pLo;
1286 
1287  // If we are coiterating non-unique levels, then use pHi=segHi;
1288  // otherwise use pHi=pLo+1.
1289  // NOTE: Just because the level is non-unique, that does not
1290  // guarantee that segHi is defined: because we only generate segHi
1291  // whenever coiterating, in order to improve code quality for the
1292  // non-coiterating cases.
1293  const auto parentSegHi = segHi[tid][lvl - 1];
1294  highs[tid][lvl] = (!isUniqueLT(lvlTypes[tid][lvl - 1]) && parentSegHi)
1295  ? parentSegHi
1296  : ADDI(pLo, c1);
1297  return;
1298  }
1299  if (is2OutOf4LT(lvlTp)) {
1300  const Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1];
1301  // Each 2:4 block has exactly two specified elements.
1302  posits[tid][lvl] = MULI(pLo, c2);
1303  highs[tid][lvl] = ADDI(posits[tid][lvl], c2);
1304  return;
1305  }
1306  llvm_unreachable("Unrecognized level-type!");
1307 }
1308 
1309 void LoopEmitter::enterTensorsAtDenseLvls(
1310  OpBuilder &builder, Location loc, ArrayRef<TensorLvlCond> dnConds, Value iv,
1311  SmallVectorImpl<SliceLoopInfo> &sliceInfo) {
1312  for (auto [dnTidLvl, denseLoopCond] : dnConds) {
1313  auto [tid, lvl] = unpackTensorLevel(dnTidLvl);
1314  assert(isDenseLT(lvlTypes[tid][lvl]));
1315 
1316  if (isAffineIdxCond(denseLoopCond)) {
1317  // Pushes sliced levels to build correct LoopInfo.
1318  bool unReduc = isAffineIdxUnRedCond(denseLoopCond);
1319  SliceInfo &info = sliceStack[tid].back();
1320  // Pushes sliced dense loop info to tell LoopEmitter how to exit it.
1321  sliceInfo.emplace_back(tid, lvl, /*fullyReduced=*/!unReduc);
1322  // FIXME: The offset and position iterator need to be adjusted when the
1323  // slice is strided.
1324  if (unReduc) {
1325  assert(*info.slicedOnLvl == lvl);
1326  unsigned depth = sliceStack[tid].back().depth;
1327  assert(depth >= 1);
1328  unsigned stride = sliceMeta[tid][lvl][depth - 1].second;
1329  // Update the slice information as we enter the new loop.
1330  info.minCrd = info.offset = MULI(iv, C_IDX(stride));
1331  info.isNonEmpty = constantI1(builder, loc, true);
1332  } else {
1333  posits[tid][lvl] =
1334  genAddress(builder, loc, tid, lvl, ADDI(info.offset, iv));
1335  }
1336  levelReducedDep[tid][lvl]++;
1337  } else {
1338  // Skips the synthetic tensor
1339  if (isSynTensor(tid))
1340  continue;
1341  // A dense level with trivial index expression.
1342  assert(dependentLvlMap[tid][lvl].empty());
1343  auto enc = getSparseTensorEncoding(tensors[tid].getType());
1344  if (enc && !isSparseOutput(tid)) {
1345  bool validPos = lvl == 0 || posits[tid][lvl - 1];
1346  if (!validPos) {
1347  // We might not find the pos for the sparse output tensor as it is
1348  // unconditionally required by the sparsification.
1349  assert(isOutputTensor(tid));
1350  continue;
1351  }
1352  posits[tid][lvl] = genAddress(builder, loc, tid, lvl, iv);
1353  // NOTE: we can also prepare for next lvl here in advance
1354  }
1355  }
1356  }
1357 }
1358 
1359 void LoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,
1360  MutableArrayRef<Value> reduc) {
1361  const LoopInfo &loopInfo = loopStack.back();
1362  for (auto [tid, lvl, reduced] : loopInfo.sliceDrivenInfo) {
1363  if (!reduced) {
1364  SliceInfo &info = sliceStack[tid].back();
1365  assert(isDenseLT(lvlTypes[tid][lvl]));
1366  assert(*info.slicedOnLvl == lvl);
1367  (void)reduced;
1368  // Resets slices pointers as the resolved slices are invalidated after we
1369  // moves forward to the next slice.
1370  invalidateSliceIterIdx(rewriter, loc, tid, lvl);
1371  info.minCrd = info.offset = info.isNonEmpty = Value();
1372  } else {
1373  forwardsReducedSliceLevelTreeIt(rewriter, loc, tid, lvl,
1374  constantIndex(rewriter, loc, 1));
1375  }
1376  levelReducedDep[tid][lvl]--;
1377  }
1378  if (auto forOp = llvm::dyn_cast<scf::ForOp>(loopInfo.loop)) {
1379  if (!reduc.empty()) {
1380  assert(reduc.size() == forOp.getNumResults());
1381  rewriter.create<scf::YieldOp>(loc, reduc);
1382  }
1383  // Exit the loop.
1384  rewriter.setInsertionPointAfter(forOp);
1385  // In-place update reduction variables.
1386  for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++)
1387  reduc[i] = forOp.getResult(i);
1388  } else {
1389  auto parOp = llvm::cast<scf::ParallelOp>(loopInfo.loop);
1390  if (!reduc.empty()) {
1391  assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1);
1392  Operation *redExp = reduc.front().getDefiningOp();
1393  // Reduction expression should have no use.
1394  assert(redExp->getUses().empty());
1395  // This must be a binary operation.
1396  // NOTE: This is users' responsibility to ensure the operation are
1397  // commutative.
1398  assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1);
1399 
1400  Value redVal = parOp.getInitVals().front();
1401  Value curVal;
1402  if (redExp->getOperand(0) == redVal)
1403  curVal = redExp->getOperand(1);
1404  else if (redExp->getOperand(1) == redVal)
1405  curVal = redExp->getOperand(0);
1406  // One of the operands must be the init value (which is also the
1407  // previous reduction value).
1408  assert(curVal);
1409 #ifndef NDEBUG
1410  // The reduction expression should be the only user of the reduction val
1411  // inside the parallel for.
1412  unsigned numUsers = 0;
1413  for (Operation *op : redVal.getUsers()) {
1414  if (op->getParentOp() == parOp)
1415  numUsers++;
1416  }
1417  assert(numUsers == 1);
1418 #endif // NDEBUG
1419 
1420  rewriter.setInsertionPointAfter(redExp);
1421  auto redOp = rewriter.create<scf::ReduceOp>(loc, curVal);
1422  // Attach to the reduction op.
1423  Block *redBlock = &redOp.getRegion().getBlocks().front();
1424  rewriter.setInsertionPointToEnd(redBlock);
1425  Operation *newRed = rewriter.clone(*redExp);
1426  // Replaces arguments of the reduction expression by using the block
1427  // arguments from scf.reduce.
1428  rewriter.updateRootInPlace(
1429  newRed, [&]() { newRed->setOperands(redBlock->getArguments()); });
1430  // Erases the out-dated reduction expression.
1431  rewriter.eraseOp(redExp);
1432  rewriter.setInsertionPointToEnd(redBlock);
1433  rewriter.create<scf::ReduceReturnOp>(loc, newRed->getResult(0));
1434  }
1435  rewriter.setInsertionPointAfter(parOp);
1436  // In-place update reduction variables.
1437  for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++)
1438  reduc[i] = parOp.getResult(i);
1439  }
1440 
1441  // Finished iterating a tensor, clean up
1442  // We only do the clean up on for loop as while loops do not necessarily
1443  // finish the iteration on a sparse tensor
1444  for (auto [tid, lvl] : unpackTensorLevelRange(loopInfo.trivialTidLvls)) {
1445  // Reset to null.
1446  coords[tid][lvl] = Value();
1447  posits[tid][lvl] = Value();
1448  // Dense level, high is fixed.
1449  if (!isDenseLT(lvlTypes[tid][lvl]))
1450  highs[tid][lvl] = Value();
1451  }
1452 }
1453 
1454 void LoopEmitter::forwardsReducedSliceLevelTreeIt(OpBuilder &builder,
1455  Location loc, TensorId tid,
1456  Level rootLvl, Value fcnt) {
1457  auto stt = getSparseTensorType(tensors[tid]);
1458 
1459  // Finds a [Lvl, leafLvl) range, and all level in between are fully reduced
1460  // level (but not resolved). Since we forward an iterator at higher level of
1461  // the tree, the subtree need to be pruned.
1462  Level leafLvl = rootLvl + 1;
1463  while (leafLvl < stt.getLvlRank() && !dependentLvlMap[tid][leafLvl].empty() &&
1464  depFullyReduced(tid, leafLvl)) {
1465  leafLvl++;
1466  }
1467 
1468  Level curLvl = rootLvl + 1;
1469  // Prunes all denses subtree.
1470  while (curLvl < leafLvl && isDenseLT(lvlTypes[tid][curLvl])) {
1471  // One step forward in parent level results in forwarding `slice.size` step
1472  // in child dense level.
1473  auto [size, stride] = sliceMeta[tid][curLvl].back();
1474  assert(stride == 1 && "Not yet implemented");
1475  fcnt = MULI(size, fcnt);
1476  curLvl++;
1477  }
1478 
1479  Value nxPosPtr = nullptr;
1480  if (curLvl < leafLvl) {
1481  assert(!isDenseLT(lvlTypes[tid][curLvl]));
1482  // The first compressed level, setting up the position pointer for it.
1483  Value sPosBuf = slicePosBuffer[tid][curLvl].back();
1484  // One step forwards in the parent level result in forwarding one `segment`
1485  // in the child sparse level.
1486  Value pPosPtr = loadSlicePosPtr(builder, loc, sPosBuf); // previous ptr
1487  Value cPosPtr = ADDI(fcnt, pPosPtr); // current ptr
1488  updateSlicePosPtr(builder, loc, sPosBuf, cPosPtr);
1489  // Loads the position pointer start for next level.
1490  nxPosPtr =
1491  loadSlicePos(builder, loc, sPosBuf, cPosPtr, SlicePosKind::kNext);
1492  curLvl++;
1493  }
1494 
1495  // TODO: This is not always needed, but we did it unconditionally for now for
1496  // simplicity.
1497  // It is only needed when `curLvl` is forwarded without traversing its child
1498  // level (e.g., the level is in a conjunctive lattices and got pruned), such
1499  // that the position pointer is not forwarded inside the loop.
1500  for (; curLvl < leafLvl; curLvl++) {
1501  assert(nxPosPtr);
1502  if (!isDenseLT(lvlTypes[tid][curLvl])) {
1503  Value sPosBuf = slicePosBuffer[tid][curLvl].back();
1504  updateSlicePosPtr(builder, loc, sPosBuf, nxPosPtr);
1505  nxPosPtr =
1506  loadSlicePos(builder, loc, sPosBuf, nxPosPtr, SlicePosKind::kNext);
1507  }
1508  }
1509 }
1510 
1511 void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
1512  MutableArrayRef<Value> reduc) {
1513  const LoopInfo &loopInfo = loopStack.back();
1514  auto whileOp = llvm::cast<scf::WhileOp>(loopInfo.loop);
1515  Value iv = loopInfo.iv;
1516  Value one = C_IDX(1);
1517 
1518  // Finalize the induction. Note that the induction could be performed
1519  // in the individual if-branches to avoid re-evaluating the conditions.
1520  // However, that would result in a rather elaborate forest of yield
1521  // instructions during code generation. Moreover, performing the induction
1522  // after the if-statements more closely resembles code generated by TACO.
1523  unsigned o = 0;
1524  SmallVector<Value> operands;
1525  unsigned delta = 0;
1526  for (auto [tid, lvl, resolved] : loopInfo.sliceDrivenInfo) {
1527  // TODO: handle dense.
1528  assert(isCompressedLT(lvlTypes[tid][lvl]));
1529  levelReducedDep[tid][lvl]--;
1530  if (!resolved) {
1531  // TODO: support coiterating multiple slices
1532  assert(loopInfo.sliceDrivenInfo.size() == 1);
1533  auto [nxNonEmpty, nxMinCrd, nxAbsOffset] =
1534  genSliceNextInduction(builder, loc, tid, lvl);
1535  // Update while loop induction operands.
1536  operands.push_back(nxNonEmpty);
1537  operands.push_back(nxMinCrd);
1538  operands.push_back(nxAbsOffset);
1539 
1540  // Update the slice stack.
1541  SliceInfo &info = sliceStack[tid].back();
1542  info.isNonEmpty = whileOp.getResult(o++);
1543  info.minCrd = whileOp.getResult(o++);
1544  info.offset = whileOp.getResult(o++);
1545  continue;
1546  }
1547 
1548  Value forwarded = nullptr;
1549  if (loopInfo.trivialTidLvls.empty() &&
1550  loopInfo.sliceDrivenInfo.size() == 1) {
1551  // Forwards the position iterator.
1552  operands.push_back(ADDI(posits[tid][lvl], one));
1553  forwarded = constantI1(builder, loc, true);
1554  } else {
1555  const Value pos = posits[tid][lvl];
1556  const Value nxPos = ADDI(posits[tid][lvl], one);
1557  forwarded = CMPI(eq, coords[tid][lvl], iv);
1558  operands.push_back(SELECT(forwarded, nxPos, pos));
1559  }
1560  {
1561  OpBuilder::InsertionGuard guard(builder);
1562  auto ifOp = builder.create<scf::IfOp>(loc, TypeRange{}, forwarded,
1563  /*else=*/false);
1564  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1565  forwardsReducedSliceLevelTreeIt(builder, loc, tid, lvl, one);
1566  }
1567  // The coordinate is invalid now.
1568  coords[tid][lvl] = nullptr;
1569 
1570  // Update the position iterator as we exit the while loop.
1571  posits[tid][lvl] = whileOp->getResult(o++);
1572  };
1573 
1574  for (auto [tid, lvl] : unpackTensorLevelRange(loopInfo.trivialTidLvls)) {
1575  const auto lvlTp = lvlTypes[tid][lvl];
1576  if (isCompressedLT(lvlTp) || isSingletonLT(lvlTp) ||
1577  isLooseCompressedLT(lvlTp)) {
1578  const Value crd = coords[tid][lvl];
1579  const Value pos = posits[tid][lvl];
1580  Value cmp = CMPI(eq, crd, iv);
1581  // If the loop contains a coiteration with non-unique level, we fast
1582  // forward all the duplicated coords by setting the position to the
1583  // segment high.
1584  Value add =
1585  !isUniqueLT(lvlTypes[tid][lvl]) ? segHi[tid][lvl] : ADDI(pos, one);
1586 
1587  operands.push_back(SELECT(cmp, add, pos));
1588  // Following loops continue iteration from the break point of the
1589  // current while loop.
1590  const Value newPos = whileOp->getResult(o++);
1591  // We need to define a new local variable for `tid` to avoid
1592  // warnings about "captured structured bindings are a C++20 extension".
1593  // FIXME(wrengr): define a helper function to capture this idiom!
1594  const TensorId newTid = tid;
1595  posits[newTid][lvl] = newPos;
1596 
1597  // The coordinate is invalid now.
1598  coords[tid][lvl] = nullptr;
1599  // The segment high is invalid now.
1600  segHi[tid][lvl] = nullptr;
1601  // highs remains unchanged.
1602  }
1603  }
1604 
1605  // Reduction value from users.
1606  for (auto &i : reduc) {
1607  operands.push_back(i);
1608  // In place update reduction variable.
1609  i = whileOp->getResult(o++);
1610  }
1611 
1612  // An (optional) universal index.
1613  if (operands.size() + delta < whileOp.getNumResults()) {
1614  assert(operands.size() + delta + 1 == whileOp.getNumResults());
1615  // The last one is the universial index.
1616  operands.push_back(ADDI(iv, one));
1617  // update the loop starting point of current loop sequence
1618  loopSeqStack.back().first = whileOp->getResult(o++);
1619  }
1620 
1621  assert(o == operands.size() + delta);
1622  if (!operands.empty())
1623  YIELD(operands);
1624 
1625  builder.setInsertionPointAfter(whileOp);
1626 }
1627 
1629  MutableArrayRef<Value> reduc) {
1630  // Clean up the values, it would help use to discover potential bug at a
1631  // earlier stage (instead of silently using a wrong value).
1632  const LoopInfo &loopInfo = loopStack.back();
1633 
1634  // Sets the insertion point to the right position.
1635  rewriter.setInsertionPointToEnd(loopInfo.userCodeBlock);
1636  if (!loopInfo.userCodeBlock->empty() &&
1637  llvm::isa<scf::YieldOp>(&loopInfo.userCodeBlock->back())) {
1638  // scf::While/For inserts an implicit yield op when there is no loop
1639  // iter args. In this case, we need to insert the code before the yield.
1640  assert(loopInfo.userCodeBlock->back().getNumResults() == 0);
1641  rewriter.setInsertionPoint(&loopInfo.userCodeBlock->back());
1642  }
1643 
1644  if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {
1645  exitWhileLoop(rewriter, loc, reduc);
1646  } else {
1647  exitForLoop(rewriter, loc, reduc);
1648  }
1649 
1650  assert(loopStack.size() == loopSeqStack.size());
1651  loopStack.pop_back();
1652 }
1653 
1654 //===----------------------------------------------------------------------===//
1655 // Slice-driven loop related methods.
1656 //===----------------------------------------------------------------------===//
1657 
1658 unsigned LoopEmitter::remDepOnLevel(TensorId tid, Level lvl) const {
1659  unsigned totalDependencies = dependentLvlMap[tid][lvl].size();
1660  if (totalDependencies != 0) {
1661  assert(totalDependencies >= 2);
1662  return totalDependencies - levelReducedDep[tid][lvl];
1663  }
1664  return totalDependencies;
1665 }
1666 
1667 const LoopEmitter::SliceInfo &LoopEmitter::getMostRecentSliceOnLvl(TensorId tid,
1668  Level lvl) {
1669  // Finds the most-recent slice using a reverse iteration.
1670  for (auto it = sliceStack[tid].rbegin(), ie = sliceStack[tid].rend(); it < ie;
1671  it++) {
1672  if (it->slicedOnLvl == lvl) { // the level matched
1673  return *it;
1674  }
1675  }
1676  llvm_unreachable("Failed to find sliceInfo");
1677 }
1678 
1679 // Generates a while loop to iterate over a slice sparse level as follows.
1680 //
1681 // while(coords[loopLo] < offset + size) {
1682 // body_builder
1683 // loopLo ++;
1684 // }
1685 std::pair<Operation *, ValueRange> LoopEmitter::genSliceLvlTraverseLoop(
1686  OpBuilder &builder, Location loc, Value posLo, Value posHi, Value offset,
1687  Value size, TensorId tid, Level lvl, ValueRange userReduc,
1688  LoopBodyBuilder bodyBuilder) {
1689  Value c1 = C_IDX(1);
1690  auto [sliceSz, stride] = sliceMeta[tid][lvl].back();
1691  assert(stride == 1 && "Not yet implemented");
1692  Value sliceHi = ADDI(offset, sliceSz);
1693 
1694  SmallVector<Value> reduc{posLo}; // loop lower bounds
1695  const unsigned numMetaReduc = reduc.size();
1696 
1697  // Append user required reduction value.
1698  reduc.append(userReduc.begin(), userReduc.end());
1699  scf::WhileOp whileOp = builder.create<scf::WhileOp>(
1700  loc, ValueRange(reduc).getTypes(), reduc,
1701  /*beforeBuilder=*/
1702  [this, posHi, sliceHi, tid, lvl](OpBuilder &builder, Location loc,
1703  ValueRange args) {
1704  Value cond = genSparseReducedAffineCond(builder, loc,
1705  coordinatesBuffers[tid][lvl],
1706  sliceHi, args[0], posHi);
1707  // continue if not yet break nor out of bound.
1708  builder.create<scf::ConditionOp>(loc, cond, args);
1709  },
1710  /*afterBuilder=*/
1711  [c1, numMetaReduc, bodyBuilder](OpBuilder &builder, Location loc,
1712  ValueRange args) {
1713  Value iv = args[0];
1714  TypeRange types = args.drop_front(numMetaReduc).getTypes();
1715  // The coordinate must be in bound as guaranteed by the loop
1716  // condition. We generate a fake if operation here only to hide the
1717  // extra loop induction variables maintained by us from users, which
1718  // will be removed by later optimization pass.
1719  auto ifOp = builder.create<scf::IfOp>(loc, types,
1720  constantI1(builder, loc, true),
1721  /*withElseBlock=*/!types.empty());
1722  {
1723  // 2 reduction variable maintained by us.
1724  SmallVector<Value> ifRet = args.drop_front(numMetaReduc);
1725  assert(ifRet.size() == args.size() - 1);
1726 
1727  OpBuilder::InsertionGuard guard(builder);
1728  // If coord >= sliceHi.
1729  if (!ifRet.empty()) {
1730  builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
1731  YIELD(ifRet);
1732  }
1733 
1734  // If coord < sliceHi.
1735  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1736  // Delegates to users' callback.
1737  bodyBuilder(builder, loc, iv, ifRet);
1738  }
1739  // Marks this special ifOp to avoid sparisification finalizing it.
1740  ifOp->setAttr(getLoopEmitterLoopAttrName(),
1741  StringAttr::get(builder.getContext(), "slice"));
1742  // Insertion point restored to after ifOp.
1743  SmallVector<Value> yields;
1744  // Increase induction variable.
1745  yields.push_back(ADDI(iv, c1));
1746  yields.append(ifOp.getResults().begin(), ifOp.getResults().end());
1747  YIELD(yields);
1748  });
1749 
1750  builder.setInsertionPointAfter(whileOp);
1751  return std::make_pair(whileOp, whileOp.getResults().drop_front(numMetaReduc));
1752 }
1753 
1754 // Generates a loop nest that traverse all the unresolved levels in between.
1755 //
1756 // for(int i = 0; i < slicePos.size(); i+=2) {
1757 // loopLo = slicePos[i];
1758 // loopHi = slicePos[i + 1];
1759 //
1760 // // Then the same loop generated by genSliceLvlTraverse above.
1761 // while (loopLo < loopHI) {
1762 // if (pos[loopLo] < sliceHi) {
1763 // bodyBuilder();
1764 // } else {
1765 // break;
1766 // }
1767 // loopLo ++;
1768 // }
1769 // }
1770 ValueRange LoopEmitter::genUnResolvedSliceTreeTraverse(
1771  OpBuilder &builder, Location loc, TensorId tid,
1772  ArrayRef<const SliceInfo *> unResLvls,
1773  std::optional<std::pair<TensorId, Level>> firstResLvl, ValueRange userReduc,
1774  LoopBodyBuilder bodyBuilder) {
1775 
1776  Value c0 = C_IDX(0), c1 = C_IDX(1);
1777  Value pos = c0;
1779  SmallVector<Value> innerArgs(userReduc.begin(), userReduc.end());
1780  scf::ForOp outerMost = nullptr; // the outermost loop.
1781 
1782  // Wraps body builder and inserts a extra counting instruction at the end.
1783  auto wrapped = [bodyBuilder](OpBuilder &builder, Location loc, Value iv,
1784  MutableArrayRef<Value> reduc) {
1785  bodyBuilder(builder, loc, iv, reduc.drop_back());
1786  // Increments the counter.
1787  reduc.back() = ADDI(reduc.back(), C_IDX(1));
1788  };
1789 
1790  // FIXME: Need special handling when the previous unresolved slice is strided:
1791  // We probably need to filter out coordinates that is not on stride.
1792  if (firstResLvl.has_value()) {
1793  // Overwrite position when the first level is fully resolved.
1794  pos = posits[firstResLvl->first][firstResLvl->second];
1795  ip = builder.saveInsertionPoint();
1796  } else {
1797  const SliceInfo &frontSlice = *unResLvls.back();
1798  Level firstLvl = *frontSlice.slicedOnLvl;
1799  if (!lvlFullyResolved(tid, firstLvl)) {
1800  if (isCompressedLT(lvlTypes[tid][firstLvl])) {
1801  // An extra counter that tracks how many segments are there in the child
1802  // compressed level.
1803  innerArgs.push_back(c0);
1804  // Overrides the user-provided builder.
1805  bodyBuilder = wrapped;
1806  unsigned depth = frontSlice.depth - 1;
1807  Value offset = frontSlice.offset;
1808  Value sPtrBuf = slicePosBuffer[tid][firstLvl][depth];
1809  Value mSz = loadSlicePosTupleNum(builder, loc, sPtrBuf);
1810  outerMost = builder.create<scf::ForOp>(
1811  loc, c0, mSz, c1, innerArgs,
1812  [this, tid, firstLvl, offset, sPtrBuf, &ip, &pos,
1813  &innerArgs](OpBuilder &builder, Location loc, Value iv,
1814  ValueRange iterArgs) {
1815  // generate traversal for each level.
1816  Value loopLo =
1817  loadSlicePos(builder, loc, sPtrBuf, iv, SlicePosKind::kLo);
1818  Value loopHi =
1819  loadSlicePos(builder, loc, sPtrBuf, iv, SlicePosKind::kHi);
1820  // We need to remember the starting index for next level's
1821  // position, because slice-driven loop breaks the level into
1822  // non-consecutive segments.
1823  updateSlicePos(builder, loc, sPtrBuf, iterArgs.back(), iv,
1825 
1826  auto [size, stride] = sliceMeta[tid][firstLvl].back();
1827  assert(stride == 1 && "Not yet implemented");
1828  ValueRange itArgs =
1829  genSliceLvlTraverseLoop(
1830  builder, loc, loopLo, loopHi, offset, size, tid, firstLvl,
1831  iterArgs,
1832  [&](OpBuilder &builder, Location, Value iv,
1833  MutableArrayRef<Value> reduc) {
1834  ip = builder.saveInsertionPoint();
1835  pos = iv;
1836  innerArgs.assign(reduc.begin(), reduc.end());
1837  })
1838  .second;
1839  YIELD(itArgs);
1840  });
1841  } else if (isDenseLT(lvlTypes[tid][firstLvl])) {
1842  assert(firstLvl == 0); // This must be the first level.
1843  Value lb = frontSlice.offset;
1844  auto [sliceSz, stride] =
1845  sliceMeta[tid][*frontSlice.slicedOnLvl][frontSlice.depth];
1846  assert(stride == 1 && "Not yet implemented");
1847  Value ub = ADDI(lb, sliceSz);
1848  outerMost = builder.create<scf::ForOp>(
1849  loc, lb, ub, c1, innerArgs,
1850  [&](OpBuilder &builder, Location loc, Value iv,
1851  ValueRange iterArgs) {
1852  ip = builder.saveInsertionPoint();
1853  pos = iv;
1854  innerArgs.assign(iterArgs.begin(), iterArgs.end());
1855  });
1856  }
1857  // We generated the loop for the first slice above, now remove it.
1858  unResLvls = unResLvls.drop_back();
1859  }
1860  }
1861  // Reset the insertion point into the loop body.
1862  builder.restoreInsertionPoint(ip);
1863  if (!unResLvls.empty()) {
1864  // Fills in dense slices levels in between.
1865  SmallVector<Value> lbs, ubs, steps, lvlSzs;
1866  for (const SliceInfo *slice : llvm::reverse(unResLvls)) {
1867  Level sliceLvl = *slice->slicedOnLvl;
1868  assert(isDenseLT(lvlTypes[tid][sliceLvl]));
1869  Value offset = slice->offset;
1870  auto [sliceSz, stride] = sliceMeta[tid][sliceLvl][slice->depth];
1871  assert(stride == 1 && "Not yet implemented");
1872  lbs.push_back(offset);
1873  ubs.push_back(ADDI(offset, sliceSz));
1874  steps.push_back(c1);
1875  lvlSzs.push_back(lvlSizes[tid][sliceLvl]);
1876  }
1877  auto denseNest =
1878  scf::buildLoopNest(builder, loc, lbs, ubs, steps, innerArgs,
1879  [&innerArgs, &lvlSzs, &pos, bodyBuilder](
1880  OpBuilder &builder, Location loc, ValueRange ivs,
1881  ValueRange iterArgs) -> scf::ValueVector {
1882  for (auto em : llvm::enumerate(ivs)) {
1883  // Linearizes position: pos = (pos * lvlsize) +
1884  // iv;
1885  pos = MULI(pos, lvlSzs[em.index()]);
1886  pos = ADDI(pos, em.value());
1887  }
1888  innerArgs.assign(iterArgs.begin(), iterArgs.end());
1889  // Generates user request loop body.
1890  bodyBuilder(builder, loc, pos, innerArgs);
1891  return innerArgs;
1892  });
1893 
1894  if (!outerMost) {
1895  // If the outermost loop has not been set, this is the outermost loop.
1896  outerMost = denseNest.loops.front();
1897  } else {
1898  // Otherwise we need to generate yield operations to link the SSA chain.
1899  YIELD(denseNest.results);
1900  }
1901  } else {
1902  assert(outerMost);
1903  // Generates user request loop body.
1904  bodyBuilder(builder, loc, pos, innerArgs);
1905  YIELD(innerArgs);
1906  }
1907  assert(outerMost);
1908  // Insert after current while operation.
1909  builder.setInsertionPointAfter(outerMost);
1910  return outerMost.getResults();
1911 }
1912 
1913 void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc,
1914  TensorId tid, Level lvl) {
1915  Value c0 = C_IDX(0), c1 = C_IDX(1);
1916  if (isDenseLT(lvlTypes[tid][lvl])) {
1917  // Dense slice begin is trivial.
1918  sliceStack[tid].emplace_back(/*minCoord=*/c0, /*offset=*/c0,
1919  /*nonEmpty=*/constantI1(builder, loc, true),
1920  lvl, /*depth=*/1);
1921  return;
1922  }
1923  auto [nxSz, stride] = sliceMeta[tid][lvl][1];
1924  assert(stride == 1 && "Not yet implemented");
1925  Value sPtrBuf = slicePosBuffer[tid][lvl][0];
1926  Value pHi, pLo;
1927  if (lvl == 0) {
1928  pLo = c0;
1929  pHi = genIndexLoad(builder, loc, positionsBuffers[tid][0], c1);
1930  } else {
1931  pLo = genIndexLoad(builder, loc, positionsBuffers[tid][lvl],
1932  posits[tid][lvl - 1]);
1933  pHi = genIndexLoad(builder, loc, positionsBuffers[tid][lvl],
1934  ADDI(posits[tid][lvl - 1], c1));
1935  }
1936  // Fills out pIdxBuffer[tid][lvl][0] with [/*memSize =*/4, 0, pLo, pHi]
1937  updateSlicePosTupleNum(builder, loc, c1, sPtrBuf);
1938  updateSlicePosPtr(builder, loc, sPtrBuf, c0);
1939  updateSlicePos(builder, loc, sPtrBuf, pLo, c0, SlicePosKind::kLo);
1940  updateSlicePos(builder, loc, sPtrBuf, pHi, c0, SlicePosKind::kHi);
1941 
1942  // This is an non empty tensor if pLo < pHi.
1943  Value isNonEmpty = CMPI(ult, pLo, pHi);
1944  // The minimal coord must be at the first on ordered level.
1945  // FIXME: Technically we should load the coord only when the slice is
1946  // nonempty. though we assume that even on empty sparse tensors, a non-empty
1947  // ptr/idx buffer is allocated for each level so it would not cause OOB to
1948  // avoid generating a ifOp here.
1949  Value minCrd = genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], pLo);
1950 
1951  // FIXME: We need the relative offset related to the base slice.
1952  Value absOffset = offsetFromMinCoord(builder, loc, minCrd, nxSz, isNonEmpty);
1953  sliceStack[tid].emplace_back(minCrd, absOffset, isNonEmpty, lvl,
1954  /*depth=*/1);
1955 }
1956 
1957 // Fills in the slicePosBuffer before slice-driven loop begin.
1958 // TODO: it can only handle all compressed tensors.
1959 //
1960 // // Loop generated by `genUnResolvedSliceTreeTraverse`
1961 // for(int i = 0; i < slicePos.size(); i+=2) {
1962 // loopLo = slicePos[i];
1963 // loopHi = slicePos[i + 1];
1964 // minCrd = max;
1965 // while (loopLo < loopHi) {
1966 // if (pos[loopLo] < sliceHi) {
1967 // // bodyBuilder
1968 // slicePos[tid].push_back(pos[loopLo]);
1969 // slicePos[tid].push_back(pos[loopLo + 1]);
1970 // minCrd = min(minCrd, crd[pos[loopLo]]);
1971 // } else {
1972 // break;
1973 // }
1974 // loopLo ++;
1975 // }
1976 // }
1977 void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc,
1978  TensorId tid, Level lvl) {
1979  Value c0 = C_IDX(0), c1 = C_IDX(1);
1980  unsigned depth = levelReducedDep[tid][lvl];
1981  // The remaining slice size after reduction.
1982  Value remSz = sliceMeta[tid][lvl][depth + 1].first;
1983  // Dense slice begin is trivial
1984  if (isDenseLT(lvlTypes[tid][lvl])) {
1985  sliceStack[tid].emplace_back(c0, c0, constantI1(builder, loc, false), lvl,
1986  depth + 1);
1987  return;
1988  }
1989 
1990  assert(isCompressedLT(lvlTypes[tid][lvl]));
1991  // Unhandled Cases:
1992  //
1993  // 1st, lvl = prevSlicedLvl, i.e., t[d0 + d1 + d2,...] (more than one
1994  // variable need to be reduced on the same level).
1995  //
1996  // 2nd, lvl > prevSliceLvl + 1, i.e., t[..., d2, d3 + d4] (having a
1997  // simple dim expression in between).
1998  assert(lvl == *sliceStack[tid].back().slicedOnLvl + 1);
1999 
2000  // Check slice stack integrity.
2001  assert(slicePosBuffer[tid][lvl - 1].size() == sliceStack[tid].back().depth);
2002 
2003  SmallVector<const SliceInfo *> unResSlices;
2004  std::optional<std::pair<TensorId, Level>> firstResLvl;
2005  for (Level curLvl = lvl; curLvl >= 1; curLvl--) {
2006  Level prevLvl = curLvl - 1;
2007  if (lvlFullyResolved(tid, prevLvl)) {
2008  firstResLvl = std::make_pair(tid, prevLvl);
2009  break;
2010  }
2011  unResSlices.push_back(&getMostRecentSliceOnLvl(tid, prevLvl));
2012  if (!isDenseLT(lvlTypes[tid][prevLvl])) {
2013  break;
2014  }
2015  }
2016 
2017  assert(!unResSlices.empty() &&
2018  !lvlFullyResolved(tid, *unResSlices.front()->slicedOnLvl));
2019 
2020  Value sPtrBuf = slicePosBuffer[tid][lvl].back();
2021  SmallVector<Value, 3> reduc = {
2022  constantI1(builder, loc, false), // isNonEmpty
2023  lvlSizes[tid][lvl], // minCoord
2024  c0, // memSize
2025  };
2026 
2027  ValueRange result = genUnResolvedSliceTreeTraverse(
2028  builder, loc, tid, unResSlices, firstResLvl, reduc,
2029  [this, c1, tid, lvl, sPtrBuf](OpBuilder &builder, Location loc, Value iv,
2030  MutableArrayRef<Value> reduc) {
2031  Value &nonEmpty = reduc[0];
2032  Value &minCrd = reduc[1];
2033  Value &curTupleCnt = reduc[2];
2034 
2035  Value pHi = ADDI(iv, c1);
2036  Value sPLo = genIndexLoad(builder, loc, positionsBuffers[tid][lvl], iv);
2037  Value sPHi =
2038  genIndexLoad(builder, loc, positionsBuffers[tid][lvl], pHi);
2039 
2040  // isNonEmpty = isNonEmpty || lvlNonEmpty, i.e., as long as there is
2041  // one non-empty lvl, the slice is non-empty.
2042  Value lvlNonEmpty = CMPI(ult, sPLo, sPHi);
2043  nonEmpty = builder.create<arith::OrIOp>(loc, lvlNonEmpty, nonEmpty);
2044 
2045  // Update the minimum coordinate.
2046  auto ifNonEmpty = builder.create<scf::IfOp>(loc, builder.getIndexType(),
2047  lvlNonEmpty, true);
2048  {
2049  // Generate Code as follows.
2050  //
2051  // if (nonEmpty) {
2052  // minCrd = min(minCrd, crd[pos[pLo]]);
2053  // }
2054  OpBuilder::InsertionGuard guard(builder);
2055  builder.setInsertionPointToStart(ifNonEmpty.thenBlock());
2056  Value curC =
2057  genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], sPLo);
2058  Value isSmaller = CMPI(ult, curC, minCrd);
2059  Value newMin = SELECT(isSmaller, curC, minCrd);
2060  YIELD(newMin);
2061  builder.setInsertionPointToStart(ifNonEmpty.elseBlock());
2062  YIELD(minCrd);
2063  }
2064  minCrd = ifNonEmpty.getResult(0);
2065  updateSlicePos(builder, loc, sPtrBuf, sPLo, curTupleCnt,
2067  updateSlicePos(builder, loc, sPtrBuf, sPHi, curTupleCnt,
2069  curTupleCnt = ADDI(curTupleCnt, C_IDX(1));
2070  });
2071 
2072  Value isNonEmpty = result[0];
2073  Value minCrd = result[1];
2074  // Two metadata [memSize, idx].
2075  // TODO: Can use an SSA value for these two metadata
2076  updateSlicePosTupleNum(builder, loc, result[2], sPtrBuf);
2077  updateSlicePosPtr(builder, loc, sPtrBuf, c0);
2078  // FIXME: we need the relative offset related to the base slice.
2079  Value absOffset = offsetFromMinCoord(builder, loc, minCrd, remSz, isNonEmpty);
2080  sliceStack[tid].emplace_back(minCrd, absOffset, isNonEmpty, lvl, depth + 1);
2081 }
2082 
2083 bool LoopEmitter::genSliceBegin(OpBuilder &builder, Location loc, TensorId tid,
2084  Level lvl) {
2085  if (depFullyReduced(tid, lvl)) {
2086  // Do not need to prepare for slice driven loop on dense level after it is
2087  // fully reduced.
2088  if (isDenseLT(lvlTypes[tid][lvl]))
2089  return true;
2090  // If constraints on the tensor is fully resolved. We do not need to
2091  // generates slice begin any more, instead we fall back to TACO-based
2092  // algorithm to (co)iterates over the slice.
2093  Value sPosBuf = slicePosBuffer[tid][lvl].back();
2094  Value tupleIdx = loadSlicePosPtr(builder, loc, sPosBuf);
2095  posits[tid][lvl] =
2096  loadSlicePos(builder, loc, sPosBuf, tupleIdx, SlicePosKind::kLo);
2097  highs[tid][lvl] =
2098  loadSlicePos(builder, loc, sPosBuf, tupleIdx, SlicePosKind::kHi);
2099  return true;
2100  }
2101 
2102  // Only when the level is sorted, the next-non-empty slice can be computed
2103  // efficiently.
2104  const LevelType lvlType = lvlTypes[tid][lvl];
2105  assert(isOrderedLT(lvlType));
2106  if (isSingletonLT(lvlType)) {
2107  llvm_unreachable("TODO: dense level should be easy to support, while "
2108  "singleton level requires more efforts");
2109  }
2110 
2111  assert(!dependentLvlMap[tid][lvl].empty());
2112  assert(!sliceStack[tid].empty());
2113 
2114  const SliceInfo &sliceInfo = sliceStack[tid].back();
2115  auto baseEnc = getSparseTensorEncoding(tensors[tid].getType());
2116  if (baseEnc.isSlice())
2117  llvm_unreachable("TODO: not yet implemented");
2118 
2119  // Generate caches required to fast compute next-non-empty slices with
2120  // increasing offset for slice-base loop.
2121  // We do not need cache for dense levels.
2122  if (slicePosBuffer[tid][lvl][0] == nullptr && !isDenseLT(lvlType)) {
2123  OpBuilder::InsertionGuard guard(builder);
2124  // The buffer can be reused, and the size is loop invariant: it only
2125  // depends on the iteration graph's toposort.
2126  builder.setInsertionPointAfter(localInsertPos);
2127  Value tupleCnt = C_IDX(1);
2128  // Accumlates the size required to cache the pLo for the slice.
2129  // E.g., if we want to cache the pIdx for slice<d0xd1xf64> on the second
2130  // level. We at most need to a memref<d0xindex>.
2131  // NOTE: this is apperantly an over-approximation when the previous
2132  // level is compressed, and we can compute a precise memory size
2133  // inside the loops. But that would also requires us to allocate/free
2134  // memorys in loops.
2135  // TODO: Maybe using allocaScopeOp inside the loop to resolve the issue?
2136  for (Level curLevel = lvl;
2137  curLevel >= 1 && !lvlFullyResolved(tid, curLevel - 1); curLevel--) {
2138  // We only handle cases when all the previously unresolved levels are
2139  // fully reduced.
2140  assert(depFullyReduced(tid, curLevel - 1));
2141  assert(!sliceMeta[tid][curLevel - 1].empty());
2142  auto [sz, stride] = sliceMeta[tid][curLevel - 1].back();
2143  assert(stride == 1 && "Not yet implemented");
2144  tupleCnt = MULI(tupleCnt, sz);
2145  }
2146  for (Value &cache : slicePosBuffer[tid][lvl])
2147  cache = allocSlicePosBuf(builder, loc, tupleCnt);
2148  }
2149 
2150  if (sliceInfo.isInitialTensor() ||
2151  (lvl >= 1 && lvlFullyResolved(tid, lvl - 1))) {
2152  // First level or previous level has been full resolved.
2153  genResolvedSliceBegin(builder, loc, tid, lvl);
2154  } else {
2155  // The previous level has not been full resolved.
2156  genUnResolvedSliceBegin(builder, loc, tid, lvl);
2157  }
2158  return false;
2159 }
2160 
2161 void LoopEmitter::invalidateSliceIterIdx(OpBuilder &builder, Location loc,
2162  TensorId tid, Level lvl) {
2163  for (unsigned i = 0; i <= lvl; i++) {
2164  if (!isDenseLT(lvlTypes[tid][i]) && !dependentLvlMap[tid][i].empty()) {
2165  updateSlicePosPtr(builder, loc, slicePosBuffer[tid][i].back(), C_IDX(0));
2166  }
2167  }
2168 }
2169 
2170 std::tuple<Value, Value, Value>
2171 LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
2172  TensorId tid, Level lvl) {
2173  if (!isCompressedLT(lvlTypes[tid][lvl]))
2174  llvm_unreachable("TODO");
2175 
2176  // else generate code to compute next non empty slice.
2177  Value c0 = C_IDX(0), c1 = C_IDX(1);
2178 
2179  SliceInfo &info = sliceStack[tid].back();
2180  assert(info.slicedOnLvl == lvl);
2181  //
2182  // We forward to the next non empty slice by
2183  // if (minCrd > offset) {
2184  // offset += 1
2185  // } else {
2186  // minCrd = nextMinInSlice();
2187  // offset = minCrd - size + 1;
2188  // }
2189  //
2190  // if (offset + size > parents.size)
2191  // isNonEmpty = false;
2192  //
2193  Value absOffset = info.offset;
2194  // Resets slices pointers as the resolved slices are invalidated after we
2195  // moves forward to the next slice.
2196  invalidateSliceIterIdx(builder, loc, tid, lvl);
2197 
2198  SmallVector<Value, 3> reduc = {info.minCrd, info.isNonEmpty, absOffset};
2199  Value sPtrBuf = slicePosBuffer[tid][lvl][info.depth - 1];
2200  Value fastPathP = CMPI(ugt, info.minCrd, absOffset);
2201  auto ifOp = builder.create<scf::IfOp>(loc, ValueRange(reduc).getTypes(),
2202  fastPathP, true);
2203  {
2204  OpBuilder::InsertionGuard guard(builder);
2205  // Take the fast path
2206  // if (minCrd > offset) {
2207  // return offset += 1
2208  // }
2209  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
2210  reduc[2] = ADDI(absOffset, c1);
2211  // Yield offset + 1.
2212  YIELD(reduc);
2213 
2214  // else /*minCrd == offset*/ {
2215  // for (i = 0; i < slicePos.size(); i+=kSliceIterWidth) {
2216  // if (crd[pos[slicePos[i]]] == minCrd) {
2217  // slicePos[i]++;
2218  // }
2219  // minCrd=min(minCrd, crd[pos[slicePos[i]]]);
2220  // }
2221  // offset = minCrd - size + 1;
2222  // }
2223  builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
2224  reduc[2] = absOffset; // restore value.
2225  Value mSz = loadSlicePosTupleNum(builder, loc, sPtrBuf); // memSize
2226  reduc[0] = lvlSizes[tid][lvl]; // next min coord
2227  reduc[1] = constantI1(builder, loc, false); // isNonEmpty
2228  auto loopArgs = static_cast<ValueRange>(reduc).drop_back();
2229  auto forOp = scf::buildLoopNest(
2230  builder, loc, c0, mSz, c1, loopArgs,
2231  [this, tid, lvl, c1, sPtrBuf,
2232  &info](OpBuilder &builder, Location loc, ValueRange ivs,
2233  ValueRange iterArgs) -> scf::ValueVector {
2234  Value curMinCrd = iterArgs[0];
2235  Value isNonEmpty = iterArgs[1];
2236 
2237  Type idxTp = builder.getIndexType();
2238  Value pLo = loadSlicePos(builder, loc, sPtrBuf, ivs.front(),
2240  Value pHi = loadSlicePos(builder, loc, sPtrBuf, ivs.front(),
2242  //
2243  // if (pLo < pHi) // Only loads when inbound.
2244  // coord = load[pLo]
2245  // if coord == minCrd
2246  // pLo += 1
2247  //
2248  // if (pLo < pHi)
2249  // curMinCrd = min(curMinCrd, load[pLo])
2250  //
2251  Value pred = CMPI(ult, pLo, pHi);
2252  auto advPLo = builder.create<scf::IfOp>(loc, idxTp, pred, true);
2253  /* if pLo < pHi */ {
2254  builder.setInsertionPointToStart(&advPLo.getThenRegion().front());
2255  // coord = load[pLo]
2256  Value coord =
2257  genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], pLo);
2258  Value pred = CMPI(eq, coord, info.minCrd);
2259  auto ifEqual = builder.create<scf::IfOp>(loc, idxTp, pred, true);
2260  /* if coord == minCrd */ {
2261  builder.setInsertionPointToStart(
2262  &ifEqual.getThenRegion().front());
2263  Value newPlo = ADDI(pLo, c1);
2264  // Updates the cache.
2265  updateSlicePos(builder, loc, sPtrBuf, newPlo, ivs.front(),
2267  YIELD(newPlo);
2268  }
2269  /* else coord != minCrd */ {
2270  builder.setInsertionPointToStart(
2271  &ifEqual.getElseRegion().front());
2272  YIELD(pLo);
2273  }
2274  builder.setInsertionPointAfter(ifEqual);
2275  YIELD(ifEqual.getResults());
2276  }
2277  /* else pLo >= pHi */ {
2278  builder.setInsertionPointToStart(&advPLo.getElseRegion().front());
2279  YIELD(pLo);
2280  }
2281 
2282  builder.setInsertionPointAfter(advPLo);
2283  pLo = advPLo.getResult(0);
2284  Value lvlNonEmpty = CMPI(ult, pLo, pHi);
2285  // Update minCrds
2286  auto newMin =
2287  builder.create<scf::IfOp>(loc, idxTp, lvlNonEmpty, true);
2288  builder.setInsertionPointToStart(&newMin.getThenRegion().front());
2289  YIELD(genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], pLo));
2290 
2291  builder.setInsertionPointToStart(&newMin.getElseRegion().front());
2292  YIELD(curMinCrd);
2293  builder.setInsertionPointAfter(newMin);
2294 
2295  // isNonEmpty = isNonEmpty || lvlNonEmpty
2296  isNonEmpty =
2297  builder.create<arith::OrIOp>(loc, lvlNonEmpty, isNonEmpty);
2298  curMinCrd = builder.create<arith::SelectOp>(
2299  loc, CMPI(ult, newMin.getResult(0), curMinCrd),
2300  newMin.getResult(0), curMinCrd);
2301  return {curMinCrd, isNonEmpty};
2302  });
2303 
2304  builder.setInsertionPointAfter(forOp.loops.front());
2305  // minOffset = minCrd + 1 >= size ? minCrd + 1 - size : c0
2306  Value tmp = ADDI(forOp.results.front(), c1);
2307  auto [size, stride] = sliceMeta[tid][lvl][info.depth];
2308  assert(stride == 1 && "Not yet implemented");
2309  Value minOffset = SUBI(tmp, size);
2310  Value p = CMPI(uge, tmp, size);
2311  minOffset = SELECT(p, minOffset, c0);
2312 
2313  SmallVector<Value, 3> yields;
2314  yields.assign(forOp.results.begin(), forOp.results.end());
2315  yields.push_back(minOffset);
2316  YIELD(yields);
2317  }
2318 
2319  Value nextMinCrd = ifOp.getResults()[0];
2320  Value nextNonEmpty = ifOp.getResults()[1];
2321 
2322  // The next offset should at least be offset + 1;
2323  Value minOffset = ifOp.getResults()[2];
2324  Value nxOffset = ADDI(info.offset, c1);
2325  Value maxPred = CMPI(ugt, minOffset, nxOffset);
2326  Value nextAbsOffset = SELECT(maxPred, minOffset, nxOffset);
2327 
2328  auto [size, stride] = sliceMeta[tid][lvl][info.depth];
2329  assert(stride == 1 && "Not yet implemented");
2330  Value sliceUB = ADDI(nextAbsOffset, size);
2331 
2332  // FIXME: this only works if there is only one parent.
2333  assert(info.depth - 1 == 0);
2334  // nextNonEmpty = nextNonEmpty && slice upper bound <= parent upperbound.
2335  nextNonEmpty = ANDI(nextNonEmpty, CMPI(ule, sliceUB, lvlSizes[tid][lvl]));
2336 
2337  // FIXME: compute relative offset.
2338  assert(info.depth - 1 == 0);
2339  return std::make_tuple(nextNonEmpty, nextMinCrd, nextAbsOffset);
2340 }
2341 
2342 #undef CMPI
2343 #undef C_IDX
2344 #undef YIELD
2345 #undef ADDI
2346 #undef ANDI
2347 #undef SUBI
2348 #undef MULI
2349 #undef SELECT
static Value toSliceCrd(OpBuilder &builder, Location loc, Value crd, Value offset, Value stride, Value tensor, Level lvl)
Converts a coordinate relative to the slice to the coordinate relative to the underlying tensor.
Definition: LoopEmitter.cpp:84
static Value genSliceStride(OpBuilder &builder, Location loc, Value tensor, Level lvl)
Definition: LoopEmitter.cpp:74
static void updateSlicePos(OpBuilder &builder, Location loc, Value sPosBuf, Value pos, Value tupleIdx, SlicePosKind posKind)
#define SUBI(lhs, rhs)
Definition: LoopEmitter.cpp:37
static Value loadSlicePos(OpBuilder &builder, Location loc, Value sPosBuf, Value tupleIdx, SlicePosKind posKind)
static Value allocSlicePosBuf(OpBuilder &builder, Location loc, Value tupleCnt)
static Value genSparseReducedAffineCond(OpBuilder &builder, Location loc, Value crdBuf, Value crdHi, Value posit, Value posHi)
#define MULI(lhs, rhs)
Definition: LoopEmitter.cpp:38
SlicePosKind
static Value genSliceOffset(OpBuilder &builder, Location loc, Value tensor, Level lvl)
Definition: LoopEmitter.cpp:68
#define C_IDX(v)
Definition: LoopEmitter.cpp:33
#define ANDI(lhs, rhs)
Definition: LoopEmitter.cpp:36
#define CMPI(p, l, r)
Definition: LoopEmitter.cpp:29
static LLVM_ATTRIBUTE_UNUSED void dumpIndexMemRef(OpBuilder &builder, Location loc, Value memref)
Definition: LoopEmitter.cpp:48
static Value loadSlicePosTupleNum(OpBuilder &builder, Location loc, Value sPosBuf)
#define REMUI(lhs, rhs)
Definition: LoopEmitter.cpp:39
#define YIELD(vs)
Definition: LoopEmitter.cpp:34
#define SELECT(c, l, r)
Definition: LoopEmitter.cpp:41
#define DIVUI(lhs, rhs)
Definition: LoopEmitter.cpp:40
static Value getSlicePosIdx(OpBuilder &builder, Location loc, Value posBuf, Value tupleIdx, SlicePosKind posKind)
static void updateSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf, Value pPtr)
#define ADDI(lhs, rhs)
Definition: LoopEmitter.cpp:35
static Value offsetFromMinCoord(OpBuilder &builder, Location loc, Value minCrd, Value size, Value isNonEmpty)
Generates code to compute the absolute offset of the slice based on the provide minimum coordinates i...
static Value loadSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf)
static void updateSlicePosTupleNum(OpBuilder &builder, Location loc, Value num, Value sPosBuf)
static std::pair< Value, Value > fromSliceCrd(OpBuilder &builder, Location loc, Value crd, Value offset, Value stride, Value tensor, Level lvl)
Converts a coordinate relative to the underlying tensor to the coordinate relative to the slice,...
static constexpr unsigned kSliceIterWidth
Definition: LoopEmitter.cpp:66
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:27
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:138
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgListType getArguments()
Definition: Block.h:80
Operation & front()
Definition: Block.h:146
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
MLIRContext * getContext() const
Definition: Builders.h:55
IntegerType getI1Type()
Definition: Builders.cpp:73
IndexType getIndexType()
Definition: Builders.cpp:71
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
This class represents a saved insertion point.
Definition: Builders.h:312
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:333
This class helps build Operations.
Definition: Builders.h:206
InsertPoint saveInsertionPoint() const
Return a saved insertion point.
Definition: Builders.h:370
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:430
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:528
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:416
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:383
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:421
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition: Builders.cpp:419
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:446
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:397
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:427
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
unsigned getNumOperands()
Definition: Operation.h:341
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:560
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:236
use_range getUses()
Returns a range of all uses, which is useful for iterating over all uses.
Definition: Operation.h:825
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:399
void updateRootInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around a root update of an operation.
Definition: PatternMatch.h:606
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:378
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:125
user_range getUsers() const
Definition: Value.h:224
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
void exitCurrentLoop(RewriterBase &rewriter, Location loc, MutableArrayRef< Value > reduc={})
Generates code to exit the current loop (e.g., generates yields, forwards loop induction variables,...
constexpr static llvm::StringLiteral getLoopEmitterLoopAttrName()
Definition: LoopEmitter.h:282
void enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls)
Enters a new loop sequence, the loops within the same sequence starts from the break points of previo...
void initialize(ValueRange tensors, StringAttr loopTag=nullptr, bool hasOutput=false, bool isSparseOut=false, unsigned numLoops=0, DependentLvlGetter getter=nullptr)
Takes an array of input tensors, which the generated loops will iterate over.
Value genAffine(OpBuilder &builder, Location loc, AffineExpr a)
Generates code to compute an affine expression whose variables are LoopIds (i.e., a....
TensorLevel makeTensorLevel(TensorId t, Level l) const
Compresses a TensorId and Level into a TensorLevel.
Definition: LoopEmitter.h:238
unsigned getNumManifestTensors() const
Gets the total number of manifest tensors (excluding the synthetic tensor).
Definition: LoopEmitter.h:220
Operation * enterCoIterationOverTensorsAtLvls(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls, MutableArrayRef< Value > reduc={}, bool isParallel=false, bool genDedup=false, bool needsUniv=false)
Emits a co-iteration loop over a set of tensors.
Operation * enterFilterLoopOverTensorAtLvl(OpBuilder &builder, Location loc, TensorId tid, Level lvl, AffineExpr affine, MutableArrayRef< Value > reduc={})
Enters a loop that tries to locate a coordinates in a sparse level based on the value evaluated by th...
std::pair< TensorId, Level > unpackTensorLevel(TensorLevel tidLvl) const
De-compresses a TensorLevel back to a pair of TensorId and Level.
Definition: LoopEmitter.h:243
auto unpackTensorLevelRange(ContainerTy &&c) const
Converts a range of TensorLevel to a range of std::pair<TensorId, Level>
Definition: LoopEmitter.h:250
void initializeLoopEmit(OpBuilder &builder, Location loc, OutputUpdater updater=nullptr, SynTensorBoundSetter synSetter=nullptr)
Starts a loop emitting session by generating all the buffers needed for iterating over the tensors.
void genDenseAffineAddress(OpBuilder &builder, Location loc, TensorLevel tidLvl, AffineExpr lvlExpr)
Emits the address for a dense level based on the value evaluated by the provided affine expression.
auto unpackTensorLevelFromCondRange(ContainerTy &&c) const
Definition: LoopEmitter.h:260
void exitCurrentLoopSeq(OpBuilder &builder, Location loc)
Exits the current loop sequence, this will reset universal index to 0.
TensorId getSynTensorId() const
Gets the TensorId for synthetic tensor.
Definition: LoopEmitter.h:229
A wrapper around RankedTensorType, which has three goals:
bool hasEncoding() const
Returns true for tensors which have an encoding, and false for those which do not.
Level getLvlRank() const
Returns the level-rank.
SparseTensorEncodingAttr getEncoding() const
BaseMemRefType getMemRefTypeWithFullyDynamicLayout(TensorType tensorType, Attribute memorySpace=nullptr)
Return a MemRef type with fully dynamic layout.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:670
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:70
Value constantIndex(OpBuilder &builder, Location loc, int64_t i)
Generates a constant of index type.
Definition: CodegenUtils.h:361
Value constantZero(OpBuilder &builder, Location loc, Type tp)
Generates a 0-valued constant of the given type.
Definition: CodegenUtils.h:339
Dimension toDim(SparseTensorEncodingAttr enc, Level l)
Convenience method to translate the given level to the corresponding dimension.
constexpr bool isLooseCompressedLT(LevelType lt)
Check if the LevelType is loose compressed (regardless of properties).
Definition: Enums.h:271
unsigned TensorLevel
Definition: LoopEmitter.h:46
constexpr bool isUniqueLT(LevelType lt)
Check if the LevelType is unique (regardless of storage format).
Definition: Enums.h:299
Value genToValues(OpBuilder &builder, Location loc, Value tensor)
Infers the result type and generates ToValuesOp.
uint64_t Level
The type of level identifiers and level-ranks.
Definition: SparseTensor.h:38
Value genToPositions(OpBuilder &builder, Location loc, Value tensor, Level lvl)
Infers the result type and generates ToPositionsOp.
Value constantI1(OpBuilder &builder, Location loc, bool b)
Generates a constant of i1 type.
Definition: CodegenUtils.h:386
RankedTensorType getRankedTensorType(T &&t)
Convenience method to abbreviate casting getType().
Definition: SparseTensor.h:74
constexpr bool is2OutOf4LT(LevelType lt)
Check if the LevelType is 2OutOf4 (regardless of properties).
Definition: Enums.h:277
constexpr bool isDenseLT(LevelType lt)
Check if the LevelType is dense (regardless of properties).
Definition: Enums.h:253
constexpr bool isSingletonLT(LevelType lt)
Check if the LevelType is singleton (regardless of properties).
Definition: Enums.h:265
Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice slice for the sparse tensor slice, return a constant if the offs...
constexpr bool isOrderedLT(LevelType lt)
Check if the LevelType is ordered (regardless of storage format).
Definition: Enums.h:294
LevelType
This enum defines all the sparse representations supportable by the SparseTensor dialect.
Definition: Enums.h:168
SparseTensorEncodingAttr getSparseTensorEncoding(Type type)
Convenience method to get a sparse encoding attribute from a type.
Value genIndexLoad(OpBuilder &builder, Location loc, Value mem, Value s)
Generates a pointer/index load from the sparse storage scheme.
bool isZeroRankedTensorOrScalar(Type type)
Definition: CodegenUtils.h:429
constexpr bool isCompressedLT(LevelType lt)
Check if the LevelType is compressed (regardless of properties).
Definition: Enums.h:259
Value genAlloca(OpBuilder &builder, Location loc, Value sz, Type tp)
Generates an uninitialized temporary buffer of the given size and type, but returns it as type memref...
Value genToCoordinates(OpBuilder &builder, Location loc, Value tensor, Level lvl, Level cooStart)
Infers the result type and generates ToCoordinatesOp.
SparseTensorType getSparseTensorType(Value val)
Convenience methods to obtain a SparseTensorType from a Value.
func::CallOp createFuncCall(OpBuilder &builder, Location loc, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface)
Creates a CallOp to the function reference returned by getFunc() in the builder's module.
Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice offset for the sparse tensor slice, return a constant if the off...
unsigned TensorId
Tensor identifiers, chosen to be the BlockArgument::getArgNumber of the value passed to Merger::build...
Definition: Merger.h:35
Include the generated interface declarations.
@ Mul
RHS of mul is always a constant or a symbolic expression.
@ DimId
Dimensional identifier.
@ Constant
Constant integer.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LoopVector loops
Definition: SCF.h:73