doxygen/LoopEmitter_8cpp_source.html

 //===- LoopEmitter.cpp ----------------------------------------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "LoopEmitter.h"

 #include "CodegenUtils.h"


 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"

 #include "mlir/Dialect/Linalg/IR/Linalg.h"

 #include "mlir/Dialect/Linalg/Utils/Utils.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"

 #include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"


 using namespace mlir;

 using namespace mlir::sparse_tensor;


 //===----------------------------------------------------------------------===//

 // File local shorthand macros

 //===----------------------------------------------------------------------===//


 #define CMPI(p, l, r)                                                          \

   (builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::p, (l), (r))       \

        .getResult())


 #define C_IDX(v) (constantIndex(builder, loc, (v)))

 #define YIELD(vs) (builder.create<scf::YieldOp>(loc, (vs)))

 #define ADDI(lhs, rhs) (builder.create<arith::AddIOp>(loc, (lhs), (rhs)))

 #define ANDI(lhs, rhs) (builder.create<arith::AndIOp>(loc, (lhs), (rhs)))

 #define SUBI(lhs, rhs) (builder.create<arith::SubIOp>(loc, (lhs), (rhs)))

 #define MULI(lhs, rhs) (builder.create<arith::MulIOp>(loc, (lhs), (rhs)))

 #define REMUI(lhs, rhs) (builder.create<arith::RemUIOp>(loc, (lhs), (rhs)))

 #define DIVUI(lhs, rhs) (builder.create<arith::DivUIOp>(loc, (lhs), (rhs)))

 #define SELECT(c, l, r) (builder.create<arith::SelectOp>(loc, (c), (l), (r)))


 //===----------------------------------------------------------------------===//

 // Debugging utils

 //===----------------------------------------------------------------------===//


 #ifndef NDEBUG

 LLVM_ATTRIBUTE_UNUSED static void dumpIndexMemRef(OpBuilder &builder,

                                                   Location loc, Value memref) {

   memref = builder.create<memref::CastOp>(

       loc, UnrankedMemRefType::get(builder.getIndexType(), 0), memref);

   createFuncCall(builder, loc, "printMemrefInd", TypeRange{},

                  ValueRange{memref}, EmitCInterface::On);

 }

 #endif


 //===----------------------------------------------------------------------===//

 // File local helper functions.

 //===----------------------------------------------------------------------===//


 // For index reduction loops, since the tensor are sliced into non-continuous

 // fragments, we need a triple [pLo, pHi, pPtr], in which the pair (pLo, pHi)

 // specifies the range of the fragment, and pPtr specifies the index of the

 // corresponding fragment in the child level (i.e., a pointer to the sliced

 // position array).

 static Value genSliceOffset(OpBuilder &builder, Location loc, Value tensor,

                             Level lvl) {

   auto enc = getSparseTensorEncoding(tensor.getType());

   return createOrFoldSliceOffsetOp(builder, loc, tensor, toDim(enc, lvl));

 }


 static Value genSliceStride(OpBuilder &builder, Location loc, Value tensor,

                             Level lvl) {

   auto enc = getSparseTensorEncoding(tensor.getType());

   return createOrFoldSliceStrideOp(builder, loc, tensor, toDim(enc, lvl));

 }


 static bool isIntOrFPZero(Attribute attr) {

   if (auto f = llvm::dyn_cast<FloatAttr>(attr); f && f.getValue().isZero())

     return true;

   if (auto i = llvm::dyn_cast<IntegerAttr>(attr); i && i.getValue().isZero())

     return true;

   return false;

 }


 static Value unFoldOpIntResult(OpBuilder &builder, Location loc,

                                OpFoldResult ofr) {

   if (std::optional<int64_t> i = getConstantIntValue(ofr); i.has_value())

     return constantIndex(builder, loc, *i);

   return cast<Value>(ofr);

 }


 static Value tryFoldTensors(Value t) {

   // TODO: this should be done through a folding pass after switching to

   // `sparse_tensor.iterate`-based sparsification.

   auto stt = tryGetSparseTensorType(t);

   auto padOp = t.getDefiningOp<tensor::PadOp>();

   if (padOp && stt.has_value() && stt->hasEncoding() &&

       padOp.getSourceType().getEncoding() == stt->getEncoding() &&

       stt->getEncoding().isIdentity()) {

     // Try fusing padOp with zeros.

     Attribute padCst;

     if (matchPattern(padOp.getBody()->getTerminator(),

                      m_Op<tensor::YieldOp>(m_Constant(&padCst))) &&

         isIntOrFPZero(padCst)) {

       return padOp.getSource();

     }

   }

   return t;

 }


 //===----------------------------------------------------------------------===//

 // Sparse tensor loop emitter class implementations

 //===----------------------------------------------------------------------===//


 LoopEmitter::LoopEmitter(ValueRange tensors, StringAttr loopTag, bool hasOutput,

                          bool isSparseOut, unsigned numLoops,

                          DependentLvlGetter dimGetter,

                          SparseEmitStrategy emitStrategy) {

   initialize(tensors, loopTag, hasOutput, isSparseOut, numLoops, dimGetter);

 }


 void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,

                              bool isSparseOut, unsigned numLoops,

                              DependentLvlGetter dimGetter,

                              SparseEmitStrategy emitStrategy) {

   // First initialize the top-level type of the fields.

   this->loopTag = loopTag;

   this->hasOutput = hasOutput;

   this->isSparseOut = isSparseOut;

   this->emitStrategy = emitStrategy;


   const unsigned numManifestTensors = ts.size();

   const unsigned synTensorId = numManifestTensors;

   const unsigned numTensors = numManifestTensors + 1;

   // tensors array (len == numManifestTensor).

   this->tensors.assign(ts.begin(), ts.end());

   // Arrays with len == numTensor.

   this->valBuffer.assign(numTensors, nullptr);

   this->lvls.resize(numTensors);

   this->iters.resize(numTensors);

   this->spIterVals.resize(numTensors);


   // These zeros will be overwritten below, but we need to initialize

   // them to something since we'll need random-access assignment.

   this->loopStack.reserve(numLoops);

   this->loopSeqStack.reserve(numLoops);


   // Index-reduction related fields.

   this->dependentLvlMap.assign(

       numTensors, std::vector<std::vector<std::pair<TensorLevel, unsigned>>>());

   this->sliceMeta.assign(

       numTensors, std::vector<std::vector<std::pair<Value, unsigned>>>());

   this->levelReducedDep.assign(numTensors, std::vector<unsigned>());


   // Initialize nested types of `TensorId`-indexed fields.

   for (TensorId tid = 0; tid < numTensors; tid++) {

     Level lvlRank;

     if (tid == synTensorId) {

       // Synthetic tensor (conceptually) is an all-dense tensor with rank equal

       // to the total number of loops (each level can potentially be mapped to

       // one of the loop being generated).

       lvlRank = numLoops;

     } else {

       const Value t = tensors[tid];

       // a scalar or 0-dimension tensors

       if (isZeroRankedTensorOrScalar(t.getType()))

         continue;


       auto rtp = getRankedTensorType(t);

       const SparseTensorType stt(rtp);

       lvlRank = stt.getLvlRank();

     }


     lvls[tid].resize(lvlRank);

     iters[tid].resize(lvlRank);

     spIterVals[tid].resize(lvlRank);

     loopHighs.assign(numLoops, nullptr);


     // Slice-driven loops related initialization.

     levelReducedDep[tid].assign(lvlRank, 0);

     dependentLvlMap[tid].assign(

         lvlRank, std::vector<std::pair<TensorLevel, unsigned>>());

     sliceMeta[tid].assign(lvlRank, std::vector<std::pair<Value, unsigned>>());

     if (dimGetter && !isSynTensor(tid)) {

       for (Level l = 0; l < lvlRank; l++) {

         std::vector<std::pair<LoopId, unsigned>> deps = dimGetter(tid, l);

         // Sort the loop by order.

         llvm::sort(deps, llvm::less_first());


         dependentLvlMap[tid][l] = std::move(deps);

         unsigned depends = dependentLvlMap[tid][l].size();

         if (depends == 0)

           continue;

         sliceMeta[tid][l].reserve(depends);

       }

     }

   }

 }


 std::unique_ptr<SparseIterator>

 LoopEmitter::makeLevelIterator(OpBuilder &builder, Location loc, TensorId t,

                                Level l) {

   Value tensor = tensors[t];

   auto stt = getSparseTensorType(tensor);

   auto it = makeSimpleIterator(*lvls[t][l], emitStrategy);


   Value folded = tryFoldTensors(tensor);

   if (folded != tensor) {

     auto padOp = tensor.getDefiningOp<tensor::PadOp>();

     assert(padOp);

     if (padOp.getPaddedDims().test(l)) {

       Value low = unFoldOpIntResult(builder, loc, padOp.getMixedLowPad()[l]);

       Value high = unFoldOpIntResult(builder, loc, padOp.getMixedHighPad()[l]);

       auto padIt = makePaddedIterator(std::move(it), low, high, emitStrategy);

       return padIt;

     }

   }


   if (stt.hasEncoding() && stt.getEncoding().isSlice()) {

     Value offset = genSliceOffset(builder, loc, tensor, l);

     Value stride = genSliceStride(builder, loc, tensor, l);

     auto slicedIt = makeSlicedLevelIterator(

         std::move(it), offset, stride, lvls[t][l]->getSize(), emitStrategy);

     return slicedIt;

   }


   return it;

 }


 void LoopEmitter::initializeLoopEmit(

     OpBuilder &builder, Location loc, LoopEmitter::OutputUpdater updater,

     LoopEmitter::SynTensorBoundSetter synSetter) {


   // For every manifest tensor, set up the values buffer.

   for (TensorId t = 0, numTensors = getNumManifestTensors(); t < numTensors;

        t++) {

     // TODO: this should be done through a folding pass after switching to

     // `sparse_tensor.iterate`-based sparsification.

     const Value tensor = tryFoldTensors(tensors[t]);

     const auto rtp = dyn_cast<RankedTensorType>(tensor.getType());

     // Skips only scalar, zero ranked tensor still need to be bufferized and

     // (probably) filled with zeros by users.

     if (!rtp)

       continue;


     auto stt = getSparseTensorType(tensor);

     const auto shape = rtp.getShape();


     // Perform the required bufferization. Dense inputs materialize from the

     // input tensors. Sparse inputs use sparse primitives to obtain the values.

     // Delegates extra output initialization to clients.

     bool isOutput = isOutputTensor(t);

     Type elementType = stt.getElementType();

     if (!stt.hasEncoding()) {

       // Non-annotated dense tensors.

       BaseMemRefType denseTp = MemRefType::get(shape, elementType);


       // TODO: if we unconditionally use fully dynamic layout here, it breaks

       // some vectorization passes which requires static stride = 1.

       // Is it possible to call vectorization pass after bufferization?

       if (llvm::isa_and_nonnull<tensor::ExtractSliceOp>(tensor.getDefiningOp()))

         denseTp = bufferization::getMemRefTypeWithFullyDynamicLayout(rtp);


       Value denseVal =

           builder.create<bufferization::ToBufferOp>(loc, denseTp, tensor);

       // Dense outputs need special handling.

       if (isOutput && updater)

         denseVal = updater(builder, loc, denseVal, tensor);


       valBuffer[t] = denseVal;

     } else {

       // Annotated sparse tensors.

       // We also need the value buffer for all-dense annotated "sparse"

       // tensors.

       valBuffer[t] = builder.create<ToValuesOp>(loc, tensor);

     }

   }


   // The sparse iterator values will only be available after the loop is

   // constructed.

   if (emitStrategy == SparseEmitStrategy::kSparseIterator)

     return;


   // For every synthetic tensor, set the high bound by calling the callback.

   if (synSetter) {

     TensorId synId = getSynTensorId();

     for (unsigned i = 0, e = loopHighs.size(); i < e; i++) {

       Value sz = loopHighs[i] = synSetter(builder, loc, i);

       auto [stl, it] = makeSynLevelAndIterator(sz, synId, i, emitStrategy);

       lvls[synId][i] = std::move(stl);

       iters[synId][i].emplace_back(std::move(it));

     }

   }


   // For every manifest tensor:

   // * For every level:

   //   * get the positions and coordinates buffers

   //   * get/compute the level-size, which is also used as the upper-bound

   //     on positions.

   for (TensorId t = 0, numTensors = getNumManifestTensors(); t < numTensors;

        t++) {

     // TODO: this should be done through a folding pass after switching to

     // `sparse_tensor.iterate`-based sparsification.

     const Value tensor = tryFoldTensors(tensors[t]);

     const auto rtp = dyn_cast<RankedTensorType>(tensor.getType());

     if (!rtp)

       // Skips only scalar, zero ranked tensor still need to be bufferized and

       // (probably) filled with zeros by users.

       continue;


     auto stt = getSparseTensorType(tensor);

     const Level lvlRank = stt.getLvlRank();


     // Scan all levels of current tensor.

     for (Level l = 0; l < lvlRank; l++) {

       // Find upper bound in current dimension.

       lvls[t][l] = makeSparseTensorLevel(builder, loc, tensor, t, l);

       if (!dependentLvlMap[t][l].empty())

         continue;


       auto it = makeLevelIterator(builder, loc, t, l);

       iters[t][l].emplace_back(std::move(it));

     }

     // NOTE: we can also prepare for 0 lvl here in advance, this will hoist

     // some loop preparation from tensor iteration, but will also (undesirably)

     // hoist the code ouside if-conditions.

   }

   // TODO: avoid treating subsection iterator as a special case.

   initSubSectIterator(builder, loc);

 }


 void LoopEmitter::initSubSectIterator(OpBuilder &builder, Location loc) {

   Value c0 = C_IDX(0);

   for (TensorId t = 0, e = tensors.size(); t < e; t++) {

     auto rtp = dyn_cast<RankedTensorType>(tensors[t].getType());

     if (!rtp)

       continue;


     Level lvlRank = SparseTensorType(rtp).getLvlRank();


     // Compute the dependency reduction order.

     auto remDepStack = dependentLvlMap;

     std::vector<std::tuple<LoopId, TensorId, Level>> depRedOrder;

     for (Level lvl = 0; lvl < lvlRank; lvl++) {

       // Reverse queue into a stack.

       std::reverse(remDepStack[t][lvl].begin(), remDepStack[t][lvl].end());

       for (auto [loop, coeff] : dependentLvlMap[t][lvl])

         depRedOrder.emplace_back(std::make_tuple(loop, t, lvl));

     }


     if (depRedOrder.empty())

       continue;


     llvm::sort(depRedOrder, llvm::less_first());


     SmallVector<SparseIterator *> lastIter(tensors.size(), nullptr);

     for (auto [loop, t, lvl] : depRedOrder) {

       std::pair<LoopId, unsigned> curDep = remDepStack[t][lvl].back();

       assert(curDep.first == loop);

       remDepStack[t][lvl].pop_back();


       auto lvlIt = makeLevelIterator(builder, loc, t, lvl);

       const SparseIterator *parent = lastIter[t];

       if (!parent && lvl > 0) {

         if (dependentLvlMap[t][lvl - 1].empty()) {

           parent = iters[t][lvl - 1].back().get();

         }

       }


       std::unique_ptr<SparseIterator> it;

       if (!remDepStack[t][lvl].empty()) {

         // Compute the subsection size.

         Value size = c0;

         for (auto [loop, stride] : remDepStack[t][lvl]) {

           Value idxMax = SUBI(loopHighs[loop], C_IDX(1));

           size = ADDI(size, ADDI(MULI(idxMax, C_IDX(stride)), C_IDX(1)));

         }

         it = makeNonEmptySubSectIterator(builder, loc, parent, loopHighs[loop],

                                          std::move(lvlIt), size, curDep.second,

                                          emitStrategy);

       } else {

         const SparseIterator &subSectIter = *iters[t][lvl].back();

         it = makeTraverseSubSectIterator(builder, loc, subSectIter, *parent,

                                          std::move(lvlIt), loopHighs[loop],

                                          curDep.second, emitStrategy);

       }

       lastIter[t] = it.get();

       iters[t][lvl].emplace_back(std::move(it));

     }

   }

 }


 void LoopEmitter::categorizeIterators(

     ArrayRef<TensorLevel> tidLvls, SmallVectorImpl<SparseIterator *> &raIters,

     SmallVectorImpl<SparseIterator *> &spIters) {

   // Finds out the tensor level that we should use to generate loops. Amongs all

   // the tensor levels, there is at most one sparse tensor level.

   for (auto [t, l] : unpackTensorLevelRange(tidLvls)) {

     SparseIterator *it = &getCurIterator(t, l);

     if (it->randomAccessible())

       raIters.push_back(it);

     else

       spIters.push_back(it);

   }


   llvm::stable_sort(spIters, [](auto lhs, auto rhs) {

     // AffineUnRed > Affine > Slice > Trivial

     return static_cast<uint8_t>(lhs->kind) > static_cast<uint8_t>(rhs->kind);

   });

 }


 void LoopEmitter::enterNewLoopSeq(OpBuilder &builder, Location loc,

                                   ArrayRef<TensorLevel> tidLvls) {

   // TODO: sort

   assert(loopSeqStack.size() == loopStack.size());


   if (emitStrategy != SparseEmitStrategy::kSparseIterator) {

     // Prepares for all the tensors used in the current loop sequence.

     for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {

       levelReducedDep[tid][lvl]++;

       prepareLoopOverTensorAtLvl(builder, loc, tid, lvl);

     }

   }


   // Universal Index starts from 0.

   loopSeqStack.emplace_back(C_IDX(0), tidLvls.vec());

 }


 void LoopEmitter::exitCurrentLoopSeq(OpBuilder &builder, Location loc) {

   assert(loopSeqStack.size() == loopStack.size() + 1);


   // Depending on whether the slice is resolved or not at current loop sequence,

   // end them in different ways.

   for (auto [tid, lvl] : unpackTensorLevelRange(loopSeqStack.back().second))

     levelReducedDep[tid][lvl]--;


   loopSeqStack.pop_back();

 }


 Value LoopEmitter::genAffine(OpBuilder &builder, Location loc, AffineExpr a) {

   switch (a.getKind()) {

   case AffineExprKind::DimId: {

     // FIXME: since the one callsite in Sparsification passes in a

     // level-expression, the `getPosition` must in fact be a `Dimension`.

     // However, elsewhere we have been lead to expect that `loopIdToOrd`

     // should be indexed by `LoopId`...

     const auto loopId = cast<AffineDimExpr>(a).getPosition();

     return loopStack[loopId].iv;

   }

   case AffineExprKind::Add: {

     auto binOp = cast<AffineBinaryOpExpr>(a);

     return ADDI(genAffine(builder, loc, binOp.getLHS()),

                 genAffine(builder, loc, binOp.getRHS()));

   }

   case AffineExprKind::Mul: {

     auto binOp = cast<AffineBinaryOpExpr>(a);

     return MULI(genAffine(builder, loc, binOp.getLHS()),

                 genAffine(builder, loc, binOp.getRHS()));

   }

   case AffineExprKind::Constant: {

     int64_t c = cast<AffineConstantExpr>(a).getValue();

     return C_IDX(c);

   }

   default:

     llvm_unreachable("unexpected affine subscript");

   }

 }


 std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl(

     OpBuilder &builder, Location loc, SparseIterator &iter,

     MutableArrayRef<Value> reduc, bool isParallel) {


   // TODO: support dynamic slices.

   // Uses the first dimension here to build the loop bound (which is also the

   // biggest range).


   Value step = C_IDX(1);

   auto [lo, hi] = iter.genForCond(builder, loc);

   Operation *loop = nullptr;

   Value iv;

   if (isParallel) {

     scf::ParallelOp parOp =

         builder.create<scf::ParallelOp>(loc, lo, hi, step, reduc);

     builder.setInsertionPointToStart(parOp.getBody());

     assert(parOp.getNumReductions() == reduc.size());

     iv = parOp.getInductionVars()[0];


     // In-place update on the reduction variable vector.

     // Note that the init vals is not the actual reduction variables but instead

     // used as a "special handle" to (temporarily) represent them. The

     // expression on init vals will be moved into scf.reduce and replaced with

     // the block arguments when exiting the loop (see exitForLoop). This is

     // needed as we can not build the actual reduction block and get the actual

     // reduction variable before users fill parallel loop body.

     for (int i = 0, e = reduc.size(); i < e; i++)

       reduc[i] = parOp.getInitVals()[i];

     loop = parOp;

   } else {

     scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);

     builder.setInsertionPointToStart(forOp.getBody());

     iv = forOp.getInductionVar();


     // In-place update on the reduction variable vector.

     assert(forOp.getNumRegionIterArgs() == reduc.size());

     for (int i = 0, e = reduc.size(); i < e; i++)

       reduc[i] = forOp.getRegionIterArg(i);

     loop = forOp;

   }

   assert(loop && iv);


   Value crd = iv;

   if (!iter.randomAccessible()) {

     iter.linkNewScope(iv);

     crd = iter.deref(builder, loc);

   } else {

     iter.locate(builder, loc, iv);

   }


   return {loop, crd};

 }


 std::pair<Operation *, Value> LoopEmitter::emitWhileLoopOverTensorsAtLvls(

     OpBuilder &builder, Location loc, ArrayRef<SparseIterator *> spIters,

     MutableArrayRef<Value> reduc, bool needsUniv) {

   return genCoIteration(builder, loc, spIters, reduc,

                         needsUniv ? loopSeqStack.back().first : nullptr);

 }


 bool LoopEmitter::shouldIteratedByForLoop(ArrayRef<SparseIterator *> spIters) {

   // If we need to co-iterate over two sparse tensors, we need a while loop

   if (spIters.size() > 1)

     return false;


   if (spIters.size() == 1)

     return spIters.front()->iteratableByFor();


   return true;

 }


 Region *LoopEmitter::enterCurrentCoIterationCase(OpBuilder &builder,

                                                  Location loc,

                                                  I64BitSet caseBit,

                                                  unsigned caseIdx,

                                                  MutableArrayRef<Value> reduc) {

   auto coIterOp = cast<CoIterateOp>(loopStack.back().loop);

   SmallVector<Attribute> cases(coIterOp.getCases().getAsRange<Attribute>());

   cases[caseIdx] = builder.getI64IntegerAttr(caseBit);


   coIterOp.setCasesAttr(builder.getArrayAttr(cases));

   Region &caseRegion = coIterOp.getRegion(caseIdx);

   assert(caseRegion.getBlocks().empty() &&

          "re-initialize the same coiteration case region.");


   // Each block starts with by a list of user-provided iteration arguments.

   TypeRange iterArgsTps = coIterOp.getInitArgs().getTypes();

   // Followed by a list of used coordinates of index type.

   SmallVector<Type> blockArgTps(coIterOp.getCrdUsedLvls().count(),

                                 builder.getIndexType());


   blockArgTps.append(iterArgsTps.begin(), iterArgsTps.end());

   // Ends with a set of iterators that defines the actually iteration space.

   for (auto i : caseBit.bits()) {

     blockArgTps.push_back(

         cast<IterSpaceType>(coIterOp.getIterSpaces()[i].getType())

             .getIteratorType());

   }

   SmallVector<Location> locs(blockArgTps.size(), loc);

   caseRegion.emplaceBlock().addArguments(blockArgTps, locs);


   // Entering the new region scope, updating the SSA chain.

   builder.setInsertionPointToStart(&caseRegion.front());

   // Update the coordinates.

   loopStack.back().iv = coIterOp.getCrds(caseIdx).front();

   // Updates loop iteration arguments.

   ValueRange iterArgs = coIterOp.getRegionIterArgs(caseIdx);

   llvm::copy(iterArgs, reduc.begin());

   // Updates sparse iterator values.

   ValueRange iters = coIterOp.getRegionIterators(caseIdx);

   ArrayRef<TensorLevel> tidLvls = loopStack.back().tidLvls;

   for (auto [i, tl] : llvm::enumerate(unpackTensorLevelRange(tidLvls))) {

     if (caseBit[i]) {

       spIterVals[tl.first][tl.second] = iters.front();

       iters = iters.drop_front();

     } else {

       spIterVals[tl.first][tl.second] = nullptr;

     }

   }

   // Must have consumed all iterator SSA values.

   assert(iters.empty());

   return &caseRegion;

 }


 Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(

     OpBuilder &builder, Location loc, ArrayRef<TensorLevel> tidLvls,

     unsigned numCases, MutableArrayRef<Value> reduc, bool tryParallel,

     bool needsUniv) {

   // TODO: Argument `numCases` only used when generating iterator-based sparse

   // loops. Simplify the code upon feature complete.

   // TODO: handle coiteration with sparse iterator.

   if (emitStrategy == SparseEmitStrategy::kSparseIterator) {

     if (tidLvls.size() == 1) {

       auto [tid, lvl] = unpackTensorLevel(tidLvls.front());

       Value t = tensors[tid];


       // Extract and iterate over the iteration space.

       ExtractIterSpaceOp extractSpaceOp =

           lvl == 0 ? builder.create<ExtractIterSpaceOp>(loc, t)

                    : builder.create<ExtractIterSpaceOp>(

                          loc, t, spIterVals[tid][lvl - 1], lvl);


       IterateOp iterOp = builder.create<IterateOp>(

           loc, extractSpaceOp.getExtractedSpace(), reduc);

       spIterVals[tid][lvl] = iterOp.getIterator();


       // Update the reduction varaibles.

       llvm::copy(iterOp.getRegionIterArgs(), reduc.begin());

       // Set the insertion point to loop body.

       builder.setInsertionPointToStart(iterOp.getBody());

       loopStack.emplace_back(tidLvls, iterOp, builder.getInsertionBlock(),

                              iterOp.getCrds().front(), loopTag);

       return iterOp;

     }


     // CoIteration Loops.

     SmallVector<Value> spaces;

     for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {

       Value t = tensors[tid];

       ExtractIterSpaceOp extractSpaceOp =

           lvl == 0 ? builder.create<ExtractIterSpaceOp>(loc, t)

                    : builder.create<ExtractIterSpaceOp>(

                          loc, t, spIterVals[tid][lvl - 1], lvl);

       spaces.push_back(extractSpaceOp.getExtractedSpace());

     }

     auto coIterOp = builder.create<CoIterateOp>(loc, spaces, reduc, numCases);

     // The CoIterationOp does not have insertion block nor induction variable.

     // TODO: the `struct LoopInfo` should be simplied after full migration.

     loopStack.emplace_back(tidLvls, coIterOp, /*insertion block*/ nullptr,

                            /*induction variable*/ nullptr, loopTag);

     return coIterOp;

   }


   // TODO: support multiple return on parallel for?

   tryParallel = tryParallel && reduc.size() <= 1;


   SmallVector<SparseIterator *> raIters;

   SmallVector<SparseIterator *> spIters;

   categorizeIterators(tidLvls, raIters, spIters);


   // Only when there is at least one sparse conditions, do we really need the

   // universal index.

   // TODO: Maybe we should instead requires merger to pass in a valid value at

   // the first place instead of adjusting it in LoopEmitter?

   needsUniv = !spIters.empty() && needsUniv;

   // The TensorLevel used for loop conditions.

   // If there is any sparse level, we need to use the sparse condition.

   // If all levels are dense, we can pick arbitrary one (dense slice-driven loop

   // can be generated using a simple ForOp as well).

   Operation *l = nullptr;

   Value iv = nullptr;

   SmallVector<TensorLevel> tls;


   // Generates loops differently depending on whether we need a slice-driven

   // loop or a simple level traversal loop.

   if (shouldIteratedByForLoop(spIters) && !needsUniv) {

     assert(spIters.size() <= 1);

     SparseIterator &it = spIters.empty() ? *raIters.front() : *spIters.front();

     std::tie(l, iv) =

         emitForLoopOverTensorAtLvl(builder, loc, it, reduc, tryParallel);

     tls.push_back(makeTensorLevel(it.tid, it.lvl));

   } else {

     for (auto *it : spIters) {

       tls.push_back(makeTensorLevel(it->tid, it->lvl));

     }


     if (needsUniv)

       for (auto *it : raIters)

         tls.push_back(makeTensorLevel(it->tid, it->lvl));


     std::tie(l, iv) =

         emitWhileLoopOverTensorsAtLvls(builder, loc, spIters, reduc, needsUniv);

   }


   // Enter dense tensor levels.

   for (SparseIterator *it : raIters)

     it->locate(builder, loc, iv);


   // NOTE: we can also prepare for next dim here in advance

   // Pushes the loop into stack.

   loopStack.emplace_back(tls, l, builder.getInsertionBlock(), iv, loopTag);

   return l;

 }


 void LoopEmitter::locateLvlAtAffineAddress(OpBuilder &builder, Location loc,

                                            TensorLevel tidLvl,

                                            AffineExpr lvlExpr) {

   auto [tid, lvl] = unpackTensorLevel(tidLvl);


   const SparseIterator *parent =

       lvl == 0 ? nullptr : iters[tid][lvl - 1].back().get();

   auto &it = getCurIterator(tid, lvl);

   it.genInit(builder, loc, parent);


   assert(it.kind == IterKind::kTrivial && it.randomAccessible());

   Value lvlCrd = genAffine(builder, loc, lvlExpr);

   it.locate(builder, loc, lvlCrd);

 }


 void LoopEmitter::prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,

                                              TensorId tid, Level lvl) {

   // if this is the first level, there is no parent iterator for the current

   // iterator.

   // If the current iterator is a subsection-based iterator, the parent iterator

   // is memorized by the iterator.

   bool hasParent = lvl == 0 || !dependentLvlMap[tid][lvl].empty();


   const SparseIterator *parent =

       hasParent ? nullptr : iters[tid][lvl - 1].back().get();

   auto &it = getCurIterator(tid, lvl);

   it.genInit(builder, loc, parent);


   // Locates the randon accessible iterator to 0.

   if (it.randomAccessible())

     it.locate(builder, loc, C_IDX(0));

 }


 void LoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,

                               MutableArrayRef<Value> reduc) {

   const LoopInfo &loopInfo = loopStack.back();

   if (emitStrategy == SparseEmitStrategy::kSparseIterator) {

     auto iterateOp = llvm::cast<IterateOp>(loopInfo.loop);

     assert(reduc.size() == iterateOp.getNumResults());

     rewriter.create<sparse_tensor::YieldOp>(loc, reduc);

     // Exit the loop.

     rewriter.setInsertionPointAfter(iterateOp);

     // In-place update reduction variables.

     llvm::copy(iterateOp.getResults(), reduc.begin());

     return;

   }

   if (auto forOp = llvm::dyn_cast<scf::ForOp>(loopInfo.loop)) {

     if (!reduc.empty()) {

       assert(reduc.size() == forOp.getNumResults());

       rewriter.create<scf::YieldOp>(loc, reduc);

     }

     // Exit the loop.

     rewriter.setInsertionPointAfter(forOp);

     // In-place update reduction variables.

     llvm::copy(forOp.getResults(), reduc.begin());

   } else {

     auto parOp = llvm::cast<scf::ParallelOp>(loopInfo.loop);

     if (!reduc.empty()) {

       assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1);

       Operation *redExp = reduc.front().getDefiningOp();

       // Reduction expression should have no use.

       assert(redExp->getUses().empty());

       // This must be a binary operation.

       // NOTE: This is users' responsibility to ensure the operation are

       // commutative.

       assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1);


       Value redVal = parOp.getInitVals().front();

       Value curVal;

       if (redExp->getOperand(0) == redVal)

         curVal = redExp->getOperand(1);

       else if (redExp->getOperand(1) == redVal)

         curVal = redExp->getOperand(0);

       // One of the operands must be the init value (which is also the

       // previous reduction value).

       assert(curVal);

 #ifndef NDEBUG

       // The reduction expression should be the only user of the reduction val

       // inside the parallel for.

       unsigned numUsers = 0;

       for (Operation *op : redVal.getUsers()) {

         if (op->getParentOp() == parOp)

           numUsers++;

       }

       assert(numUsers == 1);

 #endif // NDEBUG


       rewriter.setInsertionPointAfter(redExp);

       auto redOp = rewriter.create<scf::ReduceOp>(loc, curVal);

       // Attach to the reduction op.

       Block *redBlock = &redOp.getReductions().front().front();

       rewriter.setInsertionPointToEnd(redBlock);

       Operation *newRed = rewriter.clone(*redExp);

       // Replaces arguments of the reduction expression by using the block

       // arguments from scf.reduce.

       rewriter.modifyOpInPlace(

           newRed, [&]() { newRed->setOperands(redBlock->getArguments()); });

       // Erases the out-dated reduction expression.

       rewriter.eraseOp(redExp);

       rewriter.setInsertionPointToEnd(redBlock);

       rewriter.create<scf::ReduceReturnOp>(loc, newRed->getResult(0));

     }

     rewriter.setInsertionPointAfter(parOp);

     // In-place update reduction variables.

     for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++)

       reduc[i] = parOp.getResult(i);

   }

 }


 void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,

                                 MutableArrayRef<Value> reduc) {

   const LoopInfo &loopInfo = loopStack.back();

   auto whileOp = llvm::cast<scf::WhileOp>(loopInfo.loop);

   Value iv = loopInfo.iv;

   Value one = C_IDX(1);


   // Finalize the induction. Note that the induction could be performed

   // in the individual if-branches to avoid re-evaluating the conditions.

   // However, that would result in a rather elaborate forest of yield

   // instructions during code generation. Moreover, performing the induction

   // after the if-statements more closely resembles code generated by TACO.

   SmallVector<Value> operands;

   ValueRange whileRes = whileOp.getResults();


   for (auto [tid, lvl] : unpackTensorLevelRange(loopInfo.tidLvls)) {

     SparseIterator &it = getCurIterator(tid, lvl);

     if (!it.randomAccessible()) {

       // Forward the sparse iterator.

       Value cmp = CMPI(eq, it.getCrd(), iv);

       it.forwardIf(builder, loc, cmp);

       operands.append(it.getCursor().begin(), it.getCursor().end());

       // const Value newPos = whileOp->getResult(o++);

       // Following loops continue iteration from the break point of the

       // current while loop.

       whileRes = it.linkNewScope(whileRes);

     } else {

       // Make sure randomly accessible (dense) iterator is set to the right

       // position according to the universal index.

       Value uniIdx = whileOp.getResults().back();

       it.locate(builder, loc, uniIdx);

     }

   }


   // Reduction value from users.

   for (auto &i : reduc) {

     operands.push_back(i);

     // Update user reduction variables.

     i = whileRes.front();

     whileRes = whileRes.drop_front();

   }


   // An (optional) universal index.

   if (operands.size() < whileOp.getNumResults()) {

     assert(operands.size() + 1 == whileOp.getNumResults());

     // The last one is the universial index.

     operands.push_back(ADDI(iv, one));

     // update the loop starting point of current loop sequence

     loopSeqStack.back().first = whileOp->getResults().back();

   }


   if (!operands.empty())

     YIELD(operands);


   builder.setInsertionPointAfter(whileOp);

 }


 void LoopEmitter::exitCurrentLoop(RewriterBase &rewriter, Location loc,

                                   MutableArrayRef<Value> reduc) {

   // Clean up the values, it would help use to discover potential bug at a

   // earlier stage (instead of silently using a wrong value).

   const LoopInfo &loopInfo = loopStack.back();

   if (emitStrategy == SparseEmitStrategy::kSparseIterator) {

     Operation *p = loopInfo.loop;

     if (isa<IterateOp>(p))

       rewriter.create<sparse_tensor::YieldOp>(loc, reduc);


     // Exit the loop.

     rewriter.setInsertionPointAfter(p);

     // In-place update reduction variables.

     llvm::copy(p->getResults(), reduc.begin());

     loopStack.pop_back();

     return;

   }


   // Sets the insertion point to the right position.

   rewriter.setInsertionPointToEnd(loopInfo.userCodeBlock);

   if (!loopInfo.userCodeBlock->empty() &&

       llvm::isa<scf::YieldOp>(&loopInfo.userCodeBlock->back())) {

     // scf::While/For inserts an implicit yield op when there is no loop

     // iter args. In this case, we need to insert the code before the yield.

     assert(loopInfo.userCodeBlock->back().getNumResults() == 0);

     rewriter.setInsertionPoint(&loopInfo.userCodeBlock->back());

   }


   if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {

     exitWhileLoop(rewriter, loc, reduc);

   } else {

     exitForLoop(rewriter, loc, reduc);

   }


   assert(loopStack.size() == loopSeqStack.size());

   loopStack.pop_back();

 }


 //===----------------------------------------------------------------------===//

 // Loop generation utils

 //===----------------------------------------------------------------------===//


 std::pair<Operation *, Value> sparse_tensor::genCoIteration(

     OpBuilder &builder, Location loc, ArrayRef<SparseIterator *> spIters,

     MutableArrayRef<Value> reduc, Value uniIdx, bool userReducFirst) {

   // NOTE: the slice driven tensor-related reduction variable must

   // appear before normal tensors.


   // The set of induction variables for the while loop.

   SmallVector<Value> ivs;


   // TODO: remove the flag after full migration. Currently

   // `sparse_tensor.coiterate` operation (must) put user provided reduction

   // values at the front of the block list, while direct sparsification to scf

   // loops put them at the end.

   if (userReducFirst)

     ivs.append(reduc.begin(), reduc.end());


   // Construct the while-loop with a parameter for each coordinate.

   for (SparseIterator *it : spIters) {

     ValueRange itVals = it->getCursor();

     ivs.append(itVals.begin(), itVals.end());

   }


   if (!userReducFirst)

     ivs.append(reduc.begin(), reduc.end());


   // Update universal index.

   if (uniIdx)

     ivs.push_back(uniIdx);


   // Ensures all operands are valid.

   assert(llvm::all_of(ivs, [](Value v) { return v != nullptr; }));

   TypeRange types = ValueRange(ivs).getTypes();

   auto whileOp = builder.create<scf::WhileOp>(loc, types, ivs);


   SmallVector<Location> locs(types.size(), loc);

   Block *before = builder.createBlock(&whileOp.getBefore(), {}, types, locs);

   Block *after = builder.createBlock(&whileOp.getAfter(), {}, types, locs);


   // Generates loop conditions.

   builder.setInsertionPointToStart(before);

   ValueRange bArgs = before->getArguments();

   Value whileCond = nullptr; // bool values for loop condition.


   for (SparseIterator *it : spIters) {

     auto [cond, remArgs] = it->genWhileCond(builder, loc, bArgs);

     whileCond = !whileCond ? cond : ANDI(whileCond, cond);

     bArgs = remArgs;

   }

   // The remaining block arguments are user-provided reduction values and an

   // optional universal index. Make sure their sizes match.

   assert(bArgs.size() == reduc.size() + (uniIdx ? 1 : 0));

   builder.create<scf::ConditionOp>(loc, whileCond, before->getArguments());


   // Generates loop body.

   builder.setInsertionPointToStart(after);

   ValueRange aArgs = after->getArguments();


   for (SparseIterator *it : spIters) {

     aArgs = it->linkNewScope(aArgs);

     // Dereference the iterator to cache the coordinate.

     it->deref(builder, loc);

   }


   // In-place update on reduction variable.

   for (unsigned i = 0, e = reduc.size(); i < e; i++)

     reduc[i] = aArgs[i];


   Value min;

   // Finds the minimum coordinate

   if (!uniIdx) {

     for (SparseIterator *it : spIters) {

       if (min) {

         Value cmp = CMPI(ult, it->getCrd(), min);

         min = SELECT(cmp, it->getCrd(), min);

       } else {

         min = it->getCrd();

       }

     }

   } else {

     // Otherwise, universal index is the minimal pos.

     min = whileOp.getAfterArguments().back();

   }


   return {whileOp, min};

 }


 #undef CMPI

 #undef C_IDX

 #undef YIELD

 #undef ADDI

 #undef ANDI

 #undef SUBI

 #undef MULI

 #undef SELECT

Bufferization.h

CodegenUtils.h

copy
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
Definition: ConvertLaunchFuncToLLVMCalls.cpp:71

Utils.h

genSliceStride
static Value genSliceStride(OpBuilder &builder, Location loc, Value tensor, Level lvl)
Definition: LoopEmitter.cpp:72

tryFoldTensors
static Value tryFoldTensors(Value t)
Definition: LoopEmitter.cpp:93

SUBI
#define SUBI(lhs, rhs)
Definition: LoopEmitter.cpp:37

MULI
#define MULI(lhs, rhs)
Definition: LoopEmitter.cpp:38

genSliceOffset
static Value genSliceOffset(OpBuilder &builder, Location loc, Value tensor, Level lvl)
Definition: LoopEmitter.cpp:66

C_IDX
#define C_IDX(v)
Definition: LoopEmitter.cpp:33

ANDI
#define ANDI(lhs, rhs)
Definition: LoopEmitter.cpp:36

CMPI
#define CMPI(p, l, r)
Definition: LoopEmitter.cpp:29

isIntOrFPZero
static bool isIntOrFPZero(Attribute attr)
Definition: LoopEmitter.cpp:78

dumpIndexMemRef
static LLVM_ATTRIBUTE_UNUSED void dumpIndexMemRef(OpBuilder &builder, Location loc, Value memref)
Definition: LoopEmitter.cpp:48

YIELD
#define YIELD(vs)
Definition: LoopEmitter.cpp:34

SELECT
#define SELECT(c, l, r)
Definition: LoopEmitter.cpp:41

ADDI
#define ADDI(lhs, rhs)
Definition: LoopEmitter.cpp:35

unFoldOpIntResult
static Value unFoldOpIntResult(OpBuilder &builder, Location loc, OpFoldResult ofr)
Definition: LoopEmitter.cpp:86

LoopEmitter.h

min
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:206

SparseTensorType.h

VectorOps.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::MutableArrayRef
Definition: LLVM.h:62

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

llvm::function_ref
Definition: LLVM.h:90

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineExpr::getKind
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:35

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::BaseMemRefType
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:104

mlir::Block
Block represents an ordered list of Operations.
Definition: Block.h:33

mlir::Block::addArguments
iterator_range< args_iterator > addArguments(TypeRange types, ArrayRef< Location > locs)
Add one argument to the argument list for each type specified in the list.
Definition: Block.cpp:162

mlir::Block::getArguments
BlockArgListType getArguments()
Definition: Block.h:87

mlir::Block::front
Operation & front()
Definition: Block.h:153

mlir::Builder::getI64IntegerAttr
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:110

mlir::Builder::getArrayAttr
ArrayAttr getArrayAttr(ArrayRef< Attribute > value)
Definition: Builders.cpp:264

mlir::Builder::getIndexType
IndexType getIndexType()
Definition: Builders.cpp:53

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::createBlock
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition: Builders.cpp:428

mlir::OpBuilder::clone
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:551

mlir::OpBuilder::setInsertionPointToStart
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396

mlir::OpBuilder::setInsertionPointToEnd
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:434

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455

mlir::OpBuilder::setInsertionPointAfter
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:410

mlir::OpBuilder::getInsertionBlock
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:440

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getOperand
Value getOperand(unsigned idx)
Definition: Operation.h:350

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407

mlir::Operation::getNumOperands
unsigned getNumOperands()
Definition: Operation.h:346

mlir::Operation::setOperands
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:237

mlir::Operation::getResults
result_range getResults()
Definition: Operation.h:415

mlir::Operation::getUses
use_range getUses()
Returns a range of all uses, which is useful for iterating over all uses.
Definition: Operation.h:846

mlir::Operation::getNumResults
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26

mlir::Region::getBlocks
BlockListType & getBlocks()
Definition: Region.h:45

mlir::Region::front
Block & front()
Definition: Region.h:65

mlir::Region::emplaceBlock
Block & emplaceBlock()
Definition: Region.h:46

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::RewriterBase::eraseOp
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Definition: PatternMatch.cpp:157

mlir::RewriterBase::modifyOpInPlace
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:593

mlir::TypeRange
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::ValueRange::getTypes
type_range getTypes() const

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::Value::getUsers
user_range getUsers() const
Definition: Value.h:218

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20

mlir::sparse_tensor::I64BitSet
A simple wrapper to encode a bitset of (at most 64) levels, currently used by sparse_tensor....
Definition: SparseTensor.h:64

mlir::sparse_tensor::I64BitSet::bits
iterator_range< const_set_bits_iterator > bits() const
Definition: SparseTensor.h:75

mlir::sparse_tensor::LoopEmitter::exitCurrentLoop
void exitCurrentLoop(RewriterBase &rewriter, Location loc, MutableArrayRef< Value > reduc={})
Generates code to exit the current loop (e.g., generates yields, forwards loop induction variables,...
Definition: LoopEmitter.cpp:860

mlir::sparse_tensor::LoopEmitter::locateLvlAtAffineAddress
void locateLvlAtAffineAddress(OpBuilder &builder, Location loc, TensorLevel tidLvl, AffineExpr lvlExpr)
Emits the address for a dense level based on the value evaluated by the provided affine expression.
Definition: LoopEmitter.cpp:694

mlir::sparse_tensor::LoopEmitter::LoopEmitter
LoopEmitter()=default

mlir::sparse_tensor::LoopEmitter::enterNewLoopSeq
void enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls)
Enters a new loop sequence, the loops within the same sequence starts from the break points of previo...
Definition: LoopEmitter.cpp:413

mlir::sparse_tensor::LoopEmitter::genAffine
Value genAffine(OpBuilder &builder, Location loc, AffineExpr a)
Generates code to compute an affine expression whose variables are LoopIds (i.e., cast<AffineDimExpr>...
Definition: LoopEmitter.cpp:441

mlir::sparse_tensor::LoopEmitter::enterCurrentCoIterationCase
Region * enterCurrentCoIterationCase(OpBuilder &builder, Location loc, I64BitSet caseBit, unsigned caseIdx, MutableArrayRef< Value > reduc)
Definition: LoopEmitter.cpp:541

mlir::sparse_tensor::LoopEmitter::enterCoIterationOverTensorsAtLvls
Operation * enterCoIterationOverTensorsAtLvls(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls, unsigned numCases, MutableArrayRef< Value > reduc={}, bool isParallel=false, bool needsUniv=false)
Emits a co-iteration loop over a set of tensors.
Definition: LoopEmitter.cpp:594

mlir::sparse_tensor::LoopEmitter::makeTensorLevel
TensorLevel makeTensorLevel(TensorId t, Level l) const
Compresses a TensorId and Level into a TensorLevel.
Definition: LoopEmitter.h:203

mlir::sparse_tensor::LoopEmitter::getNumManifestTensors
unsigned getNumManifestTensors() const
Gets the total number of manifest tensors (excluding the synthetic tensor).
Definition: LoopEmitter.h:185

mlir::sparse_tensor::LoopEmitter::initialize
void initialize(ValueRange tensors, StringAttr loopTag=nullptr, bool hasOutput=false, bool isSparseOut=false, unsigned numLoops=0, DependentLvlGetter getter=nullptr, SparseEmitStrategy emitStrategy=SparseEmitStrategy::kFunctional)
Takes an array of input tensors, which the generated loops will iterate over.
Definition: LoopEmitter.cpp:123

mlir::sparse_tensor::LoopEmitter::unpackTensorLevel
std::pair< TensorId, Level > unpackTensorLevel(TensorLevel tidLvl) const
De-compresses a TensorLevel back to a pair of TensorId and Level.
Definition: LoopEmitter.h:208

mlir::sparse_tensor::LoopEmitter::unpackTensorLevelRange
auto unpackTensorLevelRange(ContainerTy &&c) const
Converts a range of TensorLevel to a range of std::pair<TensorId, Level>
Definition: LoopEmitter.h:215

mlir::sparse_tensor::LoopEmitter::initializeLoopEmit
void initializeLoopEmit(OpBuilder &builder, Location loc, OutputUpdater updater=nullptr, SynTensorBoundSetter synSetter=nullptr)
Starts a loop emitting session by generating all the buffers needed for iterating over the tensors.
Definition: LoopEmitter.cpp:231

mlir::sparse_tensor::LoopEmitter::exitCurrentLoopSeq
void exitCurrentLoopSeq(OpBuilder &builder, Location loc)
Exits the current loop sequence, this will reset universal index to 0.
Definition: LoopEmitter.cpp:430

mlir::sparse_tensor::LoopEmitter::getSynTensorId
TensorId getSynTensorId() const
Gets the TensorId for synthetic tensor.
Definition: LoopEmitter.h:194

mlir::sparse_tensor::SparseIterator
Helper class that generates loop conditions, etc, to traverse a sparse tensor level.
Definition: SparseTensorIterator.h:148

mlir::sparse_tensor::SparseIterator::genForCond
virtual std::pair< Value, Value > genForCond(OpBuilder &b, Location l)
Definition: SparseTensorIterator.h:285

mlir::sparse_tensor::SparseIterator::genInit
void genInit(OpBuilder &b, Location l, const SparseIterator *p)
Definition: SparseTensorIterator.cpp:980

mlir::sparse_tensor::SparseIterator::getCrd
Value getCrd() const
Definition: SparseTensorIterator.h:187

mlir::sparse_tensor::SparseIterator::lvl
const unsigned lvl
Definition: SparseTensorIterator.h:337

mlir::sparse_tensor::SparseIterator::locate
void locate(OpBuilder &b, Location l, Value crd)
Definition: SparseTensorIterator.cpp:1007

mlir::sparse_tensor::SparseIterator::forwardIf
virtual ValueRange forwardIf(OpBuilder &b, Location l, Value cond)
Definition: SparseTensorIterator.cpp:1045

mlir::sparse_tensor::SparseIterator::linkNewScope
ValueRange linkNewScope(ValueRange pos)
Definition: SparseTensorIterator.h:313

mlir::sparse_tensor::SparseIterator::tid
const unsigned tid
Definition: SparseTensorIterator.h:337

mlir::sparse_tensor::SparseIterator::getCursor
ValueRange getCursor() const
Definition: SparseTensorIterator.h:189

mlir::sparse_tensor::SparseIterator::deref
Value deref(OpBuilder &b, Location l)
Definition: SparseTensorIterator.cpp:1021

mlir::sparse_tensor::SparseIterator::randomAccessible
virtual bool randomAccessible() const =0

mlir::sparse_tensor::SparseIterator::kind
const IterKind kind
Definition: SparseTensorIterator.h:336

mlir::sparse_tensor::SparseIterator::genWhileCond
std::pair< Value, ValueRange > genWhileCond(OpBuilder &b, Location l, ValueRange vs)
Definition: SparseTensorIterator.h:292

mlir::sparse_tensor::SparseTensorType
A wrapper around RankedTensorType, which has three goals:
Definition: SparseTensorType.h:46

mlir::sparse_tensor::SparseTensorType::getLvlRank
Level getLvlRank() const
Returns the level-rank.
Definition: SparseTensorType.h:242

Arith.h

Linalg.h

MemRef.h

SCF.h

Tensor.h

mlir::bufferization::getMemRefTypeWithFullyDynamicLayout
BaseMemRefType getMemRefTypeWithFullyDynamicLayout(TensorType tensorType, Attribute memorySpace=nullptr)
Return a MemRef type with fully dynamic layout.
Definition: BufferizableOpInterface.cpp:821

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::sparse_tensor
Definition: Enums.h:41

mlir::sparse_tensor::constantIndex
Value constantIndex(OpBuilder &builder, Location loc, int64_t i)
Generates a constant of index type.
Definition: CodegenUtils.h:331

mlir::sparse_tensor::toDim
Dimension toDim(SparseTensorEncodingAttr enc, Level l)
Convenience method to translate the given level to the corresponding dimension.
Definition: SparseTensorDialect.cpp:1182

mlir::sparse_tensor::makeSparseTensorLevel
std::unique_ptr< SparseTensorLevel > makeSparseTensorLevel(OpBuilder &b, Location l, Value t, unsigned tid, Level lvl)
Helper function to create a TensorLevel object from given tensor.
Definition: SparseTensorIterator.cpp:1579

mlir::sparse_tensor::TensorLevel
unsigned TensorLevel
Definition: LoopEmitter.h:26

mlir::sparse_tensor::makeTraverseSubSectIterator
std::unique_ptr< SparseIterator > makeTraverseSubSectIterator(OpBuilder &b, Location l, const SparseIterator &subsectIter, const SparseIterator &parent, std::unique_ptr< SparseIterator > &&wrap, Value loopBound, unsigned stride, SparseEmitStrategy strategy)
Helper function to create a SparseIterator object that iterates over a non-empty subsection created b...
Definition: SparseTensorIterator.cpp:1691

mlir::sparse_tensor::IterKind::kTrivial
@ kTrivial

mlir::sparse_tensor::Level
uint64_t Level
The type of level identifiers and level-ranks.
Definition: SparseTensor.h:42

mlir::sparse_tensor::tryGetSparseTensorType
std::optional< SparseTensorType > tryGetSparseTensorType(Value val)
Definition: SparseTensorType.h:377

mlir::sparse_tensor::getRankedTensorType
RankedTensorType getRankedTensorType(T &&t)
Convenience method to abbreviate casting getType().
Definition: SparseTensor.h:160

mlir::sparse_tensor::makeSynLevelAndIterator
std::pair< std::unique_ptr< SparseTensorLevel >, std::unique_ptr< SparseIterator > > makeSynLevelAndIterator(Value sz, unsigned tid, unsigned lvl, SparseEmitStrategy strategy)
Helper function to create a synthetic SparseIterator object that iterates over a dense space specifie...
Definition: SparseTensorIterator.cpp:1600

mlir::sparse_tensor::createOrFoldSliceStrideOp
Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice slice for the sparse tensor slice, return a constant if the offs...
Definition: CodegenUtils.cpp:566

mlir::sparse_tensor::getSparseTensorEncoding
SparseTensorEncodingAttr getSparseTensorEncoding(Type type)
Convenience method to get a sparse encoding attribute from a type.
Definition: SparseTensorDialect.cpp:1034

mlir::sparse_tensor::genCoIteration
std::pair< Operation *, Value > genCoIteration(OpBuilder &builder, Location loc, ArrayRef< SparseIterator * > iters, MutableArrayRef< Value > reduc, Value uniIdx, bool userReducFirst=false)
Definition: LoopEmitter.cpp:902

mlir::sparse_tensor::isZeroRankedTensorOrScalar
bool isZeroRankedTensorOrScalar(Type type)
Definition: CodegenUtils.h:412

mlir::sparse_tensor::makePaddedIterator
std::unique_ptr< SparseIterator > makePaddedIterator(std::unique_ptr< SparseIterator > &&sit, Value padLow, Value padHigh, SparseEmitStrategy strategy)
Helper function to create a SparseIterator object that iterates over a padded sparse level (the padde...
Definition: SparseTensorIterator.cpp:1655

mlir::sparse_tensor::getSparseTensorType
SparseTensorType getSparseTensorType(Value val)
Convenience methods to obtain a SparseTensorType from a Value.
Definition: SparseTensorType.h:374

mlir::sparse_tensor::makeSimpleIterator
std::unique_ptr< SparseIterator > makeSimpleIterator(OpBuilder &b, Location l, const SparseIterationSpace &iterSpace)
Helper function to create a simple SparseIterator object that iterate over the entire iteration space...
Definition: SparseTensorIterator.cpp:1609

mlir::sparse_tensor::createFuncCall
func::CallOp createFuncCall(OpBuilder &builder, Location loc, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface)
Creates a CallOp to the function reference returned by getFunc() in the builder's module.
Definition: CodegenUtils.cpp:343

mlir::sparse_tensor::makeSlicedLevelIterator
std::unique_ptr< SparseIterator > makeSlicedLevelIterator(std::unique_ptr< SparseIterator > &&sit, Value offset, Value stride, Value size, SparseEmitStrategy strategy)
Helper function to create a SparseIterator object that iterates over a sliced space,...
Definition: SparseTensorIterator.cpp:1644

mlir::sparse_tensor::createOrFoldSliceOffsetOp
Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice offset for the sparse tensor slice, return a constant if the off...
Definition: CodegenUtils.cpp:556

mlir::sparse_tensor::makeNonEmptySubSectIterator
std::unique_ptr< SparseIterator > makeNonEmptySubSectIterator(OpBuilder &b, Location l, const SparseIterator *parent, Value loopBound, std::unique_ptr< SparseIterator > &&delegate, Value size, unsigned stride, SparseEmitStrategy strategy)
Helper function to create a SparseIterator object that iterate over the non-empty subsections set.
Definition: SparseTensorIterator.cpp:1670

mlir::sparse_tensor::EmitCInterface::On
@ On

mlir::sparse_tensor::TensorId
unsigned TensorId
Tensor identifiers, chosen to be the BlockArgument::getArgNumber of the value passed to Merger::build...
Definition: Merger.h:35

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::matchPattern
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490

mlir::getConstantIntValue
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Definition: StaticValueUtils.cpp:115

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305

mlir::AffineExprKind::Mul
@ Mul
RHS of mul is always a constant or a symbolic expression.

mlir::AffineExprKind::DimId
@ DimId
Dimensional identifier.

mlir::AffineExprKind::Constant
@ Constant
Constant integer.

mlir::AffineExprKind::Add
@ Add

mlir::SparseEmitStrategy
SparseEmitStrategy
Defines a scope for reinterpret map pass.
Definition: Passes.h:52

mlir::SparseEmitStrategy::kSparseIterator
@ kSparseIterator

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::m_Constant
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369