29 #define CMPI(p, l, r) \
30 (builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::p, (l), (r)) \
33 #define C_IDX(v) (constantIndex(builder, loc, (v)))
34 #define YIELD(vs) (builder.create<scf::YieldOp>(loc, (vs)))
35 #define ADDI(lhs, rhs) (builder.create<arith::AddIOp>(loc, (lhs), (rhs)))
36 #define ANDI(lhs, rhs) (builder.create<arith::AndIOp>(loc, (lhs), (rhs)))
37 #define SUBI(lhs, rhs) (builder.create<arith::SubIOp>(loc, (lhs), (rhs)))
38 #define MULI(lhs, rhs) (builder.create<arith::MulIOp>(loc, (lhs), (rhs)))
39 #define REMUI(lhs, rhs) (builder.create<arith::RemUIOp>(loc, (lhs), (rhs)))
40 #define DIVUI(lhs, rhs) (builder.create<arith::DivUIOp>(loc, (lhs), (rhs)))
41 #define SELECT(c, l, r) (builder.create<arith::SelectOp>(loc, (c), (l), (r)))
50 memref = builder.
create<memref::CastOp>(
83 bool isSparseOut,
unsigned numLoops,
86 initialize(tensors, loopTag, hasOutput, isSparseOut, numLoops, dimGetter);
90 bool isSparseOut,
unsigned numLoops,
94 this->loopTag = loopTag;
95 this->hasOutput = hasOutput;
96 this->isSparseOut = isSparseOut;
97 this->emitStrategy = emitStrategy;
99 const unsigned numManifestTensors = ts.size();
100 const unsigned synTensorId = numManifestTensors;
101 const unsigned numTensors = numManifestTensors + 1;
103 this->tensors.assign(ts.begin(), ts.end());
105 this->valBuffer.assign(numTensors,
nullptr);
106 this->lvls.resize(numTensors);
107 this->iters.resize(numTensors);
111 this->loopStack.reserve(numLoops);
112 this->loopSeqStack.reserve(numLoops);
115 this->dependentLvlMap.assign(
116 numTensors, std::vector<std::vector<std::pair<TensorLevel, unsigned>>>());
117 this->sliceMeta.assign(
118 numTensors, std::vector<std::vector<std::pair<Value, unsigned>>>());
119 this->levelReducedDep.assign(numTensors, std::vector<unsigned>());
122 for (
TensorId tid = 0; tid < numTensors; tid++) {
124 if (tid == synTensorId) {
130 const Value t = tensors[tid];
140 lvls[tid].resize(lvlRank);
141 iters[tid].resize(lvlRank);
142 loopHighs.assign(numLoops,
nullptr);
145 levelReducedDep[tid].assign(lvlRank, 0);
146 dependentLvlMap[tid].assign(
147 lvlRank, std::vector<std::pair<TensorLevel, unsigned>>());
148 sliceMeta[tid].assign(lvlRank, std::vector<std::pair<Value, unsigned>>());
149 if (dimGetter && !isSynTensor(tid)) {
150 for (
Level l = 0; l < lvlRank; l++) {
151 std::vector<std::pair<LoopId, unsigned>> deps = dimGetter(tid, l);
153 std::sort(deps.begin(), deps.end(),
154 [](
auto &lhs,
auto &rhs) { return lhs.first < rhs.first; });
156 dependentLvlMap[tid][l] = std::move(deps);
157 unsigned depends = dependentLvlMap[tid][l].size();
160 sliceMeta[tid][l].reserve(depends);
166 std::unique_ptr<SparseIterator>
171 if (stt.hasEncoding() && stt.getEncoding().isSlice()) {
175 std::move(it), offset, stride, lvls[t][l]->getSize(), emitStrategy);
187 for (
unsigned i = 0, e = loopHighs.size(); i < e; i++) {
188 Value sz = loopHighs[i] = synSetter(builder, loc, i);
190 lvls[synId][i] = std::move(stl);
191 iters[synId][i].emplace_back(std::move(it));
203 const Value tensor = tensors[t];
204 const auto rtp = dyn_cast<RankedTensorType>(tensor.
getType());
213 const Level lvlRank = stt.getLvlRank();
214 const auto shape = rtp.getShape();
217 for (
Level l = 0; l < stt.getLvlRank(); l++) {
218 if (stt.hasEncoding())
219 lvlSzs.push_back(builder.
create<LvlOp>(loc, tensor, l));
221 lvlSzs.push_back(builder.
create<tensor::DimOp>(loc, tensor, l));
225 for (
Level l = 0; l < lvlRank; l++) {
228 if (!dependentLvlMap[t][l].empty())
231 auto it = makeLevelIterator(builder, loc, t, l);
232 iters[t][l].emplace_back(std::move(it));
239 bool isOutput = isOutputTensor(t);
240 Type elementType = stt.getElementType();
241 if (!stt.hasEncoding()) {
248 if (llvm::isa_and_nonnull<tensor::ExtractSliceOp>(tensor.
getDefiningOp()))
252 builder.
create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
254 if (isOutput && updater)
255 denseVal = updater(builder, loc, denseVal, tensor);
257 valBuffer[t] = denseVal;
262 valBuffer[t] = builder.
create<ToValuesOp>(loc, tensor);
269 initSubSectIterator(builder, loc);
274 for (
TensorId t = 0, e = tensors.size(); t < e; t++) {
275 auto rtp = dyn_cast<RankedTensorType>(tensors[t].getType());
282 auto remDepStack = dependentLvlMap;
283 std::vector<std::tuple<LoopId, TensorId, Level>> depRedOrder;
284 for (
Level lvl = 0; lvl < lvlRank; lvl++) {
286 std::reverse(remDepStack[t][lvl].begin(), remDepStack[t][lvl].end());
287 for (
auto [loop, coeff] : dependentLvlMap[t][lvl])
288 depRedOrder.emplace_back(std::make_tuple(loop, t, lvl));
291 if (depRedOrder.empty())
294 std::sort(depRedOrder.begin(), depRedOrder.end(),
295 [](
auto &l,
auto &r) { return std::get<0>(l) < std::get<0>(r); });
298 for (
auto [loop, t, lvl] : depRedOrder) {
299 std::pair<LoopId, unsigned> curDep = remDepStack[t][lvl].back();
300 assert(curDep.first == loop);
301 remDepStack[t][lvl].pop_back();
303 auto lvlIt = makeLevelIterator(builder, loc, t, lvl);
305 if (!parent && lvl > 0) {
306 if (dependentLvlMap[t][lvl - 1].empty()) {
307 parent = iters[t][lvl - 1].back().get();
311 std::unique_ptr<SparseIterator> it;
312 if (!remDepStack[t][lvl].empty()) {
315 for (
auto [loop, stride] : remDepStack[t][lvl]) {
320 std::move(lvlIt), size, curDep.second,
325 std::move(lvlIt), loopHighs[loop],
326 curDep.second, emitStrategy);
328 lastIter[t] = it.get();
329 iters[t][lvl].emplace_back(std::move(it));
334 void LoopEmitter::categorizeIterators(
342 raIters.push_back(it);
344 spIters.push_back(it);
347 std::stable_sort(spIters.begin(), spIters.end(), [](
auto lhs,
auto rhs) {
349 return static_cast<uint8_t>(lhs->kind) > static_cast<uint8_t>(rhs->kind);
356 assert(loopSeqStack.size() == loopStack.size());
360 levelReducedDep[tid][lvl]++;
361 prepareLoopOverTensorAtLvl(builder, loc, tid, lvl);
365 loopSeqStack.emplace_back(
C_IDX(0), tidLvls.vec());
369 assert(loopSeqStack.size() == loopStack.size() + 1);
374 levelReducedDep[tid][lvl]--;
376 loopSeqStack.pop_back();
386 const auto loopId = cast<AffineDimExpr>(a).getPosition();
387 return loopStack[loopId].iv;
390 auto binOp = cast<AffineBinaryOpExpr>(a);
392 genAffine(builder, loc, binOp.getRHS()));
395 auto binOp = cast<AffineBinaryOpExpr>(a);
397 genAffine(builder, loc, binOp.getRHS()));
400 int64_t c = cast<AffineConstantExpr>(a).getValue();
404 llvm_unreachable(
"unexpected affine subscript");
408 std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl(
417 auto [lo, hi] = iter.
genForCond(builder, loc);
421 scf::ParallelOp parOp =
422 builder.
create<scf::ParallelOp>(loc, lo, hi, step, reduc);
424 assert(parOp.getNumReductions() == reduc.size());
425 iv = parOp.getInductionVars()[0];
434 for (
int i = 0, e = reduc.size(); i < e; i++)
435 reduc[i] = parOp.getInitVals()[i];
438 scf::ForOp forOp = builder.
create<scf::ForOp>(loc, lo, hi, step, reduc);
440 iv = forOp.getInductionVar();
443 assert(forOp.getNumRegionIterArgs() == reduc.size());
444 for (
int i = 0, e = reduc.size(); i < e; i++)
445 reduc[i] = forOp.getRegionIterArg(i);
453 crd = iter.
deref(builder, loc);
455 iter.
locate(builder, loc, iv);
461 std::pair<Operation *, Value> LoopEmitter::emitWhileLoopOverTensorsAtLvls(
473 ivs.append(itVals.begin(), itVals.end());
477 ivs.append(reduc.begin(), reduc.end());
480 ivs.push_back(loopSeqStack.back().first);
483 assert(llvm::all_of(ivs, [](
Value v) {
return v !=
nullptr; }));
485 auto whileOp = builder.
create<scf::WhileOp>(loc, types, ivs);
494 Value whileCond =
nullptr;
497 auto [cond, remArgs] = it->
genWhileCond(builder, loc, bArgs);
498 whileCond = !whileCond ? cond :
ANDI(whileCond, cond);
503 assert(bArgs.size() == reduc.size() + needsUniv ? 1 : 0);
517 it->
deref(builder, loc);
521 assert(aArgs.size() == reduc.size() + needsUniv ? 1 : 0);
522 for (
unsigned i = 0, e = reduc.size(); i < e; i++)
538 min = whileOp.getAfterArguments().back();
541 return {whileOp,
min};
546 if (spIters.size() > 1)
549 if (spIters.size() == 1)
550 return spIters.front()->iteratableByFor();
560 tryParallel = tryParallel && reduc.size() <= 1;
564 categorizeIterators(tidLvls, raIters, spIters);
570 needsUniv = !spIters.empty() && needsUniv;
581 if (shouldIteratedByForLoop(spIters) && !needsUniv) {
582 assert(spIters.size() <= 1);
583 SparseIterator &it = spIters.empty() ? *raIters.front() : *spIters.front();
585 emitForLoopOverTensorAtLvl(builder, loc, it, reduc, tryParallel);
588 for (
auto *it : spIters) {
593 for (
auto *it : raIters)
597 emitWhileLoopOverTensorsAtLvls(builder, loc, spIters, reduc, needsUniv);
602 it->
locate(builder, loc, iv);
616 lvl == 0 ? nullptr : iters[tid][lvl - 1].back().get();
617 auto &it = getCurIterator(tid, lvl);
618 it.
genInit(builder, loc, parent);
622 it.
locate(builder, loc, lvlCrd);
631 bool hasParent = lvl == 0 || !dependentLvlMap[tid][lvl].empty();
634 hasParent ? nullptr : iters[tid][lvl - 1].back().get();
635 auto &it = getCurIterator(tid, lvl);
636 it.
genInit(builder, loc, parent);
645 const LoopInfo &loopInfo = loopStack.back();
646 if (
auto forOp = llvm::dyn_cast<scf::ForOp>(loopInfo.loop)) {
647 if (!reduc.empty()) {
648 assert(reduc.size() == forOp.getNumResults());
649 rewriter.
create<scf::YieldOp>(loc, reduc);
654 for (
unsigned i = 0, e = forOp.getResults().size(); i < e; i++)
655 reduc[i] = forOp.getResult(i);
657 auto parOp = llvm::cast<scf::ParallelOp>(loopInfo.loop);
658 if (!reduc.empty()) {
659 assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1);
660 Operation *redExp = reduc.front().getDefiningOp();
662 assert(redExp->
getUses().empty());
668 Value redVal = parOp.getInitVals().front();
680 unsigned numUsers = 0;
685 assert(numUsers == 1);
689 auto redOp = rewriter.
create<scf::ReduceOp>(loc, curVal);
691 Block *redBlock = &redOp.getReductions().
front().front();
697 newRed, [&]() { newRed->
setOperands(redBlock->getArguments()); });
705 for (
unsigned i = 0, e = parOp.getResults().size(); i < e; i++)
706 reduc[i] = parOp.getResult(i);
712 const LoopInfo &loopInfo = loopStack.back();
713 auto whileOp = llvm::cast<scf::WhileOp>(loopInfo.loop);
714 Value iv = loopInfo.iv;
739 Value uniIdx = whileOp.getResults().back();
740 it.
locate(builder, loc, uniIdx);
745 for (
auto &i : reduc) {
746 operands.push_back(i);
748 i = whileRes.front();
749 whileRes = whileRes.drop_front();
753 if (operands.size() < whileOp.getNumResults()) {
754 assert(operands.size() + 1 == whileOp.getNumResults());
756 operands.push_back(
ADDI(iv, one));
758 loopSeqStack.back().first = whileOp->getResults().back();
761 if (!operands.empty())
771 const LoopInfo &loopInfo = loopStack.back();
775 if (!loopInfo.userCodeBlock->empty() &&
776 llvm::isa<scf::YieldOp>(&loopInfo.userCodeBlock->back())) {
779 assert(loopInfo.userCodeBlock->back().getNumResults() == 0);
783 if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {
784 exitWhileLoop(rewriter, loc, reduc);
786 exitForLoop(rewriter, loc, reduc);
789 assert(loopStack.size() == loopSeqStack.size());
790 loopStack.pop_back();
static Value genSliceStride(OpBuilder &builder, Location loc, Value tensor, Level lvl)
static Value genSliceOffset(OpBuilder &builder, Location loc, Value tensor, Level lvl)
static LLVM_ATTRIBUTE_UNUSED void dumpIndexMemRef(OpBuilder &builder, Location loc, Value memref)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
This class provides a shared interface for ranked and unranked memref types.
Block represents an ordered list of Operations.
BlockArgListType getArguments()
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Operation is the basic unit of execution within MLIR.
Value getOperand(unsigned idx)
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumOperands()
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
use_range getUses()
Returns a range of all uses, which is useful for iterating over all uses.
unsigned getNumResults()
Return the number of results held by this operation.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
user_range getUsers() const
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
void exitCurrentLoop(RewriterBase &rewriter, Location loc, MutableArrayRef< Value > reduc={})
Generates code to exit the current loop (e.g., generates yields, forwards loop induction variables,...
void locateLvlAtAffineAddress(OpBuilder &builder, Location loc, TensorLevel tidLvl, AffineExpr lvlExpr)
Emits the address for a dense level based on the value evaluated by the provided affine expression.
void enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls)
Enters a new loop sequence, the loops within the same sequence starts from the break points of previo...
Value genAffine(OpBuilder &builder, Location loc, AffineExpr a)
Generates code to compute an affine expression whose variables are LoopIds (i.e., a....
Operation * enterCoIterationOverTensorsAtLvls(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls, MutableArrayRef< Value > reduc={}, bool isParallel=false, bool needsUniv=false)
Emits a co-iteration loop over a set of tensors.
TensorLevel makeTensorLevel(TensorId t, Level l) const
Compresses a TensorId and Level into a TensorLevel.
unsigned getNumManifestTensors() const
Gets the total number of manifest tensors (excluding the synthetic tensor).
void initialize(ValueRange tensors, StringAttr loopTag=nullptr, bool hasOutput=false, bool isSparseOut=false, unsigned numLoops=0, DependentLvlGetter getter=nullptr, SparseEmitStrategy emitStrategy=SparseEmitStrategy::kFunctional)
Takes an array of input tensors, which the generated loops will iterate over.
std::pair< TensorId, Level > unpackTensorLevel(TensorLevel tidLvl) const
De-compresses a TensorLevel back to a pair of TensorId and Level.
auto unpackTensorLevelRange(ContainerTy &&c) const
Converts a range of TensorLevel to a range of std::pair<TensorId, Level>
void initializeLoopEmit(OpBuilder &builder, Location loc, OutputUpdater updater=nullptr, SynTensorBoundSetter synSetter=nullptr)
Starts a loop emitting session by generating all the buffers needed for iterating over the tensors.
void exitCurrentLoopSeq(OpBuilder &builder, Location loc)
Exits the current loop sequence, this will reset universal index to 0.
TensorId getSynTensorId() const
Gets the TensorId for synthetic tensor.
Helper class that generates loop conditions, etc, to traverse a sparse tensor level.
virtual std::pair< Value, Value > genForCond(OpBuilder &b, Location l)
void genInit(OpBuilder &b, Location l, const SparseIterator *p)
void locate(OpBuilder &b, Location l, Value crd)
virtual ValueRange forwardIf(OpBuilder &b, Location l, Value cond)
ValueRange linkNewScope(ValueRange pos)
ValueRange getCursor() const
Value deref(OpBuilder &b, Location l)
virtual bool randomAccessible() const =0
std::pair< Value, ValueRange > genWhileCond(OpBuilder &b, Location l, ValueRange vs)
A wrapper around RankedTensorType, which has three goals:
Level getLvlRank() const
Returns the level-rank.
BaseMemRefType getMemRefTypeWithFullyDynamicLayout(TensorType tensorType, Attribute memorySpace=nullptr)
Return a MemRef type with fully dynamic layout.
Dimension toDim(SparseTensorEncodingAttr enc, Level l)
Convenience method to translate the given level to the corresponding dimension.
std::unique_ptr< SparseIterator > makeTraverseSubSectIterator(OpBuilder &b, Location l, const SparseIterator &subsectIter, const SparseIterator &parent, std::unique_ptr< SparseIterator > &&wrap, Value loopBound, unsigned stride, SparseEmitStrategy strategy)
Helper function to create a SparseIterator object that iterate over a non-empty subsection created by...
uint64_t Level
The type of level identifiers and level-ranks.
RankedTensorType getRankedTensorType(T &&t)
Convenience method to abbreviate casting getType().
std::unique_ptr< SparseIterator > makeSimpleIterator(const SparseTensorLevel &stl, SparseEmitStrategy strategy)
Helper function to create a simple SparseIterator object that iterate over the SparseTensorLevel.
std::pair< std::unique_ptr< SparseTensorLevel >, std::unique_ptr< SparseIterator > > makeSynLevelAndIterator(Value sz, unsigned tid, unsigned lvl, SparseEmitStrategy strategy)
Helper function to create a synthetic SparseIterator object that iterate over a dense space specified...
Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice slice for the sparse tensor slice, return a constant if the offs...
SparseTensorEncodingAttr getSparseTensorEncoding(Type type)
Convenience method to get a sparse encoding attribute from a type.
bool isZeroRankedTensorOrScalar(Type type)
std::unique_ptr< SparseTensorLevel > makeSparseTensorLevel(OpBuilder &builder, Location loc, Value t, unsigned tid, Level l)
Helper function to create a TensorLevel object from given tensor.
SparseTensorType getSparseTensorType(Value val)
Convenience methods to obtain a SparseTensorType from a Value.
func::CallOp createFuncCall(OpBuilder &builder, Location loc, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface)
Creates a CallOp to the function reference returned by getFunc() in the builder's module.
std::unique_ptr< SparseIterator > makeSlicedLevelIterator(std::unique_ptr< SparseIterator > &&sit, Value offset, Value stride, Value size, SparseEmitStrategy strategy)
Helper function to create a SparseIterator object that iterate over a sliced space,...
Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice offset for the sparse tensor slice, return a constant if the off...
std::unique_ptr< SparseIterator > makeNonEmptySubSectIterator(OpBuilder &b, Location l, const SparseIterator *parent, Value loopBound, std::unique_ptr< SparseIterator > &&delegate, Value size, unsigned stride, SparseEmitStrategy strategy)
Helper function to create a SparseIterator object that iterate over the non-empty subsections set.
unsigned TensorId
Tensor identifiers, chosen to be the BlockArgument::getArgNumber of the value passed to Merger::build...
Include the generated interface declarations.
@ Mul
RHS of mul is always a constant or a symbolic expression.
@ DimId
Dimensional identifier.
@ Constant
Constant integer.
SparseEmitStrategy
Defines a scope for reinterpret map pass.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...