29 #define CMPI(p, l, r) \
30 (builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::p, (l), (r)) \
33 #define C_IDX(v) (constantIndex(builder, loc, (v)))
34 #define YIELD(vs) (builder.create<scf::YieldOp>(loc, (vs)))
35 #define ADDI(lhs, rhs) (builder.create<arith::AddIOp>(loc, (lhs), (rhs)))
36 #define ANDI(lhs, rhs) (builder.create<arith::AndIOp>(loc, (lhs), (rhs)))
37 #define SUBI(lhs, rhs) (builder.create<arith::SubIOp>(loc, (lhs), (rhs)))
38 #define MULI(lhs, rhs) (builder.create<arith::MulIOp>(loc, (lhs), (rhs)))
39 #define REMUI(lhs, rhs) (builder.create<arith::RemUIOp>(loc, (lhs), (rhs)))
40 #define DIVUI(lhs, rhs) (builder.create<arith::DivUIOp>(loc, (lhs), (rhs)))
41 #define SELECT(c, l, r) (builder.create<arith::SelectOp>(loc, (c), (l), (r)))
50 memref = builder.
create<memref::CastOp>(
87 return ADDI(
MULI(crd, stride), offset);
120 crd =
SUBI(crd, offset);
122 crd =
DIVUI(crd, stride);
123 return std::make_pair(crd, rem);
145 return ifOp.getResult(0);
168 builder.
create<memref::StoreOp>(loc, pPtr, sPosBuf,
C_IDX(1));
176 builder.
create<memref::StoreOp>(loc, num, sPosBuf,
C_IDX(0));
193 llvm_unreachable(
"unexpected kind");
202 builder.
create<memref::StoreOp>(
207 std::pair<Value, Value>
210 assert(isSparseSlices[tid]);
211 Value slice = tensors[tid];
212 Value offset = sliceOffsets[tid][lvl];
213 Value stride = sliceStrides[tid][lvl];
216 const auto [newCrd, crdRem] =
217 fromSliceCrd(builder, loc, crd, offset, stride, slice, lvl);
222 if (
auto staticOffset = enc.getStaticLvlSliceOffset(lvl);
223 !(staticOffset.has_value() && *staticOffset == 0)) {
224 auto geOffset =
CMPI(uge, crd, offset);
225 conds.push_back(geOffset);
229 auto ltLength =
CMPI(ult, newCrd, lvlSizes[tid][lvl]);
230 conds.push_back(ltLength);
233 if (
auto staticStride = enc.getStaticLvlSliceStride(lvl);
234 !(staticStride.has_value() && *staticStride == 1)) {
235 auto fitStride =
CMPI(eq, crdRem,
C_IDX(0));
236 conds.push_back(fitStride);
240 auto pred = conds.front();
241 for (
auto cond :
ValueRange(conds).drop_front())
242 pred =
ANDI(pred, cond);
244 return {newCrd, pred};
253 Value pos = lvl == 0 ?
C_IDX(0) : posits[tid][lvl - 1];
255 if (isSparseSlices[tid])
256 crd =
toSliceCrd(builder, loc, crd, sliceOffsets[tid][lvl],
257 sliceStrides[tid][lvl], tensors[tid], lvl);
265 const auto coordinates = coordinatesBuffers[tid][lvl];
266 const auto sameCrd =
genIndexLoad(builder, loc, coordinates, pLo);
267 auto whileOp = builder.
create<scf::WhileOp>(
272 const auto pos = ivs[0];
273 Value inBound = builder.create<arith::CmpIOp>(
274 loc, arith::CmpIPredicate::ult, pos, pHi);
276 builder.create<scf::IfOp>(loc, builder.getI1Type(), inBound,
true);
281 builder.setInsertionPointToStart(ifInBound.thenBlock());
283 Value isSameCrd = builder.create<arith::CmpIOp>(
284 loc, arith::CmpIPredicate::eq, crd, sameCrd);
288 builder.setInsertionPointToStart(ifInBound.elseBlock());
291 builder.create<scf::ConditionOp>(loc, ifInBound.getResults()[0], ivs);
306 const Value mem = coordinatesBuffers[tid][lvl];
308 const Value pos = posits[tid][lvl];
314 bool isSparseOut,
unsigned numLoops,
316 initialize(tensors, loopTag, hasOutput, isSparseOut, numLoops, dimGetter);
320 bool isSparseOut,
unsigned numLoops,
323 this->loopTag = loopTag;
324 this->hasOutput = hasOutput;
325 this->isSparseOut = isSparseOut;
327 const unsigned numManifestTensors = ts.size();
328 const unsigned synTensorId = numManifestTensors;
329 const unsigned numTensors = numManifestTensors + 1;
331 this->tensors.assign(ts.begin(), ts.end());
333 this->lvlTypes.assign(numTensors, std::vector<LevelType>());
334 this->lvlSizes.assign(numTensors, std::vector<Value>());
335 this->highs.assign(numTensors, std::vector<Value>());
336 this->segHi.assign(numTensors, std::vector<Value>());
337 this->posits.assign(numTensors, std::vector<Value>());
338 this->coords.assign(numTensors, std::vector<Value>());
339 this->positionsBuffers.assign(numTensors, std::vector<Value>());
340 this->coordinatesBuffers.assign(numTensors, std::vector<Value>());
341 this->valBuffer.assign(numTensors,
nullptr);
342 this->isSparseSlices.assign(numTensors,
false);
343 this->sliceOffsets.assign(numTensors, std::vector<Value>());
344 this->sliceStrides.assign(numTensors, std::vector<Value>());
348 this->loopStack.reserve(numLoops);
349 this->loopSeqStack.reserve(numLoops);
352 this->dependentLvlMap.assign(
353 numTensors, std::vector<std::vector<std::pair<TensorLevel, unsigned>>>());
354 this->slicePosBuffer.assign(numTensors, std::vector<std::vector<Value>>());
355 this->sliceMeta.assign(
356 numTensors, std::vector<std::vector<std::pair<Value, unsigned>>>());
357 this->sliceStack.assign(numTensors, std::vector<SliceInfo>());
358 this->levelReducedDep.assign(numTensors, std::vector<unsigned>());
361 for (
TensorId tid = 0; tid < numTensors; tid++) {
363 if (tid == synTensorId) {
370 const Value t = tensors[tid];
381 isSparseSlices[tid] = enc.isSlice();
382 for (
auto lvlTp : enc.getLvlTypes())
383 lvlTypes[tid].push_back(lvlTp);
390 lvlSizes[tid].assign(lvlRank,
Value());
391 highs[tid].assign(lvlRank,
Value());
392 segHi[tid].assign(lvlRank,
Value());
393 posits[tid].assign(lvlRank,
Value());
394 coords[tid].assign(lvlRank,
Value());
395 positionsBuffers[tid].assign(lvlRank,
Value());
396 coordinatesBuffers[tid].assign(lvlRank,
Value());
397 sliceOffsets[tid].assign(lvlRank,
Value());
398 sliceStrides[tid].assign(lvlRank,
Value());
401 levelReducedDep[tid].assign(lvlRank, 0);
402 dependentLvlMap[tid].assign(
403 lvlRank, std::vector<std::pair<TensorLevel, unsigned>>());
404 slicePosBuffer[tid].assign(lvlRank, std::vector<Value>());
405 sliceMeta[tid].assign(lvlRank, std::vector<std::pair<Value, unsigned>>());
406 sliceStack[tid].emplace_back(
Value(),
409 if (dimGetter && !isSynTensor(tid)) {
410 for (
Level l = 0; l < lvlRank; l++) {
411 dependentLvlMap[tid][l] = dimGetter(tid, l);
412 unsigned depends = dependentLvlMap[tid][l].size();
415 sliceMeta[tid][l].assign(depends, std::make_pair(
nullptr, 0));
417 slicePosBuffer[tid][l].assign(depends - 1,
nullptr);
429 for (
unsigned i = 0, e = highs[
getSynTensorId()].size(); i < e; i++)
440 const Value tensor = tensors[t];
441 const auto rtp = dyn_cast<RankedTensorType>(tensor.
getType());
450 const Level lvlRank = stt.getLvlRank();
451 const auto shape = rtp.getShape();
452 const Level cooStart = stt.getCOOStart();
455 for (
Level l = 0; l < stt.getLvlRank(); l++) {
456 if (stt.hasEncoding())
457 lvlSzs.push_back(builder.
create<LvlOp>(loc, tensor, l));
459 lvlSzs.push_back(builder.
create<tensor::DimOp>(loc, tensor, l));
463 for (
Level l = 0; l < lvlRank; l++) {
465 assert(!positionsBuffers[t][l] && !coordinatesBuffers[t][l] &&
467 const auto lvlTp = lvlTypes[t][l];
472 coordinatesBuffers[t][l] =
476 coordinatesBuffers[t][l] =
484 highs[t][l] = lvlSizes[t][l] = lvlSzs[l];
485 if (isSparseSlices[t]) {
495 bool isOutput = isOutputTensor(t);
496 Type elementType = stt.getElementType();
497 if (!stt.hasEncoding()) {
504 if (llvm::isa_and_nonnull<tensor::ExtractSliceOp>(tensor.
getDefiningOp()))
508 builder.
create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
510 if (isOutput && updater)
511 denseVal = updater(builder, loc, denseVal, tensor);
513 valBuffer[t] = denseVal;
527 for (
TensorId t = 0, e = tensors.size(); t < e; t++) {
528 auto rtp = dyn_cast<RankedTensorType>(tensors[t].getType());
533 for (
Level lvl = 0; lvl < lvlRank; lvl++) {
534 if (!dependentLvlMap[t][lvl].empty()) {
536 dependentLvlMap[t][lvl];
538 assert(depLvls.size() == sliceMeta[t][lvl].size());
541 for (
int e = depLvls.size() - 1; e >= 0; e--) {
543 unsigned stride = depLvls[e].second;
544 Value stridedSize = lvlSizes[dt][dl];
546 stridedSize =
MULI(stridedSize,
C_IDX(stride));
547 size =
ADDI(size, stridedSize);
548 sliceMeta[t][lvl][e] = std::make_pair(size, stride);
556 void LoopEmitter::categorizeLoopCondition(
562 assert(lvlTypes[t].size() > l);
563 auto lvlType = lvlTypes[t][l];
570 bool isSlice = isSparseSlices[t];
571 bool isAffine = !dependentLvlMap[t][l].empty();
572 bool isUnRedu =
false;
574 assert(!isSlice || !isAffine);
577 if (!dependentLvlMap[t][l].empty())
578 isUnRedu = !depFullyReduced(t, l);
580 auto &dstVec = isSparse ? spConds : dnConds;
583 makeLoopCondKind(isSparse, isSlice, isAffine, isUnRedu));
586 std::stable_sort(spConds.begin(), spConds.end(), [](
auto lhs,
auto rhs) {
588 return static_cast<uint8_t>(lhs.second) > static_cast<uint8_t>(rhs.second);
595 assert(loopSeqStack.size() == loopStack.size());
597 std::vector<std::tuple<TensorId, Level, bool>> slicedTids;
600 if (!dependentLvlMap[tid][lvl].empty()) {
601 bool fullyRed = genSliceBegin(builder, loc, tid, lvl);
602 slicedTids.emplace_back(tid, lvl, fullyRed);
603 }
else if (!isSynTensor(tid)) {
604 prepareLoopOverTensorAtLvl(builder, loc, tid, lvl);
609 loopSeqStack.emplace_back(
C_IDX(0), std::move(slicedTids));
613 assert(loopSeqStack.size() == loopStack.size() + 1);
615 const auto &slicedTids = loopSeqStack.back().second;
619 for (
auto [tid, lvl, res] : slicedTids) {
622 assert(sliceStack[tid].back().slicedOnLvl == lvl);
623 sliceStack[tid].pop_back();
626 loopSeqStack.pop_back();
636 const auto loopId = cast<AffineDimExpr>(a).getPosition();
637 return loopStack[loopId].iv;
640 auto binOp = cast<AffineBinaryOpExpr>(a);
642 genAffine(builder, loc, binOp.getRHS()));
645 auto binOp = cast<AffineBinaryOpExpr>(a);
647 genAffine(builder, loc, binOp.getRHS()));
650 int64_t c = cast<AffineConstantExpr>(a).getValue();
654 llvm_unreachable(
"unexpected affine subscript");
658 std::pair<Operation *, Value> LoopEmitter::emitForLoopOverTensorAtLvl(
672 scf::ParallelOp parOp =
673 builder.
create<scf::ParallelOp>(loc, lo, hi, step, reduc);
675 assert(parOp.getNumReductions() == reduc.size());
676 iv = parOp.getInductionVars()[0];
685 for (
int i = 0, e = reduc.size(); i < e; i++)
686 reduc[i] = parOp.getInitVals()[i];
689 scf::ForOp forOp = builder.
create<scf::ForOp>(loc, lo, hi, step, reduc);
691 iv = forOp.getInductionVar();
694 assert(forOp.getNumRegionIterArgs() == reduc.size());
695 for (
int i = 0, e = reduc.size(); i < e; i++)
696 reduc[i] = forOp.getRegionIterArg(i);
705 posits[tid][lvl] = iv;
706 crd = genSparseCrd(builder, loc, tid, lvl);
712 if (isSparseSlices[tid] && isSparseCond) {
716 for (
Value red : reduc)
717 types.push_back(red.getType());
719 auto [trans, pred] = genSliceLegitPredicate(builder, loc, crd, tid, lvl);
720 bool hasReduc = !types.empty();
721 scf::IfOp ifOp = builder.
create<scf::IfOp>(loc, types, pred,
730 YIELD(ifOp.getResults());
741 coords[tid][lvl] = crd;
749 switch (cond.second) {
750 case LoopCondKind::SparseCond: {
751 assert(ivs.size() == 1);
753 return CMPI(ult, ivs.back(), highs[tid][lvl]);
755 case LoopCondKind::SparseSliceCond: {
756 assert(ivs.size() == 1);
757 return CMPI(ult, ivs.back(), highs[tid][lvl]);
759 case LoopCondKind::SparseAffineCond: {
760 assert(ivs.size() == 1);
767 if (llvm::isa_and_nonnull<scf::WhileOp>(loop))
769 auto [remSz, stride] = sliceMeta[tid][lvl].back();
770 assert(stride == 1 &&
"Not yet implemented");
771 crdHi =
ADDI(getMostRecentSliceOnLvl(tid, lvl).offset, remSz);
775 coordinatesBuffers[tid][lvl], crdHi,
776 ivs[0], highs[tid][lvl]);
778 case LoopCondKind::SparseAffineUnRedCond: {
779 assert(ivs.size() == 3);
783 llvm_unreachable(
"Unhandled LoopCondKind");
785 llvm_unreachable(
"Unhandled LoopCondKind");
788 std::optional<Value> LoopEmitter::genWhileLoopBody(
OpBuilder &builder,
790 TensorLvlCond cond) {
793 switch (cond.second) {
794 case LoopCondKind::SparseCond: {
797 posits[tid][lvl] = ivs.back();
800 coords[tid][lvl] = genSparseCrd(builder, loc, tid, lvl);
803 case LoopCondKind::SparseSliceCond: {
804 assert(ivs.size() == 1);
805 posits[tid][lvl] = ivs.front();
806 Value sCrd = genSparseCrd(builder, loc, tid, lvl);
809 auto [dCrd, pred] = genSliceLegitPredicate(builder, loc, sCrd, tid, lvl);
810 coords[tid][lvl] = dCrd;
813 case LoopCondKind::SparseAffineCond: {
814 assert(ivs.size() == 1);
816 assert(sliceStack[tid].back().depth == 1 &&
"TODO: not yet implement");
818 Value posit = ivs[0];
819 Value crdBuf = coordinatesBuffers[tid][lvl];
823 auto relC =
SUBI(absC, getFinalSliceOnLvl(tid, lvl).offset);
824 posits[tid][lvl] = posit;
825 coords[tid][lvl] = relC;
828 case LoopCondKind::SparseAffineUnRedCond: {
829 unsigned depth = sliceStack[tid].back().depth;
830 unsigned curStride = sliceMeta[tid][lvl][depth - 1].second;
831 assert(ivs.size() == 3);
834 SliceInfo &sliceInfo = sliceStack[tid].back();
835 sliceInfo.isNonEmpty = ivs[0];
836 sliceInfo.minCrd = ivs[1];
837 sliceInfo.offset = ivs[2];
843 assert(depth == 1 &&
"TODO: not yet implement");
844 Value crd = sliceInfo.offset;
847 if (curStride != 1) {
850 crd =
DIVUI(crd, strideVal);
853 coords[tid][lvl] = crd;
858 llvm_unreachable(
"Unhandled LoopCondKind");
860 llvm_unreachable(
"Unhandled LoopCondKind");
865 TensorLvlCond cond) {
866 assert(isSparseCond(cond.second));
868 if (isAffineIdxUnRedCond(cond.second)) {
869 unsigned depth = sliceStack[tid].back().depth;
870 unsigned curStride = sliceMeta[tid][lvl][depth - 1].second;
879 assert(curArgs.size() == 3);
880 auto ifOp = builder.
create<scf::IfOp>(loc, curArgs.
getTypes(), pred,
true);
889 auto [nonEmpty, minCrd, offset] =
890 genSliceNextInduction(builder, loc, tid, lvl);
895 return ifOp.getResults();
898 assert(isSliceCond(cond.second) && isSparseCond(cond.second));
899 assert(curArgs.size() == 1);
901 return SELECT(pred, curArgs.front(), nextPos)->getResults();
903 llvm_unreachable(
"unhandled case");
906 std::pair<Operation *, Value> LoopEmitter::emitWhileLoopOverTensorsAtLvls(
911 assert(!spConds.empty());
920 for (
auto [tl, cKind] : spConds) {
922 const auto lvlTp = lvlTypes[tid][lvl];
924 assert(!isDenseCond(cKind));
930 unsigned prevSz = ivs.size();
931 if (isAffineIdxCond(cKind)) {
934 if (isAffineIdxUnRedCond(cKind)) {
935 SliceInfo &sliceInfo = sliceStack[tid].back();
937 ivs.push_back(sliceInfo.isNonEmpty);
938 ivs.push_back(sliceInfo.minCrd);
939 ivs.push_back(sliceInfo.offset);
941 ivs.push_back(posits[tid][lvl]);
944 levelReducedDep[tid][lvl]++;
946 assert(dependentLvlMap[tid][lvl].empty());
947 const Value pos = posits[tid][lvl];
950 opSegSize.push_back(ivs.size() - prevSz);
954 ivs.append(reduc.begin(), reduc.end());
957 ivs.push_back(loopSeqStack.back().first);
960 assert(llvm::all_of(ivs, [](
Value v) {
return v !=
nullptr; }));
962 auto whileOp = builder.
create<scf::WhileOp>(loc, types, ivs);
971 Value whileCond =
nullptr;
972 for (
auto [c, segSz] : llvm::zip_equal(spConds, opSegSize)) {
973 Value cv = genWhileLoopConditions(builder, loc, bArgs.take_front(segSz), c);
974 bArgs = bArgs.drop_front(segSz);
975 whileCond = !whileCond ? cv :
ANDI(whileCond, cv);
979 assert(bArgs.size() == reduc.size() + needsUniv ? 1 : 0);
991 Value extraPred =
nullptr;
992 for (
auto [c, segSz] : llvm::zip_equal(spConds, opSegSize)) {
993 ValueRange condArgs = aArgs.take_front(segSz);
994 auto pred = genWhileLoopBody(builder, loc, condArgs, c);
995 assert(pred.has_value() == isCondWithExtraCheck(c.second));
996 if (pred.has_value()) {
998 extraPred = extraPred ==
nullptr ? *pred :
ANDI(*pred, extraPred);
999 ValueRange nxArgs = genCheckedValue(builder, loc, *pred, condArgs, c);
1000 assert(nxArgs.size() == segSz);
1002 for (
unsigned i = 0; i < segSz; i++) {
1003 nextArgsRef[i] = nxArgs[i];
1006 aArgs = aArgs.drop_front(segSz);
1007 nextArgsRef = nextArgsRef.drop_front(segSz);
1011 auto ifOp = builder.
create<scf::IfOp>(loc, types, extraPred,
true);
1016 YIELD(ifOp->getResults());
1029 segHi[tid][lvl] = genSegmentHigh(builder, loc, tid, lvl, posits[tid][lvl],
1035 assert(aArgs.size() == reduc.size() + needsUniv ? 1 : 0);
1036 for (
unsigned i = 0, e = reduc.size(); i < e; i++)
1037 reduc[i] = aArgs[i];
1043 const auto lvlTp = lvlTypes[tid][lvl];
1046 const auto crd = coords[tid][lvl];
1058 min = whileOp.getAfterArguments().back();
1061 return {whileOp,
min};
1066 assert(llvm::all_of(sparseConds,
1067 [](TensorLvlCond c) {
return isSparseCond(c.second); }));
1070 if (sparseConds.size() > 1)
1075 if (sparseConds.size() == 1) {
1077 return !isAffineIdxCond(sparseConds.back().second) &&
1078 !(genDedup && !
isUniqueLT(lvlTypes[tid][lvl]));
1090 assert(!tidLvls.empty());
1092 assert(!coords[t][l] ||
1093 !dependentLvlMap[t][l].empty());
1097 tryParallel = tryParallel && reduc.size() <= 1;
1101 categorizeLoopCondition(tidLvls, dnConds, spConds);
1107 needsUniv = !spConds.empty() && needsUniv;
1119 if (shouldIteratedByForLoop(spConds, genDedup) && !needsUniv) {
1120 assert(spConds.size() <= 1);
1121 TensorLvlCond tlCond = spConds.empty() ? dnConds.front() : spConds.front();
1122 auto loopCondKind = tlCond.second;
1124 Value lo = isSparseCond(loopCondKind)
1126 : loopSeqStack.back().first;
1127 Value hi = highs[tid][lvl];
1128 if (isDenseCond(loopCondKind) && isAffineIdxCond(loopCondKind)) {
1129 bool unReduc = isAffineIdxUnRedCond(loopCondKind);
1130 assert(unReduc == !depFullyReduced(tid, lvl));
1131 unsigned depth = sliceStack[tid].back().depth;
1134 auto [nxSz, nxStride] = sliceMeta[tid][lvl][depth];
1137 unsigned stride = sliceMeta[tid][lvl][depth - 1].second;
1141 hi =
SUBI(lvlSizes[tid][lvl], hi);
1146 assert(nxStride == 1 &&
"Not yet implemented.");
1149 std::tie(l, iv) = emitForLoopOverTensorAtLvl(builder, loc, tid, lvl, lo, hi,
1150 reduc, tryParallel);
1153 trivialLvls.push_back(tlCond.first);
1155 for (
auto [tl, cKind] : spConds) {
1156 if (isAffineIdxCond(cKind)) {
1158 bool unReduc = isAffineIdxUnRedCond(cKind);
1159 assert(unReduc == !depFullyReduced(tid, lvl));
1160 sliceDrivenInfo.emplace_back(tid, lvl, !unReduc);
1162 trivialLvls.push_back(tl);
1167 emitWhileLoopOverTensorsAtLvls(builder, loc, spConds, reduc, needsUniv);
1171 enterTensorsAtDenseLvls(builder, loc, dnConds, iv, sliceDrivenInfo);
1175 loopStack.emplace_back(trivialLvls, sliceDrivenInfo, l,
1183 assert(isValidLevel(tid, lvl));
1184 assert(!isa<AffineDimExpr>(affine) && !
isDenseLT(lvlTypes[tid][lvl]));
1186 assert(!coords[tid][lvl]);
1193 const Value pLo = posits[tid][lvl];
1194 const Value pHi = highs[tid][lvl];
1195 scf::ForOp forOp = builder.
create<scf::ForOp>(loc, pLo, pHi, step, reduc);
1198 assert(forOp.getNumRegionIterArgs() == reduc.size());
1199 for (
int i = 0, e = reduc.size(); i < e; i++)
1200 reduc[i] = forOp.getRegionIterArg(i);
1204 const Value pos = forOp.getInductionVar();
1205 posits[tid][lvl] = pos;
1207 const Value mem = coordinatesBuffers[tid][lvl];
1209 coords[tid][lvl] = crd;
1214 auto pred =
CMPI(eq, crd, expected);
1216 for (
Value red : reduc) {
1217 types.push_back(red.getType());
1220 bool hasReduc = !types.empty();
1222 builder.
create<scf::IfOp>(loc, types, pred, hasReduc);
1230 YIELD(ifOp.getResults());
1254 posits[tid][lvl] = genAddress(builder, loc, tid, lvl, lvlCrd);
1259 assert(isValidLevel(tid, lvl));
1260 const auto lvlTp = lvlTypes[tid][lvl];
1270 assert(lvl == 0 || posits[tid][lvl - 1]);
1272 const Value mem = positionsBuffers[tid][lvl];
1274 Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1];
1276 pLo = builder.
create<arith::MulIOp>(loc, pLo, c2);
1277 posits[tid][lvl] =
genIndexLoad(builder, loc, mem, pLo);
1280 highs[tid][lvl] =
genIndexLoad(builder, loc, mem, pHi);
1284 const Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1];
1285 posits[tid][lvl] = pLo;
1293 const auto parentSegHi = segHi[tid][lvl - 1];
1294 highs[tid][lvl] = (!
isUniqueLT(lvlTypes[tid][lvl - 1]) && parentSegHi)
1300 const Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1];
1302 posits[tid][lvl] =
MULI(pLo, c2);
1303 highs[tid][lvl] =
ADDI(posits[tid][lvl], c2);
1306 llvm_unreachable(
"Unrecognized level-type!");
1309 void LoopEmitter::enterTensorsAtDenseLvls(
1312 for (
auto [dnTidLvl, denseLoopCond] : dnConds) {
1316 if (isAffineIdxCond(denseLoopCond)) {
1318 bool unReduc = isAffineIdxUnRedCond(denseLoopCond);
1319 SliceInfo &info = sliceStack[tid].back();
1321 sliceInfo.emplace_back(tid, lvl, !unReduc);
1325 assert(*info.slicedOnLvl == lvl);
1326 unsigned depth = sliceStack[tid].back().depth;
1328 unsigned stride = sliceMeta[tid][lvl][depth - 1].second;
1330 info.minCrd = info.offset =
MULI(iv,
C_IDX(stride));
1331 info.isNonEmpty =
constantI1(builder, loc,
true);
1334 genAddress(builder, loc, tid, lvl,
ADDI(info.offset, iv));
1336 levelReducedDep[tid][lvl]++;
1339 if (isSynTensor(tid))
1342 assert(dependentLvlMap[tid][lvl].empty());
1344 if (enc && !isSparseOutput(tid)) {
1345 bool validPos = lvl == 0 || posits[tid][lvl - 1];
1349 assert(isOutputTensor(tid));
1352 posits[tid][lvl] = genAddress(builder, loc, tid, lvl, iv);
1361 const LoopInfo &loopInfo = loopStack.back();
1362 for (
auto [tid, lvl, reduced] : loopInfo.sliceDrivenInfo) {
1364 SliceInfo &info = sliceStack[tid].back();
1366 assert(*info.slicedOnLvl == lvl);
1370 invalidateSliceIterIdx(rewriter, loc, tid, lvl);
1371 info.minCrd = info.offset = info.isNonEmpty =
Value();
1373 forwardsReducedSliceLevelTreeIt(rewriter, loc, tid, lvl,
1376 levelReducedDep[tid][lvl]--;
1378 if (
auto forOp = llvm::dyn_cast<scf::ForOp>(loopInfo.loop)) {
1379 if (!reduc.empty()) {
1380 assert(reduc.size() == forOp.getNumResults());
1381 rewriter.
create<scf::YieldOp>(loc, reduc);
1386 for (
unsigned i = 0, e = forOp.getResults().size(); i < e; i++)
1387 reduc[i] = forOp.getResult(i);
1389 auto parOp = llvm::cast<scf::ParallelOp>(loopInfo.loop);
1390 if (!reduc.empty()) {
1391 assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1);
1392 Operation *redExp = reduc.front().getDefiningOp();
1394 assert(redExp->
getUses().empty());
1400 Value redVal = parOp.getInitVals().front();
1412 unsigned numUsers = 0;
1417 assert(numUsers == 1);
1421 auto redOp = rewriter.
create<scf::ReduceOp>(loc, curVal);
1423 Block *redBlock = &redOp.getRegion().getBlocks().
front();
1429 newRed, [&]() { newRed->
setOperands(redBlock->getArguments()); });
1437 for (
unsigned i = 0, e = parOp.getResults().size(); i < e; i++)
1438 reduc[i] = parOp.getResult(i);
1446 coords[tid][lvl] =
Value();
1447 posits[tid][lvl] =
Value();
1450 highs[tid][lvl] =
Value();
1454 void LoopEmitter::forwardsReducedSliceLevelTreeIt(
OpBuilder &builder,
1462 Level leafLvl = rootLvl + 1;
1463 while (leafLvl < stt.getLvlRank() && !dependentLvlMap[tid][leafLvl].empty() &&
1464 depFullyReduced(tid, leafLvl)) {
1468 Level curLvl = rootLvl + 1;
1470 while (curLvl < leafLvl &&
isDenseLT(lvlTypes[tid][curLvl])) {
1473 auto [size, stride] = sliceMeta[tid][curLvl].back();
1474 assert(stride == 1 &&
"Not yet implemented");
1475 fcnt =
MULI(size, fcnt);
1479 Value nxPosPtr =
nullptr;
1480 if (curLvl < leafLvl) {
1481 assert(!
isDenseLT(lvlTypes[tid][curLvl]));
1483 Value sPosBuf = slicePosBuffer[tid][curLvl].back();
1500 for (; curLvl < leafLvl; curLvl++) {
1502 if (!
isDenseLT(lvlTypes[tid][curLvl])) {
1503 Value sPosBuf = slicePosBuffer[tid][curLvl].back();
1513 const LoopInfo &loopInfo = loopStack.back();
1514 auto whileOp = llvm::cast<scf::WhileOp>(loopInfo.loop);
1515 Value iv = loopInfo.iv;
1526 for (
auto [tid, lvl, resolved] : loopInfo.sliceDrivenInfo) {
1529 levelReducedDep[tid][lvl]--;
1532 assert(loopInfo.sliceDrivenInfo.size() == 1);
1533 auto [nxNonEmpty, nxMinCrd, nxAbsOffset] =
1534 genSliceNextInduction(builder, loc, tid, lvl);
1536 operands.push_back(nxNonEmpty);
1537 operands.push_back(nxMinCrd);
1538 operands.push_back(nxAbsOffset);
1541 SliceInfo &info = sliceStack[tid].back();
1542 info.isNonEmpty = whileOp.getResult(o++);
1543 info.minCrd = whileOp.getResult(o++);
1544 info.offset = whileOp.getResult(o++);
1548 Value forwarded =
nullptr;
1549 if (loopInfo.trivialTidLvls.empty() &&
1550 loopInfo.sliceDrivenInfo.size() == 1) {
1552 operands.push_back(
ADDI(posits[tid][lvl], one));
1555 const Value pos = posits[tid][lvl];
1556 const Value nxPos =
ADDI(posits[tid][lvl], one);
1557 forwarded =
CMPI(eq, coords[tid][lvl], iv);
1558 operands.push_back(
SELECT(forwarded, nxPos, pos));
1565 forwardsReducedSliceLevelTreeIt(builder, loc, tid, lvl, one);
1568 coords[tid][lvl] =
nullptr;
1571 posits[tid][lvl] = whileOp->getResult(o++);
1575 const auto lvlTp = lvlTypes[tid][lvl];
1578 const Value crd = coords[tid][lvl];
1579 const Value pos = posits[tid][lvl];
1585 !
isUniqueLT(lvlTypes[tid][lvl]) ? segHi[tid][lvl] :
ADDI(pos, one);
1587 operands.push_back(
SELECT(cmp, add, pos));
1590 const Value newPos = whileOp->getResult(o++);
1595 posits[newTid][lvl] = newPos;
1598 coords[tid][lvl] =
nullptr;
1600 segHi[tid][lvl] =
nullptr;
1606 for (
auto &i : reduc) {
1607 operands.push_back(i);
1609 i = whileOp->getResult(o++);
1613 if (operands.size() + delta < whileOp.getNumResults()) {
1614 assert(operands.size() + delta + 1 == whileOp.getNumResults());
1616 operands.push_back(
ADDI(iv, one));
1618 loopSeqStack.back().first = whileOp->getResult(o++);
1621 assert(o == operands.size() + delta);
1622 if (!operands.empty())
1632 const LoopInfo &loopInfo = loopStack.back();
1636 if (!loopInfo.userCodeBlock->empty() &&
1637 llvm::isa<scf::YieldOp>(&loopInfo.userCodeBlock->back())) {
1640 assert(loopInfo.userCodeBlock->back().getNumResults() == 0);
1644 if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {
1645 exitWhileLoop(rewriter, loc, reduc);
1647 exitForLoop(rewriter, loc, reduc);
1650 assert(loopStack.size() == loopSeqStack.size());
1651 loopStack.pop_back();
1658 unsigned LoopEmitter::remDepOnLevel(
TensorId tid,
Level lvl)
const {
1659 unsigned totalDependencies = dependentLvlMap[tid][lvl].size();
1660 if (totalDependencies != 0) {
1661 assert(totalDependencies >= 2);
1662 return totalDependencies - levelReducedDep[tid][lvl];
1664 return totalDependencies;
1667 const LoopEmitter::SliceInfo &LoopEmitter::getMostRecentSliceOnLvl(
TensorId tid,
1670 for (
auto it = sliceStack[tid].rbegin(), ie = sliceStack[tid].rend(); it < ie;
1672 if (it->slicedOnLvl == lvl) {
1676 llvm_unreachable(
"Failed to find sliceInfo");
1685 std::pair<Operation *, ValueRange> LoopEmitter::genSliceLvlTraverseLoop(
1688 LoopBodyBuilder bodyBuilder) {
1690 auto [sliceSz, stride] = sliceMeta[tid][lvl].back();
1691 assert(stride == 1 &&
"Not yet implemented");
1695 const unsigned numMetaReduc = reduc.size();
1698 reduc.append(userReduc.begin(), userReduc.end());
1699 scf::WhileOp whileOp = builder.
create<scf::WhileOp>(
1705 coordinatesBuffers[tid][lvl],
1706 sliceHi, args[0], posHi);
1708 builder.create<scf::ConditionOp>(loc, cond, args);
1714 TypeRange types = args.drop_front(numMetaReduc).getTypes();
1719 auto ifOp = builder.create<scf::IfOp>(loc, types,
1725 assert(ifRet.size() == args.size() - 1);
1729 if (!ifRet.empty()) {
1730 builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
1735 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1737 bodyBuilder(builder, loc, iv, ifRet);
1745 yields.push_back(
ADDI(iv, c1));
1746 yields.append(ifOp.getResults().begin(), ifOp.getResults().end());
1750 builder.setInsertionPointAfter(whileOp);
1751 return std::make_pair(whileOp, whileOp.getResults().drop_front(numMetaReduc));
1770 ValueRange LoopEmitter::genUnResolvedSliceTreeTraverse(
1773 std::optional<std::pair<TensorId, Level>> firstResLvl,
ValueRange userReduc,
1774 LoopBodyBuilder bodyBuilder) {
1780 scf::ForOp outerMost =
nullptr;
1785 bodyBuilder(builder, loc, iv, reduc.drop_back());
1787 reduc.back() =
ADDI(reduc.back(),
C_IDX(1));
1792 if (firstResLvl.has_value()) {
1794 pos = posits[firstResLvl->first][firstResLvl->second];
1797 const SliceInfo &frontSlice = *unResLvls.back();
1798 Level firstLvl = *frontSlice.slicedOnLvl;
1799 if (!lvlFullyResolved(tid, firstLvl)) {
1803 innerArgs.push_back(c0);
1805 bodyBuilder = wrapped;
1806 unsigned depth = frontSlice.depth - 1;
1807 Value offset = frontSlice.offset;
1808 Value sPtrBuf = slicePosBuffer[tid][firstLvl][depth];
1810 outerMost = builder.
create<scf::ForOp>(
1811 loc, c0, mSz, c1, innerArgs,
1812 [
this, tid, firstLvl, offset, sPtrBuf, &ip, &pos,
1826 auto [size, stride] = sliceMeta[tid][firstLvl].back();
1827 assert(stride == 1 &&
"Not yet implemented");
1829 genSliceLvlTraverseLoop(
1830 builder, loc, loopLo, loopHi, offset, size, tid, firstLvl,
1836 innerArgs.assign(reduc.begin(), reduc.end());
1841 }
else if (
isDenseLT(lvlTypes[tid][firstLvl])) {
1842 assert(firstLvl == 0);
1843 Value lb = frontSlice.offset;
1844 auto [sliceSz, stride] =
1845 sliceMeta[tid][*frontSlice.slicedOnLvl][frontSlice.depth];
1846 assert(stride == 1 &&
"Not yet implemented");
1848 outerMost = builder.
create<scf::ForOp>(
1849 loc, lb, ub, c1, innerArgs,
1852 ip = builder.saveInsertionPoint();
1854 innerArgs.assign(iterArgs.begin(), iterArgs.end());
1858 unResLvls = unResLvls.drop_back();
1862 builder.restoreInsertionPoint(ip);
1863 if (!unResLvls.empty()) {
1866 for (
const SliceInfo *slice : llvm::reverse(unResLvls)) {
1867 Level sliceLvl = *slice->slicedOnLvl;
1868 assert(
isDenseLT(lvlTypes[tid][sliceLvl]));
1869 Value offset = slice->offset;
1870 auto [sliceSz, stride] = sliceMeta[tid][sliceLvl][slice->depth];
1871 assert(stride == 1 &&
"Not yet implemented");
1872 lbs.push_back(offset);
1873 ubs.push_back(
ADDI(offset, sliceSz));
1874 steps.push_back(c1);
1875 lvlSzs.push_back(lvlSizes[tid][sliceLvl]);
1879 [&innerArgs, &lvlSzs, &pos, bodyBuilder](
1885 pos =
MULI(pos, lvlSzs[em.index()]);
1886 pos =
ADDI(pos, em.value());
1888 innerArgs.assign(iterArgs.begin(), iterArgs.end());
1890 bodyBuilder(builder, loc, pos, innerArgs);
1896 outerMost = denseNest.
loops.front();
1899 YIELD(denseNest.results);
1904 bodyBuilder(builder, loc, pos, innerArgs);
1910 return outerMost.getResults();
1918 sliceStack[tid].emplace_back(c0, c0,
1923 auto [nxSz, stride] = sliceMeta[tid][lvl][1];
1924 assert(stride == 1 &&
"Not yet implemented");
1925 Value sPtrBuf = slicePosBuffer[tid][lvl][0];
1929 pHi =
genIndexLoad(builder, loc, positionsBuffers[tid][0], c1);
1931 pLo =
genIndexLoad(builder, loc, positionsBuffers[tid][lvl],
1932 posits[tid][lvl - 1]);
1933 pHi =
genIndexLoad(builder, loc, positionsBuffers[tid][lvl],
1934 ADDI(posits[tid][lvl - 1], c1));
1953 sliceStack[tid].emplace_back(minCrd, absOffset, isNonEmpty, lvl,
1980 unsigned depth = levelReducedDep[tid][lvl];
1982 Value remSz = sliceMeta[tid][lvl][depth + 1].first;
1985 sliceStack[tid].emplace_back(c0, c0,
constantI1(builder, loc,
false), lvl,
1998 assert(lvl == *sliceStack[tid].back().slicedOnLvl + 1);
2001 assert(slicePosBuffer[tid][lvl - 1].size() == sliceStack[tid].back().depth);
2004 std::optional<std::pair<TensorId, Level>> firstResLvl;
2005 for (
Level curLvl = lvl; curLvl >= 1; curLvl--) {
2006 Level prevLvl = curLvl - 1;
2007 if (lvlFullyResolved(tid, prevLvl)) {
2008 firstResLvl = std::make_pair(tid, prevLvl);
2011 unResSlices.push_back(&getMostRecentSliceOnLvl(tid, prevLvl));
2012 if (!
isDenseLT(lvlTypes[tid][prevLvl])) {
2017 assert(!unResSlices.empty() &&
2018 !lvlFullyResolved(tid, *unResSlices.front()->slicedOnLvl));
2020 Value sPtrBuf = slicePosBuffer[tid][lvl].back();
2027 ValueRange result = genUnResolvedSliceTreeTraverse(
2028 builder, loc, tid, unResSlices, firstResLvl, reduc,
2031 Value &nonEmpty = reduc[0];
2032 Value &minCrd = reduc[1];
2033 Value &curTupleCnt = reduc[2];
2038 genIndexLoad(builder, loc, positionsBuffers[tid][lvl], pHi);
2042 Value lvlNonEmpty =
CMPI(ult, sPLo, sPHi);
2043 nonEmpty = builder.
create<arith::OrIOp>(loc, lvlNonEmpty, nonEmpty);
2057 genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], sPLo);
2058 Value isSmaller =
CMPI(ult, curC, minCrd);
2064 minCrd = ifNonEmpty.getResult(0);
2069 curTupleCnt =
ADDI(curTupleCnt,
C_IDX(1));
2072 Value isNonEmpty = result[0];
2073 Value minCrd = result[1];
2080 sliceStack[tid].emplace_back(minCrd, absOffset, isNonEmpty, lvl, depth + 1);
2085 if (depFullyReduced(tid, lvl)) {
2093 Value sPosBuf = slicePosBuffer[tid][lvl].back();
2104 const LevelType lvlType = lvlTypes[tid][lvl];
2107 llvm_unreachable(
"TODO: dense level should be easy to support, while "
2108 "singleton level requires more efforts");
2111 assert(!dependentLvlMap[tid][lvl].empty());
2112 assert(!sliceStack[tid].empty());
2114 const SliceInfo &sliceInfo = sliceStack[tid].back();
2116 if (baseEnc.isSlice())
2117 llvm_unreachable(
"TODO: not yet implemented");
2122 if (slicePosBuffer[tid][lvl][0] ==
nullptr && !
isDenseLT(lvlType)) {
2136 for (
Level curLevel = lvl;
2137 curLevel >= 1 && !lvlFullyResolved(tid, curLevel - 1); curLevel--) {
2140 assert(depFullyReduced(tid, curLevel - 1));
2141 assert(!sliceMeta[tid][curLevel - 1].empty());
2142 auto [sz, stride] = sliceMeta[tid][curLevel - 1].back();
2143 assert(stride == 1 &&
"Not yet implemented");
2144 tupleCnt =
MULI(tupleCnt, sz);
2146 for (
Value &cache : slicePosBuffer[tid][lvl])
2150 if (sliceInfo.isInitialTensor() ||
2151 (lvl >= 1 && lvlFullyResolved(tid, lvl - 1))) {
2153 genResolvedSliceBegin(builder, loc, tid, lvl);
2156 genUnResolvedSliceBegin(builder, loc, tid, lvl);
2163 for (
unsigned i = 0; i <= lvl; i++) {
2164 if (!
isDenseLT(lvlTypes[tid][i]) && !dependentLvlMap[tid][i].empty()) {
2170 std::tuple<Value, Value, Value>
2174 llvm_unreachable(
"TODO");
2179 SliceInfo &info = sliceStack[tid].back();
2180 assert(info.slicedOnLvl == lvl);
2193 Value absOffset = info.offset;
2196 invalidateSliceIterIdx(builder, loc, tid, lvl);
2199 Value sPtrBuf = slicePosBuffer[tid][lvl][info.depth - 1];
2200 Value fastPathP =
CMPI(ugt, info.minCrd, absOffset);
2210 reduc[2] =
ADDI(absOffset, c1);
2224 reduc[2] = absOffset;
2226 reduc[0] = lvlSizes[tid][lvl];
2228 auto loopArgs =
static_cast<ValueRange>(reduc).drop_back();
2230 builder, loc, c0, mSz, c1, loopArgs,
2231 [
this, tid, lvl, c1, sPtrBuf,
2234 Value curMinCrd = iterArgs[0];
2235 Value isNonEmpty = iterArgs[1];
2252 auto advPLo = builder.
create<scf::IfOp>(loc, idxTp, pred,
true);
2257 genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], pLo);
2258 Value pred =
CMPI(eq, coord, info.minCrd);
2259 auto ifEqual = builder.
create<scf::IfOp>(loc, idxTp, pred,
true);
2262 &ifEqual.getThenRegion().front());
2271 &ifEqual.getElseRegion().front());
2275 YIELD(ifEqual.getResults());
2283 pLo = advPLo.getResult(0);
2284 Value lvlNonEmpty =
CMPI(ult, pLo, pHi);
2287 builder.
create<scf::IfOp>(loc, idxTp, lvlNonEmpty,
true);
2297 builder.
create<arith::OrIOp>(loc, lvlNonEmpty, isNonEmpty);
2298 curMinCrd = builder.
create<arith::SelectOp>(
2299 loc,
CMPI(ult, newMin.getResult(0), curMinCrd),
2300 newMin.getResult(0), curMinCrd);
2301 return {curMinCrd, isNonEmpty};
2306 Value tmp =
ADDI(forOp.results.front(), c1);
2307 auto [size, stride] = sliceMeta[tid][lvl][info.depth];
2308 assert(stride == 1 &&
"Not yet implemented");
2311 minOffset =
SELECT(p, minOffset, c0);
2314 yields.assign(forOp.results.begin(), forOp.results.end());
2315 yields.push_back(minOffset);
2319 Value nextMinCrd = ifOp.getResults()[0];
2320 Value nextNonEmpty = ifOp.getResults()[1];
2323 Value minOffset = ifOp.getResults()[2];
2325 Value maxPred =
CMPI(ugt, minOffset, nxOffset);
2326 Value nextAbsOffset =
SELECT(maxPred, minOffset, nxOffset);
2328 auto [size, stride] = sliceMeta[tid][lvl][info.depth];
2329 assert(stride == 1 &&
"Not yet implemented");
2330 Value sliceUB =
ADDI(nextAbsOffset, size);
2333 assert(info.depth - 1 == 0);
2335 nextNonEmpty =
ANDI(nextNonEmpty,
CMPI(ule, sliceUB, lvlSizes[tid][lvl]));
2338 assert(info.depth - 1 == 0);
2339 return std::make_tuple(nextNonEmpty, nextMinCrd, nextAbsOffset);
static Value toSliceCrd(OpBuilder &builder, Location loc, Value crd, Value offset, Value stride, Value tensor, Level lvl)
Converts a coordinate relative to the slice to the coordinate relative to the underlying tensor.
static Value genSliceStride(OpBuilder &builder, Location loc, Value tensor, Level lvl)
static void updateSlicePos(OpBuilder &builder, Location loc, Value sPosBuf, Value pos, Value tupleIdx, SlicePosKind posKind)
static Value loadSlicePos(OpBuilder &builder, Location loc, Value sPosBuf, Value tupleIdx, SlicePosKind posKind)
static Value allocSlicePosBuf(OpBuilder &builder, Location loc, Value tupleCnt)
static Value genSparseReducedAffineCond(OpBuilder &builder, Location loc, Value crdBuf, Value crdHi, Value posit, Value posHi)
static Value genSliceOffset(OpBuilder &builder, Location loc, Value tensor, Level lvl)
static LLVM_ATTRIBUTE_UNUSED void dumpIndexMemRef(OpBuilder &builder, Location loc, Value memref)
static Value loadSlicePosTupleNum(OpBuilder &builder, Location loc, Value sPosBuf)
static Value getSlicePosIdx(OpBuilder &builder, Location loc, Value posBuf, Value tupleIdx, SlicePosKind posKind)
static void updateSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf, Value pPtr)
static Value offsetFromMinCoord(OpBuilder &builder, Location loc, Value minCrd, Value size, Value isNonEmpty)
Generates code to compute the absolute offset of the slice based on the provide minimum coordinates i...
static Value loadSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf)
static void updateSlicePosTupleNum(OpBuilder &builder, Location loc, Value num, Value sPosBuf)
static std::pair< Value, Value > fromSliceCrd(OpBuilder &builder, Location loc, Value crd, Value offset, Value stride, Value tensor, Level lvl)
Converts a coordinate relative to the underlying tensor to the coordinate relative to the slice,...
static constexpr unsigned kSliceIterWidth
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
This class provides a shared interface for ranked and unranked memref types.
Block represents an ordered list of Operations.
BlockArgListType getArguments()
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
MLIRContext * getContext() const
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
This class represents a saved insertion point.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
InsertPoint saveInsertionPoint() const
Return a saved insertion point.
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Operation is the basic unit of execution within MLIR.
Value getOperand(unsigned idx)
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumOperands()
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
use_range getUses()
Returns a range of all uses, which is useful for iterating over all uses.
unsigned getNumResults()
Return the number of results held by this operation.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void updateRootInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around a root update of an operation.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
user_range getUsers() const
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
void exitCurrentLoop(RewriterBase &rewriter, Location loc, MutableArrayRef< Value > reduc={})
Generates code to exit the current loop (e.g., generates yields, forwards loop induction variables,...
constexpr static llvm::StringLiteral getLoopEmitterLoopAttrName()
void enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls)
Enters a new loop sequence, the loops within the same sequence starts from the break points of previo...
void initialize(ValueRange tensors, StringAttr loopTag=nullptr, bool hasOutput=false, bool isSparseOut=false, unsigned numLoops=0, DependentLvlGetter getter=nullptr)
Takes an array of input tensors, which the generated loops will iterate over.
Value genAffine(OpBuilder &builder, Location loc, AffineExpr a)
Generates code to compute an affine expression whose variables are LoopIds (i.e., a....
TensorLevel makeTensorLevel(TensorId t, Level l) const
Compresses a TensorId and Level into a TensorLevel.
unsigned getNumManifestTensors() const
Gets the total number of manifest tensors (excluding the synthetic tensor).
Operation * enterCoIterationOverTensorsAtLvls(OpBuilder &builder, Location loc, ArrayRef< TensorLevel > tidLvls, MutableArrayRef< Value > reduc={}, bool isParallel=false, bool genDedup=false, bool needsUniv=false)
Emits a co-iteration loop over a set of tensors.
Operation * enterFilterLoopOverTensorAtLvl(OpBuilder &builder, Location loc, TensorId tid, Level lvl, AffineExpr affine, MutableArrayRef< Value > reduc={})
Enters a loop that tries to locate a coordinates in a sparse level based on the value evaluated by th...
std::pair< TensorId, Level > unpackTensorLevel(TensorLevel tidLvl) const
De-compresses a TensorLevel back to a pair of TensorId and Level.
auto unpackTensorLevelRange(ContainerTy &&c) const
Converts a range of TensorLevel to a range of std::pair<TensorId, Level>
void initializeLoopEmit(OpBuilder &builder, Location loc, OutputUpdater updater=nullptr, SynTensorBoundSetter synSetter=nullptr)
Starts a loop emitting session by generating all the buffers needed for iterating over the tensors.
void genDenseAffineAddress(OpBuilder &builder, Location loc, TensorLevel tidLvl, AffineExpr lvlExpr)
Emits the address for a dense level based on the value evaluated by the provided affine expression.
auto unpackTensorLevelFromCondRange(ContainerTy &&c) const
void exitCurrentLoopSeq(OpBuilder &builder, Location loc)
Exits the current loop sequence, this will reset universal index to 0.
TensorId getSynTensorId() const
Gets the TensorId for synthetic tensor.
A wrapper around RankedTensorType, which has three goals:
bool hasEncoding() const
Returns true for tensors which have an encoding, and false for those which do not.
Level getLvlRank() const
Returns the level-rank.
SparseTensorEncodingAttr getEncoding() const
BaseMemRefType getMemRefTypeWithFullyDynamicLayout(TensorType tensorType, Attribute memorySpace=nullptr)
Return a MemRef type with fully dynamic layout.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Value constantIndex(OpBuilder &builder, Location loc, int64_t i)
Generates a constant of index type.
Value constantZero(OpBuilder &builder, Location loc, Type tp)
Generates a 0-valued constant of the given type.
Dimension toDim(SparseTensorEncodingAttr enc, Level l)
Convenience method to translate the given level to the corresponding dimension.
constexpr bool isLooseCompressedLT(LevelType lt)
Check if the LevelType is loose compressed (regardless of properties).
constexpr bool isUniqueLT(LevelType lt)
Check if the LevelType is unique (regardless of storage format).
Value genToValues(OpBuilder &builder, Location loc, Value tensor)
Infers the result type and generates ToValuesOp.
uint64_t Level
The type of level identifiers and level-ranks.
Value genToPositions(OpBuilder &builder, Location loc, Value tensor, Level lvl)
Infers the result type and generates ToPositionsOp.
Value constantI1(OpBuilder &builder, Location loc, bool b)
Generates a constant of i1 type.
RankedTensorType getRankedTensorType(T &&t)
Convenience method to abbreviate casting getType().
constexpr bool is2OutOf4LT(LevelType lt)
Check if the LevelType is 2OutOf4 (regardless of properties).
constexpr bool isDenseLT(LevelType lt)
Check if the LevelType is dense (regardless of properties).
constexpr bool isSingletonLT(LevelType lt)
Check if the LevelType is singleton (regardless of properties).
Value createOrFoldSliceStrideOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice slice for the sparse tensor slice, return a constant if the offs...
constexpr bool isOrderedLT(LevelType lt)
Check if the LevelType is ordered (regardless of storage format).
LevelType
This enum defines all the sparse representations supportable by the SparseTensor dialect.
SparseTensorEncodingAttr getSparseTensorEncoding(Type type)
Convenience method to get a sparse encoding attribute from a type.
Value genIndexLoad(OpBuilder &builder, Location loc, Value mem, Value s)
Generates a pointer/index load from the sparse storage scheme.
bool isZeroRankedTensorOrScalar(Type type)
constexpr bool isCompressedLT(LevelType lt)
Check if the LevelType is compressed (regardless of properties).
Value genAlloca(OpBuilder &builder, Location loc, Value sz, Type tp)
Generates an uninitialized temporary buffer of the given size and type, but returns it as type memref...
Value genToCoordinates(OpBuilder &builder, Location loc, Value tensor, Level lvl, Level cooStart)
Infers the result type and generates ToCoordinatesOp.
SparseTensorType getSparseTensorType(Value val)
Convenience methods to obtain a SparseTensorType from a Value.
func::CallOp createFuncCall(OpBuilder &builder, Location loc, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface)
Creates a CallOp to the function reference returned by getFunc() in the builder's module.
Value createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, Value tensor, Dimension dim)
Generates code to retrieve the slice offset for the sparse tensor slice, return a constant if the off...
unsigned TensorId
Tensor identifiers, chosen to be the BlockArgument::getArgNumber of the value passed to Merger::build...
Include the generated interface declarations.
@ Mul
RHS of mul is always a constant or a symbolic expression.
@ DimId
Dimensional identifier.
@ Constant
Constant integer.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...