29#include "llvm/Support/Debug.h" 
   33#define DEBUG_TYPE "hoist-padding" 
   35#define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ") 
   45  if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
 
   46    forOp.getInductionVar().printAsOperand(dbgs(), state);
 
   47    dbgs() << 
" @ " << forOp.getOperation();
 
 
   55  LLVM_DEBUG(llvm::interleaveComma(backwardSlice, 
DBGS() << 
"--backwardSlice:",
 
   63                                     dbgs() << *op << 
"\n";
 
 
   75  scf::ForOp outermostEnclosingForOp = 
nullptr;
 
   77  while (nLevels-- > 0 &&
 
   78         (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
 
   79    LLVM_DEBUG(
DBGS() << 
"loops: ";
 
   82    reverseEnclosingLoops.push_back(outermostEnclosingForOp);
 
   83    nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
 
 
   94  scf::ForOp outermostEnclosingForOp = 
nullptr;
 
   96  while (outermostEnclosingForOp != untilLoop &&
 
   97         (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
 
   98    LLVM_DEBUG(
DBGS() << 
"loops: ";
 
  101    reverseEnclosingLoops.push_back(outermostEnclosingForOp);
 
  102    nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
 
 
  111                                 scf::ForOp outermostEnclosingForOp,
 
  116    return domInfo.
dominates(outermostEnclosingForOp, op) &&
 
  117           !padOp->isProperAncestor(op);
 
  125  for (
Value v : valuesDefinedAbove) {
 
  127    assert(
result.succeeded() && 
"expected a backward slice");
 
  133  assert(
result.succeeded() && 
"expected a backward slice");
 
 
  157struct HoistPaddingAnalysis {
 
  158  HoistPaddingAnalysis(tensor::PadOp padOp, 
int numLoops);
 
  159  HoistPaddingAnalysis(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp);
 
  161  bool isValid() { 
return valid.has_value() && valid.value(); }
 
  162  bool isInvalid() { 
return valid.has_value() && !valid.value(); }
 
  165  SmallVector<Value> getHoistedPackedTensorSizes(RewriterBase &rewriter,
 
  182  void enableHoistPadding(RewriterBase &rewriter);
 
  187  void finalizeHoistPaddingAnalysis();
 
  191  std::optional<bool> valid;
 
  194  tensor::PadOp opToHoist;
 
  197  SmallVector<scf::ForOp> reverseEnclosingLoops;
 
  223  LogicalResult dropNonIndexDependencies();
 
  228  scf::ForOp outermostEnclosingForOp;
 
  239  SmallVector<scf::ForOp> packingLoops;
 
  242  tensor::ExtractSliceOp sliceOp;
 
  245  scf::ForOp padConsumingForOp;
 
  250HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp, 
int numLoops)
 
  251    : valid(std::nullopt), opToHoist(padOp) {
 
  254  if (reverseEnclosingLoops.empty()) {
 
  255    LLVM_DEBUG(
DBGS() << 
"--No immediately enclosing loop -> Skip\n");
 
  259  outermostEnclosingForOp = reverseEnclosingLoops.back();
 
  260  sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
 
  262    LLVM_DEBUG(
DBGS() << 
"--Cannot find the extract slice op -> Skip\n");
 
  268HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
 
  269                                           scf::ForOp outermostEnclosingForOp)
 
  270    : valid(std::nullopt), opToHoist(padOp) {
 
  273                         reverseEnclosingLoops);
 
  274  if (reverseEnclosingLoops.empty()) {
 
  275    LLVM_DEBUG(
DBGS() << 
"--No immediately enclosing loop -> Skip\n");
 
  279  this->outermostEnclosingForOp = reverseEnclosingLoops.back();
 
  280  if (this->outermostEnclosingForOp != outermostEnclosingForOp) {
 
  281    LLVM_DEBUG(
DBGS() << 
"--Unexpected outermost enclosing loop -> Skip\n");
 
  285  sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
 
  287    LLVM_DEBUG(
DBGS() << 
"--Cannot find the extract slice op -> Skip\n");
 
  293void HoistPaddingAnalysis::enableHoistPadding(
RewriterBase &rewriter) {
 
  299  if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
 
  300    outermostEnclosingForOp = cast<scf::ForOp>(
 
  305void HoistPaddingAnalysis::finalizeHoistPaddingAnalysis() {
 
  309  if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
 
  310    LLVM_DEBUG(
DBGS() << 
"--outermostEnclosingForOp:\n" 
  311                      << outermostEnclosingForOp << 
"\n" 
  312                      << 
"--sliceOp: " << sliceOp << 
"\n" 
  313                      << 
"--sliceOp.getSource(): " << sliceOp.getSource()
 
  315    LLVM_DEBUG(
DBGS() << 
"----Source not defined outside of loops -> Skip\n");
 
  319  if (sliceOp->hasOneUse()) {
 
  320    padConsumingForOp = dyn_cast<scf::ForOp>(*(sliceOp->getUsers().begin()));
 
  326  Value paddingValue = opToHoist.getConstantPaddingValue();
 
  328      !isa_and_nonnull<arith::ConstantOp>(paddingValue.
getDefiningOp())) {
 
  329    LLVM_DEBUG(
DBGS() << 
"Cannot find constant padding value -> Skip\n");
 
  335  if (backwardSlice.size() <= 1) {
 
  344  if (
failed(dropNonIndexDependencies())) {
 
  345    LLVM_DEBUG(
DBGS() << 
"--Cannot dropNonIndexDependencies -> Skip\n");
 
  357  for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
 
  358    if (backwardSlice.contains(forOp))
 
  359      packingLoops.push_back(forOp);
 
  362  if (packingLoops.size() > 1 && padConsumingForOp) {
 
  363    LLVM_DEBUG(
DBGS() << 
"--Cannot hoist multiple loops through iter_args -> " 
  364                         "Downgrade to 1 loop\n");
 
  365    packingLoops.resize(1);
 
  375LogicalResult HoistPaddingAnalysis::dropNonIndexDependencies() {
 
  381  auto addIndexOperandsToIndexEdges = [&](Operation *operation) {
 
  382    for (Value operand : operation->getOperands())
 
  383      if (operand.getType().isIndex())
 
  384        indexEdges.insert(operand);
 
  388  auto hasIndexResult = [&](Operation *operation) {
 
  389    return llvm::any_of(operation->getResults(), [&](Value 
result) {
 
  390      return indexEdges.contains(result);
 
  415  for (Operation *op : llvm::reverse(backwardSlice)) {
 
  418    if (op == opToHoist || op == sliceOp) {
 
  419      addIndexOperandsToIndexEdges(op);
 
  424    if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
 
  425      if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
 
  426        addIndexOperandsToIndexEdges(op);
 
  432    if (hasIndexResult(op)) {
 
  433      addIndexOperandsToIndexEdges(op);
 
  435      if (llvm::any_of(op->getOperandTypes(),
 
  436                       [](Type type) { return !type.isIndex(); })) {
 
  437        LLVM_DEBUG(
DBGS() << 
"Unsupported op with non index type operands: " 
  438                          << op << 
" -> Skip\n");
 
  442      auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
 
  443      bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
 
  444      if (hasMemoryEffect || op->getNumRegions() != 0) {
 
  445        LLVM_DEBUG(
DBGS() << 
"Unsupported op with region or memory effect: " 
  446                          << op << 
" -> Skip\n");
 
  453    if (!isa<arith::ConstantOp>(op))
 
  454      operationsToRemove.insert(op);
 
  456  backwardSlice.set_subtract(operationsToRemove);
 
  461HoistPaddingAnalysis::getHoistedPackedTensorSizes(RewriterBase &rewriter,
 
  462                                                  Location loc)
 const {
 
  463  SmallVector<Value> dynamicTensorSizes;
 
  470  for (
auto forOp : packingLoops) {
 
  473        rewriter, loc, presburger::BoundType::UB, forOp.getUpperBound(),
 
  475        [&](Value v, std::optional<int64_t> d, ValueBoundsConstraintSet &cstr) {
 
  476          if (v == forOp.getUpperBound())
 
  479          Operation *op = v.getDefiningOp();
 
  482          return !isa<affine::AffineMinOp, affine::AffineMaxOp,
 
  483                      affine::AffineApplyOp>(op);
 
  486    assert(succeeded(loopUb) && 
"could not get upper bound");
 
  493    AffineExpr lb, ub, step;
 
  496    Value res = rewriter.
createOrFold<affine::AffineApplyOp>(
 
  497        loc, (ub - lb).ceilDiv(step),
 
  499                   cast<scf::ForOp>(forOp).getStep()});
 
  500    dynamicTensorSizes.push_back(res);
 
  503  return dynamicTensorSizes;
 
  527  Value ivVal = forOp.getInductionVar(), lbVal = forOp.getLowerBound(),
 
  528        stepVal = forOp.getStep();
 
  529  auto loc = forOp->
getLoc();
 
  531      loc, (iv - lb).ceilDiv(step), 
ValueRange{ivVal, lbVal, stepVal});
 
 
  548    tensor::EmptyOp emptyOp, 
const HoistPaddingAnalysis &analysis) {
 
  552  scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
 
  555  RankedTensorType paddedTensorType = opToHoist.getResultType();
 
  556  int paddedRank = paddedTensorType.getRank();
 
  559  BlockArgument bbArg = dyn_cast<BlockArgument>(opToHoist.getSource());
 
  564    if (forOp != outerLoop && !outerLoop->isAncestor(forOp))
 
  566    OpOperand &operand = *forOp.getTiedLoopInit(bbArg);
 
  567    bvm.
map(bbArg, operand.
get());
 
  568    bbArg = dyn_cast<BlockArgument>(operand.
get());
 
  572  Value hoistedPackedTensor = emptyOp.getResult();
 
  574  for (
Operation *op : analysis.backwardSlice) {
 
  577    if (
auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) {
 
  579        LLVM_DEBUG(
DBGS() << 
"--Skip: " << sliceOp << 
"\n");
 
  585    auto forOp = dyn_cast<scf::ForOp>(op);
 
  588      rewriter.
clone(*op, bvm);
 
  594    auto clonedForOp = scf::ForOp::create(
 
  598        nullptr, forOp.getUnsignedCmp());
 
  601    bvm.
map(forOp.getInductionVar(), clonedForOp.getInductionVar());
 
  602    bvm.
map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
 
  603    bvm.
map(forOp.getResults(), clonedForOp.getResults());
 
  604    assert(clonedForOp->getNumRegions() == 1);
 
  605    clonedLoopIvs.push_back(clonedForOp.getInductionVar());
 
  609    Value loopIndependentIterationCount =
 
  613    if (!loopIndependentIterationCount)
 
  614      llvm_unreachable(
"loop independence prerequisite not met");
 
  615    leadingHoistedPackedTensorIndexings.push_back(
 
  616        loopIndependentIterationCount);
 
  617    hoistedPackedTensor = clonedForOp.getRegionIterArgs().front();
 
  622  int64_t nPackedLoops = clonedLoopIvs.size();
 
  626                                leadingHoistedPackedTensorIndexings.end()};
 
  630  for (
int64_t sz : transposedTensorType.getShape()) {
 
  632    if (ShapedType::isDynamic(sz))
 
  641  TransposeOp maybeTransposeOp;
 
  642  Value paddedTensor = bvm.
lookup(opToHoist.getResult());
 
  643  if (!transposeVector.empty()) {
 
  644    Value outputTensor = tensor::ExtractSliceOp::create(
 
  645        rewriter, loc, transposedTensorType, hoistedPackedTensor, offsets,
 
  647    maybeTransposeOp = linalg::TransposeOp::create(
 
  648        rewriter, loc, paddedTensor, outputTensor, transposeVector);
 
  649    paddedTensor = maybeTransposeOp.getResult()[0];
 
  653  if (nPackedLoops > 0) {
 
  656    Value inserted = tensor::InsertSliceOp::create(rewriter, loc, paddedTensor,
 
  657                                                   hoistedPackedTensor, offsets,
 
  662    for (
Value iv : llvm::reverse(clonedLoopIvs)) {
 
  665      scf::YieldOp::create(rewriter, loc, valueToYield);
 
  666      valueToYield = forOp.getResult(0);
 
  675      leadingHoistedPackedTensorIndexings,
 
  677      cast<tensor::PadOp>(bvm.
lookup(opToHoist.getResult()).getDefiningOp())};
 
 
  687  int nPackedLoops = analysis.packingLoops.size();
 
  688  LLVM_DEBUG(
DBGS() << 
"\n";
 
  690                    << *opToHoist->getParentOfType<func::FuncOp>() << 
"\n";
 
  691             DBGS() << 
"Start hoisting above " << nPackedLoops << 
" loops\n");
 
  694  RankedTensorType paddedTensorType = opToHoist.getResultType();
 
  697  FailureOr<RankedTensorType> transposedTensorType =
 
  699  if (failed(transposedTensorType)) {
 
  700    LLVM_DEBUG(
DBGS() << 
"--Could not compute transposed type -> Skip\n");
 
  707  llvm::append_range(packedShape, transposedTensorType->getShape());
 
  708  auto hoistedPackedTensorType = RankedTensorType::get(
 
  709      packedShape, transposedTensorType->getElementType());
 
  712  scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
 
  716      analysis.getHoistedPackedTensorSizes(rewriter, loc);
 
  717  auto emptyOp = tensor::EmptyOp::create(
 
  718      rewriter, loc, hoistedPackedTensorType.getShape(),
 
  719      hoistedPackedTensorType.getElementType(), dynamicTensorSizes);
 
  722                                  *transposedTensorType, emptyOp, analysis);
 
 
  731  HoistPaddingAnalysis analysis(opToHoist, outermostEnclosingForOp);
 
  732  analysis.enableHoistPadding(rewriter);
 
  733  analysis.finalizeHoistPaddingAnalysis();
 
  734  if (!analysis.isValid()) {
 
  735    LLVM_DEBUG(
DBGS() << 
"--Analysis failed -> Skip\n");
 
 
  754                                      Value expectedSource) {
 
  755  LLVM_DEBUG(
DBGS() << 
"Start tracesBackToExpectedValue on: " << extractSliceOp
 
  757  LLVM_DEBUG(
DBGS() << 
"--with extractSlice: " << extractSliceOp << 
"\n");
 
  758  Value source = extractSliceOp.getSource();
 
  759  LLVM_DEBUG(
DBGS() << 
"--with starting source: " << source << 
"\n");
 
  760  while (source && source != expectedSource) {
 
  761    auto destOp = source.
getDefiningOp<DestinationStyleOpInterface>();
 
  764    LLVM_DEBUG(
DBGS() << 
"--step dest op: " << destOp << 
"\n");
 
  765    source = destOp.getDpsInitOperand(cast<OpResult>(source).getResultNumber())
 
  768  LLVM_DEBUG(
DBGS() << 
"--final source: " << source << 
"\n");
 
  769  LLVM_DEBUG(
DBGS() << 
"--expected source: " << expectedSource << 
"\n");
 
  770  return source == expectedSource;
 
 
  801static tensor::ExtractSliceOp
 
  803                      Value hoistedPackedTensor,
 
  804                      tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp) {
 
  805  LLVM_DEBUG(
DBGS() << 
"Start padThroughLoopIterArg on: " << forOp << 
"\n");
 
  806  LLVM_DEBUG(
DBGS() << 
"--paddedValueBeforeHoisting: " 
  807                    << paddedValueBeforeHoisting << 
"\n");
 
  809  for (
OpOperand &use : outerSliceOp->getUses()) {
 
  810    if (use.getOwner() == forOp) {
 
  811      assert(!pUse && 
"Multiple slice uses in the for loop");
 
  815  assert(pUse && 
"No slice use in the for loop");
 
  819  unsigned iterArgNumber = forOp.getTiedLoopResult(pUse).getResultNumber();
 
  820  auto yieldingExtractSliceOp = forOp.getYieldedValues()[iterArgNumber]
 
  821                                    .getDefiningOp<tensor::ExtractSliceOp>();
 
  822  if (!yieldingExtractSliceOp)
 
  823    return tensor::ExtractSliceOp();
 
  829                                 paddedValueBeforeHoisting))
 
  830    return tensor::ExtractSliceOp();
 
  833  initArgs[iterArgNumber] = hoistedPackedTensor;
 
  835  yieldOperands[iterArgNumber] = yieldingExtractSliceOp.getSource();
 
  837  int64_t numOriginalForOpResults = initArgs.size();
 
  838  LLVM_DEBUG(
DBGS() << 
"numOriginalForOpResults: " << numOriginalForOpResults
 
  840  tensor::ExtractSliceOp extracted;
 
  844    extracted = tensor::ExtractSliceOp::create(
 
  845        rewriter, hoistedPackedTensor.
getLoc(), hoistedPackedTensor,
 
  846        outerSliceOp.getMixedOffsets(), outerSliceOp.getMixedSizes(),
 
  847        outerSliceOp.getMixedStrides());
 
  850  scf::ForOp newForOp = cast<scf::ForOp>(*forOp.replaceWithAdditionalYields(
 
  851      rewriter, initArgs, 
true,
 
  853        return yieldOperands;
 
  856  LLVM_DEBUG(
DBGS() << 
"newForOp results: " << newForOp.getNumResults()
 
  858  LLVM_DEBUG(
DBGS() << 
"replace source of: " << extracted << 
"\n");
 
  859  LLVM_DEBUG(
DBGS() << 
"with result #" 
  860                    << numOriginalForOpResults + iterArgNumber
 
  861                    << 
" of forOp, giving us: " << extracted << 
"\n");
 
  863  extracted.getSourceMutable().assign(
 
  864      newForOp.getResult(numOriginalForOpResults + iterArgNumber));
 
  867  LLVM_DEBUG(
DBGS() << 
"replace uses of: " << paddedValueBeforeHoisting
 
  869  LLVM_DEBUG(
DBGS() << 
"with region iter arg #" 
  870                    << numOriginalForOpResults + iterArgNumber << 
"\n");
 
  872      paddedValueBeforeHoisting,
 
  873      newForOp.getRegionIterArg(numOriginalForOpResults + iterArgNumber));
 
 
  882                                    tensor::PadOp opToHoist,
 
  883                                    RankedTensorType transposedTensorType,
 
  884                                    const HoistPaddingAnalysis &analysis,
 
  892  RankedTensorType paddedTensorType = opToHoist.getResultType();
 
  893  int paddedRank = paddedTensorType.getRank();
 
  896  LLVM_DEBUG(
DBGS() << 
"nPackedLoops: " << nPackedLoops << 
" loops\n");
 
  898  scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
 
  901  Value hoistedPackedTensor;
 
  905  if (nPackedLoops > 0) {
 
  906    loopIterationCounts =
 
  907        llvm::to_vector<4>(llvm::map_range(packingLoops, [&](
Operation *loop) {
 
  909                                         cast<scf::ForOp>(loop));
 
  912    if (llvm ::any_of(loopIterationCounts, [](
Value v) { 
return !v; }))
 
  913      llvm_unreachable(
"loop independence prerequisite not met");
 
  916    std::copy(loopIterationCounts.begin(), loopIterationCounts.end(),
 
  918    hoistedPackedTensor =
 
  923    hoistedPackedTensor = bvm.
lookup(opToHoist.getResult());
 
  926  LLVM_DEBUG(
DBGS() << 
"hoistedPackedTensor: " << hoistedPackedTensor << 
"\n");
 
  929  scf::ForOp forOp = analysis.padConsumingForOp;
 
  932                                 analysis.sliceOp, forOp);
 
  938  return tensor::ExtractSliceOp::create(
 
  939      rewriter, loc, transposedTensorType, hoistedPackedTensor, offsets,
 
 
  947  LLVM_DEBUG(
DBGS() << 
"\n"; 
DBGS() << 
" Try to hoist " << *(opToHoist) << 
"\n";
 
  948             DBGS() << 
" by " << numLoops << 
" loops\n");
 
  950  HoistPaddingAnalysis analysis(opToHoist, numLoops);
 
  951  analysis.enableHoistPadding(rewriter);
 
  952  analysis.finalizeHoistPaddingAnalysis();
 
  953  if (!analysis.isValid()) {
 
  954    LLVM_DEBUG(
DBGS() << 
"--Analysis failed -> Skip\n");
 
  961      rewriter, bvm, opToHoist, transposeVector, analysis);
 
  962  if (failed(packingResult)) {
 
  963    LLVM_DEBUG(
DBGS() << 
"--buildPackingLoopNestImpl failed -> Skip\n");
 
  967  if (!transposeVector.empty())
 
  968    transposeOps.push_back(packingResult->maybeTransposeOp);
 
  970  FailureOr<RankedTensorType> transposedTensorType =
 
  972  assert(succeeded(transposedTensorType) && 
"unexpected failure in type");
 
  978                             analysis, *packingResult);
 
  981  RankedTensorType paddedTensorType = opToHoist.getResultType();
 
  982  if (!transposeVector.empty()) {
 
  987        tensor::EmptyOp::create(rewriter, loc, paddedTensorType.getShape(),
 
  988                                paddedTensorType.getElementType());
 
  989    TransposeOp unTransposeOp = linalg::TransposeOp::create(
 
  990        rewriter, loc, newResult, emptyTensor, transposeVector);
 
  991    newResult = unTransposeOp.getResult()[0];
 
  992    transposeOps.push_back(unTransposeOp);
 
  995  LLVM_DEBUG(
DBGS() << 
"newResult: " << newResult << 
"\n");
 
  997      DBGS() << 
"After hoisting: " 
 1002  hoistedOp = packingResult->hoistedPadOp;
 
 1004  LLVM_DEBUG(
DBGS() << 
"--SUCCESS\n");
 
 
 1009    tensor::PadOp opToHoist, 
int64_t numLoops,
 
 1014                               hoistedOp, transposeOps);
 
 
static tensor::ExtractSliceOp padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, Value hoistedPackedTensor, tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp)
If the original consumer of outerSliceOp was a forOp (i.e.
 
static Value buildLoopIterationCount(RewriterBase &rewriter, scf::ForOp outer, scf::ForOp forOp)
Return the current iteration number in the loop (iv - lb).ceilDiv(step).
 
static void getEnclosingLoopsUntil(tensor::PadOp padOp, scf::ForOp untilLoop, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
 
static bool debugPrintLoopInShortForm(Operation *op)
 
static bool tracesBackToExpectedValue(tensor::ExtractSliceOp extractSliceOp, Value expectedSource)
Return true if we can walk back the use-def chain from extractSliceOp to expectedSource going through...
 
static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v)
 
static FailureOr< PackingResult > buildPackingLoopNestImpl(RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef< int64_t > transposeVector, RankedTensorType transposedTensorType, tensor::EmptyOp emptyOp, const HoistPaddingAnalysis &analysis)
 
static void computeBackwardSlice(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp, SetVector< Operation * > &backwardSlice)
 
static Value replaceByPackingResult(RewriterBase &rewriter, const IRMapping &bvm, tensor::PadOp opToHoist, RankedTensorType transposedTensorType, const HoistPaddingAnalysis &analysis, const PackingResult &packingResult)
Produce a tensor extracted from the packingResult.
 
static void debugPrintBackwardSlice(SetVector< Operation * > &backwardSlice)
 
static void getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
 
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
 
Base type for affine expression.
 
This class provides management for the lifetime of the state used when printing the IR.
 
This class represents an argument of a Block.
 
Block * getOwner() const
Returns the block that owns this argument.
 
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
 
IntegerAttr getIndexAttr(int64_t value)
 
MLIRContext * getContext() const
 
A class for computing basic dominance information.
 
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
 
This is a utility class for mapping one set of IR entities to another.
 
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
 
auto lookup(T from) const
Lookup a mapped value within the map.
 
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
 
IRValueT get() const
Return the current value being used by this operand.
 
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
 
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
 
MLIRContext is the top-level object for a collection of MLIR operations.
 
RAII guard to reset the insertion point of the builder when destroyed.
 
This class helps build Operations.
 
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
 
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
 
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
 
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
 
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
 
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
 
This class represents an operand of an operation.
 
Operation is the basic unit of execution within MLIR.
 
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
 
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
 
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
 
virtual void finalizeOpModification(Operation *op)
This method is used to signal the end of an in-place modification of the given operation.
 
virtual void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
 
virtual void startOpModification(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
 
This class provides an abstraction over the different types of ranges over Values.
 
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
 
Location getLoc() const
Return the location of this value.
 
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
 
FailureOr< OpFoldResult > reifyIndexValueBound(OpBuilder &b, Location loc, presburger::BoundType type, Value value, ValueBoundsConstraintSet::StopConditionFn stopCondition=nullptr, bool closedUB=false)
Reify a bound for the given index-typed value in terms of SSA values for which stopCondition is met.
 
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
 
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
 
ForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
 
FailureOr< RankedTensorType > computeTransposedType(RankedTensorType rankedTensorType, ArrayRef< int64_t > transposeVector)
Returns the transposed rankedTensorType if transposeVector is non-empty.
 
Include the generated interface declarations.
 
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
 
LogicalResult getBackwardSlice(Operation *op, SetVector< Operation * > *backwardSlice, const BackwardSliceOptions &options={})
Fills backwardSlice with the computed backward slice (i.e.
 
LoopLikeOpInterface hoistLoopInvariantSubsets(RewriterBase &rewriter, LoopLikeOpInterface loopLike)
Hoist loop-invariant tensor subsets (subset extraction and subset insertion ops) from loop-like ops.
 
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
 
llvm::SetVector< T, Vector, Set, N > SetVector
 
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
 
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
 
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
 
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
 
bool inclusive
Include the top level op in the slice.
 
Helper struct to hold the results of building a packing loop nest.
 
SmallVector< OpFoldResult > strides
 
SmallVector< Value > clonedLoopIvs
 
SmallVector< OpFoldResult > sizes