29#include "llvm/ADT/SmallVectorExtras.h"
30#include "llvm/Support/Debug.h"
34#define DEBUG_TYPE "hoist-padding"
36#define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ")
46 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
47 forOp.getInductionVar().printAsOperand(dbgs(), state);
48 dbgs() <<
" @ " << forOp.getOperation();
56 LLVM_DEBUG(llvm::interleaveComma(backwardSlice,
DBGS() <<
"--backwardSlice:",
64 dbgs() << *op <<
"\n";
76 scf::ForOp outermostEnclosingForOp =
nullptr;
78 while (nLevels-- > 0 &&
79 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
80 LLVM_DEBUG(
DBGS() <<
"loops: ";
83 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
84 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
95 scf::ForOp outermostEnclosingForOp =
nullptr;
97 while (outermostEnclosingForOp != untilLoop &&
98 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
99 LLVM_DEBUG(
DBGS() <<
"loops: ";
102 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
103 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
112 scf::ForOp outermostEnclosingForOp,
117 return domInfo.
dominates(outermostEnclosingForOp, op) &&
118 !padOp->isProperAncestor(op);
126 for (
Value v : valuesDefinedAbove) {
128 assert(
result.succeeded() &&
"expected a backward slice");
134 assert(
result.succeeded() &&
"expected a backward slice");
158struct HoistPaddingAnalysis {
159 HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops);
160 HoistPaddingAnalysis(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp);
162 bool isValid() {
return valid.has_value() && valid.value(); }
163 bool isInvalid() {
return valid.has_value() && !valid.value(); }
166 SmallVector<Value> getHoistedPackedTensorSizes(RewriterBase &rewriter,
183 void enableHoistPadding(RewriterBase &rewriter);
188 void finalizeHoistPaddingAnalysis();
192 std::optional<bool> valid;
195 tensor::PadOp opToHoist;
198 SmallVector<scf::ForOp> reverseEnclosingLoops;
224 LogicalResult dropNonIndexDependencies();
229 scf::ForOp outermostEnclosingForOp;
240 SmallVector<scf::ForOp> packingLoops;
243 tensor::ExtractSliceOp sliceOp;
246 scf::ForOp padConsumingForOp;
251HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops)
252 : valid(std::nullopt), opToHoist(padOp) {
255 if (reverseEnclosingLoops.empty()) {
256 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
260 outermostEnclosingForOp = reverseEnclosingLoops.back();
261 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
263 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
269HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
270 scf::ForOp outermostEnclosingForOp)
271 : valid(std::nullopt), opToHoist(padOp) {
274 reverseEnclosingLoops);
275 if (reverseEnclosingLoops.empty()) {
276 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
280 this->outermostEnclosingForOp = reverseEnclosingLoops.back();
281 if (this->outermostEnclosingForOp != outermostEnclosingForOp) {
282 LLVM_DEBUG(
DBGS() <<
"--Unexpected outermost enclosing loop -> Skip\n");
286 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
288 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
294void HoistPaddingAnalysis::enableHoistPadding(
RewriterBase &rewriter) {
300 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
301 outermostEnclosingForOp = cast<scf::ForOp>(
306void HoistPaddingAnalysis::finalizeHoistPaddingAnalysis() {
310 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
311 LLVM_DEBUG(
DBGS() <<
"--outermostEnclosingForOp:\n"
312 << outermostEnclosingForOp <<
"\n"
313 <<
"--sliceOp: " << sliceOp <<
"\n"
314 <<
"--sliceOp.getSource(): " << sliceOp.getSource()
316 LLVM_DEBUG(
DBGS() <<
"----Source not defined outside of loops -> Skip\n");
320 if (sliceOp->hasOneUse()) {
321 padConsumingForOp = dyn_cast<scf::ForOp>(*(sliceOp->getUsers().begin()));
327 Value paddingValue = opToHoist.getConstantPaddingValue();
329 !isa_and_nonnull<arith::ConstantOp>(paddingValue.
getDefiningOp())) {
330 LLVM_DEBUG(
DBGS() <<
"Cannot find constant padding value -> Skip\n");
336 if (backwardSlice.size() <= 1) {
345 if (
failed(dropNonIndexDependencies())) {
346 LLVM_DEBUG(
DBGS() <<
"--Cannot dropNonIndexDependencies -> Skip\n");
358 for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
359 if (backwardSlice.contains(forOp))
360 packingLoops.push_back(forOp);
363 if (packingLoops.size() > 1 && padConsumingForOp) {
364 LLVM_DEBUG(
DBGS() <<
"--Cannot hoist multiple loops through iter_args -> "
365 "Downgrade to 1 loop\n");
366 packingLoops.resize(1);
376LogicalResult HoistPaddingAnalysis::dropNonIndexDependencies() {
382 auto addIndexOperandsToIndexEdges = [&](Operation *operation) {
383 for (Value operand : operation->getOperands())
384 if (operand.getType().isIndex())
385 indexEdges.insert(operand);
389 auto hasIndexResult = [&](Operation *operation) {
390 return llvm::any_of(operation->getResults(), [&](Value
result) {
391 return indexEdges.contains(result);
416 for (Operation *op : llvm::reverse(backwardSlice)) {
419 if (op == opToHoist || op == sliceOp) {
420 addIndexOperandsToIndexEdges(op);
425 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
426 if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
427 addIndexOperandsToIndexEdges(op);
433 if (hasIndexResult(op)) {
434 addIndexOperandsToIndexEdges(op);
436 if (llvm::any_of(op->getOperandTypes(),
437 [](Type type) { return !type.isIndex(); })) {
438 LLVM_DEBUG(
DBGS() <<
"Unsupported op with non index type operands: "
439 << op <<
" -> Skip\n");
443 auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
444 bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
445 if (hasMemoryEffect || op->getNumRegions() != 0) {
446 LLVM_DEBUG(
DBGS() <<
"Unsupported op with region or memory effect: "
447 << op <<
" -> Skip\n");
454 if (!isa<arith::ConstantOp>(op))
455 operationsToRemove.insert(op);
457 backwardSlice.set_subtract(operationsToRemove);
462HoistPaddingAnalysis::getHoistedPackedTensorSizes(RewriterBase &rewriter,
463 Location loc)
const {
464 SmallVector<Value> dynamicTensorSizes;
471 for (
auto forOp : packingLoops) {
473 FailureOr<OpFoldResult> loopUb = affine::reifyIndexValueBound(
474 rewriter, loc, presburger::BoundType::UB, forOp.getUpperBound(),
476 [&](Value v, std::optional<int64_t> d, ValueBoundsConstraintSet &cstr) {
477 if (v == forOp.getUpperBound())
480 Operation *op = v.getDefiningOp();
483 return !isa<affine::AffineMinOp, affine::AffineMaxOp,
484 affine::AffineApplyOp>(op);
487 assert(succeeded(loopUb) &&
"could not get upper bound");
494 AffineExpr lb, ub, step;
497 Value res = rewriter.
createOrFold<affine::AffineApplyOp>(
498 loc, (ub - lb).ceilDiv(step),
500 cast<scf::ForOp>(forOp).getStep()});
501 dynamicTensorSizes.push_back(res);
504 return dynamicTensorSizes;
528 Value ivVal = forOp.getInductionVar(), lbVal = forOp.getLowerBound(),
529 stepVal = forOp.getStep();
530 auto loc = forOp->
getLoc();
532 loc, (iv - lb).ceilDiv(step),
ValueRange{ivVal, lbVal, stepVal});
549 tensor::EmptyOp emptyOp,
const HoistPaddingAnalysis &analysis) {
553 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
556 RankedTensorType paddedTensorType = opToHoist.getResultType();
557 int paddedRank = paddedTensorType.getRank();
560 BlockArgument bbArg = dyn_cast<BlockArgument>(opToHoist.getSource());
565 if (forOp != outerLoop && !outerLoop->isAncestor(forOp))
567 OpOperand &operand = *forOp.getTiedLoopInit(bbArg);
568 bvm.
map(bbArg, operand.
get());
569 bbArg = dyn_cast<BlockArgument>(operand.
get());
573 Value hoistedPackedTensor = emptyOp.getResult();
575 for (
Operation *op : analysis.backwardSlice) {
578 if (
auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) {
580 LLVM_DEBUG(
DBGS() <<
"--Skip: " << sliceOp <<
"\n");
586 auto forOp = dyn_cast<scf::ForOp>(op);
589 rewriter.
clone(*op, bvm);
595 auto clonedForOp = scf::ForOp::create(
599 nullptr, forOp.getUnsignedCmp());
602 bvm.
map(forOp.getInductionVar(), clonedForOp.getInductionVar());
603 bvm.
map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
604 bvm.
map(forOp.getResults(), clonedForOp.getResults());
605 assert(clonedForOp->getNumRegions() == 1);
606 clonedLoopIvs.push_back(clonedForOp.getInductionVar());
610 Value loopIndependentIterationCount =
614 if (!loopIndependentIterationCount)
615 llvm_unreachable(
"loop independence prerequisite not met");
616 leadingHoistedPackedTensorIndexings.push_back(
617 loopIndependentIterationCount);
618 hoistedPackedTensor = clonedForOp.getRegionIterArgs().front();
623 int64_t nPackedLoops = clonedLoopIvs.size();
627 leadingHoistedPackedTensorIndexings.end()};
631 for (
int64_t sz : transposedTensorType.getShape()) {
633 if (ShapedType::isDynamic(sz))
642 TransposeOp maybeTransposeOp;
643 Value paddedTensor = bvm.
lookup(opToHoist.getResult());
644 if (!transposeVector.empty()) {
645 Value outputTensor = tensor::ExtractSliceOp::create(
646 rewriter, loc, transposedTensorType, hoistedPackedTensor, offsets,
648 maybeTransposeOp = linalg::TransposeOp::create(
649 rewriter, loc, paddedTensor, outputTensor, transposeVector);
650 paddedTensor = maybeTransposeOp.getResult()[0];
654 if (nPackedLoops > 0) {
657 Value inserted = tensor::InsertSliceOp::create(rewriter, loc, paddedTensor,
658 hoistedPackedTensor, offsets,
663 for (
Value iv : llvm::reverse(clonedLoopIvs)) {
666 scf::YieldOp::create(rewriter, loc, valueToYield);
667 valueToYield = forOp.getResult(0);
676 leadingHoistedPackedTensorIndexings,
678 cast<tensor::PadOp>(bvm.
lookup(opToHoist.getResult()).getDefiningOp())};
688 int nPackedLoops = analysis.packingLoops.size();
689 LLVM_DEBUG(
DBGS() <<
"\n";
691 << *opToHoist->getParentOfType<func::FuncOp>() <<
"\n";
692 DBGS() <<
"Start hoisting above " << nPackedLoops <<
" loops\n");
695 RankedTensorType paddedTensorType = opToHoist.getResultType();
698 FailureOr<RankedTensorType> transposedTensorType =
700 if (failed(transposedTensorType)) {
701 LLVM_DEBUG(
DBGS() <<
"--Could not compute transposed type -> Skip\n");
708 llvm::append_range(packedShape, transposedTensorType->getShape());
709 auto hoistedPackedTensorType = RankedTensorType::get(
710 packedShape, transposedTensorType->getElementType());
713 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
717 analysis.getHoistedPackedTensorSizes(rewriter, loc);
718 auto emptyOp = tensor::EmptyOp::create(
719 rewriter, loc, hoistedPackedTensorType.getShape(),
720 hoistedPackedTensorType.getElementType(), dynamicTensorSizes);
723 *transposedTensorType, emptyOp, analysis);
732 HoistPaddingAnalysis analysis(opToHoist, outermostEnclosingForOp);
733 analysis.enableHoistPadding(rewriter);
734 analysis.finalizeHoistPaddingAnalysis();
735 if (!analysis.isValid()) {
736 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
755 Value expectedSource) {
756 LLVM_DEBUG(
DBGS() <<
"Start tracesBackToExpectedValue on: " << extractSliceOp
758 LLVM_DEBUG(
DBGS() <<
"--with extractSlice: " << extractSliceOp <<
"\n");
759 Value source = extractSliceOp.getSource();
760 LLVM_DEBUG(
DBGS() <<
"--with starting source: " << source <<
"\n");
761 while (source && source != expectedSource) {
762 auto destOp = source.
getDefiningOp<DestinationStyleOpInterface>();
765 LLVM_DEBUG(
DBGS() <<
"--step dest op: " << destOp <<
"\n");
766 source = destOp.getDpsInitOperand(cast<OpResult>(source).getResultNumber())
769 LLVM_DEBUG(
DBGS() <<
"--final source: " << source <<
"\n");
770 LLVM_DEBUG(
DBGS() <<
"--expected source: " << expectedSource <<
"\n");
771 return source == expectedSource;
802static tensor::ExtractSliceOp
804 Value hoistedPackedTensor,
805 tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp) {
806 LLVM_DEBUG(
DBGS() <<
"Start padThroughLoopIterArg on: " << forOp <<
"\n");
807 LLVM_DEBUG(
DBGS() <<
"--paddedValueBeforeHoisting: "
808 << paddedValueBeforeHoisting <<
"\n");
810 for (
OpOperand &use : outerSliceOp->getUses()) {
811 if (use.getOwner() == forOp) {
812 assert(!pUse &&
"Multiple slice uses in the for loop");
816 assert(pUse &&
"No slice use in the for loop");
820 unsigned iterArgNumber = forOp.getTiedLoopResult(pUse).getResultNumber();
821 auto yieldingExtractSliceOp = forOp.getYieldedValues()[iterArgNumber]
822 .getDefiningOp<tensor::ExtractSliceOp>();
823 if (!yieldingExtractSliceOp)
824 return tensor::ExtractSliceOp();
830 paddedValueBeforeHoisting))
831 return tensor::ExtractSliceOp();
834 initArgs[iterArgNumber] = hoistedPackedTensor;
836 yieldOperands[iterArgNumber] = yieldingExtractSliceOp.getSource();
838 int64_t numOriginalForOpResults = initArgs.size();
839 LLVM_DEBUG(
DBGS() <<
"numOriginalForOpResults: " << numOriginalForOpResults
841 tensor::ExtractSliceOp extracted;
845 extracted = tensor::ExtractSliceOp::create(
846 rewriter, hoistedPackedTensor.
getLoc(), hoistedPackedTensor,
847 outerSliceOp.getMixedOffsets(), outerSliceOp.getMixedSizes(),
848 outerSliceOp.getMixedStrides());
851 scf::ForOp newForOp = cast<scf::ForOp>(*forOp.replaceWithAdditionalYields(
852 rewriter, initArgs,
true,
854 return yieldOperands;
857 LLVM_DEBUG(
DBGS() <<
"newForOp results: " << newForOp.getNumResults()
859 LLVM_DEBUG(
DBGS() <<
"replace source of: " << extracted <<
"\n");
860 LLVM_DEBUG(
DBGS() <<
"with result #"
861 << numOriginalForOpResults + iterArgNumber
862 <<
" of forOp, giving us: " << extracted <<
"\n");
864 extracted.getSourceMutable().assign(
865 newForOp.getResult(numOriginalForOpResults + iterArgNumber));
868 LLVM_DEBUG(
DBGS() <<
"replace uses of: " << paddedValueBeforeHoisting
870 LLVM_DEBUG(
DBGS() <<
"with region iter arg #"
871 << numOriginalForOpResults + iterArgNumber <<
"\n");
873 paddedValueBeforeHoisting,
874 newForOp.getRegionIterArg(numOriginalForOpResults + iterArgNumber));
883 tensor::PadOp opToHoist,
884 RankedTensorType transposedTensorType,
885 const HoistPaddingAnalysis &analysis,
893 RankedTensorType paddedTensorType = opToHoist.getResultType();
894 int paddedRank = paddedTensorType.getRank();
897 LLVM_DEBUG(
DBGS() <<
"nPackedLoops: " << nPackedLoops <<
" loops\n");
899 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
902 Value hoistedPackedTensor;
906 if (nPackedLoops > 0) {
907 loopIterationCounts =
908 llvm::map_to_vector<4>(packingLoops, [&](
Operation *loop) {
910 cast<scf::ForOp>(loop));
913 if (llvm ::any_of(loopIterationCounts, [](
Value v) {
return !v; }))
914 llvm_unreachable(
"loop independence prerequisite not met");
917 llvm::copy(loopIterationCounts, offsets.begin());
918 hoistedPackedTensor =
923 hoistedPackedTensor = bvm.
lookup(opToHoist.getResult());
926 LLVM_DEBUG(
DBGS() <<
"hoistedPackedTensor: " << hoistedPackedTensor <<
"\n");
929 scf::ForOp forOp = analysis.padConsumingForOp;
932 analysis.sliceOp, forOp);
938 return tensor::ExtractSliceOp::create(
939 rewriter, loc, transposedTensorType, hoistedPackedTensor, offsets,
947 LLVM_DEBUG(
DBGS() <<
"\n";
DBGS() <<
" Try to hoist " << *(opToHoist) <<
"\n";
948 DBGS() <<
" by " << numLoops <<
" loops\n");
950 HoistPaddingAnalysis analysis(opToHoist, numLoops);
951 analysis.enableHoistPadding(rewriter);
952 analysis.finalizeHoistPaddingAnalysis();
953 if (!analysis.isValid()) {
954 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
961 rewriter, bvm, opToHoist, transposeVector, analysis);
962 if (failed(packingResult)) {
963 LLVM_DEBUG(
DBGS() <<
"--buildPackingLoopNestImpl failed -> Skip\n");
967 if (!transposeVector.empty())
968 transposeOps.push_back(packingResult->maybeTransposeOp);
970 FailureOr<RankedTensorType> transposedTensorType =
972 assert(succeeded(transposedTensorType) &&
"unexpected failure in type");
978 analysis, *packingResult);
981 RankedTensorType paddedTensorType = opToHoist.getResultType();
982 if (!transposeVector.empty()) {
987 tensor::EmptyOp::create(rewriter, loc, paddedTensorType.getShape(),
988 paddedTensorType.getElementType());
989 TransposeOp unTransposeOp = linalg::TransposeOp::create(
990 rewriter, loc, newResult, emptyTensor, transposeVector);
991 newResult = unTransposeOp.getResult()[0];
992 transposeOps.push_back(unTransposeOp);
995 LLVM_DEBUG(
DBGS() <<
"newResult: " << newResult <<
"\n");
997 DBGS() <<
"After hoisting: "
1002 hoistedOp = packingResult->hoistedPadOp;
1004 LLVM_DEBUG(
DBGS() <<
"--SUCCESS\n");
1009 tensor::PadOp opToHoist,
int64_t numLoops,
1014 hoistedOp, transposeOps);
static tensor::ExtractSliceOp padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, Value hoistedPackedTensor, tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp)
If the original consumer of outerSliceOp was a forOp (i.e.
static Value buildLoopIterationCount(RewriterBase &rewriter, scf::ForOp outer, scf::ForOp forOp)
Return the current iteration number in the loop (iv - lb).ceilDiv(step).
static void getEnclosingLoopsUntil(tensor::PadOp padOp, scf::ForOp untilLoop, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
static bool debugPrintLoopInShortForm(Operation *op)
static bool tracesBackToExpectedValue(tensor::ExtractSliceOp extractSliceOp, Value expectedSource)
Return true if we can walk back the use-def chain from extractSliceOp to expectedSource going through...
static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v)
static FailureOr< PackingResult > buildPackingLoopNestImpl(RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef< int64_t > transposeVector, RankedTensorType transposedTensorType, tensor::EmptyOp emptyOp, const HoistPaddingAnalysis &analysis)
static void computeBackwardSlice(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp, SetVector< Operation * > &backwardSlice)
static Value replaceByPackingResult(RewriterBase &rewriter, const IRMapping &bvm, tensor::PadOp opToHoist, RankedTensorType transposedTensorType, const HoistPaddingAnalysis &analysis, const PackingResult &packingResult)
Produce a tensor extracted from the packingResult.
static void debugPrintBackwardSlice(SetVector< Operation * > &backwardSlice)
static void getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
Base type for affine expression.
This class provides management for the lifetime of the state used when printing the IR.
This class represents an argument of a Block.
Block * getOwner() const
Returns the block that owns this argument.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
A class for computing basic dominance information.
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void finalizeOpModification(Operation *op)
This method is used to signal the end of an in-place modification of the given operation.
virtual void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
virtual void startOpModification(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
ForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
FailureOr< RankedTensorType > computeTransposedType(RankedTensorType rankedTensorType, ArrayRef< int64_t > transposeVector)
Returns the transposed rankedTensorType if transposeVector is non-empty.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
LogicalResult getBackwardSlice(Operation *op, SetVector< Operation * > *backwardSlice, const BackwardSliceOptions &options={})
Fills backwardSlice with the computed backward slice (i.e.
LoopLikeOpInterface hoistLoopInvariantSubsets(RewriterBase &rewriter, LoopLikeOpInterface loopLike)
Hoist loop-invariant tensor subsets (subset extraction and subset insertion ops) from loop-like ops.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
llvm::SetVector< T, Vector, Set, N > SetVector
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
bool inclusive
Include the top level op in the slice.
Helper struct to hold the results of building a packing loop nest.
SmallVector< OpFoldResult > strides
SmallVector< Value > clonedLoopIvs
SmallVector< OpFoldResult > sizes