30 #include "llvm/Support/Debug.h"
34 #define DEBUG_TYPE "hoist-padding"
36 #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ")
46 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
47 forOp.getInductionVar().printAsOperand(dbgs(), state);
48 dbgs() <<
" @ " << forOp.getOperation();
56 LLVM_DEBUG(llvm::interleaveComma(backwardSlice,
DBGS() <<
"--backwardSlice:",
64 dbgs() << *op <<
"\n";
76 scf::ForOp outermostEnclosingForOp =
nullptr;
78 while (nLevels-- > 0 &&
79 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
80 LLVM_DEBUG(
DBGS() <<
"loops: ";
83 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
84 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
95 scf::ForOp outermostEnclosingForOp =
nullptr;
97 while (outermostEnclosingForOp != untilLoop &&
98 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
99 LLVM_DEBUG(
DBGS() <<
"loops: ";
102 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
103 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
112 scf::ForOp outermostEnclosingForOp,
117 return domInfo.
dominates(outermostEnclosingForOp, op) &&
118 !padOp->isProperAncestor(op);
126 for (
Value v : valuesDefinedAbove) {
128 assert(result.succeeded() &&
"expected a backward slice");
132 LogicalResult result =
134 assert(result.succeeded() &&
"expected a backward slice");
158 struct HoistPaddingAnalysis {
159 HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops);
160 HoistPaddingAnalysis(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp);
162 bool isValid() {
return valid.has_value() && valid.value(); }
163 bool isInvalid() {
return valid.has_value() && !valid.value(); }
188 void finalizeHoistPaddingAnalysis();
192 std::optional<bool> valid;
195 tensor::PadOp opToHoist;
224 LogicalResult dropNonIndexDependencies();
229 scf::ForOp outermostEnclosingForOp;
243 tensor::ExtractSliceOp sliceOp;
246 scf::ForOp padConsumingForOp;
251 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops)
252 : valid(std::nullopt), opToHoist(padOp) {
255 if (reverseEnclosingLoops.empty()) {
256 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
260 outermostEnclosingForOp = reverseEnclosingLoops.back();
261 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
263 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
269 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
270 scf::ForOp outermostEnclosingForOp)
271 : valid(std::nullopt), opToHoist(padOp) {
274 reverseEnclosingLoops);
275 if (reverseEnclosingLoops.empty()) {
276 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
280 this->outermostEnclosingForOp = reverseEnclosingLoops.back();
281 if (this->outermostEnclosingForOp != outermostEnclosingForOp) {
282 LLVM_DEBUG(
DBGS() <<
"--Unexpected outermost enclosing loop -> Skip\n");
286 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
288 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
294 void HoistPaddingAnalysis::enableHoistPadding(
RewriterBase &rewriter) {
300 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
301 outermostEnclosingForOp = cast<scf::ForOp>(
306 void HoistPaddingAnalysis::finalizeHoistPaddingAnalysis() {
310 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
311 LLVM_DEBUG(
DBGS() <<
"--outermostEnclosingForOp:\n"
312 << outermostEnclosingForOp <<
"\n"
313 <<
"--sliceOp: " << sliceOp <<
"\n"
314 <<
"--sliceOp.getSource(): " << sliceOp.getSource()
316 LLVM_DEBUG(
DBGS() <<
"----Source not defined outside of loops -> Skip\n");
320 if (sliceOp->hasOneUse()) {
321 padConsumingForOp = dyn_cast<scf::ForOp>(*(sliceOp->getUsers().begin()));
327 Value paddingValue = opToHoist.getConstantPaddingValue();
329 !isa_and_nonnull<arith::ConstantOp>(paddingValue.
getDefiningOp())) {
330 LLVM_DEBUG(
DBGS() <<
"Cannot find constant padding value -> Skip\n");
336 if (backwardSlice.size() <= 1) {
345 if (failed(dropNonIndexDependencies())) {
346 LLVM_DEBUG(
DBGS() <<
"--Cannot dropNonIndexDependencies -> Skip\n");
358 for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
359 if (backwardSlice.contains(forOp))
360 packingLoops.push_back(forOp);
363 if (packingLoops.size() > 1 && padConsumingForOp) {
364 LLVM_DEBUG(
DBGS() <<
"--Cannot hoist multiple loops through iter_args -> "
365 "Downgrade to 1 loop\n");
366 packingLoops.resize(1);
376 LogicalResult HoistPaddingAnalysis::dropNonIndexDependencies() {
382 auto addIndexOperandsToIndexEdges = [&](
Operation *operation) {
383 for (
Value operand : operation->getOperands())
384 if (operand.getType().isIndex())
385 indexEdges.insert(operand);
389 auto hasIndexResult = [&](
Operation *operation) {
390 return llvm::any_of(operation->getResults(), [&](
Value result) {
391 return indexEdges.contains(result);
416 for (
Operation *op : llvm::reverse(backwardSlice)) {
419 if (op == opToHoist || op == sliceOp) {
420 addIndexOperandsToIndexEdges(op);
425 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
426 if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
427 addIndexOperandsToIndexEdges(op);
433 if (hasIndexResult(op)) {
434 addIndexOperandsToIndexEdges(op);
436 if (llvm::any_of(op->getOperandTypes(),
437 [](
Type type) { return !type.isIndex(); })) {
438 LLVM_DEBUG(
DBGS() <<
"Unsupported op with non index type operands: "
439 << op <<
" -> Skip\n");
443 auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
444 bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
445 if (hasMemoryEffect || op->getNumRegions() != 0) {
446 LLVM_DEBUG(
DBGS() <<
"Unsupported op with region or memory effect: "
447 << op <<
" -> Skip\n");
454 if (!isa<arith::ConstantOp>(op))
455 operationsToRemove.insert(op);
457 backwardSlice.set_subtract(operationsToRemove);
462 HoistPaddingAnalysis::getHoistedPackedTensorSizes(
RewriterBase &rewriter,
471 for (
auto forOp : packingLoops) {
474 rewriter, loc, presburger::BoundType::UB, forOp.getUpperBound(),
477 if (v == forOp.getUpperBound())
480 Operation *op = v.getDefiningOp();
483 return !isa<affine::AffineMinOp, affine::AffineMaxOp,
484 affine::AffineApplyOp>(op);
487 assert(succeeded(loopUb) &&
"could not get upper bound");
498 loc, (ub - lb).ceilDiv(step),
500 cast<scf::ForOp>(forOp).getStep()});
501 dynamicTensorSizes.push_back(res);
504 return dynamicTensorSizes;
528 Value ivVal = forOp.getInductionVar(), lbVal = forOp.getLowerBound(),
529 stepVal = forOp.getStep();
530 auto loc = forOp->
getLoc();
532 loc, (iv - lb).ceilDiv(step),
ValueRange{ivVal, lbVal, stepVal});
549 tensor::EmptyOp emptyOp,
const HoistPaddingAnalysis &analysis) {
553 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
556 RankedTensorType paddedTensorType = opToHoist.getResultType();
557 int paddedRank = paddedTensorType.getRank();
560 BlockArgument bbArg = dyn_cast<BlockArgument>(opToHoist.getSource());
565 if (forOp != outerLoop && !outerLoop->isAncestor(forOp))
567 OpOperand &operand = *forOp.getTiedLoopInit(bbArg);
568 bvm.
map(bbArg, operand.
get());
569 bbArg = dyn_cast<BlockArgument>(operand.
get());
573 Value hoistedPackedTensor = emptyOp.getResult();
575 for (
Operation *op : analysis.backwardSlice) {
578 if (
auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) {
580 LLVM_DEBUG(
DBGS() <<
"--Skip: " << sliceOp <<
"\n");
586 auto forOp = dyn_cast<scf::ForOp>(op);
589 rewriter.
clone(*op, bvm);
595 auto clonedForOp = rewriter.
create<scf::ForOp>(
601 bvm.
map(forOp.getInductionVar(), clonedForOp.getInductionVar());
602 bvm.
map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
603 bvm.
map(forOp.getResults(), clonedForOp.getResults());
604 assert(clonedForOp->getNumRegions() == 1);
605 clonedLoopIvs.push_back(clonedForOp.getInductionVar());
609 Value loopIndependentIterationCount =
613 if (!loopIndependentIterationCount)
614 llvm_unreachable(
"loop independence prerequisite not met");
615 leadingHoistedPackedTensorIndexings.push_back(
616 loopIndependentIterationCount);
617 hoistedPackedTensor = clonedForOp.getRegionIterArgs().front();
622 int64_t nPackedLoops = clonedLoopIvs.size();
626 leadingHoistedPackedTensorIndexings.end()};
630 for (int64_t sz : transposedTensorType.getShape()) {
632 if (ShapedType::isDynamic(sz))
641 TransposeOp maybeTransposeOp;
642 Value paddedTensor = bvm.
lookup(opToHoist.getResult());
643 if (!transposeVector.empty()) {
644 Value outputTensor = rewriter.
create<tensor::ExtractSliceOp>(
645 loc, transposedTensorType, hoistedPackedTensor, offsets, sizes,
647 maybeTransposeOp = rewriter.
create<linalg::TransposeOp>(
648 loc, paddedTensor, outputTensor, transposeVector);
649 paddedTensor = maybeTransposeOp.
getResult()[0];
653 if (nPackedLoops > 0) {
656 Value inserted = rewriter.
create<tensor::InsertSliceOp>(
657 loc, paddedTensor, hoistedPackedTensor, offsets, sizes, strides);
660 Value valueToYield = inserted;
661 for (
Value iv : llvm::reverse(clonedLoopIvs)) {
664 rewriter.
create<scf::YieldOp>(loc, valueToYield);
674 leadingHoistedPackedTensorIndexings,
676 cast<tensor::PadOp>(bvm.
lookup(opToHoist.getResult()).getDefiningOp())};
686 int nPackedLoops = analysis.packingLoops.size();
687 LLVM_DEBUG(
DBGS() <<
"\n";
689 << *opToHoist->getParentOfType<func::FuncOp>() <<
"\n";
690 DBGS() <<
"Start hoisting above " << nPackedLoops <<
" loops\n");
693 RankedTensorType paddedTensorType = opToHoist.getResultType();
696 FailureOr<RankedTensorType> transposedTensorType =
698 if (failed(transposedTensorType)) {
699 LLVM_DEBUG(
DBGS() <<
"--Could not compute transposed type -> Skip\n");
706 llvm::append_range(packedShape, transposedTensorType->getShape());
708 packedShape, transposedTensorType->getElementType());
711 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
715 analysis.getHoistedPackedTensorSizes(rewriter, loc);
716 auto emptyOp = rewriter.
create<tensor::EmptyOp>(
717 loc, hoistedPackedTensorType.getShape(),
718 hoistedPackedTensorType.getElementType(), dynamicTensorSizes);
721 *transposedTensorType, emptyOp, analysis);
730 HoistPaddingAnalysis analysis(opToHoist, outermostEnclosingForOp);
731 analysis.enableHoistPadding(rewriter);
732 analysis.finalizeHoistPaddingAnalysis();
733 if (!analysis.isValid()) {
734 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
753 Value expectedSource) {
754 LLVM_DEBUG(
DBGS() <<
"Start tracesBackToExpectedValue on: " << extractSliceOp
756 LLVM_DEBUG(
DBGS() <<
"--with extractSlice: " << extractSliceOp <<
"\n");
757 Value source = extractSliceOp.getSource();
758 LLVM_DEBUG(
DBGS() <<
"--with starting source: " << source <<
"\n");
759 while (source && source != expectedSource) {
761 dyn_cast_or_null<DestinationStyleOpInterface>(source.
getDefiningOp());
764 LLVM_DEBUG(
DBGS() <<
"--step dest op: " << destOp <<
"\n");
765 source = destOp.getDpsInitOperand(cast<OpResult>(source).getResultNumber())
768 LLVM_DEBUG(
DBGS() <<
"--final source: " << source <<
"\n");
769 LLVM_DEBUG(
DBGS() <<
"--expected source: " << expectedSource <<
"\n");
770 return source == expectedSource;
801 static tensor::ExtractSliceOp
803 Value hoistedPackedTensor,
804 tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp) {
805 LLVM_DEBUG(
DBGS() <<
"Start padThroughLoopIterArg on: " << forOp <<
"\n");
806 LLVM_DEBUG(
DBGS() <<
"--paddedValueBeforeHoisting: "
807 << paddedValueBeforeHoisting <<
"\n");
809 for (
OpOperand &use : outerSliceOp->getUses()) {
810 if (use.getOwner() == forOp) {
811 assert(!pUse &&
"Multiple slice uses in the for loop");
815 assert(pUse &&
"No slice use in the for loop");
819 unsigned iterArgNumber = forOp.getTiedLoopResult(pUse).getResultNumber();
820 auto yieldingExtractSliceOp = forOp.getYieldedValues()[iterArgNumber]
821 .getDefiningOp<tensor::ExtractSliceOp>();
822 if (!yieldingExtractSliceOp)
823 return tensor::ExtractSliceOp();
829 paddedValueBeforeHoisting))
830 return tensor::ExtractSliceOp();
833 initArgs[iterArgNumber] = hoistedPackedTensor;
835 yieldOperands[iterArgNumber] = yieldingExtractSliceOp.getSource();
837 int64_t numOriginalForOpResults = initArgs.size();
838 LLVM_DEBUG(
DBGS() <<
"numOriginalForOpResults: " << numOriginalForOpResults
840 tensor::ExtractSliceOp extracted;
844 extracted = rewriter.
create<tensor::ExtractSliceOp>(
845 hoistedPackedTensor.
getLoc(), hoistedPackedTensor,
846 outerSliceOp.getMixedOffsets(), outerSliceOp.getMixedSizes(),
847 outerSliceOp.getMixedStrides());
850 scf::ForOp newForOp = cast<scf::ForOp>(*forOp.replaceWithAdditionalYields(
851 rewriter, initArgs,
true,
853 return yieldOperands;
856 LLVM_DEBUG(
DBGS() <<
"newForOp results: " << newForOp.getNumResults()
858 LLVM_DEBUG(
DBGS() <<
"replace source of: " << extracted <<
"\n");
859 LLVM_DEBUG(
DBGS() <<
"with result #"
860 << numOriginalForOpResults + iterArgNumber
861 <<
" of forOp, giving us: " << extracted <<
"\n");
863 extracted.getSourceMutable().assign(
864 newForOp.getResult(numOriginalForOpResults + iterArgNumber));
867 LLVM_DEBUG(
DBGS() <<
"replace uses of: " << paddedValueBeforeHoisting
869 LLVM_DEBUG(
DBGS() <<
"with region iter arg #"
870 << numOriginalForOpResults + iterArgNumber <<
"\n");
872 paddedValueBeforeHoisting,
873 newForOp.getRegionIterArg(numOriginalForOpResults + iterArgNumber));
882 tensor::PadOp opToHoist,
883 RankedTensorType transposedTensorType,
884 const HoistPaddingAnalysis &analysis,
892 RankedTensorType paddedTensorType = opToHoist.getResultType();
893 int paddedRank = paddedTensorType.getRank();
896 LLVM_DEBUG(
DBGS() <<
"nPackedLoops: " << nPackedLoops <<
" loops\n");
898 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
901 Value hoistedPackedTensor;
905 if (nPackedLoops > 0) {
906 loopIterationCounts =
907 llvm::to_vector<4>(llvm::map_range(packingLoops, [&](
Operation *loop) {
909 cast<scf::ForOp>(loop));
912 if (llvm ::any_of(loopIterationCounts, [](
Value v) {
return !v; }))
913 llvm_unreachable(
"loop independence prerequisite not met");
916 std::copy(loopIterationCounts.begin(), loopIterationCounts.end(),
918 hoistedPackedTensor =
923 hoistedPackedTensor = bvm.
lookup(opToHoist.getResult());
926 LLVM_DEBUG(
DBGS() <<
"hoistedPackedTensor: " << hoistedPackedTensor <<
"\n");
929 scf::ForOp forOp = analysis.padConsumingForOp;
932 analysis.sliceOp, forOp);
938 return rewriter.
create<tensor::ExtractSliceOp>(
939 loc, transposedTensorType, hoistedPackedTensor, offsets,
944 RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops,
947 LLVM_DEBUG(
DBGS() <<
"\n";
DBGS() <<
" Try to hoist " << *(opToHoist) <<
"\n";
948 DBGS() <<
" by " << numLoops <<
" loops\n");
950 HoistPaddingAnalysis analysis(opToHoist, numLoops);
951 analysis.enableHoistPadding(rewriter);
952 analysis.finalizeHoistPaddingAnalysis();
953 if (!analysis.isValid()) {
954 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
961 rewriter, bvm, opToHoist, transposeVector, analysis);
962 if (failed(packingResult)) {
963 LLVM_DEBUG(
DBGS() <<
"--buildPackingLoopNestImpl failed -> Skip\n");
967 if (!transposeVector.empty())
968 transposeOps.push_back(packingResult->maybeTransposeOp);
970 FailureOr<RankedTensorType> transposedTensorType =
972 assert(succeeded(transposedTensorType) &&
"unexpected failure in type");
978 analysis, *packingResult);
981 RankedTensorType paddedTensorType = opToHoist.getResultType();
982 if (!transposeVector.empty()) {
986 Value emptyTensor = rewriter.
create<tensor::EmptyOp>(
987 loc, paddedTensorType.getShape(), paddedTensorType.getElementType());
988 TransposeOp unTransposeOp = rewriter.
create<linalg::TransposeOp>(
989 loc, newResult, emptyTensor, transposeVector);
990 newResult = unTransposeOp.
getResult()[0];
991 transposeOps.push_back(unTransposeOp);
994 LLVM_DEBUG(
DBGS() <<
"newResult: " << newResult <<
"\n");
996 DBGS() <<
"After hoisting: "
1001 hoistedOp = packingResult->hoistedPadOp;
1003 LLVM_DEBUG(
DBGS() <<
"--SUCCESS\n");
1008 tensor::PadOp opToHoist, int64_t numLoops,
1013 hoistedOp, transposeOps);
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static tensor::ExtractSliceOp padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, Value hoistedPackedTensor, tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp)
If the original consumer of outerSliceOp was a forOp (i.e.
static Value buildLoopIterationCount(RewriterBase &rewriter, scf::ForOp outer, scf::ForOp forOp)
Return the current iteration number in the loop (iv - lb).ceilDiv(step).
static void getEnclosingLoopsUntil(tensor::PadOp padOp, scf::ForOp untilLoop, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
static bool debugPrintLoopInShortForm(Operation *op)
static bool tracesBackToExpectedValue(tensor::ExtractSliceOp extractSliceOp, Value expectedSource)
Return true if we can walk back the use-def chain from extractSliceOp to expectedSource going through...
static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v)
static FailureOr< PackingResult > buildPackingLoopNestImpl(RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef< int64_t > transposeVector, RankedTensorType transposedTensorType, tensor::EmptyOp emptyOp, const HoistPaddingAnalysis &analysis)
static void computeBackwardSlice(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp, SetVector< Operation * > &backwardSlice)
static Value replaceByPackingResult(RewriterBase &rewriter, const IRMapping &bvm, tensor::PadOp opToHoist, RankedTensorType transposedTensorType, const HoistPaddingAnalysis &analysis, const PackingResult &packingResult)
Produce a tensor extracted from the packingResult.
static void debugPrintBackwardSlice(SetVector< Operation * > &backwardSlice)
static void getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
Base type for affine expression.
This class provides management for the lifetime of the state used when printing the IR.
This class represents an argument of a Block.
Block * getOwner() const
Returns the block that owns this argument.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
A class for computing basic dominance information.
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
virtual void finalizeOpModification(Operation *op)
This method is used to signal the end of an in-place modification of the given operation.
virtual void startOpModification(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
A helper class to be used with ValueBoundsOpInterface.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
FailureOr< OpFoldResult > reifyIndexValueBound(OpBuilder &b, Location loc, presburger::BoundType type, Value value, ValueBoundsConstraintSet::StopConditionFn stopCondition=nullptr, bool closedUB=false)
Reify a bound for the given index-typed value in terms of SSA values for which stopCondition is met.
void bindDims(MLIRContext *ctx)
void bindSymbols(MLIRContext *ctx)
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
FailureOr< RankedTensorType > computeTransposedType(RankedTensorType rankedTensorType, ArrayRef< int64_t > transposeVector)
Returns the transposed rankedTensorType if transposeVector is non-empty.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
LogicalResult getBackwardSlice(Operation *op, SetVector< Operation * > *backwardSlice, const BackwardSliceOptions &options={})
Fills backwardSlice with the computed backward slice (i.e.
LoopLikeOpInterface hoistLoopInvariantSubsets(RewriterBase &rewriter, LoopLikeOpInterface loopLike)
Hoist loop-invariant tensor subsets (subset extraction and subset insertion ops) from loop-like ops.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
bool inclusive
Include the top level op in the slice.
Helper struct to hold the results of building a packing loop nest.
SmallVector< OpFoldResult > strides
SmallVector< Value > clonedLoopIvs
SmallVector< OpFoldResult > sizes