29 #include "llvm/Support/Debug.h"
33 #define DEBUG_TYPE "hoist-padding"
35 #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ")
45 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
46 forOp.getInductionVar().printAsOperand(dbgs(), state);
47 dbgs() <<
" @ " << forOp.getOperation();
55 LLVM_DEBUG(llvm::interleaveComma(backwardSlice,
DBGS() <<
"--backwardSlice:",
63 dbgs() << *op <<
"\n";
75 scf::ForOp outermostEnclosingForOp =
nullptr;
77 while (nLevels-- > 0 &&
78 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
79 LLVM_DEBUG(
DBGS() <<
"loops: ";
82 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
83 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
94 scf::ForOp outermostEnclosingForOp =
nullptr;
96 while (outermostEnclosingForOp != untilLoop &&
97 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
98 LLVM_DEBUG(
DBGS() <<
"loops: ";
101 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
102 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
111 scf::ForOp outermostEnclosingForOp,
116 return domInfo.
dominates(outermostEnclosingForOp, op) &&
117 !padOp->isProperAncestor(op);
125 for (
Value v : valuesDefinedAbove) {
127 assert(result.succeeded() &&
"expected a backward slice");
131 LogicalResult result =
133 assert(result.succeeded() &&
"expected a backward slice");
157 struct HoistPaddingAnalysis {
158 HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops);
159 HoistPaddingAnalysis(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp);
161 bool isValid() {
return valid.has_value() && valid.value(); }
162 bool isInvalid() {
return valid.has_value() && !valid.value(); }
187 void finalizeHoistPaddingAnalysis();
191 std::optional<bool> valid;
194 tensor::PadOp opToHoist;
223 LogicalResult dropNonIndexDependencies();
228 scf::ForOp outermostEnclosingForOp;
242 tensor::ExtractSliceOp sliceOp;
245 scf::ForOp padConsumingForOp;
250 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops)
251 : valid(std::nullopt), opToHoist(padOp) {
254 if (reverseEnclosingLoops.empty()) {
255 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
259 outermostEnclosingForOp = reverseEnclosingLoops.back();
260 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
262 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
268 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
269 scf::ForOp outermostEnclosingForOp)
270 : valid(std::nullopt), opToHoist(padOp) {
273 reverseEnclosingLoops);
274 if (reverseEnclosingLoops.empty()) {
275 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
279 this->outermostEnclosingForOp = reverseEnclosingLoops.back();
280 if (this->outermostEnclosingForOp != outermostEnclosingForOp) {
281 LLVM_DEBUG(
DBGS() <<
"--Unexpected outermost enclosing loop -> Skip\n");
285 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
287 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
293 void HoistPaddingAnalysis::enableHoistPadding(
RewriterBase &rewriter) {
299 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
300 outermostEnclosingForOp = cast<scf::ForOp>(
305 void HoistPaddingAnalysis::finalizeHoistPaddingAnalysis() {
309 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
310 LLVM_DEBUG(
DBGS() <<
"--outermostEnclosingForOp:\n"
311 << outermostEnclosingForOp <<
"\n"
312 <<
"--sliceOp: " << sliceOp <<
"\n"
313 <<
"--sliceOp.getSource(): " << sliceOp.getSource()
315 LLVM_DEBUG(
DBGS() <<
"----Source not defined outside of loops -> Skip\n");
319 if (sliceOp->hasOneUse()) {
320 padConsumingForOp = dyn_cast<scf::ForOp>(*(sliceOp->getUsers().begin()));
326 Value paddingValue = opToHoist.getConstantPaddingValue();
328 !isa_and_nonnull<arith::ConstantOp>(paddingValue.
getDefiningOp())) {
329 LLVM_DEBUG(
DBGS() <<
"Cannot find constant padding value -> Skip\n");
335 if (backwardSlice.size() <= 1) {
344 if (
failed(dropNonIndexDependencies())) {
345 LLVM_DEBUG(
DBGS() <<
"--Cannot dropNonIndexDependencies -> Skip\n");
357 for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
358 if (backwardSlice.contains(forOp))
359 packingLoops.push_back(forOp);
362 if (packingLoops.size() > 1 && padConsumingForOp) {
363 LLVM_DEBUG(
DBGS() <<
"--Cannot hoist multiple loops through iter_args -> "
364 "Downgrade to 1 loop\n");
365 packingLoops.resize(1);
375 LogicalResult HoistPaddingAnalysis::dropNonIndexDependencies() {
381 auto addIndexOperandsToIndexEdges = [&](
Operation *operation) {
382 for (
Value operand : operation->getOperands())
383 if (operand.getType().isIndex())
384 indexEdges.insert(operand);
388 auto hasIndexResult = [&](
Operation *operation) {
389 return llvm::any_of(operation->getResults(), [&](
Value result) {
390 return indexEdges.contains(result);
415 for (
Operation *op : llvm::reverse(backwardSlice)) {
418 if (op == opToHoist || op == sliceOp) {
419 addIndexOperandsToIndexEdges(op);
424 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
425 if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
426 addIndexOperandsToIndexEdges(op);
432 if (hasIndexResult(op)) {
433 addIndexOperandsToIndexEdges(op);
435 if (llvm::any_of(op->getOperandTypes(),
436 [](
Type type) { return !type.isIndex(); })) {
437 LLVM_DEBUG(
DBGS() <<
"Unsupported op with non index type operands: "
438 << op <<
" -> Skip\n");
442 auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
443 bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
444 if (hasMemoryEffect || op->getNumRegions() != 0) {
445 LLVM_DEBUG(
DBGS() <<
"Unsupported op with region or memory effect: "
446 << op <<
" -> Skip\n");
453 if (!isa<arith::ConstantOp>(op))
454 operationsToRemove.insert(op);
456 backwardSlice.set_subtract(operationsToRemove);
461 HoistPaddingAnalysis::getHoistedPackedTensorSizes(
RewriterBase &rewriter,
470 for (
auto forOp : packingLoops) {
473 rewriter, loc, presburger::BoundType::UB, forOp.getUpperBound(),
476 if (v == forOp.getUpperBound())
479 Operation *op = v.getDefiningOp();
482 return !isa<affine::AffineMinOp, affine::AffineMaxOp,
483 affine::AffineApplyOp>(op);
486 assert(succeeded(loopUb) &&
"could not get upper bound");
497 loc, (ub - lb).ceilDiv(step),
499 cast<scf::ForOp>(forOp).getStep()});
500 dynamicTensorSizes.push_back(res);
503 return dynamicTensorSizes;
527 Value ivVal = forOp.getInductionVar(), lbVal = forOp.getLowerBound(),
528 stepVal = forOp.getStep();
529 auto loc = forOp->
getLoc();
531 loc, (iv - lb).ceilDiv(step),
ValueRange{ivVal, lbVal, stepVal});
548 tensor::EmptyOp emptyOp,
const HoistPaddingAnalysis &
analysis) {
552 scf::ForOp outerLoop =
analysis.outermostEnclosingForOp;
555 RankedTensorType paddedTensorType = opToHoist.getResultType();
556 int paddedRank = paddedTensorType.getRank();
559 BlockArgument bbArg = dyn_cast<BlockArgument>(opToHoist.getSource());
564 if (forOp != outerLoop && !outerLoop->isAncestor(forOp))
566 OpOperand &operand = *forOp.getTiedLoopInit(bbArg);
567 bvm.
map(bbArg, operand.
get());
568 bbArg = dyn_cast<BlockArgument>(operand.
get());
572 Value hoistedPackedTensor = emptyOp.getResult();
577 if (
auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) {
579 LLVM_DEBUG(
DBGS() <<
"--Skip: " << sliceOp <<
"\n");
585 auto forOp = dyn_cast<scf::ForOp>(op);
588 rewriter.
clone(*op, bvm);
594 auto clonedForOp = scf::ForOp::create(
598 nullptr, forOp.getUnsignedCmp());
601 bvm.
map(forOp.getInductionVar(), clonedForOp.getInductionVar());
602 bvm.
map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
603 bvm.
map(forOp.getResults(), clonedForOp.getResults());
604 assert(clonedForOp->getNumRegions() == 1);
605 clonedLoopIvs.push_back(clonedForOp.getInductionVar());
609 Value loopIndependentIterationCount =
613 if (!loopIndependentIterationCount)
614 llvm_unreachable(
"loop independence prerequisite not met");
615 leadingHoistedPackedTensorIndexings.push_back(
616 loopIndependentIterationCount);
617 hoistedPackedTensor = clonedForOp.getRegionIterArgs().front();
622 int64_t nPackedLoops = clonedLoopIvs.size();
626 leadingHoistedPackedTensorIndexings.end()};
630 for (int64_t sz : transposedTensorType.getShape()) {
632 if (ShapedType::isDynamic(sz))
641 TransposeOp maybeTransposeOp;
642 Value paddedTensor = bvm.
lookup(opToHoist.getResult());
643 if (!transposeVector.empty()) {
644 Value outputTensor = tensor::ExtractSliceOp::create(
645 rewriter, loc, transposedTensorType, hoistedPackedTensor, offsets,
647 maybeTransposeOp = linalg::TransposeOp::create(
648 rewriter, loc, paddedTensor, outputTensor, transposeVector);
649 paddedTensor = maybeTransposeOp.getResult()[0];
653 if (nPackedLoops > 0) {
656 Value inserted = tensor::InsertSliceOp::create(rewriter, loc, paddedTensor,
657 hoistedPackedTensor, offsets,
661 Value valueToYield = inserted;
662 for (
Value iv : llvm::reverse(clonedLoopIvs)) {
665 scf::YieldOp::create(rewriter, loc, valueToYield);
666 valueToYield = forOp.getResult(0);
675 leadingHoistedPackedTensorIndexings,
677 cast<tensor::PadOp>(bvm.
lookup(opToHoist.getResult()).getDefiningOp())};
687 int nPackedLoops =
analysis.packingLoops.size();
688 LLVM_DEBUG(
DBGS() <<
"\n";
690 << *opToHoist->getParentOfType<func::FuncOp>() <<
"\n";
691 DBGS() <<
"Start hoisting above " << nPackedLoops <<
" loops\n");
694 RankedTensorType paddedTensorType = opToHoist.getResultType();
697 FailureOr<RankedTensorType> transposedTensorType =
699 if (
failed(transposedTensorType)) {
700 LLVM_DEBUG(
DBGS() <<
"--Could not compute transposed type -> Skip\n");
707 llvm::append_range(packedShape, transposedTensorType->getShape());
709 packedShape, transposedTensorType->getElementType());
712 scf::ForOp outerLoop =
analysis.outermostEnclosingForOp;
716 analysis.getHoistedPackedTensorSizes(rewriter, loc);
717 auto emptyOp = tensor::EmptyOp::create(
718 rewriter, loc, hoistedPackedTensorType.getShape(),
719 hoistedPackedTensorType.getElementType(), dynamicTensorSizes);
722 *transposedTensorType, emptyOp,
analysis);
731 HoistPaddingAnalysis
analysis(opToHoist, outermostEnclosingForOp);
732 analysis.enableHoistPadding(rewriter);
733 analysis.finalizeHoistPaddingAnalysis();
735 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
754 Value expectedSource) {
755 LLVM_DEBUG(
DBGS() <<
"Start tracesBackToExpectedValue on: " << extractSliceOp
757 LLVM_DEBUG(
DBGS() <<
"--with extractSlice: " << extractSliceOp <<
"\n");
758 Value source = extractSliceOp.getSource();
759 LLVM_DEBUG(
DBGS() <<
"--with starting source: " << source <<
"\n");
760 while (source && source != expectedSource) {
761 auto destOp = source.
getDefiningOp<DestinationStyleOpInterface>();
764 LLVM_DEBUG(
DBGS() <<
"--step dest op: " << destOp <<
"\n");
765 source = destOp.getDpsInitOperand(cast<OpResult>(source).getResultNumber())
768 LLVM_DEBUG(
DBGS() <<
"--final source: " << source <<
"\n");
769 LLVM_DEBUG(
DBGS() <<
"--expected source: " << expectedSource <<
"\n");
770 return source == expectedSource;
801 static tensor::ExtractSliceOp
803 Value hoistedPackedTensor,
804 tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp) {
805 LLVM_DEBUG(
DBGS() <<
"Start padThroughLoopIterArg on: " << forOp <<
"\n");
806 LLVM_DEBUG(
DBGS() <<
"--paddedValueBeforeHoisting: "
807 << paddedValueBeforeHoisting <<
"\n");
809 for (
OpOperand &use : outerSliceOp->getUses()) {
810 if (use.getOwner() == forOp) {
811 assert(!pUse &&
"Multiple slice uses in the for loop");
815 assert(pUse &&
"No slice use in the for loop");
819 unsigned iterArgNumber = forOp.getTiedLoopResult(pUse).getResultNumber();
820 auto yieldingExtractSliceOp = forOp.getYieldedValues()[iterArgNumber]
821 .getDefiningOp<tensor::ExtractSliceOp>();
822 if (!yieldingExtractSliceOp)
823 return tensor::ExtractSliceOp();
829 paddedValueBeforeHoisting))
830 return tensor::ExtractSliceOp();
833 initArgs[iterArgNumber] = hoistedPackedTensor;
835 yieldOperands[iterArgNumber] = yieldingExtractSliceOp.getSource();
837 int64_t numOriginalForOpResults = initArgs.size();
838 LLVM_DEBUG(
DBGS() <<
"numOriginalForOpResults: " << numOriginalForOpResults
840 tensor::ExtractSliceOp extracted;
844 extracted = tensor::ExtractSliceOp::create(
845 rewriter, hoistedPackedTensor.
getLoc(), hoistedPackedTensor,
846 outerSliceOp.getMixedOffsets(), outerSliceOp.getMixedSizes(),
847 outerSliceOp.getMixedStrides());
850 scf::ForOp newForOp = cast<scf::ForOp>(*forOp.replaceWithAdditionalYields(
851 rewriter, initArgs,
true,
853 return yieldOperands;
856 LLVM_DEBUG(
DBGS() <<
"newForOp results: " << newForOp.getNumResults()
858 LLVM_DEBUG(
DBGS() <<
"replace source of: " << extracted <<
"\n");
859 LLVM_DEBUG(
DBGS() <<
"with result #"
860 << numOriginalForOpResults + iterArgNumber
861 <<
" of forOp, giving us: " << extracted <<
"\n");
863 extracted.getSourceMutable().assign(
864 newForOp.getResult(numOriginalForOpResults + iterArgNumber));
867 LLVM_DEBUG(
DBGS() <<
"replace uses of: " << paddedValueBeforeHoisting
869 LLVM_DEBUG(
DBGS() <<
"with region iter arg #"
870 << numOriginalForOpResults + iterArgNumber <<
"\n");
872 paddedValueBeforeHoisting,
873 newForOp.getRegionIterArg(numOriginalForOpResults + iterArgNumber));
882 tensor::PadOp opToHoist,
883 RankedTensorType transposedTensorType,
884 const HoistPaddingAnalysis &
analysis,
892 RankedTensorType paddedTensorType = opToHoist.getResultType();
893 int paddedRank = paddedTensorType.getRank();
896 LLVM_DEBUG(
DBGS() <<
"nPackedLoops: " << nPackedLoops <<
" loops\n");
898 scf::ForOp outerLoop =
analysis.outermostEnclosingForOp;
901 Value hoistedPackedTensor;
905 if (nPackedLoops > 0) {
906 loopIterationCounts =
907 llvm::to_vector<4>(llvm::map_range(packingLoops, [&](
Operation *loop) {
909 cast<scf::ForOp>(loop));
912 if (llvm ::any_of(loopIterationCounts, [](
Value v) {
return !v; }))
913 llvm_unreachable(
"loop independence prerequisite not met");
916 std::copy(loopIterationCounts.begin(), loopIterationCounts.end(),
918 hoistedPackedTensor =
923 hoistedPackedTensor = bvm.
lookup(opToHoist.getResult());
926 LLVM_DEBUG(
DBGS() <<
"hoistedPackedTensor: " << hoistedPackedTensor <<
"\n");
929 scf::ForOp forOp =
analysis.padConsumingForOp;
938 return tensor::ExtractSliceOp::create(
939 rewriter, loc, transposedTensorType, hoistedPackedTensor, offsets,
944 RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops,
947 LLVM_DEBUG(
DBGS() <<
"\n";
DBGS() <<
" Try to hoist " << *(opToHoist) <<
"\n";
948 DBGS() <<
" by " << numLoops <<
" loops\n");
950 HoistPaddingAnalysis
analysis(opToHoist, numLoops);
951 analysis.enableHoistPadding(rewriter);
952 analysis.finalizeHoistPaddingAnalysis();
954 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
961 rewriter, bvm, opToHoist, transposeVector,
analysis);
962 if (
failed(packingResult)) {
963 LLVM_DEBUG(
DBGS() <<
"--buildPackingLoopNestImpl failed -> Skip\n");
967 if (!transposeVector.empty())
968 transposeOps.push_back(packingResult->maybeTransposeOp);
970 FailureOr<RankedTensorType> transposedTensorType =
972 assert(succeeded(transposedTensorType) &&
"unexpected failure in type");
981 RankedTensorType paddedTensorType = opToHoist.getResultType();
982 if (!transposeVector.empty()) {
987 tensor::EmptyOp::create(rewriter, loc, paddedTensorType.getShape(),
988 paddedTensorType.getElementType());
989 TransposeOp unTransposeOp = linalg::TransposeOp::create(
990 rewriter, loc, newResult, emptyTensor, transposeVector);
991 newResult = unTransposeOp.getResult()[0];
992 transposeOps.push_back(unTransposeOp);
995 LLVM_DEBUG(
DBGS() <<
"newResult: " << newResult <<
"\n");
997 DBGS() <<
"After hoisting: "
1002 hoistedOp = packingResult->hoistedPadOp;
1004 LLVM_DEBUG(
DBGS() <<
"--SUCCESS\n");
1009 tensor::PadOp opToHoist, int64_t numLoops,
1014 hoistedOp, transposeOps);
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static tensor::ExtractSliceOp padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, Value hoistedPackedTensor, tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp)
If the original consumer of outerSliceOp was a forOp (i.e.
static Value buildLoopIterationCount(RewriterBase &rewriter, scf::ForOp outer, scf::ForOp forOp)
Return the current iteration number in the loop (iv - lb).ceilDiv(step).
static void getEnclosingLoopsUntil(tensor::PadOp padOp, scf::ForOp untilLoop, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
static bool debugPrintLoopInShortForm(Operation *op)
static bool tracesBackToExpectedValue(tensor::ExtractSliceOp extractSliceOp, Value expectedSource)
Return true if we can walk back the use-def chain from extractSliceOp to expectedSource going through...
static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v)
static FailureOr< PackingResult > buildPackingLoopNestImpl(RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef< int64_t > transposeVector, RankedTensorType transposedTensorType, tensor::EmptyOp emptyOp, const HoistPaddingAnalysis &analysis)
static void computeBackwardSlice(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp, SetVector< Operation * > &backwardSlice)
static Value replaceByPackingResult(RewriterBase &rewriter, const IRMapping &bvm, tensor::PadOp opToHoist, RankedTensorType transposedTensorType, const HoistPaddingAnalysis &analysis, const PackingResult &packingResult)
Produce a tensor extracted from the packingResult.
static void debugPrintBackwardSlice(SetVector< Operation * > &backwardSlice)
static void getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
Base type for affine expression.
This class provides management for the lifetime of the state used when printing the IR.
This class represents an argument of a Block.
Block * getOwner() const
Returns the block that owns this argument.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
A class for computing basic dominance information.
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
virtual void finalizeOpModification(Operation *op)
This method is used to signal the end of an in-place modification of the given operation.
virtual void startOpModification(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
A helper class to be used with ValueBoundsOpInterface.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
FailureOr< OpFoldResult > reifyIndexValueBound(OpBuilder &b, Location loc, presburger::BoundType type, Value value, ValueBoundsConstraintSet::StopConditionFn stopCondition=nullptr, bool closedUB=false)
Reify a bound for the given index-typed value in terms of SSA values for which stopCondition is met.
void bindDims(MLIRContext *ctx)
void bindSymbols(MLIRContext *ctx)
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
FailureOr< RankedTensorType > computeTransposedType(RankedTensorType rankedTensorType, ArrayRef< int64_t > transposeVector)
Returns the transposed rankedTensorType if transposeVector is non-empty.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
LogicalResult getBackwardSlice(Operation *op, SetVector< Operation * > *backwardSlice, const BackwardSliceOptions &options={})
Fills backwardSlice with the computed backward slice (i.e.
LoopLikeOpInterface hoistLoopInvariantSubsets(RewriterBase &rewriter, LoopLikeOpInterface loopLike)
Hoist loop-invariant tensor subsets (subset extraction and subset insertion ops) from loop-like ops.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
bool inclusive
Include the top level op in the slice.
Helper struct to hold the results of building a packing loop nest.
SmallVector< OpFoldResult > strides
SmallVector< Value > clonedLoopIvs
SmallVector< OpFoldResult > sizes