30 #include "llvm/Support/Debug.h"
34 #define DEBUG_TYPE "hoist-padding"
36 #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ")
46 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
47 forOp.getInductionVar().printAsOperand(dbgs(), state);
48 dbgs() <<
" @ " << forOp.getOperation();
56 LLVM_DEBUG(llvm::interleaveComma(backwardSlice,
DBGS() <<
"--backwardSlice:",
64 dbgs() << *op <<
"\n";
76 scf::ForOp outermostEnclosingForOp =
nullptr;
78 while (nLevels-- > 0 &&
79 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
80 LLVM_DEBUG(
DBGS() <<
"loops: ";
83 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
84 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
95 scf::ForOp outermostEnclosingForOp =
nullptr;
97 while (outermostEnclosingForOp != untilLoop &&
98 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
99 LLVM_DEBUG(
DBGS() <<
"loops: ";
102 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
103 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
112 scf::ForOp outermostEnclosingForOp,
117 return domInfo.
dominates(outermostEnclosingForOp, op) &&
118 !padOp->isProperAncestor(op);
126 for (
Value v : valuesDefinedAbove) {
153 struct HoistPaddingAnalysis {
154 HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops);
155 HoistPaddingAnalysis(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp);
157 bool isValid() {
return valid.has_value() && valid.value(); }
158 bool isInvalid() {
return valid.has_value() && !valid.value(); }
183 void finalizeHoistPaddingAnalysis();
187 std::optional<bool> valid;
190 tensor::PadOp opToHoist;
219 LogicalResult dropNonIndexDependencies();
224 scf::ForOp outermostEnclosingForOp;
238 tensor::ExtractSliceOp sliceOp;
241 scf::ForOp padConsumingForOp;
246 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops)
247 : valid(std::nullopt), opToHoist(padOp) {
250 if (reverseEnclosingLoops.empty()) {
251 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
255 outermostEnclosingForOp = reverseEnclosingLoops.back();
256 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
258 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
264 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
265 scf::ForOp outermostEnclosingForOp)
266 : valid(std::nullopt), opToHoist(padOp) {
269 reverseEnclosingLoops);
270 if (reverseEnclosingLoops.empty()) {
271 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
275 this->outermostEnclosingForOp = reverseEnclosingLoops.back();
276 if (this->outermostEnclosingForOp != outermostEnclosingForOp) {
277 LLVM_DEBUG(
DBGS() <<
"--Unexpected outermost enclosing loop -> Skip\n");
281 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
283 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
289 void HoistPaddingAnalysis::enableHoistPadding(
RewriterBase &rewriter) {
295 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
296 outermostEnclosingForOp = cast<scf::ForOp>(
301 void HoistPaddingAnalysis::finalizeHoistPaddingAnalysis() {
305 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
306 LLVM_DEBUG(
DBGS() <<
"--outermostEnclosingForOp:\n"
307 << outermostEnclosingForOp <<
"\n"
308 <<
"--sliceOp: " << sliceOp <<
"\n"
309 <<
"--sliceOp.getSource(): " << sliceOp.getSource()
311 LLVM_DEBUG(
DBGS() <<
"----Source not defined outside of loops -> Skip\n");
315 if (sliceOp->hasOneUse()) {
316 padConsumingForOp = dyn_cast<scf::ForOp>(*(sliceOp->getUsers().begin()));
322 Value paddingValue = opToHoist.getConstantPaddingValue();
324 !isa_and_nonnull<arith::ConstantOp>(paddingValue.
getDefiningOp())) {
325 LLVM_DEBUG(
DBGS() <<
"Cannot find constant padding value -> Skip\n");
331 if (backwardSlice.size() <= 1) {
340 if (failed(dropNonIndexDependencies())) {
341 LLVM_DEBUG(
DBGS() <<
"--Cannot dropNonIndexDependencies -> Skip\n");
353 for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
354 if (backwardSlice.contains(forOp))
355 packingLoops.push_back(forOp);
358 if (packingLoops.size() > 1 && padConsumingForOp) {
359 LLVM_DEBUG(
DBGS() <<
"--Cannot hoist multiple loops through iter_args -> "
360 "Downgrade to 1 loop\n");
361 packingLoops.resize(1);
371 LogicalResult HoistPaddingAnalysis::dropNonIndexDependencies() {
377 auto addIndexOperandsToIndexEdges = [&](
Operation *operation) {
378 for (
Value operand : operation->getOperands())
379 if (operand.getType().isIndex())
380 indexEdges.insert(operand);
384 auto hasIndexResult = [&](
Operation *operation) {
385 return llvm::any_of(operation->getResults(), [&](
Value result) {
386 return indexEdges.contains(result);
411 for (
Operation *op : llvm::reverse(backwardSlice)) {
414 if (op == opToHoist || op == sliceOp) {
415 addIndexOperandsToIndexEdges(op);
420 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
421 if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
422 addIndexOperandsToIndexEdges(op);
428 if (hasIndexResult(op)) {
429 addIndexOperandsToIndexEdges(op);
431 if (llvm::any_of(op->getOperandTypes(),
432 [](
Type type) { return !type.isIndex(); })) {
433 LLVM_DEBUG(
DBGS() <<
"Unsupported op with non index type operands: "
434 << op <<
" -> Skip\n");
438 auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
439 bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
440 if (hasMemoryEffect || op->getNumRegions() != 0) {
441 LLVM_DEBUG(
DBGS() <<
"Unsupported op with region or memory effect: "
442 << op <<
" -> Skip\n");
449 if (!isa<arith::ConstantOp>(op))
450 operationsToRemove.insert(op);
452 backwardSlice.set_subtract(operationsToRemove);
457 HoistPaddingAnalysis::getHoistedPackedTensorSizes(
RewriterBase &rewriter,
466 for (
auto forOp : packingLoops) {
469 rewriter, loc, presburger::BoundType::UB, forOp.getUpperBound(),
472 if (v == forOp.getUpperBound())
475 Operation *op = v.getDefiningOp();
478 return !isa<affine::AffineMinOp, affine::AffineMaxOp,
479 affine::AffineApplyOp>(op);
482 assert(succeeded(loopUb) &&
"could not get upper bound");
493 loc, (ub - lb).ceilDiv(step),
495 cast<scf::ForOp>(forOp).getStep()});
496 dynamicTensorSizes.push_back(res);
499 return dynamicTensorSizes;
523 Value ivVal = forOp.getInductionVar(), lbVal = forOp.getLowerBound(),
524 stepVal = forOp.getStep();
525 auto loc = forOp->
getLoc();
527 loc, (iv - lb).ceilDiv(step),
ValueRange{ivVal, lbVal, stepVal});
544 tensor::EmptyOp emptyOp,
const HoistPaddingAnalysis &analysis) {
548 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
551 RankedTensorType paddedTensorType = opToHoist.getResultType();
552 int paddedRank = paddedTensorType.getRank();
555 BlockArgument bbArg = dyn_cast<BlockArgument>(opToHoist.getSource());
560 if (forOp != outerLoop && !outerLoop->isAncestor(forOp))
562 OpOperand &operand = *forOp.getTiedLoopInit(bbArg);
563 bvm.
map(bbArg, operand.
get());
564 bbArg = dyn_cast<BlockArgument>(operand.
get());
568 Value hoistedPackedTensor = emptyOp.getResult();
570 for (
Operation *op : analysis.backwardSlice) {
573 if (
auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) {
575 LLVM_DEBUG(
DBGS() <<
"--Skip: " << sliceOp <<
"\n");
581 auto forOp = dyn_cast<scf::ForOp>(op);
584 rewriter.
clone(*op, bvm);
590 auto clonedForOp = rewriter.
create<scf::ForOp>(
596 bvm.
map(forOp.getInductionVar(), clonedForOp.getInductionVar());
597 bvm.
map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
598 bvm.
map(forOp.getResults(), clonedForOp.getResults());
599 assert(clonedForOp->getNumRegions() == 1);
600 clonedLoopIvs.push_back(clonedForOp.getInductionVar());
604 Value loopIndependentIterationCount =
608 if (!loopIndependentIterationCount)
609 llvm_unreachable(
"loop independence prerequisite not met");
610 leadingHoistedPackedTensorIndexings.push_back(
611 loopIndependentIterationCount);
612 hoistedPackedTensor = clonedForOp.getRegionIterArgs().front();
617 int64_t nPackedLoops = clonedLoopIvs.size();
621 leadingHoistedPackedTensorIndexings.end()};
625 for (int64_t sz : transposedTensorType.getShape()) {
627 if (ShapedType::isDynamic(sz))
636 TransposeOp maybeTransposeOp;
637 Value paddedTensor = bvm.
lookup(opToHoist.getResult());
638 if (!transposeVector.empty()) {
639 Value outputTensor = rewriter.
create<tensor::ExtractSliceOp>(
640 loc, transposedTensorType, hoistedPackedTensor, offsets, sizes,
642 maybeTransposeOp = rewriter.
create<linalg::TransposeOp>(
643 loc, paddedTensor, outputTensor, transposeVector);
644 paddedTensor = maybeTransposeOp.
getResult()[0];
648 if (nPackedLoops > 0) {
651 Value inserted = rewriter.
create<tensor::InsertSliceOp>(
652 loc, paddedTensor, hoistedPackedTensor, offsets, sizes, strides);
655 Value valueToYield = inserted;
656 for (
Value iv : llvm::reverse(clonedLoopIvs)) {
659 rewriter.
create<scf::YieldOp>(loc, valueToYield);
669 leadingHoistedPackedTensorIndexings,
671 cast<tensor::PadOp>(bvm.
lookup(opToHoist.getResult()).getDefiningOp())};
681 int nPackedLoops = analysis.packingLoops.size();
682 LLVM_DEBUG(
DBGS() <<
"\n";
684 << *opToHoist->getParentOfType<func::FuncOp>() <<
"\n";
685 DBGS() <<
"Start hoisting above " << nPackedLoops <<
" loops\n");
688 RankedTensorType paddedTensorType = opToHoist.getResultType();
691 FailureOr<RankedTensorType> transposedTensorType =
693 if (failed(transposedTensorType)) {
694 LLVM_DEBUG(
DBGS() <<
"--Could not compute transposed type -> Skip\n");
701 llvm::append_range(packedShape, transposedTensorType->getShape());
703 packedShape, transposedTensorType->getElementType());
706 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
710 analysis.getHoistedPackedTensorSizes(rewriter, loc);
711 auto emptyOp = rewriter.
create<tensor::EmptyOp>(
712 loc, hoistedPackedTensorType.getShape(),
713 hoistedPackedTensorType.getElementType(), dynamicTensorSizes);
716 *transposedTensorType, emptyOp, analysis);
725 HoistPaddingAnalysis analysis(opToHoist, outermostEnclosingForOp);
726 analysis.enableHoistPadding(rewriter);
727 analysis.finalizeHoistPaddingAnalysis();
728 if (!analysis.isValid()) {
729 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
748 Value expectedSource) {
749 LLVM_DEBUG(
DBGS() <<
"Start tracesBackToExpectedValue on: " << extractSliceOp
751 LLVM_DEBUG(
DBGS() <<
"--with extractSlice: " << extractSliceOp <<
"\n");
752 Value source = extractSliceOp.getSource();
753 LLVM_DEBUG(
DBGS() <<
"--with starting source: " << source <<
"\n");
754 while (source && source != expectedSource) {
756 dyn_cast_or_null<DestinationStyleOpInterface>(source.
getDefiningOp());
759 LLVM_DEBUG(
DBGS() <<
"--step dest op: " << destOp <<
"\n");
760 source = destOp.getDpsInitOperand(cast<OpResult>(source).getResultNumber())
763 LLVM_DEBUG(
DBGS() <<
"--final source: " << source <<
"\n");
764 LLVM_DEBUG(
DBGS() <<
"--expected source: " << expectedSource <<
"\n");
765 return source == expectedSource;
796 static tensor::ExtractSliceOp
798 Value hoistedPackedTensor,
799 tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp) {
800 LLVM_DEBUG(
DBGS() <<
"Start padThroughLoopIterArg on: " << forOp <<
"\n");
801 LLVM_DEBUG(
DBGS() <<
"--paddedValueBeforeHoisting: "
802 << paddedValueBeforeHoisting <<
"\n");
804 for (
OpOperand &use : outerSliceOp->getUses()) {
805 if (use.getOwner() == forOp) {
806 assert(!pUse &&
"Multiple slice uses in the for loop");
810 assert(pUse &&
"No slice use in the for loop");
814 unsigned iterArgNumber = forOp.getTiedLoopResult(pUse).getResultNumber();
815 auto yieldingExtractSliceOp = forOp.getYieldedValues()[iterArgNumber]
816 .getDefiningOp<tensor::ExtractSliceOp>();
817 if (!yieldingExtractSliceOp)
818 return tensor::ExtractSliceOp();
824 paddedValueBeforeHoisting))
825 return tensor::ExtractSliceOp();
828 initArgs[iterArgNumber] = hoistedPackedTensor;
830 yieldOperands[iterArgNumber] = yieldingExtractSliceOp.getSource();
832 int64_t numOriginalForOpResults = initArgs.size();
833 LLVM_DEBUG(
DBGS() <<
"numOriginalForOpResults: " << numOriginalForOpResults
835 tensor::ExtractSliceOp extracted;
839 extracted = rewriter.
create<tensor::ExtractSliceOp>(
840 hoistedPackedTensor.
getLoc(), hoistedPackedTensor,
841 outerSliceOp.getMixedOffsets(), outerSliceOp.getMixedSizes(),
842 outerSliceOp.getMixedStrides());
845 scf::ForOp newForOp = cast<scf::ForOp>(*forOp.replaceWithAdditionalYields(
846 rewriter, initArgs,
true,
848 return yieldOperands;
851 LLVM_DEBUG(
DBGS() <<
"newForOp results: " << newForOp.getNumResults()
853 LLVM_DEBUG(
DBGS() <<
"replace source of: " << extracted <<
"\n");
854 LLVM_DEBUG(
DBGS() <<
"with result #"
855 << numOriginalForOpResults + iterArgNumber
856 <<
" of forOp, giving us: " << extracted <<
"\n");
858 extracted.getSourceMutable().assign(
859 newForOp.getResult(numOriginalForOpResults + iterArgNumber));
862 LLVM_DEBUG(
DBGS() <<
"replace uses of: " << paddedValueBeforeHoisting
864 LLVM_DEBUG(
DBGS() <<
"with region iter arg #"
865 << numOriginalForOpResults + iterArgNumber <<
"\n");
867 paddedValueBeforeHoisting,
868 newForOp.getRegionIterArg(numOriginalForOpResults + iterArgNumber));
877 tensor::PadOp opToHoist,
878 RankedTensorType transposedTensorType,
879 const HoistPaddingAnalysis &analysis,
887 RankedTensorType paddedTensorType = opToHoist.getResultType();
888 int paddedRank = paddedTensorType.getRank();
891 LLVM_DEBUG(
DBGS() <<
"nPackedLoops: " << nPackedLoops <<
" loops\n");
893 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
896 Value hoistedPackedTensor;
900 if (nPackedLoops > 0) {
901 loopIterationCounts =
902 llvm::to_vector<4>(llvm::map_range(packingLoops, [&](
Operation *loop) {
904 cast<scf::ForOp>(loop));
907 if (llvm ::any_of(loopIterationCounts, [](
Value v) {
return !v; }))
908 llvm_unreachable(
"loop independence prerequisite not met");
911 std::copy(loopIterationCounts.begin(), loopIterationCounts.end(),
913 hoistedPackedTensor =
918 hoistedPackedTensor = bvm.
lookup(opToHoist.getResult());
921 LLVM_DEBUG(
DBGS() <<
"hoistedPackedTensor: " << hoistedPackedTensor <<
"\n");
924 scf::ForOp forOp = analysis.padConsumingForOp;
927 analysis.sliceOp, forOp);
933 return rewriter.
create<tensor::ExtractSliceOp>(
934 loc, transposedTensorType, hoistedPackedTensor, offsets,
939 RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops,
942 LLVM_DEBUG(
DBGS() <<
"\n";
DBGS() <<
" Try to hoist " << *(opToHoist) <<
"\n";
943 DBGS() <<
" by " << numLoops <<
" loops\n");
945 HoistPaddingAnalysis analysis(opToHoist, numLoops);
946 analysis.enableHoistPadding(rewriter);
947 analysis.finalizeHoistPaddingAnalysis();
948 if (!analysis.isValid()) {
949 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
956 rewriter, bvm, opToHoist, transposeVector, analysis);
957 if (failed(packingResult)) {
958 LLVM_DEBUG(
DBGS() <<
"--buildPackingLoopNestImpl failed -> Skip\n");
962 if (!transposeVector.empty())
963 transposeOps.push_back(packingResult->maybeTransposeOp);
965 FailureOr<RankedTensorType> transposedTensorType =
967 assert(succeeded(transposedTensorType) &&
"unexpected failure in type");
973 analysis, *packingResult);
976 RankedTensorType paddedTensorType = opToHoist.getResultType();
977 if (!transposeVector.empty()) {
981 Value emptyTensor = rewriter.
create<tensor::EmptyOp>(
982 loc, paddedTensorType.getShape(), paddedTensorType.getElementType());
983 TransposeOp unTransposeOp = rewriter.
create<linalg::TransposeOp>(
984 loc, newResult, emptyTensor, transposeVector);
985 newResult = unTransposeOp.
getResult()[0];
986 transposeOps.push_back(unTransposeOp);
989 LLVM_DEBUG(
DBGS() <<
"newResult: " << newResult <<
"\n");
991 DBGS() <<
"After hoisting: "
996 hoistedOp = packingResult->hoistedPadOp;
998 LLVM_DEBUG(
DBGS() <<
"--SUCCESS\n");
1003 tensor::PadOp opToHoist, int64_t numLoops,
1008 hoistedOp, transposeOps);
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static tensor::ExtractSliceOp padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, Value hoistedPackedTensor, tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp)
If the original consumer of outerSliceOp was a forOp (i.e.
static Value buildLoopIterationCount(RewriterBase &rewriter, scf::ForOp outer, scf::ForOp forOp)
Return the current iteration number in the loop (iv - lb).ceilDiv(step).
static void getEnclosingLoopsUntil(tensor::PadOp padOp, scf::ForOp untilLoop, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
static bool debugPrintLoopInShortForm(Operation *op)
static bool tracesBackToExpectedValue(tensor::ExtractSliceOp extractSliceOp, Value expectedSource)
Return true if we can walk back the use-def chain from extractSliceOp to expectedSource going through...
static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v)
static FailureOr< PackingResult > buildPackingLoopNestImpl(RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef< int64_t > transposeVector, RankedTensorType transposedTensorType, tensor::EmptyOp emptyOp, const HoistPaddingAnalysis &analysis)
static void computeBackwardSlice(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp, SetVector< Operation * > &backwardSlice)
static Value replaceByPackingResult(RewriterBase &rewriter, const IRMapping &bvm, tensor::PadOp opToHoist, RankedTensorType transposedTensorType, const HoistPaddingAnalysis &analysis, const PackingResult &packingResult)
Produce a tensor extracted from the packingResult.
static void debugPrintBackwardSlice(SetVector< Operation * > &backwardSlice)
static void getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
Base type for affine expression.
This class provides management for the lifetime of the state used when printing the IR.
This class represents an argument of a Block.
Block * getOwner() const
Returns the block that owns this argument.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
A class for computing basic dominance information.
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
virtual void finalizeOpModification(Operation *op)
This method is used to signal the end of an in-place modification of the given operation.
virtual void startOpModification(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
A helper class to be used with ValueBoundsOpInterface.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
FailureOr< OpFoldResult > reifyIndexValueBound(OpBuilder &b, Location loc, presburger::BoundType type, Value value, ValueBoundsConstraintSet::StopConditionFn stopCondition=nullptr, bool closedUB=false)
Reify a bound for the given index-typed value in terms of SSA values for which stopCondition is met.
void bindDims(MLIRContext *ctx)
void bindSymbols(MLIRContext *ctx)
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
FailureOr< RankedTensorType > computeTransposedType(RankedTensorType rankedTensorType, ArrayRef< int64_t > transposeVector)
Returns the transposed rankedTensorType if transposeVector is non-empty.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
LoopLikeOpInterface hoistLoopInvariantSubsets(RewriterBase &rewriter, LoopLikeOpInterface loopLike)
Hoist loop-invariant tensor subsets (subset extraction and subset insertion ops) from loop-like ops.
void getBackwardSlice(Operation *op, SetVector< Operation * > *backwardSlice, const BackwardSliceOptions &options={})
Fills backwardSlice with the computed backward slice (i.e.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
bool inclusive
Include the top level op in the slice.
Helper struct to hold the results of building a packing loop nest.
SmallVector< OpFoldResult > strides
SmallVector< Value > clonedLoopIvs
SmallVector< OpFoldResult > sizes