29 #include "llvm/Support/Debug.h"
33 #define DEBUG_TYPE "hoist-padding"
35 #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ")
45 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
46 forOp.getInductionVar().printAsOperand(dbgs(), state);
47 dbgs() <<
" @ " << forOp.getOperation();
55 LLVM_DEBUG(llvm::interleaveComma(backwardSlice,
DBGS() <<
"--backwardSlice:",
63 dbgs() << *op <<
"\n";
75 scf::ForOp outermostEnclosingForOp =
nullptr;
77 while (nLevels-- > 0 &&
78 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
79 LLVM_DEBUG(
DBGS() <<
"loops: ";
82 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
83 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
94 scf::ForOp outermostEnclosingForOp =
nullptr;
96 while (outermostEnclosingForOp != untilLoop &&
97 (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
98 LLVM_DEBUG(
DBGS() <<
"loops: ";
101 reverseEnclosingLoops.push_back(outermostEnclosingForOp);
102 nextEnclosingOp = outermostEnclosingForOp->
getParentOp();
111 scf::ForOp outermostEnclosingForOp,
116 return domInfo.
dominates(outermostEnclosingForOp, op) &&
117 !padOp->isProperAncestor(op);
125 for (
Value v : valuesDefinedAbove) {
152 struct HoistPaddingAnalysis {
153 HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops);
154 HoistPaddingAnalysis(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp);
156 bool isValid() {
return valid.has_value() && valid.value(); }
157 bool isInvalid() {
return valid.has_value() && !valid.value(); }
182 void finalizeHoistPaddingAnalysis();
186 std::optional<bool> valid;
189 tensor::PadOp opToHoist;
223 scf::ForOp outermostEnclosingForOp;
237 tensor::ExtractSliceOp sliceOp;
240 scf::ForOp padConsumingForOp;
245 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
int numLoops)
246 : valid(std::nullopt), opToHoist(padOp) {
249 if (reverseEnclosingLoops.empty()) {
250 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
254 outermostEnclosingForOp = reverseEnclosingLoops.back();
255 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
257 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
263 HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp,
264 scf::ForOp outermostEnclosingForOp)
265 : valid(std::nullopt), opToHoist(padOp) {
268 reverseEnclosingLoops);
269 if (reverseEnclosingLoops.empty()) {
270 LLVM_DEBUG(
DBGS() <<
"--No immediately enclosing loop -> Skip\n");
274 this->outermostEnclosingForOp = reverseEnclosingLoops.back();
275 if (this->outermostEnclosingForOp != outermostEnclosingForOp) {
276 LLVM_DEBUG(
DBGS() <<
"--Unexpected outermost enclosing loop -> Skip\n");
280 sliceOp = opToHoist.getSource().getDefiningOp<tensor::ExtractSliceOp>();
282 LLVM_DEBUG(
DBGS() <<
"--Cannot find the extract slice op -> Skip\n");
288 void HoistPaddingAnalysis::enableHoistPadding(
RewriterBase &rewriter) {
294 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
295 outermostEnclosingForOp =
300 void HoistPaddingAnalysis::finalizeHoistPaddingAnalysis() {
304 if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
305 LLVM_DEBUG(
DBGS() <<
"--outermostEnclosingForOp:\n"
306 << outermostEnclosingForOp <<
"\n"
307 <<
"--sliceOp: " << sliceOp <<
"\n"
308 <<
"--sliceOp.getSource(): " << sliceOp.getSource()
310 LLVM_DEBUG(
DBGS() <<
"----Source not defined outside of loops -> Skip\n");
314 if (sliceOp->hasOneUse()) {
315 padConsumingForOp = dyn_cast<scf::ForOp>(*(sliceOp->getUsers().begin()));
321 Value paddingValue = opToHoist.getConstantPaddingValue();
323 !isa_and_nonnull<arith::ConstantOp>(paddingValue.
getDefiningOp())) {
324 LLVM_DEBUG(
DBGS() <<
"Cannot find constant padding value -> Skip\n");
330 if (backwardSlice.size() <= 1) {
339 if (
failed(dropNonIndexDependencies())) {
340 LLVM_DEBUG(
DBGS() <<
"--Cannot dropNonIndexDependencies -> Skip\n");
352 for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
353 if (backwardSlice.contains(forOp))
354 packingLoops.push_back(forOp);
357 if (packingLoops.size() > 1 && padConsumingForOp) {
358 LLVM_DEBUG(
DBGS() <<
"--Cannot hoist multiple loops through iter_args -> "
359 "Downgrade to 1 loop\n");
360 packingLoops.resize(1);
370 LogicalResult HoistPaddingAnalysis::dropNonIndexDependencies() {
376 auto addIndexOperandsToIndexEdges = [&](
Operation *operation) {
377 for (
Value operand : operation->getOperands())
378 if (operand.getType().isIndex())
379 indexEdges.insert(operand);
383 auto hasIndexResult = [&](
Operation *operation) {
384 return llvm::any_of(operation->getResults(), [&](
Value result) {
385 return indexEdges.contains(result);
410 for (
Operation *op : llvm::reverse(backwardSlice)) {
413 if (op == opToHoist || op == sliceOp) {
414 addIndexOperandsToIndexEdges(op);
419 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
420 if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
421 addIndexOperandsToIndexEdges(op);
427 if (hasIndexResult(op)) {
428 addIndexOperandsToIndexEdges(op);
431 [](
Type type) { return !type.isIndex(); })) {
432 LLVM_DEBUG(
DBGS() <<
"Unsupported op with non index type operands: "
433 << op <<
" -> Skip\n");
437 auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
438 bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
440 LLVM_DEBUG(
DBGS() <<
"Unsupported op with region or memory effect: "
441 << op <<
" -> Skip\n");
448 if (!isa<arith::ConstantOp>(op))
449 operationsToRemove.insert(op);
451 backwardSlice.set_subtract(operationsToRemove);
456 HoistPaddingAnalysis::getHoistedPackedTensorSizes(
RewriterBase &rewriter,
465 for (
auto forOp : packingLoops) {
468 rewriter, loc, presburger::BoundType::UB, forOp.getUpperBound(),
470 [&](
Value v, std::optional<int64_t> d) {
471 if (v == forOp.getUpperBound())
474 Operation *op = v.getDefiningOp();
477 return !isa<affine::AffineMinOp, affine::AffineMaxOp,
478 affine::AffineApplyOp>(op);
481 assert(
succeeded(loopUb) &&
"could not get upper bound");
494 cast<scf::ForOp>(forOp).getStep()});
495 dynamicTensorSizes.push_back(res);
498 return dynamicTensorSizes;
522 Value ivVal = forOp.getInductionVar(), lbVal = forOp.getLowerBound(),
523 stepVal = forOp.getStep();
524 auto loc = forOp->
getLoc();
543 tensor::EmptyOp emptyOp,
const HoistPaddingAnalysis &analysis) {
547 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
550 RankedTensorType paddedTensorType = opToHoist.getResultType();
551 int paddedRank = paddedTensorType.getRank();
554 BlockArgument bbArg = dyn_cast<BlockArgument>(opToHoist.getSource());
559 if (forOp != outerLoop && !outerLoop->isAncestor(forOp))
561 OpOperand &operand = forOp.getOpOperandForRegionIterArg(bbArg);
562 bvm.
map(bbArg, operand.
get());
563 bbArg = dyn_cast<BlockArgument>(operand.
get());
567 Value hoistedPackedTensor = emptyOp.getResult();
569 for (
Operation *op : analysis.backwardSlice) {
572 if (
auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) {
574 LLVM_DEBUG(
DBGS() <<
"--Skip: " << sliceOp <<
"\n");
580 auto forOp = dyn_cast<scf::ForOp>(op);
583 rewriter.
clone(*op, bvm);
589 auto clonedForOp = rewriter.
create<scf::ForOp>(
595 bvm.
map(forOp.getInductionVar(), clonedForOp.getInductionVar());
596 bvm.
map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
597 bvm.
map(forOp.getResults(), clonedForOp.getResults());
598 assert(clonedForOp->getNumRegions() == 1);
599 clonedLoopIvs.push_back(clonedForOp.getInductionVar());
603 Value loopIndependentIterationCount =
607 if (!loopIndependentIterationCount)
608 llvm_unreachable(
"loop independence prerequisite not met");
609 leadingHoistedPackedTensorIndexings.push_back(
610 loopIndependentIterationCount);
611 hoistedPackedTensor = clonedForOp.getRegionIterArgs().front();
616 int64_t nPackedLoops = clonedLoopIvs.size();
620 leadingHoistedPackedTensorIndexings.end()};
624 for (int64_t sz : transposedTensorType.getShape()) {
626 if (ShapedType::isDynamic(sz))
635 GenericOp maybeTransposeOp;
636 Value paddedTensor = bvm.
lookup(opToHoist.getResult());
637 if (!transposeVector.empty()) {
638 Value outputTensor = rewriter.
create<tensor::ExtractSliceOp>(
639 loc, transposedTensorType, hoistedPackedTensor, offsets, sizes,
642 outputTensor, transposeVector);
643 paddedTensor = maybeTransposeOp.getResult(0);
647 if (nPackedLoops > 0) {
650 Value inserted = rewriter.
create<tensor::InsertSliceOp>(
651 loc, paddedTensor, hoistedPackedTensor, offsets, sizes, strides);
654 Value valueToYield = inserted;
655 for (
Value iv : llvm::reverse(clonedLoopIvs)) {
658 rewriter.
create<scf::YieldOp>(loc, valueToYield);
668 leadingHoistedPackedTensorIndexings,
670 cast<tensor::PadOp>(bvm.
lookup(opToHoist.getResult()).getDefiningOp())};
680 int nPackedLoops = analysis.packingLoops.size();
681 LLVM_DEBUG(
DBGS() <<
"\n";
683 << *opToHoist->getParentOfType<func::FuncOp>() <<
"\n";
684 DBGS() <<
"Start hoisting above " << nPackedLoops <<
" loops\n");
687 RankedTensorType paddedTensorType = opToHoist.getResultType();
692 if (
failed(transposedTensorType)) {
693 LLVM_DEBUG(
DBGS() <<
"--Could not compute transposed type -> Skip\n");
700 llvm::append_range(packedShape, transposedTensorType->getShape());
702 packedShape, transposedTensorType->getElementType());
705 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
709 analysis.getHoistedPackedTensorSizes(rewriter, loc);
710 auto emptyOp = rewriter.
create<tensor::EmptyOp>(
711 loc, hoistedPackedTensorType.getShape(),
712 hoistedPackedTensorType.getElementType(), dynamicTensorSizes);
715 *transposedTensorType, emptyOp, analysis);
724 HoistPaddingAnalysis analysis(opToHoist, outermostEnclosingForOp);
725 analysis.enableHoistPadding(rewriter);
726 analysis.finalizeHoistPaddingAnalysis();
727 if (!analysis.isValid()) {
728 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
747 Value expectedSource) {
748 LLVM_DEBUG(
DBGS() <<
"Start tracesBackToExpectedValue on: " << extractSliceOp
750 LLVM_DEBUG(
DBGS() <<
"--with extractSlice: " << extractSliceOp <<
"\n");
751 Value source = extractSliceOp.getSource();
752 LLVM_DEBUG(
DBGS() <<
"--with starting source: " << source <<
"\n");
753 while (source && source != expectedSource) {
755 dyn_cast_or_null<DestinationStyleOpInterface>(source.
getDefiningOp());
758 LLVM_DEBUG(
DBGS() <<
"--step dest op: " << destOp <<
"\n");
759 source = destOp.getDpsInitOperand(cast<OpResult>(source).getResultNumber())
762 LLVM_DEBUG(
DBGS() <<
"--final source: " << source <<
"\n");
763 LLVM_DEBUG(
DBGS() <<
"--expected source: " << expectedSource <<
"\n");
764 return source == expectedSource;
795 static tensor::ExtractSliceOp
797 Value hoistedPackedTensor,
798 tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp) {
799 LLVM_DEBUG(
DBGS() <<
"Start padThroughLoopIterArg on: " << forOp <<
"\n");
800 LLVM_DEBUG(
DBGS() <<
"--paddedValueBeforeHoisting: "
801 << paddedValueBeforeHoisting <<
"\n");
803 for (
OpOperand &use : outerSliceOp->getUses()) {
804 if (use.getOwner() == forOp) {
805 assert(!pUse &&
"Multiple slice uses in the for loop");
809 assert(pUse &&
"No slice use in the for loop");
813 unsigned iterArgNumber = forOp.getResultForOpOperand(*pUse).getResultNumber();
814 auto yieldOp = cast<scf::YieldOp>(forOp.getBody(0)->getTerminator());
815 auto yieldingExtractSliceOp = yieldOp->getOperand(iterArgNumber)
816 .getDefiningOp<tensor::ExtractSliceOp>();
817 if (!yieldingExtractSliceOp)
818 return tensor::ExtractSliceOp();
824 paddedValueBeforeHoisting))
825 return tensor::ExtractSliceOp();
828 initArgs[iterArgNumber] = hoistedPackedTensor;
830 yieldOperands[iterArgNumber] = yieldingExtractSliceOp.getSource();
832 int64_t numOriginalForOpResults = initArgs.size();
833 LLVM_DEBUG(
DBGS() <<
"numOriginalForOpResults: " << numOriginalForOpResults
835 tensor::ExtractSliceOp extracted;
839 extracted = rewriter.
create<tensor::ExtractSliceOp>(
840 hoistedPackedTensor.
getLoc(), hoistedPackedTensor,
841 outerSliceOp.getMixedOffsets(), outerSliceOp.getMixedSizes(),
842 outerSliceOp.getMixedStrides());
845 scf::ForOp newForOp =
848 LLVM_DEBUG(
DBGS() <<
"newForOp results: " << newForOp.getNumResults()
850 LLVM_DEBUG(
DBGS() <<
"replace source of: " << extracted <<
"\n");
851 LLVM_DEBUG(
DBGS() <<
"with result #"
852 << numOriginalForOpResults + iterArgNumber
853 <<
" of forOp, giving us: " << extracted <<
"\n");
855 extracted.getSourceMutable().assign(
856 newForOp.getResult(numOriginalForOpResults + iterArgNumber));
859 LLVM_DEBUG(
DBGS() <<
"replace uses of: " << paddedValueBeforeHoisting
861 LLVM_DEBUG(
DBGS() <<
"with region iter arg #"
862 << numOriginalForOpResults + iterArgNumber <<
"\n");
864 paddedValueBeforeHoisting,
865 newForOp.getRegionIterArg(numOriginalForOpResults + iterArgNumber));
874 tensor::PadOp opToHoist,
875 RankedTensorType transposedTensorType,
876 const HoistPaddingAnalysis &analysis,
884 RankedTensorType paddedTensorType = opToHoist.getResultType();
885 int paddedRank = paddedTensorType.getRank();
888 LLVM_DEBUG(
DBGS() <<
"nPackedLoops: " << nPackedLoops <<
" loops\n");
890 scf::ForOp outerLoop = analysis.outermostEnclosingForOp;
893 Value hoistedPackedTensor;
897 if (nPackedLoops > 0) {
898 loopIterationCounts =
899 llvm::to_vector<4>(llvm::map_range(packingLoops, [&](
Operation *loop) {
901 cast<scf::ForOp>(loop));
904 if (llvm ::any_of(loopIterationCounts, [](
Value v) {
return !v; }))
905 llvm_unreachable(
"loop independence prerequisite not met");
908 std::copy(loopIterationCounts.begin(), loopIterationCounts.end(),
910 hoistedPackedTensor =
915 hoistedPackedTensor = bvm.
lookup(opToHoist.getResult());
918 LLVM_DEBUG(
DBGS() <<
"hoistedPackedTensor: " << hoistedPackedTensor <<
"\n");
921 scf::ForOp forOp = analysis.padConsumingForOp;
924 analysis.sliceOp, forOp);
930 return rewriter.
create<tensor::ExtractSliceOp>(
931 loc, transposedTensorType, hoistedPackedTensor, offsets,
936 RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops,
939 LLVM_DEBUG(
DBGS() <<
"\n";
DBGS() <<
" Try to hoist " << *(opToHoist) <<
"\n";
940 DBGS() <<
" by " << numLoops <<
" loops\n");
942 HoistPaddingAnalysis analysis(opToHoist, numLoops);
943 analysis.enableHoistPadding(rewriter);
944 analysis.finalizeHoistPaddingAnalysis();
945 if (!analysis.isValid()) {
946 LLVM_DEBUG(
DBGS() <<
"--Analysis failed -> Skip\n");
953 rewriter, bvm, opToHoist, transposeVector, analysis);
954 if (
failed(packingResult)) {
955 LLVM_DEBUG(
DBGS() <<
"--buildPackingLoopNestImpl failed -> Skip\n");
959 if (!transposeVector.empty())
960 transposeOps.push_back(packingResult->maybeTransposeOp);
964 assert(
succeeded(transposedTensorType) &&
"unexpected failure in type");
970 analysis, *packingResult);
973 RankedTensorType paddedTensorType = opToHoist.getResultType();
974 if (!transposeVector.empty()) {
978 Value emptyTensor = rewriter.
create<tensor::EmptyOp>(
979 loc, paddedTensorType.getShape(), paddedTensorType.getElementType());
980 GenericOp unTransposeOp =
981 makeTransposeOp(rewriter, loc, newResult, emptyTensor, transposeVector);
982 newResult = unTransposeOp.getResult(0);
983 transposeOps.push_back(unTransposeOp);
986 LLVM_DEBUG(
DBGS() <<
"newResult: " << newResult <<
"\n");
988 DBGS() <<
"After hoisting: "
993 hoistedOp = packingResult->hoistedPadOp;
995 LLVM_DEBUG(
DBGS() <<
"--SUCCESS\n");
1002 tensor::PadOp &hoistedOp,
1006 hoistedOp, transposeOps);
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static tensor::ExtractSliceOp padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, Value hoistedPackedTensor, tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp)
If the original consumer of outerSliceOp was a forOp (i.e.
static Value buildLoopIterationCount(RewriterBase &rewriter, scf::ForOp outer, scf::ForOp forOp)
Return the current iteration number in the loop (iv - lb).ceilDiv(step).
static void getEnclosingLoopsUntil(tensor::PadOp padOp, scf::ForOp untilLoop, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
static bool debugPrintLoopInShortForm(Operation *op)
static bool tracesBackToExpectedValue(tensor::ExtractSliceOp extractSliceOp, Value expectedSource)
Return true if we can walk back the use-def chain from extractSliceOp to expectedSource going through...
static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v)
static FailureOr< PackingResult > buildPackingLoopNestImpl(RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef< int64_t > transposeVector, RankedTensorType transposedTensorType, tensor::EmptyOp emptyOp, const HoistPaddingAnalysis &analysis)
static void computeBackwardSlice(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp, SetVector< Operation * > &backwardSlice)
static Value replaceByPackingResult(RewriterBase &rewriter, const IRMapping &bvm, tensor::PadOp opToHoist, RankedTensorType transposedTensorType, const HoistPaddingAnalysis &analysis, const PackingResult &packingResult)
Produce a tensor extracted from the packingResult.
static void debugPrintBackwardSlice(SetVector< Operation * > &backwardSlice)
static void getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
Base type for affine expression.
This class provides management for the lifetime of the state used when printing the IR.
This class represents an argument of a Block.
Block * getOwner() const
Returns the block that owns this argument.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
A class for computing basic dominance information.
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
This class provides support for representing a failure result, or a valid value of type T.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
operand_type_range getOperandTypes()
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
virtual void finalizeRootUpdate(Operation *op)
This method is used to signal the end of a root update on the given operation.
virtual void startRootUpdate(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
FailureOr< OpFoldResult > reifyIndexValueBound(OpBuilder &b, Location loc, presburger::BoundType type, Value value, ValueBoundsConstraintSet::StopConditionFn stopCondition=nullptr, bool closedUB=false)
Reify a bound for the given index-typed value in terms of SSA values for which stopCondition is met.
void bindDims(MLIRContext *ctx)
void bindSymbols(MLIRContext *ctx)
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that transposes inputTensor into outputTensor using transposeVector to permute th...
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< GenericOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
scf::ForOp hoistRedundantSubsetExtractInsert(RewriterBase &rewriter, scf::ForOp forOp)
Greedily hoist redundant subset extract/insert operations on tensors outside of forOp.
LLVM_ATTRIBUTE_ALWAYS_INLINE MPInt ceilDiv(const MPInt &lhs, const MPInt &rhs)
FailureOr< RankedTensorType > computeTransposedType(RankedTensorType rankedTensorType, ArrayRef< int64_t > transposeVector)
Returns the transposed rankedTensorType if transposeVector is non-empty.
This header declares functions that assist transformations in the MemRef dialect.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
void getBackwardSlice(Operation *op, SetVector< Operation * > *backwardSlice, BackwardSliceOptions options={})
Fills backwardSlice with the computed backward slice (i.e.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
scf::ForOp replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop, ValueRange newIterOperands, const NewYieldValueFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
This class represents an efficient way to signal success or failure.
bool inclusive
Include the top level op in the slice.
Helper struct to hold the results of building a packing loop nest.
SmallVector< OpFoldResult > strides
SmallVector< Value > clonedLoopIvs
SmallVector< OpFoldResult > sizes