24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
44 bool replaceIterOperandsUsesInLoop) {
50 assert(loopNest.size() <= 10 &&
51 "exceeded recursion limit when yielding value from loop nest");
83 if (loopNest.size() == 1) {
85 cast<scf::ForOp>(*loopNest.back().replaceWithAdditionalYields(
86 rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
88 return {innerMostLoop};
98 innerNewBBArgs, newYieldValuesFn,
99 replaceIterOperandsUsesInLoop);
100 return llvm::to_vector(llvm::map_range(
101 newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
104 scf::ForOp outerMostLoop =
105 cast<scf::ForOp>(*loopNest.front().replaceWithAdditionalYields(
106 rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
107 newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
124 func::CallOp *callOp) {
125 assert(!funcName.empty() &&
"funcName cannot be empty");
139 ValueRange outlinedValues(captures.getArrayRef());
146 outlinedFuncArgTypes.push_back(arg.getType());
147 outlinedFuncArgLocs.push_back(arg.getLoc());
149 for (
Value value : outlinedValues) {
150 outlinedFuncArgTypes.push_back(value.getType());
151 outlinedFuncArgLocs.push_back(value.getLoc());
153 FunctionType outlinedFuncType =
157 rewriter.
create<func::FuncOp>(loc, funcName, outlinedFuncType);
158 Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
163 auto outlinedFuncBlockArgs = outlinedFuncBody->getArguments();
168 originalBlock, outlinedFuncBody,
169 outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
179 ®ion, region.
begin(),
180 TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
182 .take_front(numOriginalBlockArguments));
187 llvm::append_range(callValues, newBlock->
getArguments());
188 llvm::append_range(callValues, outlinedValues);
189 auto call = rewriter.
create<func::CallOp>(loc, outlinedFunc, callValues);
198 rewriter.
clone(*originalTerminator, bvm);
199 rewriter.
eraseOp(originalTerminator);
204 for (
auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
205 outlinedValues.size()))) {
206 Value orig = std::get<0>(it);
207 Value repl = std::get<1>(it);
217 return outlinedFunc->isProperAncestor(opOperand.
getOwner());
225 func::FuncOp *thenFn, StringRef thenFnName,
226 func::FuncOp *elseFn, StringRef elseFnName) {
230 if (thenFn && !ifOp.getThenRegion().empty()) {
232 rewriter, loc, ifOp.getThenRegion(), thenFnName);
233 if (
failed(outlinedFuncOpOrFailure))
235 *thenFn = *outlinedFuncOpOrFailure;
237 if (elseFn && !ifOp.getElseRegion().empty()) {
239 rewriter, loc, ifOp.getElseRegion(), elseFnName);
240 if (
failed(outlinedFuncOpOrFailure))
242 *elseFn = *outlinedFuncOpOrFailure;
249 assert(rootOp !=
nullptr &&
"Root operation must not be a nullptr.");
250 bool rootEnclosesPloops =
false;
252 for (
Block &block : region.getBlocks()) {
255 rootEnclosesPloops |= enclosesPloops;
256 if (
auto ploop = dyn_cast<scf::ParallelOp>(op)) {
257 rootEnclosesPloops =
true;
261 result.push_back(ploop);
266 return rootEnclosesPloops;
274 assert(divisor > 0 &&
"expected positive divisor");
275 assert(dividend.
getType().
isIndex() &&
"expected index-typed value");
277 Value divisorMinusOneCst =
278 builder.
create<arith::ConstantIndexOp>(loc, divisor - 1);
279 Value divisorCst = builder.
create<arith::ConstantIndexOp>(loc, divisor);
280 Value sum = builder.
create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
281 return builder.
create<arith::DivUIOp>(loc, sum, divisorCst);
290 assert(dividend.
getType().
isIndex() &&
"expected index-typed value");
292 Value cstOne = builder.
create<arith::ConstantIndexOp>(loc, 1);
293 Value divisorMinusOne = builder.
create<arith::SubIOp>(loc, divisor, cstOne);
294 Value sum = builder.
create<arith::AddIOp>(loc, dividend, divisorMinusOne);
295 return builder.
create<arith::DivUIOp>(loc, sum, divisor);
303 Block *loopBodyBlock,
Value forOpIV, uint64_t unrollFactor,
321 for (
unsigned i = 1; i < unrollFactor; i++) {
325 operandMap.
map(iterArgs, lastYielded);
330 Value ivUnroll = ivRemapFn(i, forOpIV, builder);
331 operandMap.
map(forOpIV, ivUnroll);
335 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
337 annotateFn(i, clonedOp, builder);
341 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++)
342 lastYielded[i] = operandMap.
lookup(yieldedValues[i]);
347 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
348 annotateFn(0, &*it, builder);
356 scf::ForOp forOp, uint64_t unrollFactor,
358 assert(unrollFactor > 0 &&
"expected positive unroll factor");
361 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
368 auto loc = forOp.getLoc();
369 Value step = forOp.getStep();
370 Value upperBoundUnrolled;
372 bool generateEpilogueLoop =
true;
377 if (lbCstOp && ubCstOp && stepCstOp) {
379 int64_t lbCst = lbCstOp.value();
380 int64_t ubCst = ubCstOp.value();
381 int64_t stepCst = stepCstOp.value();
382 assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
383 "expected positive loop bounds and step");
386 if (unrollFactor == 1) {
387 if (tripCount == 1 &&
failed(forOp.promoteIfSingleIteration(rewriter)))
392 int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
393 int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
394 int64_t stepUnrolledCst = stepCst * unrollFactor;
397 generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
398 if (generateEpilogueLoop)
400 loc, upperBoundUnrolledCst);
402 upperBoundUnrolled = forOp.getUpperBound();
405 stepUnrolled = stepCst == stepUnrolledCst
408 loc, stepUnrolledCst);
413 auto lowerBound = forOp.getLowerBound();
414 auto upperBound = forOp.getUpperBound();
416 boundsBuilder.
create<arith::SubIOp>(loc, upperBound, lowerBound);
418 Value unrollFactorCst =
421 boundsBuilder.
create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
423 Value tripCountEvenMultiple =
424 boundsBuilder.
create<arith::SubIOp>(loc, tripCount, tripCountRem);
426 upperBoundUnrolled = boundsBuilder.
create<arith::AddIOp>(
428 boundsBuilder.
create<arith::MulIOp>(loc, tripCountEvenMultiple, step));
431 boundsBuilder.
create<arith::MulIOp>(loc, step, unrollFactorCst);
435 if (generateEpilogueLoop) {
436 OpBuilder epilogueBuilder(forOp->getContext());
439 auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.
clone(*forOp));
440 epilogueForOp.setLowerBound(upperBoundUnrolled);
443 auto results = forOp.getResults();
444 auto epilogueResults = epilogueForOp.getResults();
446 for (
auto e : llvm::zip(results, epilogueResults)) {
447 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
449 epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
450 epilogueForOp.getInitArgs().size(), results);
451 (void)epilogueForOp.promoteIfSingleIteration(rewriter);
455 forOp.setUpperBound(upperBoundUnrolled);
456 forOp.setStep(stepUnrolled);
458 auto iterArgs =
ValueRange(forOp.getRegionIterArgs());
459 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
462 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
465 auto stride = b.create<arith::MulIOp>(
466 loc, step, b.create<arith::ConstantIndexOp>(loc, i));
467 return b.create<arith::AddIOp>(loc, iv, stride);
469 annotateFn, iterArgs, yieldedValues);
471 (void)forOp.promoteIfSingleIteration(rewriter);
481 Value inductionVar) {
484 bool isZeroBased =
false;
486 isZeroBased = ubCst.value() == 0;
488 bool isStepOne =
false;
490 isStepOne = stepCst.value() == 1;
495 if (isZeroBased && isStepOne)
496 return {lowerBound, upperBound,
499 Value diff = boundsBuilder.
create<arith::SubIOp>(loc, upperBound, lowerBound);
500 Value newUpperBound =
501 boundsBuilder.
create<arith::CeilDivSIOp>(loc, diff, step);
503 Value newLowerBound =
504 isZeroBased ? lowerBound
505 : boundsBuilder.
create<arith::ConstantIndexOp>(loc, 0);
507 isStepOne ? step : boundsBuilder.
create<arith::ConstantIndexOp>(loc, 1);
514 : insideLoopBuilder.
create<arith::MulIOp>(loc, inductionVar, step);
518 : insideLoopBuilder.
create<arith::AddIOp>(loc, scaled, lowerBound);
521 shifted.getDefiningOp()};
523 return {newLowerBound, newUpperBound,
536 static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
539 auto loopPieces =
normalizeLoop(builder, innerBuilder, loop.getLoc(),
540 loop.getLowerBound(), loop.getUpperBound(),
541 loop.getStep(), loop.getInductionVar());
543 loop.setLowerBound(loopPieces.lowerBound);
544 loop.setUpperBound(loopPieces.upperBound);
545 loop.setStep(loopPieces.step);
549 if (loops.size() < 2)
552 scf::ForOp innermost = loops.back();
553 scf::ForOp outermost = loops.front();
557 for (
auto loop : loops)
564 Value upperBound = outermost.getUpperBound();
565 for (
auto loop : loops.drop_front())
567 builder.
create<arith::MulIOp>(loc, upperBound, loop.getUpperBound());
568 outermost.setUpperBound(upperBound);
580 Value previous = outermost.getInductionVar();
581 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
582 unsigned idx = loops.size() - i - 1;
584 previous = builder.
create<arith::DivSIOp>(loc, previous,
585 loops[idx + 1].getUpperBound());
587 Value iv = (i == e - 1) ? previous
588 : builder.
create<arith::RemSIOp>(
589 loc, previous, loops[idx].getUpperBound());
591 loops.back().getRegion());
596 scf::ForOp second = loops[1];
597 innermost.getBody()->back().erase();
598 outermost.getBody()->getOperations().splice(
600 innermost.getBody()->getOperations());
606 scf::ParallelOp loops,
ArrayRef<std::vector<unsigned>> combinedDimensions) {
611 auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
612 for (
auto &dims : sortedDimensions)
617 normalizedUpperBounds;
618 for (
unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
622 loops.getLowerBound()[i], loops.getUpperBound()[i],
623 loops.getStep()[i], loops.getBody()->getArgument(i));
625 normalizedLowerBounds.push_back(resultBounds.lowerBound);
626 normalizedUpperBounds.push_back(resultBounds.upperBound);
627 normalizedSteps.push_back(resultBounds.step);
634 for (
unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
636 for (
auto idx : sortedDimensions[i]) {
637 newUpperBound = outsideBuilder.
create<arith::MulIOp>(
638 loc, newUpperBound, normalizedUpperBounds[idx]);
640 lowerBounds.push_back(cst0);
641 steps.push_back(cst1);
642 upperBounds.push_back(newUpperBound);
651 auto newPloop = outsideBuilder.
create<scf::ParallelOp>(
652 loc, lowerBounds, upperBounds, steps,
654 for (
unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
655 Value previous = ploopIVs[i];
656 unsigned numberCombinedDimensions = combinedDimensions[i].size();
658 for (
unsigned j = numberCombinedDimensions - 1;
j > 0; --
j) {
659 unsigned idx = combinedDimensions[i][
j];
662 Value iv = insideBuilder.create<arith::RemSIOp>(
663 loc, previous, normalizedUpperBounds[idx]);
669 previous = insideBuilder.create<arith::DivSIOp>(
670 loc, previous, normalizedUpperBounds[idx]);
674 unsigned idx = combinedDimensions[i][0];
676 previous, loops.getRegion());
681 loops.getBody()->back().
erase();
682 newPloop.getBody()->getOperations().splice(
684 loops.getBody()->getOperations());
697 return op != inner.getOperation();
702 for (
auto &op : outer.getBody()->without_terminator()) {
704 if (&op == inner.getOperation())
707 if (forwardSlice.count(&op) > 0) {
712 if (isa<scf::ForOp>(op))
725 toHoist.push_back(&op);
727 auto *outerForOp = outer.getOperation();
728 for (
auto *op : toHoist)
739 const Loops &interTile = tileLoops.first;
740 const Loops &intraTile = tileLoops.second;
741 auto size = interTile.size();
742 assert(size == intraTile.size());
745 for (
unsigned s = 1; s < size; ++s)
748 for (
unsigned s = 1; s < size; ++s)
758 template <
typename T>
762 for (
unsigned i = 0; i < maxLoops; ++i) {
763 forOps.push_back(rootForOp);
765 if (body.
begin() != std::prev(body.
end(), 2))
768 rootForOp = dyn_cast<T>(&body.
front());
776 auto originalStep = forOp.getStep();
777 auto iv = forOp.getInductionVar();
780 forOp.setStep(b.
create<arith::MulIOp>(forOp.getLoc(), originalStep, factor));
783 for (
auto t : targets) {
785 auto begin = t.getBody()->begin();
786 auto nOps = t.getBody()->getOperations().size();
790 Value stepped = b.
create<arith::AddIOp>(t.getLoc(), iv, forOp.getStep());
791 Value less = b.
create<arith::CmpIOp>(t.getLoc(), arith::CmpIPredicate::slt,
792 forOp.getUpperBound(), stepped);
793 Value ub = b.
create<arith::SelectOp>(t.getLoc(), less,
794 forOp.getUpperBound(), stepped);
797 auto newForOp = b.
create<scf::ForOp>(t.getLoc(), iv, ub, originalStep);
798 newForOp.getBody()->getOperations().splice(
799 newForOp.getBody()->getOperations().begin(),
800 t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
802 newForOp.getRegion());
804 innerLoops.push_back(newForOp);
812 template <
typename SizeType>
820 assert(res.size() == 1 &&
"Expected 1 inner forOp");
829 for (
auto it : llvm::zip(forOps, sizes)) {
830 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
832 currentTargets = step;
841 assert(loops.size() == 1);
842 res.push_back(loops[0]);
851 forOps.reserve(sizes.size());
853 if (forOps.size() < sizes.size())
854 sizes = sizes.take_front(forOps.size());
869 forOps.reserve(sizes.size());
871 if (forOps.size() < sizes.size())
872 sizes = sizes.take_front(forOps.size());
879 tileSizes.reserve(sizes.size());
880 for (
unsigned i = 0, e = sizes.size(); i < e; ++i) {
881 assert(sizes[i] > 0 &&
"expected strictly positive size for strip-mining");
883 auto forOp = forOps[i];
885 auto loc = forOp.getLoc();
886 Value diff = builder.
create<arith::SubIOp>(loc, forOp.getUpperBound(),
887 forOp.getLowerBound());
889 Value iterationsPerBlock =
891 tileSizes.push_back(iterationsPerBlock);
895 auto intraTile =
tile(forOps, tileSizes, forOps.back());
896 TileLoops tileLoops = std::make_pair(forOps, intraTile);
907 scf::ForallOp source,
909 unsigned numTargetOuts = target.getNumResults();
910 unsigned numSourceOuts = source.getNumResults();
917 fusedOuts.reserve(numTargetOuts + numSourceOuts);
918 fusedOuts.append(targetOuts.begin(), targetOuts.end());
919 fusedOuts.append(sourceOuts.begin(), sourceOuts.end());
923 scf::ForallOp fusedLoop = rewriter.
create<scf::ForallOp>(
924 source.getLoc(), source.getMixedLowerBound(), source.getMixedUpperBound(),
925 source.getMixedStep(), fusedOuts, source.getMapping());
929 fusedMapping.
map(target.getInductionVars(), fusedLoop.getInductionVars());
930 fusedMapping.map(source.getInductionVars(), fusedLoop.getInductionVars());
933 fusedMapping.map(target.getOutputBlockArguments(),
934 fusedLoop.getOutputBlockArguments().slice(0, numTargetOuts));
936 source.getOutputBlockArguments(),
937 fusedLoop.getOutputBlockArguments().slice(numTargetOuts, numSourceOuts));
941 for (
Operation &op : target.getBody()->without_terminator())
942 rewriter.
clone(op, fusedMapping);
943 for (
Operation &op : source.getBody()->without_terminator())
944 rewriter.
clone(op, fusedMapping);
947 scf::InParallelOp targetTerm = target.getTerminator();
948 scf::InParallelOp sourceTerm = source.getTerminator();
949 scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
952 for (
Operation &op : targetTerm.getYieldingOps())
953 rewriter.
clone(op, fusedMapping);
954 for (
Operation &op : sourceTerm.getYieldingOps())
955 rewriter.
clone(op, fusedMapping);
959 fusedLoop.getResults().slice(0, numTargetOuts));
962 fusedLoop.getResults().slice(numTargetOuts, numSourceOuts));
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of scf::ForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor',...
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
static LoopParams normalizeLoop(OpBuilder &boundsBuilder, OpBuilder &insideLoopBuilder, Location loc, Value lowerBound, Value upperBound, Value step, Value inductionVar)
Return the new lower bound, upper bound, and step in that order.
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
static llvm::ManagedStatic< PassManagerOptions > options
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
MLIRContext * getContext() const
This class provides support for representing a failure result, or a valid value of type T.
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
This class implements the operand iterators for the Operation class.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
operand_range getOperands()
Returns an iterator on the underlying Value's.
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
void erase()
Remove this operation from its parent block and delete it.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
BlockArgListType getArguments()
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
This class provides an abstraction over the various different ranges of value types.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Type getType() const
Return the type of this value.
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Specialization of arith.constant op that returns an integer of index type.
Operation * getOwner() const
Return the owner of this operand.
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
LogicalResult loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
void collapseParallelLoops(scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
std::pair< Loops, Loops > TileLoops
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region ®ion, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
This class represents an efficient way to signal success or failure.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.