MLIR
20.0.0git
|
#include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Interfaces/DestinationStyleOpInterface.h"
#include "mlir/Interfaces/TilingInterface.h"
#include "mlir/Rewrite/FrozenRewritePatternSet.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include <optional>
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "tile-using-interface" |
Typedefs | |
using | YieldTiledValuesFn = std::function< LogicalResult(RewriterBase &rewriter, Location loc, ValueRange ivs, ValueRange newBbArgs, SmallVector< Value > &tiledValues, SmallVector< SmallVector< OpFoldResult > > &resultOffsets, SmallVector< SmallVector< OpFoldResult > > &resultSizes)> |
A function that allows returning additional yielded values during yieldTiledValuesAndReplace . More... | |
Functions | |
static SmallVector< int64_t > | fillInterchangeVector (ArrayRef< int64_t > interchangeVector, size_t iterationDomainSize) |
Helper method to adjust the interchange vector to match the iteration domain. More... | |
static LogicalResult | verifyTileSizeOptions (RewriterBase &rewriter, Location loc, const scf::SCFTilingOptions &options) |
Verify the tile size options are set in a consistent manner. More... | |
static std::tuple< SmallVector< OpFoldResult >, SmallVector< OpFoldResult > > | getUserTileSizesAndNumThreads (RewriterBase &rewriter, TilingInterface op, ArrayRef< Range > iterationDomain, const scf::SCFTilingOptions &options) |
Method to instantiate the tile sizes and/or number of threads specified by the user. More... | |
static void | checkSafeToTileToForall (TilingInterface op, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads) |
Checks if any of the tiled loops are not parallel. More... | |
static bool | tileDividesIterationDomain (Range loopRange) |
Check if stride evenly divides the trip count size - offset . More... | |
static OpFoldResult | getBoundedTileSize (OpBuilder &b, Location loc, Range loopRange, OpFoldResult offset, OpFoldResult tileSize) |
Returns the bounded tile size given the current offset , loopRange and tileSize , i.e., min(tileSize, range.end() - offset) . More... | |
static bool | canOmitTileOffsetInBoundsCheck (OpFoldResult tileSize, OpFoldResult numThreads, OpFoldResult iterationSize) |
Returns true if the maximum tile offset tileSize * numThreads-1 is less than iterationSize . More... | |
static std::tuple< SmallVector< OpFoldResult >, SmallVector< OpFoldResult > > | getTileOffsetAndSizes (RewriterBase &rewriter, Location loc, ValueRange ivs, ArrayRef< Range > iterationDomain, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads) |
Compute the OpFoldResult s that represents the multi-dimensional offset s and size s of the tile of the iteration space that the innermost loop body of the generated tiled loops corresponds to. More... | |
static std::tuple< SmallVector< OpFoldResult >, SmallVector< OpFoldResult >, SmallVector< OpFoldResult > > | getLoopBounds (RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes) |
Function to return the bounds of the loops to be generated. More... | |
static Operation * | cloneOpAndUpdateDestinationArgs (RewriterBase &rewriter, Operation *op, ValueRange newDestArgs) |
Clones the operation and updates the destination if the operation implements the DestinationStyleOpInterface . More... | |
static LogicalResult | generateLoopNestUsingForOp (RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ValueRange destinationTensors, YieldTiledValuesFn yieldTiledValuesFn, SmallVector< LoopLikeOpInterface > &loops) |
Generate the tile-loop nest using scf.for operation. More... | |
static LogicalResult | generateLoopNestUsingForallOp (RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads, ArrayRef< Attribute > mappingVector, ValueRange destinationTensors, YieldTiledValuesFn tiledBodyFn, SmallVector< LoopLikeOpInterface > &loops) |
Generate the tile-loop nest using scf.forall operation. More... | |
static LogicalResult | generateLoopNest (RewriterBase &rewriter, Location loc, const scf::SCFTilingOptions &options, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads, ValueRange destinationTensors, YieldTiledValuesFn tiledBodyFn, SmallVector< LoopLikeOpInterface > &loops) |
Generate the tile-loop nest using the loop construct specifed in options . More... | |
template<typename LoopType > | |
FailureOr< LoopLikeOpInterface > | yieldTiledValuesAndReplaceLoop (LoopType loopOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn) |
Append the specified additional newInitOperands operands to the loops existing init operands (or similar), and replace loopOp with the new loop that has the additional init operands. More... | |
template<> | |
FailureOr< LoopLikeOpInterface > | yieldTiledValuesAndReplaceLoop< scf::ForOp > (scf::ForOp loopOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn) |
Implementation of yieldTiledValuesAndReplaceLoop for scf.for . More... | |
template<> | |
FailureOr< LoopLikeOpInterface > | yieldTiledValuesAndReplaceLoop< scf::ForallOp > (scf::ForallOp loopOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn) |
Implementation of yieldTiledValuesAndReplaceLoop for scf.forall More... | |
FailureOr< LoopLikeOpInterface > | yieldTiledValuesAndReplaceLoop (LoopLikeOpInterface loopLikeOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn) |
Implementation of yieldTiledValuesAndReplaceLoop for LoopLikeOpInterface , that just dispatches to the implementation for each supported loop type. More... | |
static LogicalResult | addInitOperandsToLoopNest (RewriterBase &rewriter, MutableArrayRef< LoopLikeOpInterface > loops, ValueRange newInitValues, YieldTiledValuesFn getNewTiledYieldsFn) |
Method to add new init values to a loop nest. More... | |
static std::tuple< OpResult, std::optional< OpOperand * > > | getUntiledProducerFromSliceSource (OpOperand *source, ArrayRef< LoopLikeOpInterface > loops) |
Return the untiled producer whose slice is used in a tiled consumer. More... | |
static LogicalResult | checkAssumptionForFusingConsumer (tensor::InsertSliceOp candidateSliceOp) |
A utility function that checks whether the only use of the result of a tensor.insert_slice op is in a scf.yield op. More... | |
static FailureOr< OpOperand * > | getConsumerFromUses (Value val, Block *containingOpBlock) |
Fetches the OpOperand of the only user (and use) of the value val which implements TilingInterface and DestinationStyleOpInterface . More... | |
static SmallVector< scf::ForOp > | getPerfectlyNestedLoopsOutsideOf (scf::ForOp loop) |
Find the perfectly nested loops outside of given loop(included) sorted from outer to inner. More... | |
static FailureOr< OpOperand * > | getUntiledConsumerFromSlice (tensor::InsertSliceOp candidateSliceOp) |
Fetch the untiled consumer of a scf.for's result which is yielded by a tensor.insert_slice. More... | |
static FailureOr< OpOperand * > | getUntiledConsumerFromSlice (tensor::ParallelInsertSliceOp candidateSliceOp) |
Fetch the first untiled consumer of a scf.forall's result which is yielded by a tensor.parallel_insert_slice. More... | |
static LogicalResult | checkAssumptionForLoop (Operation *loopOp, Operation *consumerOp) |
This utility currently checks whether the loop either :-. More... | |
static FailureOr< OpOperand * > | getUntiledConsumerFromSlice (Operation *sliceOp) |
A utility to fetch an untiled consumer of tensor.insert_slice/tensor.parallel_insert_slice. More... | |
#define DEBUG_TYPE "tile-using-interface" |
Definition at line 33 of file TileUsingInterface.cpp.
using YieldTiledValuesFn = std::function<LogicalResult( RewriterBase &rewriter, Location loc, ValueRange ivs, ValueRange newBbArgs, SmallVector<Value> &tiledValues, SmallVector<SmallVector<OpFoldResult> > &resultOffsets, SmallVector<SmallVector<OpFoldResult> > &resultSizes)> |
A function that allows returning additional yielded values during yieldTiledValuesAndReplace
.
ivs
induction variable for the loop.newBbArgs
basic block arguments corresponding to newly added iter_args.tiledValues
the tiled values to return. Must be of same size as newbbArgs
, each element of this array is inserted into the corresponding element in newbbArgs
.resultOffsets
is of the same size as tiledValues
and represents the offsets to use when inserting corresponding element from tiledValues
into the element from newBbArgs
.resultSizes
is of the same size as tiledValues
and represents the size of the corresponding element from tiledValues
inserted into the element from newBbArgs
. In case the method needs to return failure()
the method is expected to clean up any inserted operations. Definition at line 365 of file TileUsingInterface.cpp.
|
static |
Method to add new init values to a loop nest.
Updates loops
in-place with new loops that use the newInitValues
. The outer-loops are updated to yield the new result values of the inner loop. For the innermost loop, the call back getNewYields
is invoked to get the additional values to yield form the innermost loop.
Definition at line 716 of file TileUsingInterface.cpp.
References mlir::OpBuilder::create(), mlir::RewriterBase::mergeBlocks(), mlir::RewriterBase::replaceOp(), mlir::RewriterBase::replaceOpWithNewOp(), mlir::OpBuilder::setInsertionPoint(), and yieldTiledValuesAndReplaceLoop().
Referenced by mlir::scf::tileAndFuseConsumerOfSlice(), and mlir::scf::yieldReplacementForFusedProducer().
|
static |
Returns true if the maximum tile offset tileSize * numThreads-1
is less than iterationSize
.
Definition at line 230 of file TileUsingInterface.cpp.
References mlir::getConstantIntValue().
Referenced by getTileOffsetAndSizes().
|
static |
A utility function that checks whether the only use of the result of a tensor.insert_slice op is in a scf.yield op.
Definition at line 1560 of file TileUsingInterface.cpp.
References mlir::Operation::getBlock(), mlir::Value::getDefiningOp(), mlir::detail::IROperandBase::getOwner(), and mlir::Value::getUses().
Referenced by getUntiledConsumerFromSlice().
This utility currently checks whether the loop either :-.
Definition at line 1709 of file TileUsingInterface.cpp.
References mlir::Operation::getBlock(), mlir::Operation::getNumResults(), mlir::Operation::getUsers(), and mlir::Operation::isBeforeInBlock().
Referenced by mlir::scf::tileAndFuseConsumerOfSlice().
|
static |
Checks if any of the tiled loops are not parallel.
Definition at line 156 of file TileUsingInterface.cpp.
|
static |
Clones the operation and updates the destination if the operation implements the DestinationStyleOpInterface
.
Definition at line 373 of file TileUsingInterface.cpp.
Referenced by mlir::scf::tileAndFuseProducerOfSlice().
|
static |
Helper method to adjust the interchange vector to match the iteration domain.
Definition at line 60 of file TileUsingInterface.cpp.
|
static |
Generate the tile-loop nest using the loop construct specifed in options
.
options
: Tiling options specified.loopRanges
specifies the lb, ub and step of the untiled iteration space.tileSizes
is the tile sizes to use. Zero represent untiled loops.destinationTensors
are the init values to use for the outer most loop.yieldTiledValuesFn
is called to generated the loop body of the inner most loop.loops
is an in-out parameter into which the generated loops are populated. Definition at line 546 of file TileUsingInterface.cpp.
References mlir::scf::SCFTilingOptions::ForallOp, mlir::scf::SCFTilingOptions::ForOp, generateLoopNestUsingForallOp(), generateLoopNestUsingForOp(), mlir::isZeroIndex(), mlir::RewriterBase::notifyMatchFailure(), and options.
|
static |
Generate the tile-loop nest using scf.forall
operation.
loopRanges
specifies the lb, ub and step of the untiled iteration space.tileSizes
is the tile sizes to use. Zero represent untiled loops.destinationTensors
are the init values to use for the outer most loop.mappingVector
is the mapping attributes to use for loop construction. Can be empty.yieldTiledValuesFn
is called to generated the loop body of the inner most loop.loops
is an in-out parameter into which the generated loops are populated. Definition at line 474 of file TileUsingInterface.cpp.
References mlir::OpBuilder::create(), mlir::Builder::getArrayAttr(), mlir::Builder::getIndexAttr(), getLoopBounds(), mlir::isConstantIntValue(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::OpBuilder::setInsertionPointToEnd().
Referenced by generateLoopNest().
|
static |
Generate the tile-loop nest using scf.for
operation.
loopRanges
specifies the lb, ub and step of the untiled iteration space.tileSizes
is the tile sizes to use. Zero represent untiled loops.destinationTensors
are the init values to use for the outer most loop.yieldTiledValuesFn
is called to generated the loop body of the inner most loop.loops
is an in-out parameter into which the generated loops are populated. Definition at line 393 of file TileUsingInterface.cpp.
References mlir::OpBuilder::create(), mlir::Builder::getIndexAttr(), getLoopBounds(), mlir::getValueOrCreateConstantIndexOp(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPointToEnd().
Referenced by generateLoopNest().
|
static |
Returns the bounded tile size given the current offset
, loopRange
and tileSize
, i.e., min(tileSize, range.end() - offset)
.
Definition at line 205 of file TileUsingInterface.cpp.
References mlir::bindDims(), mlir::bindSymbols(), mlir::AffineMap::get(), mlir::getConstantIntValue(), mlir::Builder::getContext(), mlir::getValueOrCreateConstantIndexOp(), mlir::affine::makeComposedFoldedAffineMin(), mlir::Range::offset, mlir::Range::size, and tileDividesIterationDomain().
Referenced by getTileOffsetAndSizes().
Fetches the OpOperand of the only user (and use) of the value val
which implements TilingInterface
and DestinationStyleOpInterface
.
Returns failure otherwise.
Definition at line 1586 of file TileUsingInterface.cpp.
References mlir::Operation::getBlock(), and mlir::Value::getUses().
Referenced by getUntiledConsumerFromSlice().
|
static |
Function to return the bounds of the loops to be generated.
Definition at line 336 of file TileUsingInterface.cpp.
References mlir::isConstantIntValue().
Referenced by generateLoopNestUsingForallOp(), and generateLoopNestUsingForOp().
|
static |
Find the perfectly nested loops outside of given loop(included) sorted from outer to inner.
E.g.
This function will return three perfectly nested loops: %0 + %1 + %2, when target inner loop is %2.
Definition at line 1631 of file TileUsingInterface.cpp.
References mlir::Block::front(), mlir::Block::getTerminator(), and mlir::Block::without_terminator().
Referenced by getUntiledConsumerFromSlice(), and mlir::scf::tileAndFuseConsumerOfSlice().
|
static |
Compute the OpFoldResult
s that represents the multi-dimensional offset
s and size
s of the tile of the iteration space that the innermost loop body of the generated tiled loops corresponds to.
Definition at line 245 of file TileUsingInterface.cpp.
References mlir::bindDims(), mlir::bindSymbols(), canOmitTileOffsetInBoundsCheck(), mlir::getAsOpFoldResult(), getBoundedTileSize(), mlir::Builder::getContext(), mlir::Builder::getIndexAttr(), mlir::AffineMap::getMultiDimIdentityMap(), mlir::isConstantIntValue(), mlir::affine::makeComposedFoldedAffineApply(), mlir::affine::makeComposedFoldedAffineMax(), and mlir::affine::makeComposedFoldedAffineMin().
A utility to fetch an untiled consumer of tensor.insert_slice/tensor.parallel_insert_slice.
Definition at line 1729 of file TileUsingInterface.cpp.
References getUntiledConsumerFromSlice().
|
static |
Fetch the untiled consumer of a scf.for's result which is yielded by a tensor.insert_slice.
This function makes the following assumptions :
Definition at line 1662 of file TileUsingInterface.cpp.
References checkAssumptionForFusingConsumer(), getConsumerFromUses(), mlir::OpOperand::getOperandNumber(), mlir::Operation::getParentOp(), getPerfectlyNestedLoopsOutsideOf(), and mlir::Value::getUses().
Referenced by getUntiledConsumerFromSlice(), and mlir::scf::tileAndFuseConsumerOfSlice().
|
static |
Fetch the first untiled consumer of a scf.forall's result which is yielded by a tensor.parallel_insert_slice.
Definition at line 1683 of file TileUsingInterface.cpp.
References mlir::Operation::getBlock(), getConsumerFromUses(), and mlir::Operation::getParentOp().
|
static |
Return the untiled producer whose slice is used in a tiled consumer.
The method traverses the tile loop nest (loops
) if needed, and returns the iter_args
of the outer most that is encountered. Traversing the iter_args indicates that this is a destination operand of the consumer. If there was no loop traversal needed, the second value of the returned tuple is empty.
Definition at line 1060 of file TileUsingInterface.cpp.
References mlir::IROperand< DerivedT, IRValueT >::get().
Referenced by mlir::scf::tileAndFuseProducerOfSlice(), and mlir::scf::tileConsumerAndFuseProducersUsingSCF().
|
static |
Method to instantiate the tile sizes and/or number of threads specified by the user.
Definition at line 102 of file TileUsingInterface.cpp.
|
static |
Check if stride
evenly divides the trip count size - offset
.
Definition at line 190 of file TileUsingInterface.cpp.
References mlir::getConstantIntValue(), mlir::Range::offset, mlir::Range::size, and mlir::Range::stride.
Referenced by getBoundedTileSize().
|
static |
Verify the tile size options are set in a consistent manner.
Definition at line 78 of file TileUsingInterface.cpp.
References mlir::scf::SCFTilingOptions::ForallOp, mlir::isPermutationVector(), mlir::RewriterBase::notifyMatchFailure(), and options.
FailureOr<LoopLikeOpInterface> yieldTiledValuesAndReplaceLoop | ( | LoopLikeOpInterface | loopLikeOp, |
RewriterBase & | rewriter, | ||
ValueRange | newInitOperands, | ||
YieldTiledValuesFn | yieldTiledValuesFn | ||
) |
Implementation of yieldTiledValuesAndReplaceLoop
for LoopLikeOpInterface
, that just dispatches to the implementation for each supported loop type.
Definition at line 696 of file TileUsingInterface.cpp.
References mlir::RewriterBase::notifyMatchFailure(), and yieldTiledValuesAndReplaceLoop().
FailureOr<LoopLikeOpInterface> yieldTiledValuesAndReplaceLoop | ( | LoopType | loopOp, |
RewriterBase & | rewriter, | ||
ValueRange | newInitOperands, | ||
YieldTiledValuesFn | yieldTiledValuesFn | ||
) |
Append the specified additional newInitOperands
operands to the loops existing init
operands (or similar), and replace loopOp
with the new loop that has the additional init operands.
The loop body of this loop is moved over to the new loop. yieldTiledValuesFn
is called to get the new tiled values returned, and the offset and sizes at which the tiled value is inserted into the new region iter_args that correspond to the newly added init operands.
Definition at line 580 of file TileUsingInterface.cpp.
References mlir::RewriterBase::notifyMatchFailure().
Referenced by addInitOperandsToLoopNest(), and yieldTiledValuesAndReplaceLoop().
FailureOr<LoopLikeOpInterface> yieldTiledValuesAndReplaceLoop< scf::ForallOp > | ( | scf::ForallOp | loopOp, |
RewriterBase & | rewriter, | ||
ValueRange | newInitOperands, | ||
YieldTiledValuesFn | yieldTiledValuesFn | ||
) |
Implementation of yieldTiledValuesAndReplaceLoop
for scf.forall
Definition at line 580 of file TileUsingInterface.cpp.
FailureOr<LoopLikeOpInterface> yieldTiledValuesAndReplaceLoop< scf::ForOp > | ( | scf::ForOp | loopOp, |
RewriterBase & | rewriter, | ||
ValueRange | newInitOperands, | ||
YieldTiledValuesFn | yieldTiledValuesFn | ||
) |
Implementation of yieldTiledValuesAndReplaceLoop
for scf.for
.
Definition at line 580 of file TileUsingInterface.cpp.