29 #include "llvm/Support/CommandLine.h" 35 #define DEBUG_TYPE "linalg-tiling" 39 return cst.value() == 0;
54 for (
int idx = 0, e = tileSizes.size(), zerosCount = 0; idx < e; ++idx) {
55 if (
isZero(tileSizes[idx - zerosCount])) {
56 shapeSizes.erase(shapeSizes.begin() + idx - zerosCount);
57 tileSizes.erase(tileSizes.begin() + idx - zerosCount);
61 loopIndexToRangeIndex[idx] = idx - zerosCount;
66 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
67 res.push_back(
Range{b.create<arith::ConstantIndexOp>(loc, 0),
68 shapeSizes[idx], tileSizes[idx]});
69 return std::make_tuple(res, loopIndexToRangeIndex);
77 auto rangeIndex = loopIndexToRangeIndex.find(en.index());
78 if (rangeIndex == loopIndexToRangeIndex.end())
80 en.value() = ivs[rangeIndex->second];
89 tensor::ExtractSliceOp sliceOp,
Value source,
91 return b.
create<tensor::InsertSliceOp>(
92 loc, sliceOp.source().getType(), source, dest, sliceOp.offsets(),
93 sliceOp.sizes(), sliceOp.strides(), sliceOp.static_offsets(),
94 sliceOp.static_sizes(), sliceOp.static_strides());
97 template <
typename LoopTy>
101 auto nLoops = op.getNumLoops();
103 tileSizes = tileSizes.take_front(nLoops);
105 if (llvm::all_of(tileSizes,
isZero)) {
107 tiledOp.
op = cast<LinalgOp>(b.
clone(*op.getOperation()));
109 tiledOp.
op->result_end());
114 auto allShapeSizes = op.createFlatListOfOperandDims(b, op.getLoc());
115 AffineMap shapeSizesToLoopsMap = op.getShapesToLoopsMap();
116 if (!shapeSizesToLoopsMap)
122 b, op.getLoc(), shapeSizesToLoopsMap, allShapeSizes, tileSizes);
125 for (
const auto &attr :
126 enumerate(op.iterator_types().cast<ArrayAttr>().getValue())) {
127 if (loopIndexToRangeIndex.count(attr.index()))
128 iteratorTypes.push_back(attr.value());
132 auto invPermutationMap =
140 auto it = loopIndexToRangeIndex.find(pos);
141 if (it == loopIndexToRangeIndex.end())
143 interchangeVector.push_back(it->second);
149 assert(invPermutationMap);
151 interchangeVector.end());
159 auto tiledLoopBodyBuilder =
162 ivs.assign(localIvs.begin(), localIvs.end());
171 interchangedIvs.assign(ivs.begin(), ivs.end());
175 assert(operandValuesToUse.size() ==
176 static_cast<size_t>(op.getNumInputsAndOutputs()) &&
177 "expect the number of operands and inputs and outputs to match");
188 for (
OpOperand *opOperand : op.getOutputTensorOperands())
189 resultTensorTypes.push_back(
190 tiledOperands[opOperand->getOperandNumber()].getType());
192 res = op.clone(b, loc, resultTensorTypes, tiledOperands);
195 unsigned resultIdx = 0;
196 for (
OpOperand *opOperand : op.getOutputTensorOperands()) {
199 Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
202 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
204 res->getResult(resultIdx),
207 tensorResults.push_back(res->getResult(resultIdx));
222 loops.reserve(ivs.size());
223 for (
auto iv : ivs) {
226 assert(loops.back() &&
"no owner found for induction variable!");
230 loops.push_back(
nullptr);
238 if ((outermostLoop = loop))
242 res, loops, outermostLoop ? outermostLoop->
getResults() : tensorResults};
245 template <
typename LoopTy>
257 auto nLoops = op.getNumLoops();
260 if (tileSizeVector.size() < nLoops) {
262 tileSizeVector.append(nLoops - tileSizeVector.size(), zero);
265 return tileLinalgOpImpl<LoopTy>(b, op, tileSizeVector,
options);
273 return tileLinalgOpImpl<scf::ForOp>(b, op,
options);
275 return tileLinalgOpImpl<scf::ParallelOp>(b, op,
options);
285 tensor::PadOp &newPadOp,
LoopNest &loopNest,
292 newPadOp = cast<tensor::PadOp>(builder.
clone(*op.getOperation()));
294 int64_t rank = op.getResultType().getRank();
299 tileSizes.append(rank - tileSizes.size(), zero);
301 TilingInterface tilingInterface =
302 dyn_cast<TilingInterface>(op.getOperation());
305 for (int64_t i = 0; i < rank; ++i) {
306 allDims.push_back(ranges[i].size);
307 if (!
isZero(tileSizes[i])) {
308 lbs.push_back(ranges[i].offset);
309 dims.push_back(ranges[i].size);
310 steps.push_back(tileSizes[i]);
315 tilingInterface.getDestinationOperands(builder);
317 builder, loc, lbs, dims, steps,
ValueRange(destOperand),
330 b, loc, newPadOp->getResult(0), tileSizes, map, offsets, allDims,
332 auto sliceOp = tiledOutput.
getDefiningOp<tensor::ExtractSliceOp>();
333 assert(sliceOp &&
"expected ExtractSliceOp");
353 tensor::PadOp newPadOp;
370 template <
typename... OpTypes>
371 class CanonicalizationPatternList;
374 class CanonicalizationPatternList<> {
379 template <
typename OpTy,
typename... OpTypes>
380 class CanonicalizationPatternList<OpTy, OpTypes...> {
383 OpTy::getCanonicalizationPatterns(patterns, patterns.
getContext());
384 CanonicalizationPatternList<OpTypes...>::insert(patterns);
399 AffineApplyOp::getCanonicalizationPatterns(patterns, ctx);
400 AffineForOp::getCanonicalizationPatterns(patterns, ctx);
401 AffineMinOp::getCanonicalizationPatterns(patterns, ctx);
402 AffineMaxOp::getCanonicalizationPatterns(patterns, ctx);
403 arith::ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
405 memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx);
406 memref::ViewOp::getCanonicalizationPatterns(patterns, ctx);
408 scf::ForOp::getCanonicalizationPatterns(patterns, ctx);
409 scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
411 tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
412 tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
413 tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
415 InitTensorOp::getCanonicalizationPatterns(patterns, ctx);
416 tensor::PadOp::getCanonicalizationPatterns(patterns, ctx);
417 ctx->getLoadedDialect<LinalgDialect>()->getCanonicalizationPatterns(patterns);
419 CanonicalizationPatternList<
421 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" 430 StringAttr::get(ctx,
"tiled"));
433 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" 434 >::insert(patterns, options, f);
435 patterns.
add<PadOpTilingPattern>(ctx,
options);
441 patterns.
add<PadOpTilingPattern>(ctx,
options);
454 struct LinalgTilingPass :
public LinalgTilingBase<LinalgTilingPass> {
455 LinalgTilingPass() =
default;
457 this->tileSizes = tileSizes;
459 this->loopTypeEnum = loopType;
462 void runOnOperation()
override {
463 func::FuncOp funcOp = getOperation();
469 .Default(loopTypeEnum);
480 funcOp.walk([](LinalgOp op) {
494 std::unique_ptr<OperationPass<func::FuncOp>>
497 return std::make_unique<LinalgTilingPass>(tileSizes, loopType);
Include the generated interface declarations.
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
MLIRContext * getContext() const
SmallVector< Value > computeTileSizes(OpBuilder &b, Location loc, ValueRange ivs, ValueRange tileSizes, ArrayRef< Value > sizeBounds)
Compute tile sizes, given a list of loop ivs, tileSizes and dimension sizes (sizeBounds).
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Operation is a basic unit of execution within MLIR.
SmallVector< StringRef, 2 > distributionTypes
Specification markers of how to distribute the linalg.tiled_loop.
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
SmallVector< Value, 4 > tensorResults
TileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Operation * clone(Operation &op, BlockAndValueMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
std::vector< Value > ValueVector
An owning vector of values, handy to return from functions.
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ValueRange allShapeSizes, ValueRange allTileSizes)
void populatePadTensorTilingPatterns(RewritePatternSet &patterns, const LinalgTilingOptions &options)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Auxiliary range data structure to unpack the offset, size and stride operands into a list of triples...
LinalgTilingLoopType loopType
The type of tile loops to generate.
Block * getOwner() const
Returns the block that owns this argument.
std::unique_ptr< OperationPass< func::FuncOp > > createLinalgTilingPass(ArrayRef< int64_t > tileSizes={}, linalg::LinalgTilingLoopType loopType=linalg::LinalgTilingLoopType::Loops)
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ValueRange tileSizes, AffineMap map, ValueRange lbs, ValueRange ubs, ValueRange subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
This class represents an efficient way to signal success or failure.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
virtual void replaceOp(Operation *op, ValueRange newValues)
This method replaces the results of the operation with the specified list of values.
Optional< LinalgLoopDistributionOptions > distribution
When specified, specifies distribution of generated tile loops to processors.
static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op, tensor::PadOp &newPadOp, LoopNest &loopNest, const LinalgTilingOptions &options)
Generate a loop nest around a given tensor::PadOp (for tiling).
This class provides support for representing a failure result, or a valid value of type T...
static Value insertSliceIntoTensor(RewriterBase &b, Location loc, tensor::ExtractSliceOp sliceOp, Value source, Value dest)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
static FailureOr< TiledLinalgOp > tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ValueRange tileSizes, const LinalgTilingOptions &options)
SmallVector< Value, 4 > applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values)
Returns the values obtained by applying map to the list of values.
unsigned getNumResults() const
SmallVector< unsigned, 4 > interchangeVector
The interchange vector to reorder the tiled loops.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued...
static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp)
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
This class represents an argument of a Block.
SmallVector< Value > computeTileOffsets(OpBuilder &b, Location loc, ValueRange ivs, ValueRange tileSizes)
Compute tile offsets, given a list of loop ivs and tileSizes.
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
static void insertTilingPatterns(RewritePatternSet &patterns, const LinalgTilingOptions &options)
Populate the given list with patterns that apply Linalg tiling.
void populateSCFForLoopCanonicalizationPatterns(RewritePatternSet &patterns)
Populate patterns for canonicalizing operations inside SCF loop bodies.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
static bool isZero(Value v)
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
static llvm::ManagedStatic< PassManagerOptions > options
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
RAII guard to reset the insertion point of the builder when destroyed.
LinalgTilingLoopType
The type of loops to be generated during tiling.
SmallVector< Value, 4 > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ArrayRef< Value > valuesToTile, ValueRange ivs, ValueRange tileSizes, ArrayRef< Value > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder...
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&... args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments...
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< Attribute > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, Optional< LinalgLoopDistributionOptions >=None, ArrayRef< StringRef > distributionTypes={})
LinalgTilingOptions & setLoopType(LinalgTilingLoopType lt)
Specialization of arith.constant op that returns an integer of index type.
Perform standalone tiling of a single LinalgOp by tileSizes.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
MLIRContext is the top-level object for a collection of MLIR operations.
This class represents an operand of an operation.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
LogicalResult applyPatternsAndFoldGreedily(MutableArrayRef< Region > regions, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig())
Rewrite the regions of the specified operation, which must be isolated from above, by repeatedly applying the highest benefit patterns in a greedy work-list driven manner.
result_range getResults()
This class helps build Operations.
This class provides an abstraction over the different types of ranges over Values.
MLIRContext * getContext() const
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void addTileLoopIvsToIndexOpResults(OpBuilder &b, LinalgOp tiledOp, ArrayRef< Value > ivs)
Add the tile loop induction variables ivs to the IndexOp results found in the body of the tiledOp to ...
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.