9 #ifndef MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
10 #define MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
26 #include "llvm/ADT/SmallBitVector.h"
27 #include "llvm/ADT/SmallSet.h"
30 namespace bufferization {
31 class BufferizeTypeConverter;
34 class FrozenRewritePatternSet;
38 struct LinalgElementwiseFusionOptions;
39 struct LinalgFusionOptions;
40 struct LinalgTilingOptions;
57 bool removeDeadArgsAndResults =
true);
104 std::function<SmallVector<ReassociationIndices>(linalg::GenericOp)>;
287 unsigned size = useFullTiles.size();
288 llvm::SmallBitVector tmp(size,
false);
289 for (
unsigned i = 0; i < size; ++i)
290 tmp[i] = useFullTiles[i];
371 bool vectorizeNDExtract =
false);
394 LinalgPromotionOptions
options);
400 bool vectorizeNDExtract =
false);
425 template <
typename T>
472 bool emitAssertions =
true);
492 std::optional<ArrayAttr> mapping);
499 std::optional<ArrayAttr> mapping);
540 std::optional<ArrayAttr> mapping = std::nullopt);
757 template <
typename Conv2DOp,
typename Conv1DOp>
771 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNhwcHwcfOp,
773 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp,
998 using ControlFn = std::function<std::optional<bool>(tensor::ExtractSliceOp)>;
1034 bool useAlloc =
false);
1085 bool useAlloc =
false);
1137 bool useAlloc =
false);
1149 linalg::LinalgOp linalgOp,
1168 linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp,
static llvm::ManagedStatic< PassManagerOptions > options
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
The main mechanism for performing data layout queries.
This class provides support for representing a failure result, or a valid value of type T.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void populateLinalgNamedOpConversionPatterns(RewritePatternSet &patterns)
Patterns to convert from one named op to another.
void populateMoveInitOperandsToInputPattern(RewritePatternSet &patterns)
A pattern that converts init operands to input operands.
FailureOr< LinalgLoops > linalgOpToLoops(PatternRewriter &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.for with the proper body for linalgOp.
FailureOr< GenericOp > generalizeNamedOp(RewriterBase &rewriter, LinalgOp namedOp)
Create a GenericOp from the given named operation namedOp and replace namedOp.
void populateSplitReductionPattern(RewritePatternSet &patterns, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Patterns to apply splitReduction below.
void populateDataLayoutPropagationPatterns(RewritePatternSet &patterns)
Patterns to bubble up or down data layout ops across other operations.
void populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet &patterns)
Pattern to fuse a tensor.pad operation with the producer of its source, if the producer is a linalg o...
void populateBubbleUpExtractSliceOpPatterns(RewritePatternSet &patterns)
Patterns that are used to bubble up extract slice op above linalg op.
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
std::function< std::optional< Value >(OpBuilder &b, memref::SubViewOp subView, ArrayRef< Value > boundingSubViewSize, DataLayout &layout)> AllocBufferCallbackFn
Callback function type used to perform the allocation for the promoted subView.
DenseMap< int, int > LoopIndexToRangeIndexMap
Creates a number of ranges equal to the number of non-zero in tileSizes.
void populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via reassociativ...
std::function< bool(OpOperand *fusedOperand)> ControlFusionFn
Function type which is used to control when to stop fusion.
LogicalResult vectorize(RewriterBase &rewriter, LinalgOp linalgOp, ArrayRef< int64_t > inputVectorSizes={}, bool vectorizeNDExtract=false)
Emit a suitable vector form for a Linalg op.
void populateFoldReshapeOpsByCollapsingPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding tensor.expand_shape operation with its producer generic operation by co...
LinalgTilingLoopType
The type of loops to be generated during tiling.
FailureOr< Operation * > fuseElementwiseOps(RewriterBase &rewriter, OpOperand *fusedOperand)
Fuse two linalg.generic operations that have a producer-consumer relationship captured through fusedO...
std::function< LogicalResult(OpBuilder &b, Value buffer)> DeallocBufferCallbackFn
Callback function type used to deallocate the buffers used to hold the promoted subview.
void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Populates patterns with patterns that vectorize tensor.pad.
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
FailureOr< ForeachThreadTilingResult > tileToForeachThreadOp(RewriterBase &builder, TilingInterface op, ArrayRef< OpFoldResult > numThreads, std::optional< ArrayAttr > mapping)
void populateSparseTensorRewriting(RewritePatternSet &patterns)
Populate patterns that are only useful in the context of sparse tensors.
FailureOr< PromotionInfo > promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView, const AllocBufferCallbackFn &allocationFn, DataLayout &layout)
void peelLoops(RewriterBase &rewriter, ArrayRef< scf::ForOp > loops)
Peel and canonicalize 'loops'.
FailureOr< SplitReductionResult > splitReduction(PatternRewriter &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
FailureOr< SmallVector< Value > > collapseGenericOpIterationDims(GenericOp genericOp, ArrayRef< ReassociationIndices > foldedIterationDims, RewriterBase &rewriter)
Collapses dimensions of linalg.generic operation.
void populateConvertToDestinationStylePatterns(RewritePatternSet &patterns)
Populate patterns that convert non-destination-style ops to destination style ops.
std::function< SmallVector< Value, 4 >(OpBuilder &, Operation *)> TileSizeComputationFunction
FailureOr< LinalgLoops > linalgOpToAffineLoops(PatternRewriter &rewriter, LinalgOp linalgOp)
Emit a loop nest of affine.for with the proper body for linalgOp.
LogicalResult vectorizeLinalgOpPrecondition(LinalgOp linalgOp, ArrayRef< int64_t > inputVectorSizes={}, bool vectorizeNDExtract=false)
Return success if the operation can be vectorized.
void populateElementwiseToLinalgConversionPatterns(RewritePatternSet &patterns)
Populate patterns that convert ElementwiseMappable ops to linalg parallel loops.
FailureOr< LinalgLoops > linalgOpToParallelLoops(PatternRewriter &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.parallel with the proper body for linalgOp.
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > allShapeSizes, ArrayRef< OpFoldResult > allTileSizes)
LogicalResult promoteSubviewsPrecondition(Operation *op, LinalgPromotionOptions options)
Promote memref.subviews feeding linalg-on-buffers operations.
void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Linalg decompose convolutions patterns.
void populateConvolutionVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns for vectorizing low-D convolution ops.
LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp)
Emit a suitable vector form for a Copy op with fully static shape.
FailureOr< GenericOp > interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, ArrayRef< unsigned > interchangeVector)
Interchange the iterator_types and iterator_maps dimensions and adapts the index accesses of op.
std::function< LogicalResult(PatternRewriter &, tensor::PadOp, Value)> OptimizeCopyFn
void populateCollapseDimensions(RewritePatternSet &patterns, const GetCollapsableDimensionsFn &controlCollapseDimensions)
Pattern to collapse dimensions in a linalg.generic op.
bool areElementwiseOpsFusable(OpOperand *fusedOperand)
Return true if two linalg.generic operations with producer/consumer relationship through fusedOperand...
FailureOr< StaticMultiSizeSpecification > computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize, int64_t divisor)
void populateEraseUnusedOperandsAndResultsPatterns(RewritePatternSet &patterns)
Pattern to remove dead operands and results of linalg.generic operations.
void populatePadTensorTilingPatterns(RewritePatternSet &patterns, const LinalgTilingOptions &options)
FailureOr< SmallVector< Value > > rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, ArrayRef< int64_t > paddingDimensions, ArrayRef< Attribute > paddingValues, ArrayRef< bool > packPaddings, LinalgOp &paddedOp)
Pad the iterator dimensions paddingDimensions of all opToPad operands to a static bounding box.
std::function< LogicalResult(OpBuilder &b, Value src, Value dst)> CopyCallbackFn
Callback function type used to insert copy from original subview to subview of the promoted region fo...
void populateDecomposeLinalgOpsPattern(RewritePatternSet &patterns, bool removeDeadArgsAndResults=true)
Populate patterns for splitting a LinalgOp with multiple statements within its payload into multiple ...
FailureOr< SplitReductionResult > splitReductionByScaling(PatternRewriter &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Scaling-based implementation of the split reduction transformation.
void populateEraseUnnecessaryInputsPatterns(RewritePatternSet &patterns)
Patterns to promote inputs to outputs and remove unused inputs of linalg.generic ops.
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
FailureOr< ForeachThreadTilingResult > tileToForeachThreadOpUsingTileSizes(RewriterBase &builder, TilingInterface op, ArrayRef< OpFoldResult > tileSizes, std::optional< ArrayAttr > mapping)
Same as tileToForeachThreadOp, but calculate the number of threads required using the given tileSizes...
void populateFoldReshapeOpsByExpansionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding (collapsing) tensor_reshape operation with its producer (consumer) gene...
FailureOr< ForeachThreadReductionTilingResult > tileReductionUsingForeachThread(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes={}, std::optional< ArrayAttr > mapping=std::nullopt)
Method to tile a reduction to parallel iterations computing partial reductions.
void populateSwapExtractSliceWithFillPatterns(RewritePatternSet &patterns)
Adds patterns that waps tensor.extract_slice(linalg.fill(cst, init)) into linalg.fill(cst,...
void populateInlineConstantOperandsPatterns(RewritePatternSet &patterns)
Patterns that are used to inline constant operands into linalg generic ops.
FailureOr< LinalgOp > promoteSubViews(OpBuilder &b, LinalgOp op, const LinalgPromotionOptions &options)
Promote the subViews into a new buffer allocated at the insertion point b.
void populateConstantFoldLinalgOperations(RewritePatternSet &patterns, const ControlFusionFn &controlFn)
Patterns to constant fold Linalg operations.
std::function< SplitReductionOptions(LinalgOp op)> ControlSplitReductionFn
Function signature to control reduction splitting.
void populateLinalgNamedOpsGeneralizationPatterns(RewritePatternSet &patterns)
Linalg generalization patterns.
FailureOr< linalg::LinalgOp > pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef< OpFoldResult > packedSizes)
Implement packing of a single LinalgOp by packedSizes.
std::function< SmallVector< ReassociationIndices >(linalg::GenericOp)> GetCollapsableDimensionsFn
Function type to control generic op dimension collapsing.
std::optional< vector::CombiningKind > getCombinerOpKind(Operation *combinerOp)
Return vector::CombiningKind for the given op.
SmallVector< Value > peelLoop(RewriterBase &rewriter, Operation *op)
Try to peel and canonicalize loop op and return the new result.
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
void populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via rank-reducin...
FailureOr< PackTransposeResult > packTranspose(RewriterBase &rewriter, tensor::PackOp packOp, linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp, ArrayRef< int64_t > outerPerm, ArrayRef< int64_t > innerPerm)
Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the transposed PackOp -> LinalgOp ...
std::pair< TilingInterface, TilingInterface > splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension, OpFoldResult splitPoint)
Split the given op into two parts along the given iteration space dimension at the specified splitPoi...
void populateElementwiseOpsFusionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlElementwiseOpFusion)
Patterns for fusing linalg operation on tensors.
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
void populateExtractOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Include the generated interface declarations.
This class represents an efficient way to signal success or failure.
OpInterfaceRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting a...
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Vectorization pattern for memref::CopyOp.
LogicalResult matchAndRewrite(memref::CopyOp copyOp, PatternRewriter &rewriter) const override
Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh) dimensions into 1-D depthwise c...
FailureOr< DepthwiseConv1DNwcWcOp > returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const
LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const override
DownscaleDepthwiseConv2DNhwcHwcOp(MLIRContext *context, PatternBenefit benefit=1)
Rewrites 2-D convolution ops with size-1 window dimensions into 1-D convolution ops.
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
Transformation information returned after reduction tiling.
scf::ForeachThreadOp loops
The scf.foreach_thread operation that iterate over the tiles.
Operation * mergeOp
The final reduction operation merging all the partial reductions.
Operation * initialOp
The op initializing the tensor used for partial reductions.
Operation * parallelTiledOp
The partial reduction tiled op generated.
Rewrite a TilingInterface op to a tiled scf.foreach_thread, applying tiling by numThreads.
Rewrites a tensor::PackOp into a sequence of tensor.pad + linalg.transpose + tensor....
LogicalResult matchAndRewrite(tensor::PackOp packOp, PatternRewriter &rewriter) const override
Rewrites a tensor::UnPackOp into a sequence of rank-reduced extract_slice op.
LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp, PatternRewriter &rewriter) const override
Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and InsertSliceOp.
OptimizeCopyFn optimizeCopyFn
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp, Value dest, const SmallVector< Value > &dynSizes) const
Filling dest using FillOp constant padding value if possible.
GeneralizePadOpPattern(MLIRContext *context, OptimizeCopyFn optimizeCopyFn=nullptr, PatternBenefit benefit=1)
Match and rewrite for the pattern:
LogicalResult matchAndRewrite(vector::TransferReadOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Match and rewrite for the pattern:
LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Linalg generalization pattern.
LogicalResult matchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const override
FailureOr< GenericOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
Options that allow distribution of loops generated in Linalg transforms to processors while generatin...
SmallVector< Attribute > paddingValues
A padding value for every operand.
SmallVector< bool > packPaddings
A flag for every operand to mark the PadOp as nofold which enables packing for statically shaped oper...
LinalgPaddingOptions & setPaddingDimensions(ArrayRef< int64_t > pd)
LinalgPaddingOptions & setTransposePaddings(ArrayRef< SmallVector< int64_t >> tp)
SmallVector< SmallVector< int64_t > > transposePaddings
A permutation vector for every operand used to transpose the packed PadOp results.
LinalgPaddingOptions & setPaddingValues(ArrayRef< Attribute > pv)
LinalgPaddingOptions & setPackPaddings(ArrayRef< bool > pp)
LinalgPaddingOptions & setHoistPaddings(ArrayRef< int64_t > hp)
SmallVector< int64_t > hoistPaddings
A number of loops to hoist the PadOp out for every operand.
SmallVector< int64_t > paddingDimensions
A list of iterator dimensions to pad.
LogicalResult matchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const override
FailureOr< LinalgOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
LinalgPaddingPattern(MLIRContext *context, LinalgPaddingOptions options=LinalgPaddingOptions(), PatternBenefit benefit=1)
Linalg padding pattern.
std::optional< LinalgLoopDistributionOptions > tileDistribution
When specified, specifies distribution of generated tile loops to processors.
LinalgTilingAndFusionOptions & setTileSizes(ArrayRef< int64_t > ts)
SmallVector< int64_t > tileInterchange
Tile interchange used to permute the tile loops.
LinalgTilingAndFusionOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
SmallVector< int64_t > tileSizes
Tile sizes used to tile the root operation.
LinalgTilingOptions & setLoopType(LinalgTilingLoopType lt)
LinalgTilingOptions & setDistributionTypes(ArrayRef< StringRef > types)
LinalgTilingOptions & setInterchange(ArrayRef< unsigned > interchange)
LinalgTilingLoopType loopType
The type of tile loops to generate.
LinalgTilingOptions & setTileSizeComputationFunction(TileSizeComputationFunction fun)
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
LinalgTilingOptions & setPeeledLoops(ArrayRef< int64_t > loops)
SmallVector< int64_t > peeledLoops
Peel the specified loops.
LinalgTilingOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
SmallVector< unsigned, 4 > interchangeVector
The interchange vector to reorder the tiled loops.
TileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
LinalgTilingOptions & scalarizeDynamicDims()
Tile all dynamic dimensions by 1.
std::optional< LinalgLoopDistributionOptions > distribution
When specified, specifies distribution of generated tile loops to processors.
SmallVector< StringRef, 2 > distributionTypes
Specification markers of how to distribute the linalg.tiled_loop.
A description of a multi-size tiling comprising tile sizes and numbers of tiles, expressed as Values ...
Struct to hold the result of a packTranspose call.
tensor::PackOp transposedPackOp
linalg::LinalgOp transposedLinalgOp
tensor::UnPackOp transposedUnPackOp
Apply transformation to split the single linalg op reduction into a parallel and reduction dimension.
LinalgOp resultCombiningLinalgOp
Perform standalone tiling of a single LinalgOp by tileSizes.
SmallVector< Operation *, 8 > loops
SmallVector< Value, 4 > tensorResults
T lowTripCount
Number of tiles associated with each size.