9 #ifndef MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
10 #define MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallSet.h"
31 namespace bufferization {
33 class OneShotAnalysisState;
34 class BufferizationState;
40 enum class WinogradConv2DFmr : uint32_t;
54 enum class AllocOp { MemrefAlloc = 0, MemrefAlloca = 1 };
58 MaterializeInDestination = 0,
62 MemcpyOp memcpyOp = MemcpyOp::MaterializeInDestination;
67 bool bufferizeDestinationOnly =
false;
73 bool emitDealloc =
false;
95 tensor::PadOp padOp,
Attribute memorySpace = {},
118 const BufferizeToAllocationOptions &
options,
119 vector::MaskOp maskOp,
Attribute memorySpace = {},
120 Operation *insertionPoint =
nullptr);
129 const BufferizeToAllocationOptions &
options,
130 bufferization::AllocTensorOp allocTensorOp,
131 Attribute memorySpace = {},
132 Operation *insertionPoint =
nullptr);
151 const BufferizeToAllocationOptions &
options,
152 Operation *op, Attribute memorySpace = {},
153 Operation *insertionPoint =
nullptr);
181 RewriterBase &rewriter, Operation *op,
182 bufferization::OneShotAnalysisState &state);
307 assert(size &&
"expected non-null size");
314 std::pair<unsigned, unsigned>(operandIndex, dimIndex),
nullptr);
411 unsigned size = useFullTiles.size();
412 llvm::SmallBitVector tmp(size,
false);
413 for (
unsigned i = 0; i < size; ++i)
414 tmp[i] = useFullTiles[i];
511 bool vectorizeNDExtract =
false,
512 bool flatten1DDepthwiseConv =
false);
530 if (
auto genericOp = dyn_cast_or_null<GenericOp>(op)) {
531 return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));
533 if (
auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {
534 return llvm::to_vector(
535 llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));
548 const llvm::SmallDenseSet<unsigned> &droppedDims)>;
550 FailureOr<DropUnitDimsResult>
565 FailureOr<ElementwiseOpFusionResult>
629 std::function<FailureOr<SmallVector<OpFoldResult>>(
648 FailureOr<TilingInterface>
668 FailureOr<PackingResult>
670 scf::ForOp outermostEnclosingForOp,
726 tensor::PadOp &hoistedOp,
732 tensor::PadOp &hoistedOp,
820 GenericOp genericOp);
832 FailureOr<PromotionInfo>
834 bool useOriginalSubviewSize,
852 memref::SubViewOp subview,
865 memref::SubViewOp subview,
898 FailureOr<VectorizationResult>
902 bool vectorizeNDExtract =
false,
bool flatten1DDepthwiseConv =
false,
903 bool assumeDynamicDimsMatchVecSizes =
false,
904 bool createNamedContraction =
false);
907 LogicalResult
vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp);
937 template <
typename T>
945 template <
typename T>
995 FailureOr<MultiSizeSpecification>
998 bool emitAssertions =
true);
999 FailureOr<StaticMultiSizeSpecification>
1003 FailureOr<StaticContinuousTileSizeSpecification>
1005 unsigned targetSize);
1006 FailureOr<ContinuousTileSizeSpecification>
1009 bool emitAssertions);
1047 FailureOr<ForallReductionTilingResult>
1051 std::optional<ArrayAttr> mapping = std::nullopt);
1156 FailureOr<SplitReductionResult>
1159 bool useAlloc =
false);
1208 FailureOr<SplitReductionResult>
1211 bool useAlloc =
false);
1233 FailureOr<CollapseResult>
1246 linalg::PackOp packOp,
1247 bool lowerPadLikeWithInsertSlice =
true);
1257 FailureOr<LowerUnPackOpResult>
1259 bool lowerUnpadLikeWithExtractSlice =
true);
1270 FailureOr<PackResult>
pack(
RewriterBase &rewriter, linalg::LinalgOp linalgOp,
1287 FailureOr<PackTransposeResult>
1289 linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp,
1300 FailureOr<PackResult>
1339 std::function<std::optional<BlockPackMatmulOptions>(linalg::LinalgOp)>;
1362 FailureOr<PackResult>
1367 FailureOr<Operation *>
1369 tensor::FromElementsOp fromElementsOp);
1372 FailureOr<Operation *>
1374 tensor::GenerateOp generateOp);
1378 tensor::PadOp padOp);
1414 FailureOr<std::pair<Operation *, Operation *>>
1422 FailureOr<std::pair<Operation *, Operation *>>
1429 FailureOr<std::pair<Operation *, Operation *>>
1431 linalg::DepthwiseConv2DNhwcHwcOp convOp);
1438 FailureOr<std::pair<Operation *, Operation *>>
1444 linalg::Conv2DNhwcFhwcOp op);
1446 linalg::Conv2DNhwcFhwcQOp op);
1450 linalg::MatmulOp op,
1451 bool transposeLHS =
true);
1453 linalg::BatchMatmulOp op,
1454 bool transposeLHS =
true);
1460 linalg::Conv2DNhwcFhwcOp op,
1461 WinogradConv2DFmr fmr);
1474 FailureOr<Operation *>
1476 linalg::WinogradFilterTransformOp op);
1495 FailureOr<Operation *>
1497 linalg::WinogradInputTransformOp op);
1516 FailureOr<Operation *>
1518 linalg::WinogradOutputTransformOp op);
1526 RewriterBase &rewriter, linalg::GenericOp genericOp,
bool removeOutputs);
1536 template <
typename Conv2DOp,
typename Conv1DOp>
1550 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNhwcHwcfOp,
1552 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp,
1563 FailureOr<DepthwiseConv1DNwcWcOp>
1600 FailureOr<GenericOp>
1614 FailureOr<GenericOp>
1795 using ControlFn = std::function<std::optional<bool>(tensor::ExtractSliceOp)>;
1880 bool removeDeadArgsAndResults =
true);
1938 std::function<SmallVector<ReassociationIndices>(linalg::LinalgOp)>;
1999 bool useAlloc =
false);
2003 bool transposeLHS =
true);
2011 WinogradConv2DFmr fmr);
static llvm::ManagedStatic< PassManagerOptions > options
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Attributes are known-constant values of operations.
The main mechanism for performing data layout queries.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
void populateMoveInitOperandsToInputPattern(RewritePatternSet &patterns)
A pattern that converts init operands to input operands.
void populateTransposeMatmulPatterns(RewritePatternSet &patterns, bool transposeLHS=true)
Patterns to convert Linalg matmul ops to transposed variants.
void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns)
Adds patterns that reduce the rank of named contraction ops that have unit dimensions in the operand(...
LogicalResult rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad, const LinalgPaddingOptions &options, LinalgOp &paddedOp, SmallVector< Value > &replacements, SmallVector< tensor::PadOp > &padOps)
Pad the iterator dimensions options.paddingDimensions of all opToPad operands to a static bounding bo...
void populateSplitReductionPattern(RewritePatternSet &patterns, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Patterns to apply splitReduction below.
void populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet &patterns)
Pattern to fuse a tensor.pad operation with the producer of its source, if the producer is a linalg o...
FailureOr< std::pair< Operation *, Operation * > > rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp)
Convert linalg.conv_2d_nhwc_hwcf into linalg.generic (for img2col packing) and linalg....
bool areDimSequencesPreserved(ArrayRef< AffineMap > maps, ArrayRef< ReassociationIndices > dimSequences)
Return true if all sequences of dimensions specified in dimSequences are contiguous in all the ranges...
bool hasVectorizationImpl(Operation *)
Return true if there's dedicated logic in the Linalg Vectorizer to vectorize this Op,...
void populateExtractSliceSinkingPatterns(RewritePatternSet &patterns, const ControlPropagationFn &controlPackUnPackPropagation)
Patterns to sink extract slice across other operations.
void populateBubbleUpExtractSliceOpPatterns(RewritePatternSet &patterns)
Patterns that are used to bubble up extract slice op above linalg op.
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
std::function< std::optional< Value >(OpBuilder &b, memref::SubViewOp subView, ArrayRef< Value > boundingSubViewSize, DataLayout &layout)> AllocBufferCallbackFn
Callback function type used to perform the allocation for the promoted subView.
void populateBlockPackMatmulPatterns(RewritePatternSet &patterns, const ControlBlockPackMatmulFn &controlFn)
Patterns to block pack Linalg matmul ops.
void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns)
Populates patterns to transform linalg.conv_2d_xxx operations into linalg.generic (for img2col packin...
FailureOr< Operation * > decomposeWinogradFilterTransformOp(RewriterBase &rewriter, linalg::WinogradFilterTransformOp op)
Rewrite linalg.winograd_filter_transform.
DenseMap< int, int > LoopIndexToRangeIndexMap
Creates a number of ranges equal to the number of non-zero in tileSizes.
std::optional< Value > allocateWorkgroupMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU workgroup memory.
FailureOr< PackTransposeResult > packTranspose(RewriterBase &rewriter, linalg::PackOp packOp, linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp, ArrayRef< int64_t > outerPerm, ArrayRef< int64_t > innerPerm)
Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the transposed PackOp -> LinalgOp ...
std::function< IndexingMapOpInterface(Location loc, OpBuilder &, IndexingMapOpInterface, ArrayRef< Value > newOperands, ArrayRef< AffineMap > newIndexingMaps, const llvm::SmallDenseSet< unsigned > &droppedDims)> DroppedUnitDimsBuilder
Value bufferizeToAllocation(RewriterBase &rewriter, const BufferizeToAllocationOptions &options, tensor::PadOp padOp, Attribute memorySpace={}, Operation *insertionPoint=nullptr)
Materialize a buffer allocation for the given tensor.pad op and lower the op to linalg....
FailureOr< VectorizationResult > vectorize(RewriterBase &rewriter, Operation *op, ArrayRef< int64_t > inputVectorSizes={}, ArrayRef< bool > inputScalableVecDims={}, bool vectorizeNDExtract=false, bool flatten1DDepthwiseConv=false, bool assumeDynamicDimsMatchVecSizes=false, bool createNamedContraction=false)
Returns a VectorizationResult containing the results of the vectorized op, or failure if the transfor...
std::function< bool(OpOperand *fusedOperand)> ControlFusionFn
Function type which is used to control when to stop fusion.
bool isDimSequencePreserved(AffineMap map, ReassociationIndicesRef dimSequence)
Return true if a given sequence of dimensions are contiguous in the range of the specified indexing m...
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
SmallVector< OpFoldResult > computePaddedShape(RewriterBase &rewriter, TypedValue< RankedTensorType > v, AffineMap indexingMap, ArrayRef< OpFoldResult > indexingSizes, const PadTilingInterfaceOptions &options)
Helper function to compute the padded shape of the given value v of RankedTensorType given:
void populateDecomposeProjectedPermutationPatterns(RewritePatternSet &patterns)
Add patterns to make explicit broadcasts and transforms in the input operands of a genericOp.
FailureOr< LinalgOp > specializeGenericOp(RewriterBase &rewriter, GenericOp genericOp)
Create a namedOp from the given GenericOp and replace the GenericOp.
void populateFoldReshapeOpsByCollapsingPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding tensor.expand_shape operation with its producer generic operation by co...
LinalgTilingLoopType
The type of loops to be generated during tiling.
FailureOr< LowerUnPackOpResult > lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp, bool lowerUnpadLikeWithExtractSlice=true)
Rewrite pack as empty + transpose + reshape + extract_slice.
std::function< LogicalResult(OpBuilder &b, Value buffer)> DeallocBufferCallbackFn
Callback function type used to deallocate the buffers used to hold the promoted subview.
void populateDataLayoutPropagationPatterns(RewritePatternSet &patterns, const ControlPropagationFn &controlPackUnPackPropagation)
Patterns to bubble up or down data layout ops across other operations.
void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Populates patterns with patterns that vectorize tensor.pad.
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
void populateLinalgFoldIntoElementwisePatterns(RewritePatternSet &patterns)
Populates patterns with patterns that fold operations like linalg.transform into elementwise op map.
LogicalResult deallocateGPUPrivateMemory(OpBuilder &, Value)
In case of GPU private memory there is no need to deallocate since the memory is freed when going out...
void populateSparseTensorRewriting(RewritePatternSet &patterns)
Populate patterns that are only useful in the context of sparse tensors.
FailureOr< Operation * > decomposeWinogradOutputTransformOp(RewriterBase &rewriter, linalg::WinogradOutputTransformOp op)
Rewrite linalg.winograd_output_transform.
void populateWinogradConv2DPatterns(RewritePatternSet &patterns, WinogradConv2DFmr fmr)
Patterns to apply Winograd Conv2D algorithm F(m x m, r x r).
FailureOr< ElementwiseOpFusionResult > fuseElementwiseOps(RewriterBase &rewriter, OpOperand *fusedOperand)
FailureOr< PromotionInfo > promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView, bool useOriginalSubviewSize, const AllocBufferCallbackFn &allocationFn, DataLayout &layout)
llvm::SmallDenseSet< int > getPreservedProducerResults(GenericOp producer, GenericOp consumer, OpOperand *fusedOperand)
Returns a set of indices of the producer's results which would be preserved after the fusion.
std::optional< Value > allocateGPUPrivateMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU private memory.
void populateSimplifyDepthwiseConvPatterns(RewritePatternSet &patterns)
Patterns to simplify depthwise convolutions.
FailureOr< Operation * > rewriteInDestinationPassingStyle(RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp)
Rewrite tensor.from_elements to linalg.generic.
FailureOr< PackResult > blockPackMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp, const ControlBlockPackMatmulFn &controlPackMatmul)
Pack a matmul operation into blocked 4D layout.
void peelLoops(RewriterBase &rewriter, ArrayRef< scf::ForOp > loops)
Peel 'loops' and applies affine_min/max bounds simplification on the fly where relevant.
void populateConvertToDestinationStylePatterns(RewritePatternSet &patterns)
Populate patterns that convert non-destination-style ops to destination style ops.
FailureOr< Operation * > transposeConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op)
Convert linalg.conv_2d_nhwc_fhwc(_q) to linalg.conv_2d_nhwc_hwcf(_q) by materializing transpose.
void populateFoldUnitExtentDimsPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via reassociativ...
LogicalResult copyToWorkgroupMemory(OpBuilder &b, Value src, Value dst)
Create Memref copy operations and add gpu barrier guards before and after the copy operation to ensur...
std::function< SmallVector< Value, 4 >(OpBuilder &, Operation *)> TileSizeComputationFunction
std::function< LogicalResult(RewriterBase &, tensor::PadOp, Value)> OptimizeCopyFn
void populateElementwiseToLinalgConversionPatterns(RewritePatternSet &patterns)
Populate patterns that convert ElementwiseMappable ops to linalg parallel loops.
LogicalResult linalgOpAnchoredEmptyTensorEliminationStep(RewriterBase &rewriter, Operation *op, bufferization::OneShotAnalysisState &state)
Try to eliminate tensor::EmptyOps inside op that are anchored on a LinalgOp.
FailureOr< LinalgLoops > linalgOpToLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.for with the proper body for linalgOp.
FailureOr< GenericOp > generalizeNamedOp(RewriterBase &rewriter, LinalgOp linalgOp)
Create a GenericOp from the given named operation linalgOp and replace the given linalgOp.
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > allShapeSizes, ArrayRef< OpFoldResult > allTileSizes)
FailureOr< Operation * > transposeBatchMatmul(RewriterBase &rewriter, linalg::BatchMatmulOp op, bool transposeLHS=true)
Pattern to replace.
LogicalResult promoteSubviewsPrecondition(Operation *op, LinalgPromotionOptions options)
Promote memref.subviews feeding linalg-on-buffers operations.
LogicalResult copyToGPUPrivateMemory(OpBuilder &b, Value src, Value dst)
Normal copy to between src and dst.
FailureOr< linalg::GenericOp > deduplicateOperandsAndRemoveDeadResults(RewriterBase &rewriter, linalg::GenericOp genericOp, bool removeOutputs)
Method to deduplicate operands and remove dead results of linalg.generic operations.
void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Linalg decompose convolutions patterns.
void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns)
Patterns to decompose Winograd operators.
void populateConvolutionVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns for vectorizing low-D convolution ops.
std::function< bool(OpOperand *opOperand)> ControlFoldIntoPackUnpackFn
Function type which is used to control folding operations like tensor.pad and tensor....
FailureOr< Operation * > winogradConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op, WinogradConv2DFmr fmr)
Convert linalg.conv_2d_nhwc_fhwc to Winograd Conv2D algorithm F(m x m, r x r).
LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp)
Emit a suitable vector form for a Copy op with fully static shape.
FailureOr< SmallVector< OpFoldResult > > computeIndexingMapOpInterfacePaddedShape(RewriterBase &rewriter, OpOperand &operandToPad, ArrayRef< Range > iterationDomain, const PadTilingInterfaceOptions &options)
Specific helper for Linalg ops.
LogicalResult vectorizeOpPrecondition(Operation *op, ArrayRef< int64_t > inputVectorSizes={}, ArrayRef< bool > inputScalableVecDims={}, bool vectorizeNDExtract=false, bool flatten1DDepthwiseConv=false)
Return success if the operation can be vectorized.
FailureOr< GenericOp > interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, ArrayRef< unsigned > interchangeVector)
Interchange the iterator_types and iterator_maps dimensions and adapts the index accesses of op.
void populateCollapseDimensions(RewritePatternSet &patterns, const GetCollapsableDimensionsFn &controlCollapseDimensions)
Pattern to collapse dimensions in a linalg.generic op.
bool areElementwiseOpsFusable(OpOperand *fusedOperand)
Return true if two linalg.generic operations with producer/consumer relationship through fusedOperand...
FailureOr< StaticMultiSizeSpecification > computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize, int64_t divisor)
FailureOr< LinalgLoops > linalgOpToAffineLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of affine.for with the proper body for linalgOp.
void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns)
Populates patterns to decompose linalg.pack and linalg.unpack Ops into e.g.
void populateEraseUnusedOperandsAndResultsPatterns(RewritePatternSet &patterns)
Pattern to remove dead operands and results of linalg.generic operations.
FailureOr< ContinuousTileSizeSpecification > computeContinuousTileSizes(OpBuilder &builder, TilingInterface op, unsigned dimension, OpFoldResult targetSize, bool emitAssertions)
FailureOr< StaticContinuousTileSizeSpecification > computeStaticContinuousTileSizes(LinalgOp op, unsigned dimension, unsigned targetSize)
std::function< LogicalResult(OpBuilder &b, Value src, Value dst)> CopyCallbackFn
Callback function type used to insert copy from original subview to subview of the promoted region fo...
FailureOr< SplitReductionResult > splitReduction(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns)
Populates patterns with patterns that simplify tensor.pack and tensor.unpack operations.
void populateFoldPackUnpackIntoTensorEmptyPatterns(RewritePatternSet &patterns)
Populates patterns with patterns that fold operations like linalg.pack and linalg....
FailureOr< LinalgOp > padAndHoistLinalgOp(RewriterBase &rewriter, LinalgOp linalgOp, const LinalgPaddingOptions &options)
Apply padding and hoisting to linalgOp according to the configuration specified in options.
void populateDecomposeLinalgOpsPattern(RewritePatternSet &patterns, bool removeDeadArgsAndResults=true)
Populate patterns for splitting a LinalgOp with multiple statements within its payload into multiple ...
std::function< bool(OpOperand *opOperand)> ControlPropagationFn
Function type which is used to control propagation of linalg.pack/unpack ops.
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns, const ControlFoldIntoPackUnpackFn &controlFn=nullptr)
Populates patterns with patterns that fold operations like tensor.pad and tensor.extract_slice into t...
FailureOr< ForallReductionTilingResult > tileReductionUsingForall(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes={}, std::optional< ArrayAttr > mapping=std::nullopt)
Method to tile a reduction to parallel iterations computing partial reductions.
FailureOr< PackResult > packMatmulGreedily(RewriterBase &rewriter, LinalgOp linalgOp, ArrayRef< OpFoldResult > mnkPackedSizes, ArrayRef< int64_t > mnkPaddedSizesNextMultipleOf, ArrayRef< int64_t > mnkOrder)
Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m and n are proper parallel d...
FailureOr< PackResult > pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef< OpFoldResult > packedSizes)
Implement packing of a single LinalgOp by packedSizes.
void populateEraseUnnecessaryInputsPatterns(RewritePatternSet &patterns)
Patterns to promote inputs to outputs and remove unused inputs of linalg.generic ops.
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
std::function< SmallVector< ReassociationIndices >(linalg::LinalgOp)> GetCollapsableDimensionsFn
Function type to control generic op dimension collapsing.
std::function< FailureOr< SmallVector< OpFoldResult > >(RewriterBase &, OpOperand &, ArrayRef< Range >, const PadTilingInterfaceOptions &)> PadSizeComputationFunction
void populateFoldReshapeOpsByExpansionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding (collapsing) tensor_reshape operation with its producer (consumer) gene...
void populateSwapExtractSliceWithFillPatterns(RewritePatternSet &patterns)
Adds patterns that waps tensor.extract_slice(linalg.fill(cst, init)) into linalg.fill(cst,...
FailureOr< DropUnitDimsResult > dropUnitDims(RewriterBase &rewriter, IndexingMapOpInterface op, const DroppedUnitDimsBuilder &droppedUnitDimsBuilder, const ControlDropUnitDims &options)
void populateInlineConstantOperandsPatterns(RewritePatternSet &patterns)
Patterns that are used to inline constant operands into linalg generic ops.
FailureOr< LinalgOp > promoteSubViews(OpBuilder &b, LinalgOp op, const LinalgPromotionOptions &options)
Promote the subViews into a new buffer allocated at the insertion point b.
void populateConstantFoldLinalgOperations(RewritePatternSet &patterns, const ControlFusionFn &controlFn)
Patterns to constant fold Linalg operations.
std::function< SplitReductionOptions(LinalgOp op)> ControlSplitReductionFn
Function signature to control reduction splitting.
LogicalResult deallocateWorkgroupMemory(OpBuilder &, Value)
In case of GPU group memory there is no need to deallocate.
FailureOr< Operation * > transposeMatmul(RewriterBase &rewriter, linalg::MatmulOp op, bool transposeLHS=true)
Convert Linalg matmul ops to transposed variants.
void populateLinalgNamedOpsGeneralizationPatterns(RewritePatternSet &patterns)
Linalg generalization patterns.
void populateLinalgGenericOpsSpecializationPatterns(RewritePatternSet &patterns)
Populates patterns with patterns to convert linalg.generic ops to named ops where possible.
std::function< std::optional< BlockPackMatmulOptions >(linalg::LinalgOp)> ControlBlockPackMatmulFn
Function type which is used to control matmul packing.
void populateLinalgNamedToElementwisePatterns(RewritePatternSet &patterns)
Populates patterns that convert linalg named ops e.g.
enum WinogradConv2DFmr uint32_t std::optional< vector::CombiningKind > getCombinerOpKind(Operation *combinerOp)
Return vector::CombiningKind for the given op.
SmallVector< Value > peelLoop(RewriterBase &rewriter, Operation *op)
Try to peel and canonicalize loop op and return the new result.
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
FailureOr< CollapseResult > collapseOpIterationDims(LinalgOp op, ArrayRef< ReassociationIndices > foldedIterationDims, RewriterBase &rewriter)
Collapses dimensions of linalg.generic/linalg.copy operation.
FailureOr< Operation * > decomposeWinogradInputTransformOp(RewriterBase &rewriter, linalg::WinogradInputTransformOp op)
Rewrite linalg.winograd_input_transform.
void populateDecomposePadPatterns(RewritePatternSet &patterns)
Populates patterns to decompose tensor.pad into e.g.
void populateFoldAddIntoDestPatterns(RewritePatternSet &patterns)
Pattern to replace linalg.add when destination passing on a contraction op suffices for achieving the...
std::pair< TilingInterface, TilingInterface > splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension, OpFoldResult splitPoint)
Split the given op into two parts along the given iteration space dimension at the specified splitPoi...
void populateElementwiseOpsFusionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlElementwiseOpFusion)
Patterns for fusing linalg operation on tensors.
FailureOr< SplitReductionResult > splitReductionByScaling(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Scaling-based implementation of the split reduction transformation.
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
FailureOr< LowerPackResult > lowerPack(RewriterBase &rewriter, linalg::PackOp packOp, bool lowerPadLikeWithInsertSlice=true)
Rewrite pack as pad + reshape + transpose.
FailureOr< LinalgLoops > linalgOpToParallelLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.parallel with the proper body for linalgOp.
Include the generated interface declarations.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
ArrayRef< int64_t > ReassociationIndicesRef
const FrozenRewritePatternSet & patterns
OpInterfaceRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting a...
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
SmallVector< int64_t, 3 > mnkOrder
Permutation of matmul (M, N, K) dimensions order.
SmallVector< int64_t, 3 > blockFactors
Minor block factors (mb, nb, kb) for packing relayout where mb, mn are the parallel dimensions and kb...
bool rhsTransposeOuterBlocks
Transpose RHS outer block layout [KB][NB] -> [NB][KB].
bool lhsTransposeInnerBlocks
Transpose LHS inner block layout [mb][kb] -> [kb][mb].
SmallVector< int64_t, 3 > mnkPaddedSizesNextMultipleOf
Next multiples of the packing sizes.
bool lhsTransposeOuterBlocks
Transpose LHS outer block layout [MB][KB] -> [KB][MB].
bool allowPadding
If true, allows packing of dimensions that only partially fit into the block factors.
bool rhsTransposeInnerBlocks
Transpose RHS inner block layout [kb][nb] -> [nb][kb].
SmallVector< Value > results
Transformation to drop unit-extent dimensions from linalg.generic operations.
RankReductionStrategy rankReductionStrategy
std::function< SmallVector< unsigned >(Operation *)> ControlFnTy
Vectorization pattern for memref::CopyOp.
LogicalResult matchAndRewrite(memref::CopyOp copyOp, PatternRewriter &rewriter) const override
Rewrites a linalg::PackOp into a sequence of:
LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override
Rewrites a linalg::UnPackOp into a sequence of rank-reduced.
LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const override
Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and InsertSliceOp.
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Value createFillOrGenerateOp(RewriterBase &rewriter, tensor::PadOp padOp, Value dest, const SmallVector< Value > &dynSizes) const
Filling dest using FillOp constant padding value if possible.
DecomposePadOpPattern(MLIRContext *context, PatternBenefit benefit=1)
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
DownscaleConv2DOp(MLIRContext *context, PatternBenefit benefit=1)
Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh) dimensions into 1-D depthwise c...
FailureOr< DepthwiseConv1DNwcWcOp > returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const
LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const override
DownscaleDepthwiseConv2DNhwcHwcOp(MLIRContext *context, PatternBenefit benefit=1)
Rewrites 2-D convolution ops with size-1 window dimensions into 1-D convolution ops.
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
IndexingMapOpInterface resultOp
SmallVector< Value > replacements
Fuse two linalg.generic operations that have a producer-consumer relationship captured through fusedO...
llvm::DenseMap< Value, Value > replacements
Transformation information returned after reduction tiling.
SmallVector< Operation * > mergeOps
The final reduction operation merging all the partial reductions.
SmallVector< Value > initialValues
Initial values used for partial reductions.
scf::ForallOp loops
The scf.forall operation that iterate over the tiles.
SmallVector< Operation * > parallelTiledOps
The partial reduction tiled op generated.
Match and rewrite for the pattern:
LogicalResult matchAndRewrite(vector::TransferReadOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Match and rewrite for the pattern:
LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Linalg generalization pattern.
LogicalResult matchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const override
FailureOr< GenericOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
Options that allow distribution of loops generated in Linalg transforms to processors while generatin...
SmallVector< Attribute > paddingValues
A padding value for every operand.
@ BufferizationMaterializeInDestination
LinalgPaddingOptions & setPadToMultipleOf(ArrayRef< int64_t > m)
DenseMap< std::pair< unsigned, unsigned >, OpFoldResult > sizeToPadTo
A mapping between an operand and shape dim, and a size for a padding dimension.
std::optional< SmallVector< int64_t > > padToMultipleOf
A list of multiples to which each padding dimension should be padded to.
OpFoldResult getSizeToPadTo(unsigned operandIndex, unsigned dimIndex) const
Given the operand index and shape dim it returns the size to pad to.
LinalgPaddingOptions & setNofoldFlags(ArrayRef< bool > pp)
LinalgPaddingOptions & setPaddingDimensions(ArrayRef< int64_t > pd)
LinalgPaddingOptions & setTransposePaddings(ArrayRef< SmallVector< int64_t >> tp)
SmallVector< SmallVector< int64_t > > transposePaddings
A permutation vector for every operand used to transpose the packed PadOp results.
LinalgPaddingOptions & setSizeToPadTo(unsigned operandIndex, unsigned dimIndex, OpFoldResult size)
LinalgPaddingOptions & setPaddingValues(ArrayRef< Attribute > pv)
SmallVector< bool > nofoldFlags
A flag for every operand to mark the PadOp as nofold which enables packing for statically shaped oper...
LinalgPaddingOptions & setCopyBackOp(CopyBackOp op)
LinalgPaddingOptions & setHoistPaddings(ArrayRef< int64_t > hp)
SmallVector< int64_t > hoistPaddings
A number of loops to hoist the PadOp out for every operand.
SmallVector< int64_t > paddingDimensions
A list of iterator dimensions to pad.
CopyBackOp copyBackOp
The op to be used for copying the padded result to the original destination tensor.
LogicalResult matchAndRewrite(GenericOp op, PatternRewriter &rewriter) const override
FailureOr< GenericOp > returningMatchAndRewrite(GenericOp op, PatternRewriter &rewriter) const
std::optional< LinalgLoopDistributionOptions > tileDistribution
When specified, specifies distribution of generated tile loops to processors.
LinalgTilingAndFusionOptions & setTileSizes(ArrayRef< int64_t > ts)
SmallVector< int64_t > tileInterchange
Tile interchange used to permute the tile loops.
LinalgTilingAndFusionOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
SmallVector< int64_t > tileSizes
Tile sizes used to tile the root operation.
LinalgTilingOptions & setLoopType(LinalgTilingLoopType lt)
LinalgTilingOptions & setDistributionTypes(ArrayRef< StringRef > types)
LinalgTilingOptions & setInterchange(ArrayRef< unsigned > interchange)
LinalgTilingLoopType loopType
The type of tile loops to generate.
LinalgTilingOptions & setTileSizeComputationFunction(TileSizeComputationFunction fun)
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
LinalgTilingOptions & setPeeledLoops(ArrayRef< int64_t > loops)
SmallVector< int64_t > peeledLoops
Peel the specified loops.
LinalgTilingOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
SmallVector< unsigned, 4 > interchangeVector
The interchange vector to reorder the tiled loops.
TileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
LinalgTilingOptions & scalarizeDynamicDims()
Tile all dynamic dimensions by 1.
std::optional< LinalgLoopDistributionOptions > distribution
When specified, specifies distribution of generated tile loops to processors.
SmallVector< StringRef, 2 > distributionTypes
Specification markers of how to distribute the linalg.tiled_loop.
linalg::TransposeOp transposeOp
tensor::ExpandShapeOp expandShapeOp
tensor::ExtractSliceOp extractSliceOp
linalg::TransposeOp transposeOp
tensor::CollapseShapeOp collapseShapeOp
A description of a multi-size tiling comprising tile sizes and numbers of tiles, expressed as Values ...
Struct to hold the result of a pack call.
SmallVector< linalg::UnPackOp > unPackOps
linalg::LinalgOp packedLinalgOp
SmallVector< linalg::PackOp > packOps
Struct to hold the result of a packTranspose call.
linalg::PackOp transposedPackOp
linalg::LinalgOp transposedLinalgOp
linalg::UnPackOp transposedUnPackOp
PadTilingInterfaceOptions & setPaddingSizes(ArrayRef< OpFoldResult > m)
SmallVector< Attribute > paddingValues
A padding value for every operand.
PadTilingInterfaceOptions & setPadToMultipleOf(bool b)
bool padToMultipleOf
Pad iterator paddingDimension[i] to next multiple of paddingSizes[i] if true.
PadTilingInterfaceOptions & setPaddingValues(ArrayRef< Attribute > pv)
SmallVector< OpFoldResult > paddingSizes
A list of iterator dimensions sizes to pad to.
Apply transformation to split the single linalg op reduction into a parallel and reduction dimension.
LinalgOp resultCombiningLinalgOp
Perform standalone tiling of a single LinalgOp by tileSizes.
SmallVector< Operation *, 8 > loops
SmallVector< Value, 4 > tensorResults
Transformation information returned after vectorizing.
SmallVector< Value > replacements
Results of the vectorization transform to replace the original operation.
SmallVector< T > tileSizes
Tile sizes.
SmallVector< T > tripCounts
Number of tiles associated with each size.
T lowTripCount
Number of tiles associated with each size.
Helper struct to hold the results of building a packing loop nest.
SmallVector< OpFoldResult > strides
SmallVector< Value > leadingPackedTensorIndexings
SmallVector< Value > clonedLoopIvs
SmallVector< OpFoldResult > sizes
TransposeOp maybeTransposeOp
tensor::PadOp hoistedPadOp
SmallVector< OpFoldResult > offsets