9#ifndef MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
10#define MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
27#include "llvm/ADT/SmallBitVector.h"
33class BufferizationState;
39enum class WinogradConv2DFmr : uint32_t;
94 tensor::PadOp padOp,
Attribute memorySpace = {},
117 const BufferizeToAllocationOptions &
options,
118 vector::MaskOp maskOp,
Attribute memorySpace = {},
119 Operation *insertionPoint =
nullptr);
128 const BufferizeToAllocationOptions &
options,
129 bufferization::AllocTensorOp allocTensorOp,
130 Attribute memorySpace = {},
131 Operation *insertionPoint =
nullptr);
151 Operation *op, Attribute memorySpace = {},
152 Operation *insertionPoint =
nullptr);
180 RewriterBase &rewriter, Operation *op,
181 bufferization::OneShotAnalysisState &state);
306 assert(size &&
"expected non-null size");
313 std::pair<unsigned, unsigned>(operandIndex, dimIndex),
nullptr);
410 unsigned size = useFullTiles.size();
411 llvm::SmallBitVector tmp(size,
false);
412 for (
unsigned i = 0; i < size; ++i)
413 tmp[i] = useFullTiles[i];
510 bool vectorizeNDExtract =
false,
511 bool flatten1DDepthwiseConv =
false);
541 if (
auto genericOp = dyn_cast_or_null<GenericOp>(op)) {
542 return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));
544 if (
auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {
545 return llvm::to_vector(
546 llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));
573 return collapseValue(rewriter, loc, operand, targetShape, reassociation,
599 return expandValue(rewriter, loc,
result, origDest, reassociation, control);
608 static FailureOr<Value>
619 static FailureOr<Value>
632 const llvm::SmallDenseSet<unsigned> &droppedDims)>;
639FailureOr<DropUnitDimsResult>
660FailureOr<ElementwiseOpFusionResult>
724 std::function<FailureOr<SmallVector<OpFoldResult>>(
729FailureOr<SmallVector<OpFoldResult>>
755FailureOr<PadTilingInterfaceResult>
774FailureOr<PackingResult>
776 scf::ForOp outermostEnclosingForOp,
832 tensor::PadOp &hoistedOp,
838 tensor::PadOp &hoistedOp,
926 GenericOp genericOp);
938FailureOr<PromotionInfo>
940 bool useOriginalSubviewSize,
958 memref::SubViewOp subview,
971 memref::SubViewOp subview,
1004FailureOr<VectorizationResult>
1008 bool vectorizeNDExtract =
false,
bool flatten1DDepthwiseConv =
false,
1009 bool assumeDynamicDimsMatchVecSizes =
false,
1010 bool createNamedContraction =
false);
1043template <
typename T>
1051template <
typename T>
1101FailureOr<MultiSizeSpecification>
1104 bool emitAssertions =
true);
1105FailureOr<StaticMultiSizeSpecification>
1109FailureOr<StaticContinuousTileSizeSpecification>
1111 unsigned targetSize);
1112FailureOr<ContinuousTileSizeSpecification>
1115 bool emitAssertions);
1153FailureOr<ForallReductionTilingResult>
1157 std::optional<ArrayAttr> mapping = std::nullopt);
1262FailureOr<SplitReductionResult>
1265 bool useAlloc =
false);
1314FailureOr<SplitReductionResult>
1317 bool useAlloc =
false);
1339FailureOr<CollapseResult>
1352 linalg::PackOp packOp,
1353 bool lowerPadLikeWithInsertSlice =
true);
1363FailureOr<LowerUnPackOpResult>
1365 bool lowerUnpadLikeWithExtractSlice =
true);
1376FailureOr<PackResult>
pack(
RewriterBase &rewriter, linalg::LinalgOp linalgOp,
1393FailureOr<PackTransposeResult>
1395 linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp,
1406FailureOr<PackResult>
1445 std::function<std::optional<BlockPackMatmulOptions>(linalg::LinalgOp)>;
1468FailureOr<PackResult>
1473FailureOr<Operation *>
1475 tensor::FromElementsOp fromElementsOp);
1478FailureOr<Operation *>
1480 tensor::GenerateOp generateOp);
1484 tensor::PadOp padOp);
1520FailureOr<std::pair<Operation *, Operation *>>
1528FailureOr<std::pair<Operation *, Operation *>>
1535FailureOr<std::pair<Operation *, Operation *>>
1537 linalg::DepthwiseConv2DNhwcHwcOp convOp);
1544FailureOr<std::pair<Operation *, Operation *>>
1550 linalg::Conv2DNhwcFhwcOp op);
1552 linalg::Conv2DNhwcFhwcQOp op);
1556 linalg::MatmulOp op,
1557 bool transposeLHS =
true);
1559 linalg::BatchMatmulOp op,
1560 bool transposeLHS =
true);
1566 linalg::Conv2DNhwcFhwcOp op,
1567 WinogradConv2DFmr fmr);
1580FailureOr<Operation *>
1582 linalg::WinogradFilterTransformOp op);
1601FailureOr<Operation *>
1603 linalg::WinogradInputTransformOp op);
1622FailureOr<Operation *>
1624 linalg::WinogradOutputTransformOp op);
1632 RewriterBase &rewriter, linalg::GenericOp genericOp,
bool removeOutputs);
1642template <
typename Conv2DOp,
typename Conv1DOp>
1656extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNhwcHwcfOp,
1658extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp,
1669 FailureOr<DepthwiseConv1DNwcWcOp>
1706 FailureOr<GenericOp>
1720 FailureOr<GenericOp>
1908 using ControlFn = std::function<std::optional<bool>(tensor::ExtractSliceOp)>;
1994 bool removeDeadArgsAndResults =
true);
2036 bool PoisonPaddingOk =
false);
2055 std::function<SmallVector<ReassociationIndices>(linalg::LinalgOp)>;
2122 bool useAlloc =
false);
2126 bool transposeLHS =
true);
2134 WinogradConv2DFmr fmr);
static llvm::ManagedStatic< PassManagerOptions > options
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Attributes are known-constant values of operations.
The main mechanism for performing data layout queries.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
State for analysis-enabled bufferization.
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
void populateDataLayoutPropagationPatterns(RewritePatternSet &patterns, const ControlPropagationFn &controlPackUnPackPropagation, bool PoisonPaddingOk=false)
Patterns to bubble up or down data layout ops across other operations.
void populateMoveInitOperandsToInputPattern(RewritePatternSet &patterns)
A pattern that converts init operands to input operands.
std::function< IndexingMapOpInterface( Location loc, OpBuilder &, IndexingMapOpInterface, ArrayRef< Value > newOperands, ArrayRef< AffineMap > newIndexingMaps, const llvm::SmallDenseSet< unsigned > &droppedDims)> DroppedUnitDimsBuilder
void populateTransposeMatmulPatterns(RewritePatternSet &patterns, bool transposeLHS=true)
Patterns to convert Linalg matmul ops to transposed variants.
void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns)
Adds patterns that reduce the rank of named contraction ops that have unit dimensions in the operand(...
LogicalResult rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad, const LinalgPaddingOptions &options, LinalgOp &paddedOp, SmallVector< Value > &replacements, SmallVector< tensor::PadOp > &padOps)
Pad the iterator dimensions options.paddingDimensions of all opToPad operands to a static bounding bo...
void populateSplitReductionPattern(RewritePatternSet &patterns, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Patterns to apply splitReduction below.
void populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet &patterns)
Pattern to fuse a tensor.pad operation with the producer of its source, if the producer is a linalg o...
FailureOr< std::pair< Operation *, Operation * > > rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp)
Convert linalg.conv_2d_nhwc_hwcf into linalg.generic (for img2col packing) and linalg....
bool areDimSequencesPreserved(ArrayRef< AffineMap > maps, ArrayRef< ReassociationIndices > dimSequences)
Return true if all sequences of dimensions specified in dimSequences are contiguous in all the ranges...
bool hasVectorizationImpl(Operation *)
Return true if there's dedicated logic in the Linalg Vectorizer to vectorize this Op,...
void populateExtractSliceSinkingPatterns(RewritePatternSet &patterns, const ControlPropagationFn &controlPackUnPackPropagation)
Patterns to sink extract slice across other operations.
void populateBubbleUpExtractSliceOpPatterns(RewritePatternSet &patterns)
Patterns that are used to bubble up extract slice op above linalg op.
SmallVector< Operation *, 4 > LinalgLoops
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
void populateBlockPackMatmulPatterns(RewritePatternSet &patterns, const ControlBlockPackMatmulFn &controlFn)
Patterns to block pack Linalg matmul ops.
void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns)
Populates patterns to transform linalg.conv_2d_xxx operations into linalg.generic (for img2col packin...
std::function< FailureOr< SmallVector< OpFoldResult > >( OpBuilder &, OpOperand &, ArrayRef< Range >, const PadTilingInterfaceOptions &)> PadSizeComputationFunction
FailureOr< Operation * > decomposeWinogradFilterTransformOp(RewriterBase &rewriter, linalg::WinogradFilterTransformOp op)
Rewrite linalg.winograd_filter_transform.
std::optional< Value > allocateWorkgroupMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU workgroup memory.
FailureOr< PackTransposeResult > packTranspose(RewriterBase &rewriter, linalg::PackOp packOp, linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp, ArrayRef< int64_t > outerPerm, ArrayRef< int64_t > innerPerm)
Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the transposed PackOp -> LinalgOp ...
Value bufferizeToAllocation(RewriterBase &rewriter, const BufferizeToAllocationOptions &options, tensor::PadOp padOp, Attribute memorySpace={}, Operation *insertionPoint=nullptr)
Materialize a buffer allocation for the given tensor.pad op and lower the op to linalg....
FailureOr< VectorizationResult > vectorize(RewriterBase &rewriter, Operation *op, ArrayRef< int64_t > inputVectorSizes={}, ArrayRef< bool > inputScalableVecDims={}, bool vectorizeNDExtract=false, bool flatten1DDepthwiseConv=false, bool assumeDynamicDimsMatchVecSizes=false, bool createNamedContraction=false)
Returns a VectorizationResult containing the results of the vectorized op, or failure if the transfor...
bool isDimSequencePreserved(AffineMap map, ReassociationIndicesRef dimSequence)
Return true if a given sequence of dimensions are contiguous in the range of the specified indexing m...
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
void populateDecomposeProjectedPermutationPatterns(RewritePatternSet &patterns)
Add patterns to make explicit broadcasts and transforms in the input operands of a genericOp.
FailureOr< LinalgOp > specializeGenericOp(RewriterBase &rewriter, GenericOp genericOp)
Create a namedOp from the given GenericOp and replace the GenericOp.
void populateFoldReshapeOpsByCollapsingPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding tensor.expand_shape operation with its producer generic operation by co...
LinalgTilingLoopType
The type of loops to be generated during tiling.
FailureOr< LowerUnPackOpResult > lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp, bool lowerUnpadLikeWithExtractSlice=true)
Rewrite pack as empty + transpose + reshape + extract_slice.
std::function< std::optional< BlockPackMatmulOptions >(linalg::LinalgOp)> ControlBlockPackMatmulFn
Function type which is used to control matmul packing.
void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Populates patterns with patterns that vectorize tensor.pad.
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
void populateLinalgFoldIntoElementwisePatterns(RewritePatternSet &patterns)
Populates patterns with patterns that fold operations like linalg.transform into elementwise op map.
LogicalResult deallocateGPUPrivateMemory(OpBuilder &, Value)
In case of GPU private memory there is no need to deallocate since the memory is freed when going out...
void populateSparseTensorRewriting(RewritePatternSet &patterns)
Populate patterns that are only useful in the context of sparse tensors.
FailureOr< Operation * > decomposeWinogradOutputTransformOp(RewriterBase &rewriter, linalg::WinogradOutputTransformOp op)
Rewrite linalg.winograd_output_transform.
void populateWinogradConv2DPatterns(RewritePatternSet &patterns, WinogradConv2DFmr fmr)
Patterns to apply Winograd Conv2D algorithm F(m x m, r x r).
FailureOr< ElementwiseOpFusionResult > fuseElementwiseOps(RewriterBase &rewriter, OpOperand *fusedOperand)
FailureOr< PromotionInfo > promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView, bool useOriginalSubviewSize, const AllocBufferCallbackFn &allocationFn, DataLayout &layout)
llvm::SmallDenseSet< int > getPreservedProducerResults(GenericOp producer, GenericOp consumer, OpOperand *fusedOperand)
Returns a set of indices of the producer's results which would be preserved after the fusion.
std::function< SplitReductionOptions(LinalgOp op)> ControlSplitReductionFn
Function signature to control reduction splitting.
std::optional< Value > allocateGPUPrivateMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU private memory.
std::function< SmallVector< ReassociationIndices >(linalg::LinalgOp)> GetCollapsableDimensionsFn
Function type to control generic op dimension collapsing.
void populateSimplifyDepthwiseConvPatterns(RewritePatternSet &patterns)
Patterns to simplify depthwise convolutions.
FailureOr< Operation * > rewriteInDestinationPassingStyle(RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp)
Rewrite tensor.from_elements to linalg.generic.
FailureOr< PackResult > blockPackMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp, const ControlBlockPackMatmulFn &controlPackMatmul)
Pack a matmul operation into blocked 4D layout.
void peelLoops(RewriterBase &rewriter, ArrayRef< scf::ForOp > loops)
Peel 'loops' and applies affine_min/max bounds simplification on the fly where relevant.
SmallVector< OpFoldResult > computePaddedShape(OpBuilder &, TypedValue< RankedTensorType > v, AffineMap indexingMap, ArrayRef< OpFoldResult > indexingSizes, const PadTilingInterfaceOptions &options)
Helper function to compute the padded shape of the given value v of RankedTensorType given:
void populateConvertToDestinationStylePatterns(RewritePatternSet &patterns)
Populate patterns that convert non-destination-style ops to destination style ops.
FailureOr< Operation * > winogradConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op, WinogradConv2DFmr fmr)
Convert linalg.conv_2d_nhwc_fhwc to Winograd Conv2D algorithm F(m x m, r x r).
FailureOr< Operation * > transposeConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op)
Convert linalg.conv_2d_nhwc_fhwc(_q) to linalg.conv_2d_nhwc_hwcf(_q) by materializing transpose.
void populateFoldUnitExtentDimsPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors and memref.
std::function< SmallVector< Value, 4 >(OpBuilder &, Operation *)> TileSizeComputationFunction
LogicalResult copyToWorkgroupMemory(OpBuilder &b, Value src, Value dst)
Create Memref copy operations and add gpu barrier guards before and after the copy operation to ensur...
void populateElementwiseToLinalgConversionPatterns(RewritePatternSet &patterns)
Populate patterns that convert ElementwiseMappable ops to linalg parallel loops.
LogicalResult linalgOpAnchoredEmptyTensorEliminationStep(RewriterBase &rewriter, Operation *op, bufferization::OneShotAnalysisState &state)
Try to eliminate tensor::EmptyOps inside op that are anchored on a LinalgOp.
FailureOr< LinalgLoops > linalgOpToLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.for with the proper body for linalgOp.
FailureOr< GenericOp > generalizeNamedOp(RewriterBase &rewriter, LinalgOp linalgOp)
Create a GenericOp from the given named operation linalgOp and replace the given linalgOp.
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > allShapeSizes, ArrayRef< OpFoldResult > allTileSizes)
FailureOr< Operation * > transposeBatchMatmul(RewriterBase &rewriter, linalg::BatchMatmulOp op, bool transposeLHS=true)
Pattern to replace.
LogicalResult promoteSubviewsPrecondition(Operation *op, LinalgPromotionOptions options)
Promote memref.subviews feeding linalg-on-buffers operations.
LogicalResult copyToGPUPrivateMemory(OpBuilder &b, Value src, Value dst)
Normal copy to between src and dst.
FailureOr< linalg::GenericOp > deduplicateOperandsAndRemoveDeadResults(RewriterBase &rewriter, linalg::GenericOp genericOp, bool removeOutputs)
Method to deduplicate operands and remove dead results of linalg.generic operations.
void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Linalg decompose convolutions patterns.
void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns)
Patterns to decompose Winograd operators.
void populateConvolutionVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns for vectorizing low-D convolution ops.
LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp)
Emit a suitable vector form for a Copy op with fully static shape.
LogicalResult vectorizeOpPrecondition(Operation *op, ArrayRef< int64_t > inputVectorSizes={}, ArrayRef< bool > inputScalableVecDims={}, bool vectorizeNDExtract=false, bool flatten1DDepthwiseConv=false)
Return success if the operation can be vectorized.
FailureOr< GenericOp > interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, ArrayRef< unsigned > interchangeVector)
Interchange the iterator_types and iterator_maps dimensions and adapts the index accesses of op.
void populateCollapseDimensions(RewritePatternSet &patterns, const GetCollapsableDimensionsFn &controlCollapseDimensions)
Pattern to collapse dimensions in a linalg.generic op.
bool areElementwiseOpsFusable(OpOperand *fusedOperand)
Return true if two linalg.generic operations with producer/consumer relationship through fusedOperand...
FailureOr< StaticMultiSizeSpecification > computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize, int64_t divisor)
FailureOr< LinalgLoops > linalgOpToAffineLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of affine.for with the proper body for linalgOp.
void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns)
Populates patterns to decompose linalg.pack and linalg.unpack Ops into e.g.
std::function< std::optional< Value >( OpBuilder &b, memref::SubViewOp subView, ArrayRef< Value > boundingSubViewSize, DataLayout &layout)> AllocBufferCallbackFn
Callback function type used to perform the allocation for the promoted subView.
void populateEraseUnusedOperandsAndResultsPatterns(RewritePatternSet &patterns)
Pattern to remove dead operands and results of linalg.generic operations.
std::function< bool(OpOperand *fusedOperand)> ControlFusionFn
Function type which is used to control when to stop fusion.
FailureOr< ContinuousTileSizeSpecification > computeContinuousTileSizes(OpBuilder &builder, TilingInterface op, unsigned dimension, OpFoldResult targetSize, bool emitAssertions)
FailureOr< SmallVector< OpFoldResult > > computeIndexingMapOpInterfacePaddedShape(OpBuilder &, OpOperand &operandToPad, ArrayRef< Range > iterationDomain, const PadTilingInterfaceOptions &)
Specific helper for Linalg ops.
FailureOr< StaticContinuousTileSizeSpecification > computeStaticContinuousTileSizes(LinalgOp op, unsigned dimension, unsigned targetSize)
std::function< LogicalResult(RewriterBase &, tensor::PadOp, Value)> OptimizeCopyFn
FailureOr< SplitReductionResult > splitReduction(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns)
Populates patterns with patterns that simplify tensor.pack and tensor.unpack operations.
void populateFoldPackUnpackIntoTensorEmptyPatterns(RewritePatternSet &patterns)
Populates patterns with patterns that fold operations like linalg.pack and linalg....
FailureOr< LinalgOp > padAndHoistLinalgOp(RewriterBase &rewriter, LinalgOp linalgOp, const LinalgPaddingOptions &options)
Apply padding and hoisting to linalgOp according to the configuration specified in options.
void populateDecomposeLinalgOpsPattern(RewritePatternSet &patterns, bool removeDeadArgsAndResults=true)
Populate patterns for splitting a LinalgOp with multiple statements within its payload into multiple ...
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns, const ControlFoldIntoPackUnpackFn &controlFn=nullptr)
Populates patterns with patterns that fold operations like tensor.pad and tensor.extract_slice into t...
FailureOr< ForallReductionTilingResult > tileReductionUsingForall(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes={}, std::optional< ArrayAttr > mapping=std::nullopt)
Method to tile a reduction to parallel iterations computing partial reductions.
FailureOr< PackResult > packMatmulGreedily(RewriterBase &rewriter, LinalgOp linalgOp, ArrayRef< OpFoldResult > mnkPackedSizes, ArrayRef< int64_t > mnkPaddedSizesNextMultipleOf, ArrayRef< int64_t > mnkOrder)
Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m and n are proper parallel d...
std::function< LogicalResult(OpBuilder &b, Value buffer)> DeallocBufferCallbackFn
Callback function type used to deallocate the buffers used to hold the promoted subview.
FailureOr< PackResult > pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef< OpFoldResult > packedSizes)
Implement packing of a single LinalgOp by packedSizes.
void populateEraseUnnecessaryInputsPatterns(RewritePatternSet &patterns)
Patterns to promote inputs to outputs and remove unused inputs of linalg.generic ops.
std::function< bool(OpOperand *opOperand)> ControlFoldIntoPackUnpackFn
Function type which is used to control folding operations like tensor.pad and tensor....
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
DenseMap< int, int > LoopIndexToRangeIndexMap
Creates a number of ranges equal to the number of non-zero in tileSizes.
void populateFoldReshapeOpsByExpansionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding (collapsing) tensor_reshape operation with its producer (consumer) gene...
std::function< bool(OpOperand *opOperand)> ControlPropagationFn
Function type which is used to control propagation of linalg.pack/unpack ops.
void populateSwapExtractSliceWithFillPatterns(RewritePatternSet &patterns)
Adds patterns that waps tensor.extract_slice(linalg.fill(cst, init)) into linalg.fill(cst,...
FailureOr< DropUnitDimsResult > dropUnitDims(RewriterBase &rewriter, IndexingMapOpInterface op, const DroppedUnitDimsBuilder &droppedUnitDimsBuilder, const ControlDropUnitDims &options)
Drop unit extent dimensions from the op and its operands.
void populateInlineConstantOperandsPatterns(RewritePatternSet &patterns)
Patterns that are used to inline constant operands into linalg generic ops.
FailureOr< LinalgOp > promoteSubViews(OpBuilder &b, LinalgOp op, const LinalgPromotionOptions &options)
Promote the subViews into a new buffer allocated at the insertion point b.
void populateConstantFoldLinalgOperations(RewritePatternSet &patterns, const ControlFusionFn &controlFn)
Patterns to constant fold Linalg operations.
LogicalResult deallocateWorkgroupMemory(OpBuilder &, Value)
In case of GPU group memory there is no need to deallocate.
FailureOr< Operation * > transposeMatmul(RewriterBase &rewriter, linalg::MatmulOp op, bool transposeLHS=true)
Convert Linalg matmul ops to transposed variants.
void populateLinalgNamedOpsGeneralizationPatterns(RewritePatternSet &patterns)
Linalg generalization patterns.
void populateLinalgGenericOpsSpecializationPatterns(RewritePatternSet &patterns)
Populates patterns with patterns to convert linalg.generic ops to named ops where possible.
void populateLinalgNamedToElementwisePatterns(RewritePatternSet &patterns)
Populates patterns that convert linalg named ops e.g.
std::optional< vector::CombiningKind > getCombinerOpKind(Operation *combinerOp)
Return vector::CombiningKind for the given op.
SmallVector< Value > peelLoop(RewriterBase &rewriter, Operation *op)
Try to peel and canonicalize loop op and return the new result.
std::function< LogicalResult(OpBuilder &b, Value src, Value dst)> CopyCallbackFn
Callback function type used to insert copy from original subview to subview of the promoted region fo...
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
FailureOr< CollapseResult > collapseOpIterationDims(LinalgOp op, ArrayRef< ReassociationIndices > foldedIterationDims, RewriterBase &rewriter)
Collapses dimensions of linalg.generic/linalg.copy operation.
FailureOr< Operation * > decomposeWinogradInputTransformOp(RewriterBase &rewriter, linalg::WinogradInputTransformOp op)
Rewrite linalg.winograd_input_transform.
void populateDecomposePadPatterns(RewritePatternSet &patterns)
Populates patterns to decompose tensor.pad into e.g.
void populateFoldAddIntoDestPatterns(RewritePatternSet &patterns)
Pattern to replace linalg.add when destination passing on a contraction op suffices for achieving the...
std::pair< TilingInterface, TilingInterface > splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension, OpFoldResult splitPoint)
Split the given op into two parts along the given iteration space dimension at the specified splitPoi...
void populateElementwiseOpsFusionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlElementwiseOpFusion)
Patterns for fusing linalg operation on tensors.
void populateFoldUnitExtentDimsCanonicalizationPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Populates canonicalization patterns that simplify IR after folding unit-extent dimensions.
FailureOr< SplitReductionResult > splitReductionByScaling(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Scaling-based implementation of the split reduction transformation.
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
FailureOr< LowerPackResult > lowerPack(RewriterBase &rewriter, linalg::PackOp packOp, bool lowerPadLikeWithInsertSlice=true)
Rewrite pack as pad + reshape + transpose.
FailureOr< LinalgLoops > linalgOpToParallelLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.parallel with the proper body for linalgOp.
Include the generated interface declarations.
ArrayRef< int64_t > ReassociationIndicesRef
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
const FrozenRewritePatternSet & patterns
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
OpInterfaceRewritePattern(MLIRContext *context, PatternBenefit benefit=1)
OpRewritePattern(MLIRContext *context, PatternBenefit benefit=1, ArrayRef< StringRef > generatedNames={})
SmallVector< int64_t, 3 > mnkOrder
Permutation of matmul (M, N, K) dimensions order.
SmallVector< int64_t, 3 > blockFactors
Minor block factors (mb, nb, kb) for packing relayout where mb, mn are the parallel dimensions and kb...
bool rhsTransposeOuterBlocks
Transpose RHS outer block layout [KB][NB] -> [NB][KB].
bool lhsTransposeInnerBlocks
Transpose LHS inner block layout [mb][kb] -> [kb][mb].
SmallVector< int64_t, 3 > mnkPaddedSizesNextMultipleOf
Next multiples of the packing sizes.
bool lhsTransposeOuterBlocks
Transpose LHS outer block layout [MB][KB] -> [KB][MB].
bool allowPadding
If true, allows packing of dimensions that only partially fit into the block factors.
bool rhsTransposeInnerBlocks
Transpose RHS inner block layout [kb][nb] -> [nb][kb].
bool bufferizeDestinationOnly
If set to "true", only the destination tensor operands are bufferized to a new allocation (and wrappe...
@ MaterializeInDestination
bool emitDealloc
If set to "true", a memref.dealloc operation will be emitted for each allocated buffer.
SmallVector< Value > results
Transformation to drop unit-extent dimensions from linalg.generic operations.
std::function< FailureOr< Value >( RewriterBase &, Location, Value, Value, ArrayRef< ReassociationIndices >, const ControlDropUnitDims &)> ExpandFnTy
Instances of this type are used to control how result values are expanded into their original shape a...
std::function< FailureOr< Value >( RewriterBase &, Location, Value, ArrayRef< int64_t >, ArrayRef< ReassociationIndices >, const ControlDropUnitDims &)> CollapseFnTy
Instances of this type are used to control how operand values are collapsed after dropping unit exten...
ControlFnTy controlFn
Function to control which dimensions, if any, are to be considered for dropping unit extent dimension...
RankReductionStrategy rankReductionStrategy
ExpandFnTy expandFn
Function to control how results are expanded into their original shape after dropping unit extent dim...
CollapseFnTy collapseFn
Function to control how operands are collapsed into their new target shape after dropping unit extent...
std::function< SmallVector< unsigned >(Operation *)> ControlFnTy
Instances of this type are used to control which dimensions of an operand are considered for dropping...
Vectorization pattern for memref::CopyOp.
LogicalResult matchAndRewrite(memref::CopyOp copyOp, PatternRewriter &rewriter) const override
Rewrites a linalg::PackOp into a sequence of:
LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override
Rewrites a linalg::UnPackOp into a sequence of:
LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Value createFillOrGenerateOp(RewriterBase &rewriter, tensor::PadOp padOp, Value dest, const SmallVector< Value > &dynSizes) const
Filling dest using FillOp constant padding value if possible.
DecomposePadOpPattern(MLIRContext *context, PatternBenefit benefit=1)
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
DownscaleConv2DOp(MLIRContext *context, PatternBenefit benefit=1)
FailureOr< DepthwiseConv1DNwcWcOp > returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const
LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const override
DownscaleDepthwiseConv2DNhwcHwcOp(MLIRContext *context, PatternBenefit benefit=1)
Rewrites 2-D convolution ops with size-1 window dimensions into 1-D convolution ops.
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
IndexingMapOpInterface resultOp
SmallVector< Value > replacements
Fuse two linalg.generic operations that have a producer-consumer relationship captured through fusedO...
llvm::DenseMap< Value, Value > replacements
Transformation information returned after reduction tiling.
SmallVector< Operation * > mergeOps
The final reduction operation merging all the partial reductions.
SmallVector< Value > initialValues
Initial values used for partial reductions.
scf::ForallOp loops
The scf.forall operation that iterate over the tiles.
SmallVector< Operation * > parallelTiledOps
The partial reduction tiled op generated.
Match and rewrite for the pattern:
LogicalResult matchAndRewrite(vector::TransferReadOp xferOp, PatternRewriter &rewriter) const override
Match and rewrite for the pattern:
LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp, PatternRewriter &rewriter) const override
Linalg generalization pattern.
LogicalResult matchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const override
FailureOr< GenericOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
Options that allow distribution of loops generated in Linalg transforms to processors while generatin...
SmallVector< Attribute > paddingValues
A padding value for every operand.
@ BufferizationMaterializeInDestination
LinalgPaddingOptions & setSizeToPadTo(unsigned operandIndex, unsigned dimIndex, OpFoldResult size)
DenseMap< std::pair< unsigned, unsigned >, OpFoldResult > sizeToPadTo
A mapping between an operand and shape dim, and a size for a padding dimension.
std::optional< SmallVector< int64_t > > padToMultipleOf
A list of multiples to which each padding dimension should be padded to.
OpFoldResult getSizeToPadTo(unsigned operandIndex, unsigned dimIndex) const
Given the operand index and shape dim it returns the size to pad to.
SmallVector< SmallVector< int64_t > > transposePaddings
A permutation vector for every operand used to transpose the packed PadOp results.
LinalgPaddingOptions & setPaddingValues(ArrayRef< Attribute > pv)
LinalgPaddingOptions & setPadToMultipleOf(ArrayRef< int64_t > m)
LinalgPaddingOptions & setHoistPaddings(ArrayRef< int64_t > hp)
LinalgPaddingOptions & setCopyBackOp(CopyBackOp op)
SmallVector< bool > nofoldFlags
A flag for every operand to mark the PadOp as nofold which enables packing for statically shaped oper...
SmallVector< int64_t > hoistPaddings
A number of loops to hoist the PadOp out for every operand.
LinalgPaddingOptions & setTransposePaddings(ArrayRef< SmallVector< int64_t > > tp)
LinalgPaddingOptions & setPaddingDimensions(ArrayRef< int64_t > pd)
SmallVector< int64_t > paddingDimensions
A list of iterator dimensions to pad.
LinalgPaddingOptions & setNofoldFlags(ArrayRef< bool > pp)
CopyBackOp copyBackOp
The op to be used for copying the padded result to the original destination tensor.
LogicalResult matchAndRewrite(GenericOp op, PatternRewriter &rewriter) const override
FailureOr< GenericOp > returningMatchAndRewrite(GenericOp op, PatternRewriter &rewriter) const
std::optional< LinalgLoopDistributionOptions > tileDistribution
When specified, specifies distribution of generated tile loops to processors.
LinalgTilingAndFusionOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
LinalgTilingAndFusionOptions & setTileSizes(ArrayRef< int64_t > ts)
SmallVector< int64_t > tileInterchange
Tile interchange used to permute the tile loops.
SmallVector< int64_t > tileSizes
Tile sizes used to tile the root operation.
LinalgTilingOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
LinalgTilingOptions & setTileSizeComputationFunction(TileSizeComputationFunction fun)
LinalgTilingLoopType loopType
The type of tile loops to generate.
LinalgTilingOptions & setInterchange(ArrayRef< unsigned > interchange)
SmallVector< int64_t > peeledLoops
Peel the specified loops.
LinalgTilingOptions & setLoopType(LinalgTilingLoopType lt)
SmallVector< unsigned, 4 > interchangeVector
The interchange vector to reorder the tiled loops.
LinalgTilingOptions & setDistributionTypes(ArrayRef< StringRef > types)
TileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
LinalgTilingOptions & scalarizeDynamicDims()
Tile all dynamic dimensions by 1.
std::optional< LinalgLoopDistributionOptions > distribution
When specified, specifies distribution of generated tile loops to processors.
SmallVector< StringRef, 2 > distributionTypes
Specification markers of how to distribute the linalg.tiled_loop.
LinalgTilingOptions & setPeeledLoops(ArrayRef< int64_t > loops)
linalg::TransposeOp transposeOp
tensor::ExpandShapeOp expandShapeOp
tensor::ExtractSliceOp extractSliceOp
linalg::TransposeOp transposeOp
tensor::CollapseShapeOp collapseShapeOp
A description of a multi-size tiling comprising tile sizes and numbers of tiles, expressed as Values ...
Struct to hold the result of a pack call.
SmallVector< linalg::UnPackOp > unPackOps
linalg::LinalgOp packedLinalgOp
SmallVector< linalg::PackOp > packOps
Struct to hold the result of a packTranspose call.
linalg::PackOp transposedPackOp
linalg::LinalgOp transposedLinalgOp
linalg::UnPackOp transposedUnPackOp
SmallVector< Attribute > paddingValues
A padding value for every operand.
PadTilingInterfaceOptions & setPaddingValues(ArrayRef< Attribute > pv)
PadTilingInterfaceOptions & setPadToMultipleOf(bool b)
PadTilingInterfaceOptions & setPaddingSizes(ArrayRef< OpFoldResult > m)
bool padToMultipleOf
Pad iterator paddingDimension[i] to next multiple of paddingSizes[i] if true.
SmallVector< OpFoldResult > paddingSizes
A list of iterator dimensions sizes to pad to.
Operations and values created in the process of padding a TilingInterface operation.
SmallVector< Value > replacements
Slices of the padded op's results, same types as toPad.
TilingInterface paddedOp
The padded op, a clone of toPad with padded operands.
SmallVector< tensor::PadOp > padOps
The operands of the padded op.
Apply transformation to split the single linalg op reduction into a parallel and reduction dimension.
LinalgOp resultCombiningLinalgOp
Perform standalone tiling of a single LinalgOp by tileSizes.
SmallVector< Operation *, 8 > loops
SmallVector< Value, 4 > tensorResults
Transformation information returned after vectorizing.
SmallVector< Value > replacements
Results of the vectorization transform to replace the original operation.
SmallVector< T > tileSizes
Tile sizes.
SmallVector< T > tripCounts
Number of tiles associated with each size.
T lowTripCount
Number of tiles associated with each size.
Helper struct to hold the results of building a packing loop nest.
SmallVector< OpFoldResult > strides
SmallVector< Value > leadingPackedTensorIndexings
SmallVector< Value > clonedLoopIvs
SmallVector< OpFoldResult > sizes
TransposeOp maybeTransposeOp
tensor::PadOp hoistedPadOp
SmallVector< OpFoldResult > offsets