doxygen/mlir_2Dialect_2Linalg_2Transforms_2Transforms_8h_source.html

 //===- Transforms.h - Linalg transformations as patterns --------*- C++ -*-===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #ifndef MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H

 #define MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H


 #include <utility>


 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"

 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"

 #include "mlir/Dialect/Linalg/Utils/Utils.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/Dialect/SCF/Utils/Utils.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Utils/StaticValueUtils.h"

 #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"

 #include "mlir/Dialect/X86Vector/Transforms.h"

 #include "mlir/IR/PatternMatch.h"

 #include "mlir/Interfaces/TilingInterface.h"

 #include "mlir/Transforms/DialectConversion.h"

 #include "llvm/ADT/SmallBitVector.h"

 #include "llvm/ADT/SmallSet.h"


 namespace mlir {

 namespace bufferization {

 class AllocTensorOp;

 class OneShotAnalysisState;

 class BufferizationState;

 } // namespace bufferization


 namespace linalg {


 class LinalgOp;


 //===----------------------------------------------------------------------===//

 // Utils.

 //===----------------------------------------------------------------------===//


 /// Return vector::CombiningKind for the given op.

 std::optional<vector::CombiningKind> getCombinerOpKind(Operation *combinerOp);


 //===----------------------------------------------------------------------===//

 // Bufferization-related transforms.

 //===----------------------------------------------------------------------===//


 struct BufferizeToAllocationOptions {

   enum class AllocOp { MemrefAlloc = 0, MemrefAlloca = 1 };

   AllocOp allocOp = AllocOp::MemrefAlloc;


   enum class MemcpyOp {

     MaterializeInDestination = 0,

     MemrefCopy = 1,

     LinalgCopy = 2

   };

   MemcpyOp memcpyOp = MemcpyOp::MaterializeInDestination;


   /// If set to "true", only the destination tensor operands are bufferized to

   /// a new allocation (and wrapped in "bufferization.to_tensor"), but not the

   /// targeted op itself.

   bool bufferizeDestinationOnly = false;


   /// If set to "true", a `memref.dealloc` operation will be emitted for each

   /// allocated buffer. Otherwise, the memory is leaked, which is useful if

   /// the buffer deallocation pipeline should be run after bufferization is

   /// done.

   bool emitDealloc = false;

 };


 /// Materialize a buffer allocation for the given tensor.pad op and lower the

 /// op to linalg.fill/linalg.generic + bufferization.materialize_in_destination.

 /// E.g.:

 ///

 /// %0 = tensor.pad low[%l] high[%h] %t ...

 ///

 /// is lowered to:

 ///

 /// %alloc = memref.alloc

 /// linalg.fill ... outs(%alloc)

 /// %subview = memref.subview %alloc [%l] [...] [1]

 /// bufferization.materialize_in_destination %t in %subview

 /// %0 = bufferization.to_tensor %alloc restrict writable

 ///

 /// In addition to rewriting the IR as shown above, this function returns the

 /// newly allocated buffer. The `insertionPoint` parameter can be used to

 /// specify a custom insertion point for the buffer allocation.

 Value bufferizeToAllocation(RewriterBase &rewriter,

                             const BufferizeToAllocationOptions &options,

                             tensor::PadOp padOp, Attribute memorySpace = {},

                             Operation *insertionPoint = nullptr);


 /// Materialize a buffer allocation for the given vector.mask op and bufferize

 /// the op, including its region. E.g.:

 ///

 /// %0 = vector.mask {

 ///   vector.transfer_write %v, %t : vector<16xf32>, tensor<?xf32>

 /// } : vector<16xi1> -> tensor<?xf32>

 ///

 /// is lowered to:

 ///

 /// %alloc = memref.alloc

 /// bufferization.materialize_in_destination %t in %subview

 /// vector.mask {

 ///   vector.transfer_write %arg0, %alloc : vector<16xf32>, memref<?xf32>

 /// } : vector<16xi1>

 /// %0 = bufferization.to_tensor %alloc restrict writable

 ///

 /// In addition to rewriting the IR as shown above, this function returns the

 /// newly allocated buffer. The `insertionPoint` parameter can be used to

 /// specify a custom insertion point for the buffer allocation.

 Value bufferizeToAllocation(RewriterBase &rewriter,

                             const BufferizeToAllocationOptions &options,

                             vector::MaskOp maskOp, Attribute memorySpace = {},

                             Operation *insertionPoint = nullptr);


 /// Materialize a buffer allocation for the given bufferization.alloc_tensor op

 /// and lower the op to memref.alloc + memref.tensor_store.

 ///

 /// In addition to rewriting the IR, this function returns the newly allocated

 /// buffer. The `insertionPoint` parameter can be used to specify a custom

 /// insertion point for the buffer allocation.

 Value bufferizeToAllocation(RewriterBase &rewriter,

                             const BufferizeToAllocationOptions &options,

                             bufferization::AllocTensorOp allocTensorOp,

                             Attribute memorySpace = {},

                             Operation *insertionPoint = nullptr);


 /// Bufferize the given op with tensor semantics and materialize the result in

 /// a newly allocated buffer.

 ///

 /// Only bufferizable ops that bufferize to a memory write or have an

 /// aliasing OpOperand (and do not themselves bufferize to an allocation) are

 /// supported. They are bufferized using their BufferizableOpInterface

 /// implementation.

 ///

 /// Selected ops that bufferize to an allocation (or need special handling) are

 /// also supported:

 /// - tensor.pad

 /// - vector.mask

 ///

 /// This function returns the newly allocated buffer. The `insertionPoint`

 /// parameter can be used to specify a custom insertion point for the buffer

 /// allocation.

 Value bufferizeToAllocation(RewriterBase &rewriter,

                             const BufferizeToAllocationOptions &options,

                             Operation *op, Attribute memorySpace = {},

                             Operation *insertionPoint = nullptr);


 /// Try to eliminate tensor::EmptyOps inside `op` that are anchored on a

 /// LinalgOp. This transforms looks for LinalgOps that have an unused output

 /// operand and an input operand that is rooted in a tensor::EmptyOp. The

 /// tensor::EmptyOp uses are replaced with the output operand and the two

 /// operands of the LinalgOp are swapped.

 ///

 /// Example:

 /// %0 = tensor.empty()

 /// %1 = linalg.matmul ins(...) outs(%0)

 /// %2 = linalg.generic ins(%1) outs(%dest) {

 ///   ^bb0(%in: f32, %out: f32):

 ///   // out not used

 /// }

 ///

 /// The IR is transformed as follows:

 /// %0 = tensor.empty()

 /// %1 = linalg.matmul ins(...) outs(%dest)

 /// %2 = linalg.generic ins(%0) outs(%1) {

 ///   ^bb0(%in: f32, %out: f32):

 ///   // Use %out instead of %in

 /// }

 ///

 /// The "ins" operand has no uses inside the body of the LinalgOp and can be

 /// folded away with existing cleanup patterns. Afterwards, the tensor::EmptyOp

 /// can also fold away.

 LogicalResult linalgOpAnchoredEmptyTensorEliminationStep(

     RewriterBase &rewriter, Operation *op,

     bufferization::OneShotAnalysisState &state);


 //===----------------------------------------------------------------------===//

 // Structs that configure the behavior of various transformations.

 //===----------------------------------------------------------------------===//


 using TileSizeComputationFunction =

     std::function<SmallVector<Value, 4>(OpBuilder &, Operation *)>;


 struct LinalgTilingOptions {

   /// Computation function that returns the tile sizes for each operation.

   /// Delayed construction of constant tile sizes should occur to interoperate

   /// with folding.

   TileSizeComputationFunction tileSizeComputationFunction = nullptr;


   LinalgTilingOptions &

   setTileSizeComputationFunction(TileSizeComputationFunction fun) {

     tileSizeComputationFunction = std::move(fun);

     return *this;

   }

   /// Set the `tileSizeComputationFunction` to return the values `ts`. The

   /// values must not fold away when tiling. Otherwise, use a more robust

   /// `tileSizeComputationFunction`.

   LinalgTilingOptions &setTileSizes(const SmallVector<Value, 4> &ts) {

     tileSizeComputationFunction = [=](OpBuilder &, Operation *) { return ts; };

     return *this;

   }

   /// Convenience function to set the `tileSizeComputationFunction` to a

   /// function that computes tile sizes at the point they are needed. Allows

   /// proper interaction with folding.

   LinalgTilingOptions &setTileSizes(ArrayRef<int64_t> ts);


   /// Tile all dynamic dimensions by 1. I.e., scalarize those dimensions.

   /// Note: `scalarizeDynamicDims` and `setTileSizes` cannot be used together.

   LinalgTilingOptions &scalarizeDynamicDims();


   /// The interchange vector to reorder the tiled loops.

   SmallVector<unsigned, 4> interchangeVector = {};


   LinalgTilingOptions &setInterchange(ArrayRef<unsigned> interchange) {

     interchangeVector.assign(interchange.begin(), interchange.end());

     return *this;

   }


   /// The type of tile loops to generate.

   LinalgTilingLoopType loopType = LinalgTilingLoopType::Loops;


   LinalgTilingOptions &setLoopType(LinalgTilingLoopType lt) {

     loopType = lt;

     return *this;

   }


   /// When specified, specifies distribution of generated tile loops to

   /// processors.

   std::optional<LinalgLoopDistributionOptions> distribution;


   LinalgTilingOptions &

   setDistributionOptions(LinalgLoopDistributionOptions distributionOptions) {

     distribution = std::move(distributionOptions);

     return *this;

   }


   /// Specification markers of how to distribute the `linalg.tiled_loop`.

   SmallVector<StringRef, 2> distributionTypes = {};


   LinalgTilingOptions &setDistributionTypes(ArrayRef<StringRef> types) {

     distributionTypes.assign(types.begin(), types.end());

     return *this;

   }


   /// Peel the specified loops.

   SmallVector<int64_t> peeledLoops;


   LinalgTilingOptions &setPeeledLoops(ArrayRef<int64_t> loops) {

     peeledLoops.clear();

     peeledLoops.append(loops.begin(), loops.end());

     return *this;

   }

 };


 struct LinalgTilingAndFusionOptions {

   /// Tile sizes used to tile the root operation.

   SmallVector<int64_t> tileSizes;

   LinalgTilingAndFusionOptions &setTileSizes(ArrayRef<int64_t> ts) {

     tileSizes.assign(ts.begin(), ts.end());

     return *this;

   }

   /// Tile interchange used to permute the tile loops.

   SmallVector<int64_t> tileInterchange;

   /// When specified, specifies distribution of generated tile loops to

   /// processors.

   std::optional<LinalgLoopDistributionOptions> tileDistribution;

   LinalgTilingAndFusionOptions &

   setDistributionOptions(LinalgLoopDistributionOptions distributionOptions) {

     tileDistribution = std::move(distributionOptions);

     return *this;

   }

 };


 struct LinalgPaddingOptions {

   /// A padding value for every operand.

   SmallVector<Attribute> paddingValues;

   LinalgPaddingOptions &setPaddingValues(ArrayRef<Attribute> pv) {

     paddingValues.assign(pv.begin(), pv.end());

     return *this;

   }

   /// A list of iterator dimensions to pad.

   SmallVector<int64_t> paddingDimensions;

   LinalgPaddingOptions &setPaddingDimensions(ArrayRef<int64_t> pd) {

     paddingDimensions.assign(pd.begin(), pd.end());

     return *this;

   }

   /// A list of multiples to which each padding dimension should be padded to.

   std::optional<SmallVector<int64_t>> padToMultipleOf;

   LinalgPaddingOptions &setPadToMultipleOf(ArrayRef<int64_t> m) {

     padToMultipleOf.emplace(m.begin(), m.end());

     return *this;

   }

   /// A flag for every operand to mark the PadOp as nofold which enables

   /// packing for statically shaped operands.

   SmallVector<bool> nofoldFlags;

   LinalgPaddingOptions &setNofoldFlags(ArrayRef<bool> pp) {

     nofoldFlags.assign(pp.begin(), pp.end());

     return *this;

   }

   /// A number of loops to hoist the PadOp out for every operand.

   SmallVector<int64_t> hoistPaddings;

   LinalgPaddingOptions &setHoistPaddings(ArrayRef<int64_t> hp) {

     hoistPaddings.assign(hp.begin(), hp.end());

     return *this;

   }

   /// A permutation vector for every operand used to transpose the packed

   /// PadOp results.

   SmallVector<SmallVector<int64_t>> transposePaddings;

   LinalgPaddingOptions &

   setTransposePaddings(ArrayRef<SmallVector<int64_t>> tp) {

     transposePaddings.assign(tp.begin(), tp.end());

     return *this;

   }

   enum class CopyBackOp : int8_t {

     None = 0,

     BufferizationMaterializeInDestination = 1,

     LinalgCopy = 2

   };

   /// The op to be used for copying the padded result to the original

   /// destination tensor.

   CopyBackOp copyBackOp = CopyBackOp::BufferizationMaterializeInDestination;

   LinalgPaddingOptions &setCopyBackOp(CopyBackOp op) {

     copyBackOp = op;

     return *this;

   }

 };


 /// Callback function type used to perform the allocation for the promoted

 /// `subView`. In `boundingSubViewsize` a best attempt is made to find the

 /// smallest constant value for the size of the buffer needed for each

 /// dimension. If that is not possible, contains the dynamic size of the

 /// subview. The call back should return the buffer to use.

 using AllocBufferCallbackFn = std::function<std::optional<Value>(

     OpBuilder &b, memref::SubViewOp subView,

     ArrayRef<Value> boundingSubViewSize, DataLayout &layout)>;


 /// Callback function type used to deallocate the buffers used to hold the

 /// promoted subview.

 using DeallocBufferCallbackFn =

     std::function<LogicalResult(OpBuilder &b, Value buffer)>;


 /// Callback function type used to insert copy from original subview to

 /// subview of the promoted region for the read operands/subview of promoted

 /// region to original subview for the results. The copy has to happen from

 /// `src` to `dst`.

 using CopyCallbackFn =

     std::function<LogicalResult(OpBuilder &b, Value src, Value dst)>;


 struct LinalgPromotionOptions {

   /// Indices of subViews to promote. If `std::nullopt`, try to promote all

   /// operands.

   std::optional<DenseSet<unsigned>> operandsToPromote;

   LinalgPromotionOptions &setOperandsToPromote(ArrayRef<int64_t> operands) {

     operandsToPromote = DenseSet<unsigned>();

     operandsToPromote->insert_range(operands);

     return *this;

   }

   /// If ith element of `useFullTiles` is true the full view should be used

   /// for the promoted buffer of the ith operand in `operandsToPromote`.

   /// Otherwise the partial view will be used. The decision is defaulted to

   /// `useFullTileBuffersDefault` when `useFullTileBuffers` is std::nullopt and

   /// for operands missing from `useFullTileBuffers`.

   std::optional<llvm::SmallBitVector> useFullTileBuffers;

   LinalgPromotionOptions &setUseFullTileBuffers(ArrayRef<bool> useFullTiles) {

     unsigned size = useFullTiles.size();

     llvm::SmallBitVector tmp(size, false);

     for (unsigned i = 0; i < size; ++i)

       tmp[i] = useFullTiles[i];

     useFullTileBuffers = tmp;

     return *this;

   }

   /// If true all operands unspecified by `useFullTileBuffers` will use the

   /// full view, otherwise the partial view.

   bool useFullTileBuffersDefault = false;

   LinalgPromotionOptions &setUseFullTileBuffersByDefault(bool use) {

     useFullTileBuffersDefault = use;

     return *this;

   }

   /// Alignment of promoted buffer. If `std::nullopt` do not specify alignment.

   std::optional<unsigned> alignment;

   LinalgPromotionOptions &setAlignment(unsigned align) {

     alignment = align;

     return *this;

   }

   /// Memory space of promoted buffer. If `std::nullopt` do not specify memory

   /// space.

   std::optional<Attribute> memorySpace;

   LinalgPromotionOptions &setMemorySpace(Attribute memorySpc) {

     memorySpace = memorySpc;

     return *this;

   }

   /// Use alloca with the default allocation scheme.

   bool useAlloca = false;

   LinalgPromotionOptions &setUseAlloca(bool use) {

     useAlloca = use;

     return *this;

   }

   /// Callback function to do the allocation of the promoted buffer. If

   /// std::nullopt, then the default allocation scheme of allocating a

   /// memref<?xi8> buffer followed by a view operation is used.

   std::optional<AllocBufferCallbackFn> allocationFn;

   std::optional<DeallocBufferCallbackFn> deallocationFn;

   LinalgPromotionOptions &

   setAllocationDeallocationFns(AllocBufferCallbackFn const &allocFn,

                                DeallocBufferCallbackFn const &deallocFn) {

     allocationFn = allocFn;

     deallocationFn = deallocFn;

     return *this;

   }

   /// Callback function to do the copy of data to and from the promoted

   /// subview. If std::nullopt then a memref.copy is used.

   std::optional<CopyCallbackFn> copyInFn;

   std::optional<CopyCallbackFn> copyOutFn;

   LinalgPromotionOptions &setCopyInOutFns(CopyCallbackFn const &copyIn,

                                           CopyCallbackFn const &copyOut) {

     copyInFn = copyIn;

     copyOutFn = copyOut;

     return *this;

   }

 };


 /// Split Reduction options.

 struct SplitReductionOptions {

   // Ratio used to split the reduction dimension.  If the ratio is <= 1,

   // nothing will be done.

   int64_t ratio = 0;

   // Index where the extra dimension is added to the intermediate tensor

   // shape.

   unsigned index = 0;

   // If the inner dimension after splitting is parallel or reduction.

   bool innerParallel = false;

 };


 /// Function signature to control reduction splitting. This returns

 /// `SplitReductionOptions`.

 // TODO: don't use unsigned unless doing bit manipulation.

 using ControlSplitReductionFn =

     std::function<SplitReductionOptions(LinalgOp op)>;


 //===----------------------------------------------------------------------===//

 // Preconditions that ensure the corresponding transformation succeeds and can

 // be applied as a rewrite pattern.

 //===----------------------------------------------------------------------===//


 /// Return true if two `linalg.generic` operations with producer/consumer

 /// relationship through `fusedOperand` can be fused using elementwise op

 /// fusion.

 bool areElementwiseOpsFusable(OpOperand *fusedOperand);


 /// Promote memref.subviews feeding linalg-on-buffers operations.

 LogicalResult promoteSubviewsPrecondition(Operation *op,

                                           LinalgPromotionOptions options);


 /// Return success if the operation can be vectorized.

 LogicalResult vectorizeOpPrecondition(Operation *op,

                                       ArrayRef<int64_t> inputVectorSizes = {},

                                       ArrayRef<bool> inputScalableVecDims = {},

                                       bool vectorizeNDExtract = false,

                                       bool flatten1DDepthwiseConv = false);


 //===----------------------------------------------------------------------===//

 // Transformations exposed as functional-style API calls.

 //===----------------------------------------------------------------------===//


 using LinalgLoops = SmallVector<Operation *, 4>;


 /// Transformation to drop unit-extent dimensions from `linalg.generic`

 /// operations.

 struct ControlDropUnitDims {

   enum class RankReductionStrategy { ReassociativeReshape, ExtractInsertSlice };


   RankReductionStrategy rankReductionStrategy =

       RankReductionStrategy::ReassociativeReshape;


   using ControlFnTy = std::function<SmallVector<unsigned>(Operation *)>;

   ControlFnTy controlFn = [](Operation *op) {

     if (auto genericOp = dyn_cast_or_null<GenericOp>(op)) {

       return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));

     }

     if (auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {

       return llvm::to_vector(

           llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));

     }

     return SmallVector<unsigned>{};

   };

 };

 struct DropUnitDimsResult {

   linalg::GenericOp resultOp;

   SmallVector<Value> replacements;

 };

 FailureOr<DropUnitDimsResult> dropUnitDims(RewriterBase &rewriter,

                                            GenericOp genericOp,

                                            const ControlDropUnitDims &options);


 /// Fuse two `linalg.generic` operations that have a producer-consumer

 /// relationship captured through `fusedOperand`. The method expects

 /// that `areElementwiseOpsFusable` returns true for the given `fusedOperand`.

 struct ElementwiseOpFusionResult {

   Operation *fusedOp;

   llvm::DenseMap<Value, Value> replacements;

 };

 FailureOr<ElementwiseOpFusionResult>

 fuseElementwiseOps(RewriterBase &rewriter, OpOperand *fusedOperand);


 /// Returns a set of indices of the producer's results which would

 /// be preserved after the fusion.

 /// * There is a chance that the implementation of the transformation does not

 /// agree with the result of this method. This function gives a prediction based

 /// on an optimized fusion.

 llvm::SmallDenseSet<int> getPreservedProducerResults(GenericOp producer,

                                                      GenericOp consumer,

                                                      OpOperand *fusedOperand);


 /// Try to peel and canonicalize loop `op` and return the new result.

 /// Also applies affine_min/max bounds simplification on the fly where relevant.

 // TODO: Add support for scf.parallel and affine.for loops.

 SmallVector<Value> peelLoop(RewriterBase &rewriter, Operation *op);


 /// Peel 'loops' and applies affine_min/max bounds simplification on the fly

 /// where relevant.

 void peelLoops(RewriterBase &rewriter, ArrayRef<scf::ForOp> loops);


 /// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands

 /// to a static bounding box. The original `opToPad` is cloned and operates on

 /// the padded tensors.

 ///

 /// * "options.padToMultipleOf" indicates that each padding dimension should be

 ///   padded to the specified multiple.

 /// * Use "options.paddingValues" and "options.nofoldFlags" to set padding

 ///   value and nofold attribute of the created tensor::PadOps, respectively.

 /// * The unpadded results (extracted slice of the cloned operation) are

 ///   returned via `replacements`.

 /// * The tensor::PadOps are returned via `padOps`.

 /// * "options.copyBackOp" specifies the op type for copying back the unpadded

 ///   result to the original destination tensor.

 LogicalResult rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad,

                                 const LinalgPaddingOptions &options,

                                 LinalgOp &paddedOp,

                                 SmallVector<Value> &replacements,

                                 SmallVector<tensor::PadOp> &padOps);


 namespace detail {


 /// Helper struct to hold the results of building a packing loop nest.

 struct PackingResult {

   SmallVector<OpFoldResult> offsets, sizes, strides;

   SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings;

   TransposeOp maybeTransposeOp;

   tensor::PadOp hoistedPadOp;

 };


 /// Build the packing loop nest required to hoist `opToHoist` above

 /// `outermostEnclosingForOp`.

 /// The loop nest is built just before `outermostEnclosingForOp`.

 FailureOr<PackingResult>

 buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist,

                      scf::ForOp outermostEnclosingForOp,

                      ArrayRef<int64_t> transposeVector);


 } // namespace detail


 /// Mechanically hoist padding operations on tensors by `numLoops` into a new,

 /// generally larger tensor. This achieves packing of multiple padding ops into

 /// a larger tensor. On success, `opToHoist` is replaced by the cloned version

 /// in the packing loop so the caller can continue reasoning about the padding

 /// operation. If `transposeVector` is non-empty, hoist padding introduces a

 /// TransposeOp to transpose the padded tensor before inserting it into the

 /// packed tensor. A `transposeVector` can change the storage order of the

 /// padded tensor but does not change the order of the pack or compute loops.

 ///

 /// TODO: In the future, we should consider rewriting as a linalg.pack after

 /// hoisting since this abstraction is now available.

 ///

 /// Example in pseudo-mlir:

 /// =======================

 ///

 /// If hoistPaddingOnTensors is called with `nLoops` = 2 on the following IR.

 /// ```

 ///    scf.for (%i, %j, %k)

 ///      %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>

 ///      %0 = tensor.pad %st0 low[0, 0] high[...] {

 ///      ^bb0( ... ):

 ///        linalg.yield %pad

 ///      } : tensor<?x?xf32> to tensor<4x8xf32>

 ///      compute(%0)

 /// ```

 ///

 /// IR resembling the following is produced:

 ///

 /// ```

 ///    scf.for (%i) {

 ///      %packed_init = tensor.empty range(%j) : tensor<?x4x8xf32>

 ///      %packed = scf.for (%k) iter_args(%p : %packed_init) {

 ///        %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>

 ///        %0 = tensor.pad %st0 low[0, 0] high[...] {

 ///        ^bb0( ... ):

 ///          linalg.yield %pad

 ///        } : tensor<?x?xf32> to tensor<4x8xf32>

 ///        %1 = tensor.insert_slice %0 ...

 ///            : tensor<4x8xf32> to tensor<?x4x8xf32>

 ///        scf.yield %1: tensor<?x4x8xf32>

 ///      } -> tensor<?x4x8xf32>

 ///      scf.for (%j, %k) {

 ///        %st0 = tensor.extract_slice %packed [%k, 0, 0][1, 4, 8][1, 1, 1] :

 ///                 tensor<?x4x8xf32> to tensor<4x8xf32>

 ///        compute(%st0)

 ///      }

 ///    }

 /// ```

 FailureOr<Value>

 hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist,

                       int64_t numLoops, ArrayRef<int64_t> transposeVector,

                       tensor::PadOp &hoistedOp,

                       SmallVectorImpl<TransposeOp> &transposeOps);

 /// Calls into `hoistPaddingOnTensors` with a local IRRewriter.

 FailureOr<Value>

 hoistPaddingOnTensors(tensor::PadOp opToHoist, int64_t numLoops,

                       ArrayRef<int64_t> transposeVector,

                       tensor::PadOp &hoistedOp,

                       SmallVectorImpl<TransposeOp> &transposeOps);


 /// Apply padding and hoisting to `linalgOp` according to the configuration

 /// specified in `options`.

 FailureOr<LinalgOp> padAndHoistLinalgOp(RewriterBase &rewriter,

                                         LinalgOp linalgOp,

                                         const LinalgPaddingOptions &options);


 /// Split the given `op` into two parts along the given iteration space

 /// `dimension` at the specified `splitPoint`, and return the two parts.

 /// If the second part is statically known to be empty, do not create it

 /// and return nullptr instead. Error state is signalled by returning

 /// a pair of nullptrs.

 ///

 /// For example, the following op:

 ///

 ///   linalg.matmul ins(%0, %1 : tensor<128x32xf32>, tensor<32x64xf32>)

 ///                 outs(%2 : tensor<128x64xf32>)

 ///

 /// split along the first dimension at position 42 will result in:

 ///

 ///   %3 = tensor.extract_slice %0[0, 0][42, 32][1, 1]

 ///   %4 = tensor.extract_slice %2[0, 0][42, 64][1, 1]

 ///   %5 = linalg.matmul ins(%3, %1 : tensor<42x32xf32>, tensor<32x64xf32>)

 ///                      outs(%5 : tensor<42x64xf32>)

 ///   %6 = tensor.insert_slice %5 into %2[0, 0][42, 64][1, 1]

 ///

 ///   %7 = tensor.extract_slice %0[42, 0][86, 32][1, 1]

 ///   %8 = tensor.extract_slice %6[42, 0][86, 64][1, 1]

 ///   %9 = linalg.matmul ins(%7, %1 : tensor<86x32xf32>, tensor<32x64xf32>)

 ///                      outs(%8 : tensor<86x64xf32>)

 ///   tensor.insert_slice %5 into %6[42, 0][86, 64][1, 1]

 ///

 /// Note that there is no simplification other than constant propagation applied

 /// to slice extraction and insertion.

 std::pair<TilingInterface, TilingInterface> splitOp(RewriterBase &rewriter,

                                                     TilingInterface op,

                                                     unsigned dimension,

                                                     OpFoldResult splitPoint);


 /// Perform standalone tiling of a single LinalgOp by `tileSizes`.

 /// and permute the loop nest according to `interchangeVector`

 /// The permutation is expressed as a list of integers that specify

 /// the new ordering of the loop nest. The length of `interchangeVector`

 /// must be equal to the length of `tileSizes`.

 /// An empty vector is interpreted as the identity permutation and the

 /// transformation returns early.

 ///

 /// Return a struct containing the tiled loops in the specified order

 /// and the cloned op if successful, std::nullopt otherwise.

 ///

 /// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed by

 /// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be

 /// integers, in the range 0..`tileSizes.size()` without duplications

 /// (i.e. `[1,1,2]` is an invalid permutation).

 struct TiledLinalgOp {

   LinalgOp op;

   SmallVector<Operation *, 8> loops;

   SmallVector<Value, 4> tensorResults;

 };

 FailureOr<TiledLinalgOp> tileLinalgOp(RewriterBase &b, LinalgOp op,

                                       const LinalgTilingOptions &options);


 /// Interchange the `iterator_types` and `iterator_maps` dimensions and adapts

 /// the index accesses of `op`. This is an in-place transformation controlled

 /// by `interchangeVector`. An empty vector is interpreted as the identity

 /// permutation and the transformation returns early.

 ///

 /// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed with

 /// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be

 /// integers, in the range 0..`op.rank` without duplications

 /// (i.e. `[1,1,2]` is an invalid permutation).

 ///

 /// Return failure if the permutation is not valid.

 FailureOr<GenericOp> interchangeGenericOp(RewriterBase &rewriter,

                                           GenericOp genericOp,

                                           ArrayRef<unsigned> interchangeVector);


 /// Create a GenericOp from the given named operation `linalgOp` and replace

 /// the given `linalgOp`.

 /// Return failure if `linalgOp` is a GenericOp or misses a region builder.

 FailureOr<GenericOp> generalizeNamedOp(RewriterBase &rewriter,

                                        LinalgOp linalgOp);


 /// Create a namedOp from the given GenericOp and replace the GenericOp.

 /// Currently we can specialize only trivial linalg copy operations.

 FailureOr<LinalgOp> specializeGenericOp(RewriterBase &rewriter,

                                         GenericOp genericOp);


 /// Create a new buffer using the `allocationFn` provided. The size of this

 /// buffer is the smallest constant bounding size along each dimension that

 /// can be computed for the size of the result of `subView`. Returns the

 /// allocated buffer as `fullLocalView` and the view that matches the size of

 /// the result of subview operation as `partialLocalView`.

 struct PromotionInfo {

   Value fullLocalView;

   Value partialLocalView;

 };

 FailureOr<PromotionInfo>

 promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView,

                           const AllocBufferCallbackFn &allocationFn,

                           DataLayout &layout);


 /// Promote the `subViews` into a new buffer allocated at the insertion point

 /// `b`. Promotion occurs in 3 steps:

 ///   1. Create a new buffer for a full tile (i.e. not clipped at the

 ///   boundary).

 ///   2. Take a full view on the buffer.

 ///   3. Take a partial slice of the full view in step 2. and copy into it.

 ///

 /// Return the modified linalg op (the modification happens in place) as well

 /// as all the copy ops created.

 FailureOr<LinalgOp> promoteSubViews(OpBuilder &b, LinalgOp op,

                                     const LinalgPromotionOptions &options);


 /// Allocate the subview in the GPU workgroup memory.

 std::optional<Value> allocateWorkgroupMemory(OpBuilder &builder,

                                              memref::SubViewOp subview,

                                              ArrayRef<Value> sizeBounds,

                                              DataLayout &);


 /// In case of GPU group memory there is no need to deallocate.

 LogicalResult deallocateWorkgroupMemory(OpBuilder &, Value /*buffer*/);


 /// Create Memref copy operations and add gpu barrier guards before and after

 /// the copy operation to ensure data integrity.

 LogicalResult copyToWorkgroupMemory(OpBuilder &b, Value src, Value dst);


 /// Allocate the subview in the GPU private memory.

 std::optional<Value> allocateGPUPrivateMemory(OpBuilder &builder,

                                               memref::SubViewOp subview,

                                               ArrayRef<Value> sizeBounds,

                                               DataLayout &);


 /// Normal copy to between src and dst.

 LogicalResult copyToGPUPrivateMemory(OpBuilder &b, Value src, Value dst);


 /// In case of GPU private memory there is no need to deallocate since the

 /// memory is freed when going outside of the scope.

 LogicalResult deallocateGPUPrivateMemory(OpBuilder &, Value /*buffer*/);


 /// Return true if there's dedicated logic in the Linalg Vectorizer to

 /// vectorize this Op, false otherwise.

 ///

 /// Note that this helper merely implements a very high level check and that the

 /// vectorizer also requires various additional pre-conditions to be met for it

 /// to work (these are checked by the vectorizer itself).

 bool hasVectorizationImpl(Operation *);


 /// Emit a suitable vector form for an operation. If provided,

 /// `inputVectorSizes` are used to vectorize this operation. `inputVectorSizes`

 /// must match the rank of the iteration space of the operation and the sizes

 /// must be smaller or equal than their counterpart interation space sizes, if

 /// static. `inputVectorShapes` also allows the vectorization of operations with

 /// dynamic shapes.

 LogicalResult vectorize(RewriterBase &rewriter, Operation *op,

                         ArrayRef<int64_t> inputVectorSizes = {},

                         ArrayRef<bool> inputScalableVecDims = {},

                         bool vectorizeNDExtract = false,

                         bool flatten1DDepthwiseConv = false);


 /// Emit a suitable vector form for a Copy op with fully static shape.

 LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp);


 /// Emit a loop nest of `scf.for` with the proper body for `linalgOp`.

 FailureOr<LinalgLoops> linalgOpToLoops(RewriterBase &rewriter,

                                        LinalgOp linalgOp);


 /// Emit a loop nest of `scf.parallel` with the proper body for `linalgOp`.

 FailureOr<LinalgLoops> linalgOpToParallelLoops(RewriterBase &rewriter,

                                                LinalgOp linalgOp);


 /// Emit a loop nest of `affine.for` with the proper body for `linalgOp`.

 FailureOr<LinalgLoops> linalgOpToAffineLoops(RewriterBase &rewriter,

                                              LinalgOp linalgOp);


 /// Creates a number of ranges equal to the number of non-zero in `tileSizes`.

 /// One for each loop of the LinalgOp that is tiled. The `tileSizes` argument

 /// has one entry per surrounding loop. It uses zero as the convention that a

 /// particular loop is not tiled. This convention simplifies implementations

 /// by avoiding affine map manipulations. The returned ranges correspond to

 /// the loop ranges, in the proper order, that are tiled and for which new

 /// loops will be created. Also the function returns a map from loop indices

 /// of the LinalgOp to the corresponding non-empty range indices of newly

 /// created loops.

 using LoopIndexToRangeIndexMap = DenseMap<int, int>;

 std::tuple<SmallVector<Range, 4>, LoopIndexToRangeIndexMap>

 makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map,

                     ArrayRef<OpFoldResult> allShapeSizes,

                     ArrayRef<OpFoldResult> allTileSizes);


 namespace detail {

 template <typename T>

 struct MultiSizeSpecificationBase {

   /// Tile sizes.

   T lowTileSize, highTileSize;

   /// Number of tiles associated with each size.

   T lowTripCount, highTripCount;

 };


 template <typename T>

 struct ContinuousTileSizeSpecificationBase {

   /// Tile sizes.

   SmallVector<T> tileSizes;

   /// Number of tiles associated with each size.

   SmallVector<T> tripCounts;

 };


 } // namespace detail


 /// A description of a multi-size tiling comprising tile sizes and numbers of

 /// tiles, expressed as Values which may or may not be constant. Multi-size

 /// currently means two-size.

 struct MultiSizeSpecification

     : public detail::MultiSizeSpecificationBase<Value> {};

 struct StaticMultiSizeSpecification

     : public detail::MultiSizeSpecificationBase<int64_t> {};


 struct ContinuousTileSizeSpecification

     : public detail::ContinuousTileSizeSpecificationBase<Value> {};

 struct StaticContinuousTileSizeSpecification

     : public detail::ContinuousTileSizeSpecificationBase<int64_t> {};


 /// Emits the IR computing the multi-sized tiling specification with two tile

 /// sizes not exceeding `targetSize`, each divisible by `sizeDivisor`, such

 /// that there exist numbers of tiles with these sizes that fully cover the

 /// given iteration space `dimension` of the structured `op`.

 ///

 /// The computation is as follows:

 ///

 ///   b = originalTripCount floordiv sizeDivisor

 ///   t = (targetSize + sizeDivisor - 1) floordiv sizeDivisor

 ///   d = (b + t - 1) floordiv t

 ///   s = (b floordiv d) * sizeDivisor

 ///   v = b % d

 ///   u = d - v

 ///

 /// where the tile sizes are `s` and `s` + `sizeDivisor`, and the numbers of

 /// the corresponding tiles are `u` and `v`, respectively.  Alternatively,

 ///

 ///   s * u + (s + sizeDivisor) * v == original size,

 ///   where s mod sizeDivisor = 0.

 ///

 /// Expects all values to be positive. In some cases with the target tile size

 /// sufficiently close to the dimension shape and non-unit divisor, it is

 /// impossible to compute such sizes. If `emitAssertion` is set, also emit the

 /// assertion that size computation succeeded.

 ///

 /// Returns the specification consisting of both tile values and the number of

 /// tiles of each size.

 FailureOr<MultiSizeSpecification>

 computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension,

                       OpFoldResult targetSize, OpFoldResult divisor,

                       bool emitAssertions = true);

 FailureOr<StaticMultiSizeSpecification>

 computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize,

                             int64_t divisor);


 FailureOr<StaticContinuousTileSizeSpecification>

 computeStaticContinuousTileSizes(LinalgOp op, unsigned dimension,

                                  unsigned targetSize);

 FailureOr<ContinuousTileSizeSpecification>

 computeContinuousTileSizes(OpBuilder &builder, TilingInterface op,

                            unsigned dimension, OpFoldResult targetSize,

                            bool emitAssertions);


 /// Transformation information returned after reduction tiling.

 struct ForallReductionTilingResult {

   /// The partial reduction tiled op generated.

   SmallVector<Operation *> parallelTiledOps;

   /// The final reduction operation merging all the partial reductions.

   SmallVector<Operation *> mergeOps;

   /// Initial values used for partial reductions.

   SmallVector<Value> initialValues;

   /// The `scf.forall` operation that iterate over the tiles.

   scf::ForallOp loops;

 };


 /// Method to tile a reduction to parallel iterations computing partial

 /// reductions. After the loop all the partial reduction are merged into a final

 /// reduction. For example for the following sequence

 ///

 /// ```mlir

 /// %0 = linalg.generic %in ["parallel", "reduction"]

 ///   : tensor<7x9xf32> -> tensor<7xf32>

 /// ```

 ///

 /// into:

 ///

 /// ```mlir

 /// %0 = linalg.fill ... : tensor<7x4xf32>

 /// %1 = scf.forall (%iv) in (%c4) shared_outs(%arg0 = %0)

 ///   -> (tensor<7x4xf32>) {

 ///   %2 = tensor.extract_slice %arg3 : tensor<7x4xf32> to tensor<7xf32>

 ///   %3 = tensor.extract_slice %in : tensor<7x9xf32> -> tensor<7x?xf32>

 ///   %4 = linalg.generic %2, %3 ["parallel", "reduction"]

 ///     : tensor<7x?xf32> -> tensor<7xf32>

 ///   %5 = tensor.insert_slice %3, %arg0[0, %iv] : tensor<7x4xf32>

 /// }

 /// %6 = linalg.generic %1 ["parallel", "reduction"]

 ///   : tensor<7x4xf32> -> tensor<7xf32>

 /// ```

 FailureOr<ForallReductionTilingResult>

 tileReductionUsingForall(RewriterBase &b, PartialReductionOpInterface op,

                          ArrayRef<OpFoldResult> numThreads,

                          ArrayRef<OpFoldResult> tileSizes = {},

                          std::optional<ArrayAttr> mapping = std::nullopt);


 /// All indices returned by IndexOp should be invariant with respect to

 /// tiling. Therefore, if an operation is tiled, we have to transform the

 /// indices accordingly, i.e. offset them by the values of the corresponding

 /// induction variables that are captured implicitly in the body of the op.

 ///

 /// Example. `linalg.generic` before tiling:

 ///

 /// #id_2d = (i, j) -> (i, j)

 /// #pointwise_2d_trait = {

 ///   indexing_maps = [#id_2d, #id_2d],

 ///   iterator_types = ["parallel", "parallel"]

 /// }

 /// linalg.generic #pointwise_2d_trait %operand, %result {

 ///   ^bb0(%operand_in: f32, %result_in: f32):

 ///     %i = linalg.index 0 : index

 ///     %j = linalg.index 1 : index

 ///     <some operations that use %i, %j>

 /// }: memref<50x100xf32>, memref<50x100xf32>

 ///

 /// After tiling pass with tiles sizes 10 and 25:

 ///

 /// #strided = (i, j)[s0, s1, s2] -> (i * s1 + s0 + j * s2)

 ///

 /// %c1 = arith.constant 1 : index

 /// %c0 = arith.constant 0 : index

 /// %c25 = arith.constant 25 : index

 /// %c10 = arith.constant 10 : index

 /// operand_dim_0 = dim %operand, 0 : memref<50x100xf32>

 /// operand_dim_1 = dim %operand, 1 : memref<50x100xf32>

 /// scf.for %k = %c0 to operand_dim_0 step %c10 {

 ///   scf.for %l = %c0 to operand_dim_1 step %c25 {

 ///     %4 = memref.subview %operand[%k, %l][%c10, %c25][%c1, %c1]

 ///       : memref<50x100xf32> to memref<?x?xf32, #strided>

 ///     %5 = memref.subview %result[%k, %l][%c10, %c25][%c1, %c1]

 ///       : memref<50x100xf32> to memref<?x?xf32, #strided>

 ///     linalg.generic pointwise_2d_trait %4, %5 {

 ///     ^bb0(%operand_in: f32, %result_in: f32):

 ///       %i = linalg.index 0 : index

 ///       %j = linalg.index 1 : index

 ///       // Indices `k` and `l` are implicitly captured in the body.

 ///       %transformed_i = arith.addi %i, %k : index // index `i` is offset by

 ///       %k %transformed_j = arith.addi %j, %l : index // index `j` is offset

 ///       by %l

 ///       // Every use of %i, %j is replaced with %transformed_i,

 ///       %transformed_j <some operations that use %transformed_i,

 ///       %transformed_j>

 ///     }: memref<?x?xf32, #strided>, memref<?x?xf32, #strided>

 ///   }

 /// }

 ///

 /// TODO: Investigate whether mixing implicit and explicit indices

 /// does not lead to losing information.

 void transformIndexOps(RewriterBase &b, LinalgOp op,

                        SmallVectorImpl<Value> &ivs,

                        const LoopIndexToRangeIndexMap &loopIndexToRangeIndex);


 /// Apply transformation to split the single linalg op reduction into a

 /// parallel and reduction dimension. Then create a new linalg.generic op

 /// doing the rest of the reduction. Return the new linalg op with an extra

 /// parallel dimension or failure if the transformation didn't happen.

 ///

 /// Example:

 /// ```

 ///  %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,

 ///                                        affine_map<(d0) -> ()>],

 ///       iterator_types = ["reduction"]}

 ///  ins(%in : tensor<32xf32>)

 ///  outs(%out : tensor<f32>) {

 ///  ^bb0(%arg1: f32, %arg2: f32):

 ///    %y = arith.addf %arg1, %arg2 : f32

 ///    linalg.yield %y : f32

 ///  } -> tensor<f32>

 /// ```

 /// To:

 /// ```

 ///  %cst = arith.constant 0.000000e+00 : f32

 ///  %0 = tensor.expand_shape %in [[0, 1]]: tensor<32xf32> into tensor<4x8xf32>

 ///  %1 = tensor.empty [4] : tensor<4xf32>

 ///  %2 = linalg.fill ins(%cst : f32)

 ///                   outs(%1 : tensor<4xf32>) -> tensor<4xf32>

 ///  %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,

 ///                                        affine_map<(d0, d1) -> (d0)>],

 ///    iterator_types = ["parallel", "reduction"]}

 ///    ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) {

 ///    ^bb0(%arg3: f32, %arg5: f32):

 ///    %5 = arith.addf %arg3, %arg4 : f32

 ///    linalg.yield %5 : f32

 ///  } -> tensor<4xf32>

 /// %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,

 ///                                       affine_map<(d0) -> ()>],

 ///   iterator_types = ["reduction"]}

 ///   ins(%3 : tensor<4xf32>) outs(%out : tensor<f32>) {

 ///   ^bb0(%arg3: f32, %arg4: f32):

 ///   %5 = arith.addf %arg3, %arg4 : f32

 ///   linalg.yield %5 : f32

 /// } -> tensor<f32>

 /// ```

 struct SplitReductionResult {

   Operation *initOrAlloc;

   FillOp fillOp;

   LinalgOp splitLinalgOp;

   LinalgOp resultCombiningLinalgOp;

 };

 FailureOr<SplitReductionResult>

 splitReduction(RewriterBase &b, LinalgOp op,

                const ControlSplitReductionFn &controlSplitReductionFn,

                bool useAlloc = false);


 /// Scaling-based implementation of the split reduction transformation.

 /// Instead of introducing an ExpandShapeOp, this rewrites a reduction

 /// dimension `k` into `k * scale + kk`.

 ///

 /// Example:

 /// ```

 ///  %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)

 ///    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>

 /// ```

 ///

 /// Is transformed to:

 ///

 /// ```

 ///  #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2 * 4 + d3)>

 ///  #map1 = affine_map<(d0, d1, d2, d3) -> (d2 * 4 + d3, d1)>

 ///  #map2 = affine_map<(d0, d1, d2, d3) -> (d2, d3)>

 ///  #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>

 ///  #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>

 ///  #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>

 ///  %0 = tensor.empty [16, 32, 64] : tensor<16x32x64xf32>

 ///  %cst = arith.constant 0.000000e+00 : f32

 ///  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->

 ///     tensor<16x32x64xf32>

 ///  %2 = tensor.empty [64, 4] : tensor<64x4xi1>

 ///

 ///  %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],

 ///    iterator_types = ["parallel", "parallel", "parallel", "reduction"]}

 ///    ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>,

 ///    tensor<64x4xi1>)

 ///   outs(%1 : tensor<16x32x64xf32>) {

 ///      ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):

 ///        %5 = arith.mulf %arg3, %arg4 : f32

 ///        %6 = arith.addf %arg6, %5 : f32

 ///        linalg.yield %6 : f32

 ///  } -> tensor<16x32x64xf32>

 ///

 ///  %4 = linalg.generic {indexing_maps = [#map4, #map5],

 ///    iterator_types = ["parallel", "parallel", "reduction"]}

 //     ins(%3 : tensor<16x32x64xf32>)

 ///    outs(%C : tensor<16x32xf32>) {

 ///      ^bb0(%arg3: f32, %arg4: f32):

 ///        %5 = arith.addf %arg3, %arg4 : f32

 ///        linalg.yield %5 : f32

 ///  } -> tensor<16x32xf32>

 ///

 ///  return %4 : tensor<16x32xf32>

 /// ```

 FailureOr<SplitReductionResult>

 splitReductionByScaling(RewriterBase &b, LinalgOp op,

                         const ControlSplitReductionFn &controlSplitReductionFn,

                         bool useAlloc = false);


 /// Return `true`  if a given sequence of dimensions are contiguous in the

 /// range of the specified indexing map.

 bool isDimSequencePreserved(AffineMap map, ReassociationIndicesRef dimSequence);

 /// Return `true` if all sequences of dimensions specified in `dimSequences` are

 /// contiguous in all the ranges of the `maps`.

 bool areDimSequencesPreserved(ArrayRef<AffineMap> maps,

                               ArrayRef<ReassociationIndices> dimSequences);


 struct CollapseResult {

   SmallVector<Value> results;

   LinalgOp collapsedOp;

 };


 /// Collapses dimensions of linalg.generic/linalg.copy operation. A precondition

 /// to calling this method is that for each list in `foldedIterationDim`, the

 /// sequence of dimensions is contiguous in domains of all `indexing_maps` of

 /// the `linalgOp`. This can be checked using `areDimSequencePreserved` method.

 /// When valid, the method also collapses the operands of the op. Returns

 /// replacement values of the results of the original `linalgOp` by inserting

 /// reshapes to get back values of compatible types.

 FailureOr<CollapseResult>

 collapseOpIterationDims(LinalgOp op,

                         ArrayRef<ReassociationIndices> foldedIterationDims,

                         RewriterBase &rewriter);


 struct LowerPackResult {

   tensor::PadOp padOp;

   tensor::ExpandShapeOp expandShapeOp;

   linalg::TransposeOp transposeOp;

 };


 /// Rewrite pack as pad + reshape + transpose.

 FailureOr<LowerPackResult> lowerPack(RewriterBase &rewriter,

                                      linalg::PackOp packOp,

                                      bool lowerPadLikeWithInsertSlice = true);


 struct LowerUnPackOpResult {

   tensor::EmptyOp emptyOp;

   linalg::TransposeOp transposeOp;

   tensor::CollapseShapeOp collapseShapeOp;

   tensor::ExtractSliceOp extractSliceOp;

 };


 /// Rewrite pack as empty + transpose + reshape + extract_slice.

 FailureOr<LowerUnPackOpResult>

 lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,

             bool lowerUnpadLikeWithExtractSlice = true);


 /// Struct to hold the result of a `pack` call.

 struct PackResult {

   SmallVector<linalg::PackOp> packOps;

   linalg::LinalgOp packedLinalgOp;

   SmallVector<linalg::UnPackOp> unPackOps;

 };

 /// Implement packing of a single LinalgOp by `packedSizes`.

 /// There must be one packedSizes entry per `linalgOp` iterator.

 /// Return the packed Linalg op on success, failure otherwise.

 FailureOr<PackResult> pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp,

                            ArrayRef<OpFoldResult> packedSizes);


 /// Struct to hold the result of a `packTranspose` call.

 struct PackTransposeResult {

   linalg::PackOp transposedPackOp;

   linalg::LinalgOp transposedLinalgOp;

   linalg::UnPackOp transposedUnPackOp;

 };

 /// Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the

 /// transposed PackOp -> LinalgOp -> UnPackOp chain after replacements.

 /// Return failure if either:

 ///   1. the `packOp` does not have the `linalgOp` as its unique use.

 ///   2. the `maybeUnPackOp`, if specified must be a consumer of the result tied

 ///      to the unique `packOp` use.

 ///   3. `outerPerm` (resp. `innerPerm`) must be valid permutations of

 ///      `packOp.getOuterDimsPerm` (resp. `packOp.getInnerDimsPerm`) or empty.

 FailureOr<PackTransposeResult>

 packTranspose(RewriterBase &rewriter, linalg::PackOp packOp,

               linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp,

               ArrayRef<int64_t> outerPerm, ArrayRef<int64_t> innerPerm);


 /// Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m

 /// and n are proper parallel dimensions and k is a proper reduction

 /// dimension. Packing occurs by rewriting the op as a linalg.generic and

 /// calling linalg::pack by `mnkPackedSizes`. The order of the packed

 /// dimensions is customizable: the `mnkOrder` is a permutation of {0, 1, 2}

 /// to reorder {m, n, k} into one of the 8 possible forms. The outer

 /// dimensions of the operands are not permuted at this time, this is left for

 /// future work.

 FailureOr<PackResult>

 packMatmulGreedily(RewriterBase &rewriter, LinalgOp linalgOp,

                    ArrayRef<OpFoldResult> mnkPackedSizes,

                    ArrayRef<int64_t> mnkPaddedSizesNextMultipleOf,

                    ArrayRef<int64_t> mnkOrder);


 struct BlockPackMatmulOptions {

   /// Minor block factors (mb, nb, kb) for packing relayout where mb, mn are

   /// the parallel dimensions and kb is the reduction dimension.

   SmallVector<int64_t, 3> blockFactors;


   /// If true, allows packing of dimensions that only partially fit into the

   /// block factors.

   bool allowPadding = true;


   /// Next multiples of the packing sizes.

   SmallVector<int64_t, 3> mnkPaddedSizesNextMultipleOf;


   /// Permutation of matmul (M, N, K) dimensions order.

   SmallVector<int64_t, 3> mnkOrder = {0, 1, 2};


   /// Transpose LHS outer block layout [MB][KB] -> [KB][MB].

   bool lhsTransposeOuterBlocks = false;


   /// Transpose LHS inner block layout [mb][kb] -> [kb][mb].

   bool lhsTransposeInnerBlocks = false;


   /// Transpose RHS outer block layout [KB][NB] -> [NB][KB].

   bool rhsTransposeOuterBlocks = true;


   /// Transpose RHS inner block layout [kb][nb] -> [nb][kb].

   bool rhsTransposeInnerBlocks = true;

 };


 /// Function type which is used to control matmul packing.

 /// It is expected to return valid packing configuration for each operation.

 /// Lack of packing options indicates that no valid configuration could be

 /// assigned and the operation will not be packed.

 using ControlBlockPackMatmulFn =

     std::function<std::optional<BlockPackMatmulOptions>(linalg::LinalgOp)>;


 /// Pack a matmul operation into blocked 4D layout.

 ///

 /// Relayout a matmul operation into blocked layout with two levels of

 /// subdivision:

 ///   - major 2D blocks - outer dimensions, consist of minor blocks

 ///   - minor 2D blocks - inner dimensions, consist of scalar elements

 ///

 /// A 2D matmul MxNxK gets reshaped into blocked 4D representation

 /// as: [MB][NB][mb][nb] += [MB][KB][mb][kb] * [NB][KB][nb][kb]

 /// where the (MB, NB, KB) dimensions represent the major blocks,

 /// and the (mb, nb, kb) are the minor blocks of their respective

 /// original 2D dimensions (M, N, K).

 ///

 /// Depending on the initial operands' data layout and the specified

 /// packing options, the major blocks dimensions might get transposed

 /// e.g., [MB][KB] -> [KB][MB]. The minor blocks can also be transposed

 /// e.g., [mb][kb] -> [kb][mb].

 /// Any present batch dimensions remain unchanged.

 /// The final result is unpacked back to the original shape.

 ///

 /// Return failure if no valid packing options are provided.

 FailureOr<PackResult>

 blockPackMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp,

                 const ControlBlockPackMatmulFn &controlPackMatmul);


 /// Rewrite tensor.from_elements to linalg.generic.

 FailureOr<Operation *>

 rewriteInDestinationPassingStyle(RewriterBase &rewriter,

                                  tensor::FromElementsOp fromElementsOp);


 /// Rewrite tensor.generate to linalg.generic.

 FailureOr<Operation *>

 rewriteInDestinationPassingStyle(RewriterBase &rewriter,

                                  tensor::GenerateOp generateOp);


 /// Rewrite tensor.pad to linalg.generic + tensor.insert_slice.

 FailureOr<Operation *> rewriteInDestinationPassingStyle(RewriterBase &rewriter,

                                                         tensor::PadOp padOp);


 /// Convert linalg.conv_2d_nhwc_hwcf into linalg.generic (for img2col packing)

 /// and linalg.matmul.

 ///

 /// A convolution operation can be written as a matrix-matrix multiplication by

 /// unfolding the cross-correlation between input and filter and explicitly copy

 /// overlapped sliding window inputs.

 ///

 /// Consider 2D input X with single channel input and output and 2x2 filter W:

 /// [x(0, 0)  , x(0, 1)  , ...,   x(0, n)  ]

 /// [x(1, 0)  , x(1, 1)  , ...,   x(1, n)  ]

 /// [.        ,  .       ,.   ,      .     ]            [w(0, 0), w(0, 1)]

 /// [.        ,  .       , .  ,      .     ]    (conv)  [w(1, 0), w(1, 1)]

 /// [.        ,  .       ,   .,      .     ]

 /// [x(n-1, 0), x(n-1, 1), ..., x(n-1, n-1)]

 ///

 /// The packed input data (img2col) is a matrix with |rows| = output spatial

 /// size, |columns| = filter spatial size. To compute the output Y(i, j) we need

 /// to calculate the dot product between filter window at input X(x, y)) and the

 /// filter which will look like the following where r.h.s is the img2col matrix

 /// and l.h.s is the flattened filter:

 ///

 /// [x(0,0), x(0,1), x(1,0), x(1,1)]

 /// [x(0,1), x(1,1), x(0,2), x(1,2)] (matmul) [w(0,0), w(0,1), w(1,0), w(1,1)]

 /// [x(0,1), x(1,1), x(0,2), x(1,2)]

 /// [   .  ,    .  ,    .  ,    .  ]

 ///

 /// In general for 2D case with (N, H, W, C) input and (Kh, Kw, C, D) filter

 /// and output (N, Ho, Wo, D) the convolution is the following matrix-matrix

 /// multiplication (Ho x Wo, Kh x Kw x C) * (Kh x Kw x C, D) for each input in

 /// the N input. For the case where N > 1 its a batched matrix-matrix

 /// multiplication.

 ///

 /// On success, return both the operation that produces the img2col tensor and

 /// the final operation of the sequence that replaces the original convolution.

 FailureOr<std::pair<Operation *, Operation *>>

 rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp);


 /// Same as the above but for Fhwc channel orderings in the filter. In this case

 /// the matrix multiplication is actually a row-wise dot-product rather than a

 /// row-column dot-product. This is to avoid transposing the filter matrix which

 /// would be required for a regular matrix multiplication to produce the correct

 /// output dimensions.

 FailureOr<std::pair<Operation *, Operation *>>

 rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp);


 /// Similar to rewriteInIm2Col with linalg::Conv2DNhwcHwcfOp except there is no

 /// reduction among the input channels so each convolution can be a

 /// matrix-vector product and by transposing both input filter so channels are

 /// outer most the computation is a batched matrix-vector product.

 FailureOr<std::pair<Operation *, Operation *>>

 rewriteInIm2Col(RewriterBase &rewriter,

                 linalg::DepthwiseConv2DNhwcHwcOp convOp);


 /// Similar to rewriteInIm2Col with linalg::Conv2DNhwcHwcfOp except because the

 /// channels are to the left of the image shape dimensions, the position of the

 /// contraction dimension in the resulting matmul is reversed. This swaps the

 /// LHS and RHS of the matmul when compared with nhwc (i.e. (D, C x Kh x Kw) *

 /// (C x Kh x Kw, Ho x Wo))

 FailureOr<std::pair<Operation *, Operation *>>

 rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNchwFchwOp convOp);


 /// Convert linalg.conv_2d_nhwc_fhwc(_q) to linalg.conv_2d_nhwc_hwcf(_q) by

 /// materializing transpose.

 FailureOr<Operation *> transposeConv2D(RewriterBase &rewriter,

                                        linalg::Conv2DNhwcFhwcOp op);

 FailureOr<Operation *> transposeConv2D(RewriterBase &rewriter,

                                        linalg::Conv2DNhwcFhwcQOp op);


 /// Convert Linalg matmul ops to transposed variants.

 FailureOr<Operation *> transposeMatmul(RewriterBase &rewriter,

                                        linalg::MatmulOp op,

                                        bool transposeLHS = true);

 FailureOr<Operation *> transposeBatchMatmul(RewriterBase &rewriter,

                                             linalg::BatchMatmulOp op,

                                             bool transposeLHS = true);


 /// Convert linalg.conv_2d_nhwc_fhwc to Winograd Conv2D algorithm

 /// F(m x m, r x r). m is the dimension size of output and r is the dimension

 /// size of filter.

 FailureOr<Operation *> winogradConv2D(RewriterBase &rewriter,

                                       linalg::Conv2DNhwcFhwcOp op, int64_t m,

                                       int64_t r);


 /// Rewrite linalg.winograd_filter_transform. The data layout of the filter is

 /// FHWC. The transformation matrix is 2-dimension. We need to extract H x W

 /// from FHWC first. We generate 2 levels of loops to iterate on F and C. After

 /// the rewriting, we get

 ///

 /// scf.for %f = lo_f to hi_f step 1

 ///   scf.for %c = lo_c to hi_c step 1

 ///     %extracted = extract filter<h x w> from filter<f x h x w x c>

 ///     %ret = linalg.matmul G, %extracted

 ///     %ret = linalg.matmul %ret, GT

 ///     %inserted = insert %ret into filter<h x w x c x f>

 FailureOr<Operation *>

 decomposeWinogradFilterTransformOp(RewriterBase &rewriter,

                                    linalg::WinogradFilterTransformOp op);


 /// Rewrite linalg.winograd_input_transform. The data layout of the input is

 /// NHWC. The transformation matrix is 2-dimension. We need to extract H x W

 /// from NHWC first. We generate 4 levels of loops to iterate on N, C, tileH,

 /// and tileW. After the rewriting, we get

 ///

 /// scf.for %h = 0 to tileH step 1

 ///   scf.for %w = 0 to tileW step 1

 ///     scf.for %n = 0 to N step 1

 ///       scf.for %c = 0 to C step 1

 ///         %extracted = extract %extracted<alphaH x alphaW> from

 ///                              %input<N x H x W x C>

 ///                              at [%n, (%h x m), (%w x m), %c]

 ///         %ret = linalg.matmul BT, %extracted

 ///         %ret = linalg.matmul %ret, B

 ///         %inserted = insert %ret<alphaH x alphaW> into

 ///                            %output<alphaH x alphaW x tileH x tileW x N x C>

 ///                            at [0, 0, %h, %w, %n, %c]

 FailureOr<Operation *>

 decomposeWinogradInputTransformOp(RewriterBase &rewriter,

                                   linalg::WinogradInputTransformOp op);


 /// Rewrite linalg.winograd_output_transform. The data layout of the output is

 /// HWNF. The transformation matrix is 2-dimension. We need to extract H x W

 /// from HWNF first. We generate 4 levels of loops to iterate on N, F, tileH,

 /// and tileW. After the transformation, we get

 ///

 /// scf.for %h = 0 to tileH step 1

 ///   scf.for %w = 0 to tileW step 1

 ///     scf.for %n = 0 to N step 1

 ///       scf.for %f = 0 to F step 1

 ///         %extracted = extract %extracted<alphaH x alphaW> from

 ///                              %input<alphaH x alphaW x tileH x tileW x N x F>

 ///                              at [0, 0, %h, %w, %n, %f]

 ///         %ret = linalg.matmul AT, %extracted

 ///         %ret = linalg.matmul %ret, A

 ///         %inserted = insert %ret<alphaH x alphaW> into

 ///                            output<N x H x W x F>

 ///                            at [%n, (%h x m), (%w x m), %f]

 FailureOr<Operation *>

 decomposeWinogradOutputTransformOp(RewriterBase &rewriter,

                                    linalg::WinogradOutputTransformOp op);


 /// Method to deduplicate operands and remove dead results of `linalg.generic`

 /// operations. This is effectively DCE for a linalg.generic op. If there is

 /// deduplication of operands orremoval of results, replaces the `genericOp`

 /// with a new op and returns it. Returns the same operation if there is no

 /// deduplication/removal.

 FailureOr<linalg::GenericOp> deduplicateOperandsAndRemoveDeadResults(

     RewriterBase &rewriter, linalg::GenericOp genericOp, bool removeOutputs);


 //===----------------------------------------------------------------------===//

 // Rewrite patterns wrapping transformations.

 // TODO: every single such pattern should be a close to noop wrapper around a

 // functional-stye API call.

 //===----------------------------------------------------------------------===//


 /// Rewrites 2-D convolution ops with size-1 window dimensions into 1-D

 /// convolution ops.

 template <typename Conv2DOp, typename Conv1DOp>

 struct DownscaleSizeOneWindowed2DConvolution final

     : public OpRewritePattern<Conv2DOp> {

   using OpRewritePattern<Conv2DOp>::OpRewritePattern;


   FailureOr<Conv1DOp> returningMatchAndRewrite(Conv2DOp convOp,

                                                PatternRewriter &rewriter) const;


   LogicalResult matchAndRewrite(Conv2DOp convOp,

                                 PatternRewriter &rewriter) const override {

     return returningMatchAndRewrite(convOp, rewriter);

   }

 };


 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNhwcHwcfOp,

                                                              Conv1DNwcWcfOp>;

 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp,

                                                              Conv1DNcwFcwOp>;


 /// Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh)

 /// dimensions into 1-D depthwise convolution ops.

 struct DownscaleDepthwiseConv2DNhwcHwcOp final

     : public OpRewritePattern<DepthwiseConv2DNhwcHwcOp> {

   DownscaleDepthwiseConv2DNhwcHwcOp(MLIRContext *context,

                                     PatternBenefit benefit = 1)

       : OpRewritePattern<DepthwiseConv2DNhwcHwcOp>(context, benefit) {}


   FailureOr<DepthwiseConv1DNwcWcOp>

   returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp,

                            PatternRewriter &rewriter) const;


   LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp,

                                 PatternRewriter &rewriter) const override {

     return returningMatchAndRewrite(convOp, rewriter);

   }

 };


 struct DownscaleConv2DOp final : public OpRewritePattern<Conv2DOp> {

   DownscaleConv2DOp(MLIRContext *context, PatternBenefit benefit = 1)

       : OpRewritePattern<Conv2DOp>(context, benefit) {}


   FailureOr<Conv1DOp> returningMatchAndRewrite(Conv2DOp convOp,

                                                PatternRewriter &rewriter) const;


   LogicalResult matchAndRewrite(Conv2DOp convOp,

                                 PatternRewriter &rewriter) const override {

     return returningMatchAndRewrite(convOp, rewriter);

   }

 };


 ///

 /// Linalg generalization pattern.

 ///

 /// Apply the `generalization` transformation as a pattern.

 /// See `generalization` for more details.

 //

 // TODO: Automatic default pattern class that just unwraps a function

 // returning FailureOr<GenericOp>.

 struct LinalgGeneralizationPattern

     : public OpInterfaceRewritePattern<LinalgOp> {

   using OpInterfaceRewritePattern<LinalgOp>::OpInterfaceRewritePattern;


   /// `matchAndRewrite` implementation that returns the significant

   /// transformed pieces of IR.

   FailureOr<GenericOp>

   returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const {

     return generalizeNamedOp(rewriter, op);

   }


   LogicalResult matchAndRewrite(LinalgOp op,

                                 PatternRewriter &rewriter) const override {

     return returningMatchAndRewrite(op, rewriter);

   }

 };


 struct LinalgSpecializationPattern : public OpRewritePattern<GenericOp> {

   using OpRewritePattern<GenericOp>::OpRewritePattern;


   FailureOr<GenericOp>

   returningMatchAndRewrite(GenericOp op, PatternRewriter &rewriter) const {

     return specializeGenericOp(rewriter, op);

   }


   LogicalResult matchAndRewrite(GenericOp op,

                                 PatternRewriter &rewriter) const override {

     return returningMatchAndRewrite(op, rewriter);

   }

 };


 /// Vectorization pattern for memref::CopyOp.

 struct CopyVectorizationPattern : public OpRewritePattern<memref::CopyOp> {

   using OpRewritePattern<memref::CopyOp>::OpRewritePattern;


   LogicalResult matchAndRewrite(memref::CopyOp copyOp,

                                 PatternRewriter &rewriter) const override;

 };


 using OptimizeCopyFn =

     std::function<LogicalResult(RewriterBase &, tensor::PadOp, Value)>;


 /// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and

 /// InsertSliceOp. For now, only constant padding values are supported.

 struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {

   DecomposePadOpPattern(MLIRContext *context, PatternBenefit benefit = 1)

       : OpRewritePattern<tensor::PadOp>(context, benefit) {}

   LogicalResult matchAndRewrite(tensor::PadOp padOp,

                                 PatternRewriter &rewriter) const override;


 protected:

   Value createFillOrGenerateOp(RewriterBase &rewriter, tensor::PadOp padOp,

                                Value dest,

                                const SmallVector<Value> &dynSizes) const;

 };


 /// Rewrites a linalg::PackOp into a sequence of:

 ///   * tensor::PadOp + linalg::TransposeOp + tensor::EmptyOp +

 ///     tensor::InsertSliceOp ops.

 ///

 /// Requires that all the outer dims of the input linalg::PackOp are 1.

 ///

 /// Before:

 /// ```

 ///   %packed = linalg.pack %input

 ///     padding_value(%pad : f32)

 ///     inner_dims_pos = [1, 0]

 ///     inner_tiles = [2, %high]

 ///     into %output : tensor<5x1xf32> -> tensor<1x1x2x?xf32>

 /// ```

 ///

 /// After:

 /// ```

 ///   // PadOp

 ///   %padded = tensor.pad %arg0 low[0, 0] high[%0, 1] {

 ///     ^bb0(...):

 ///       tensor.yield %arg2 : f32

 ///   } : tensor<5x1xf32> to tensor<?x2xf32>

 ///   // EmptyOp + TransposeOp

 ///   %empty = tensor.empty(%arg3) : tensor<2x?xf32>

 ///   %transposed = linalg.transpose

 ///     ins(%extracted_slice : tensor<?x2xf32>)

 ///     outs(%empty : tensor<2x?xf32>)

 ///     permutation = [1, 0]

 ///   // InsertSliceOp

 ///   %inserted_slice = tensor.insert_slice %transposed

 ///     into %arg1[0, 0, 0, 0] [1, 1, 2, %tile_dim_1] [1, 1, 1, 1]

 ///     : tensor<2x?xf32> into tensor<1x1x2x?xf32>

 /// ```

 struct DecomposeOuterUnitDimsPackOpPattern

     : public OpRewritePattern<linalg::PackOp> {

   using OpRewritePattern<linalg::PackOp>::OpRewritePattern;

   LogicalResult matchAndRewrite(linalg::PackOp packOp,

                                 PatternRewriter &rewriter) const override;

 };


 /// Rewrites a linalg::UnPackOp into a sequence of rank-reduced

 ///   * tensor::ExtractSliceOp + linalg::TransposeOp + tensor::InsertSliceOp

 ///

 /// Requires that all the outer dims of the input linalg::PackOp are 1.

 ///

 /// Before:

 /// ```

 /// %packed = linalg.unpack %input

 ///   inner_dims_pos = [1, 0]

 ///   inner_tiles = [2, 8]

 ///   into %output : tensor<1x1x2x8xf32> -> tensor<5x1xf32>

 /// ```

 ///

 /// After:

 /// ```

 ///   // Rank-reduced extract to obtain the tile

 ///   %slice = tensor.extract_slice %arg0[0, 0, 0, 0] [1, 1, 2, 8] [1, 1, 1, 1]

 ///     : tensor<1x1x2x8xf32> to tensor<2x8xf32>

 ///   // EmptyOp + TransposeOp

 ///   %init = tensor.empty() : tensor<8x2xf32>

 ///   %transposed = linalg.transpose

 ///     ins(%extracted_slice : tensor<2x8xf32>)

 ///     outs(%0 : tensor<8x2xf32>) permutation = [1, 0]

 ///   // Extract a slice matching the specified output size

 ///   %result = tensor.extract_slice %transposed[0, 0] [5, 1] [1, 1]

 ///     : tensor<8x2xf32> to tensor<5x1xf32>

 /// ```

 struct DecomposeOuterUnitDimsUnPackOpPattern

     : public OpRewritePattern<linalg::UnPackOp> {

   using OpRewritePattern<linalg::UnPackOp>::OpRewritePattern;

   LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp,

                                 PatternRewriter &rewriter) const override;

 };


 /// Match and rewrite for the pattern:

 /// ```

 ///    %alloc = ...

 ///    [optional] %view = memref.view %alloc ...

 ///    %subView = subview %allocOrView ...

 ///    [optional] linalg.fill(%allocOrView, %cst) ...

 ///    ...

 ///    memref.copy(%in, %subView) ...

 ///    vector.transfer_read %allocOrView[...], %cst ...

 /// ```

 /// into

 /// ```

 ///    [unchanged] %alloc = ...

 ///    [unchanged] [optional] %view = memref.view %alloc ...

 ///    [unchanged] [unchanged] %subView = subview %allocOrView ...

 ///    ...

 ///    vector.transfer_read %in[...], %cst ...

 /// ```

 /// Where there is no interleaved use between memref.copy and transfer_read as

 /// well as no interleaved use between linalg.fill and memref.copy (if

 /// linalg.fill is specified).

 /// This is a custom rewrite to forward partial reads (with optional fills) to

 /// vector.transfer_read.

 struct LinalgCopyVTRForwardingPattern

     : public OpRewritePattern<vector::TransferReadOp> {

   using OpRewritePattern<vector::TransferReadOp>::OpRewritePattern;


   LogicalResult matchAndRewrite(vector::TransferReadOp xferOp,

                                 PatternRewriter &rewriter) const override;

 };


 /// Match and rewrite for the pattern:

 /// ```

 ///    %alloc = ...

 ///    [optional] %view = memref.view %alloc ...

 ///    %subView = subview %allocOrView...

 ///    ...

 ///    vector.transfer_write %..., %allocOrView[...]

 ///    memref.copy(%subView, %out)

 /// ```

 /// into

 /// ```

 ///    [unchanged] %alloc = ...

 ///    [unchanged] [optional] %view = memref.view %alloc ...

 ///    [unchanged] %subView = subview %allocOrView...

 ///    ...

 ///    vector.transfer_write %..., %out[...]

 /// ```

 /// Where there is no interleaved use between transfer_write and memref.copy.

 /// This is a custom rewrite to forward partial writes to

 /// vector.transfer_write.

 struct LinalgCopyVTWForwardingPattern

     : public OpRewritePattern<vector::TransferWriteOp> {

   using OpRewritePattern<vector::TransferWriteOp>::OpRewritePattern;


   LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp,

                                 PatternRewriter &rewriter) const override;

 };


 /// Rewrite extract_slice(tensor.pad(x)) into tensor.pad(extract_slice(x)).

 struct ExtractSliceOfPadTensorSwapPattern

     : public OpRewritePattern<tensor::ExtractSliceOp> {

   /// A function to control pattern application and rewrite logic.

   ///

   /// The function will be given the slice op and should return:

   /// -  std::nullopt: to fail the match and not apply the pattern;

   /// -  true: to apply the pattern with zero slice guard;

   /// - false: to apply the pattern without zero slice guard.

   ///

   /// See the documentation for tensor::bubbleUpPadSlice regarding zero slice

   /// guard.

   using ControlFn = std::function<std::optional<bool>(tensor::ExtractSliceOp)>;


   ExtractSliceOfPadTensorSwapPattern(MLIRContext *context,

                                      ControlFn controlFn = nullptr,

                                      PatternBenefit benefit = 1)

       : OpRewritePattern(context, benefit), controlFn(std::move(controlFn)) {}


   LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,

                                 PatternRewriter &rewriter) const override;


 private:

   ControlFn controlFn;

 };


 //===----------------------------------------------------------------------===//

 // Populate functions.

 //===----------------------------------------------------------------------===//


 /// Canonicalization patterns relevant to apply after tiling patterns. These

 /// are applied automatically by the tiling pass but need to be applied

 /// manually when tiling is called programmatically.

 RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx);

 void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns);


 /// Linalg generalization patterns


 /// Populates `patterns` with patterns to convert spec-generated named ops to

 /// linalg.generic ops.

 void populateLinalgNamedOpsGeneralizationPatterns(RewritePatternSet &patterns);


 /// Populates `patterns` with patterns to convert linalg.generic ops to named

 /// ops where possible. A linalg.generic can represent wide range and complex

 /// computations for which equivalent linalg named op may not exist e.g.

 /// linalg.generic that takes a tensor and computes a polynomial such as:

 ///     p(x) = an*x^n + ... + a1x + a0

 /// There is no equivalent named op to convert to. Many such cases exist.

 void populateLinalgGenericOpsSpecializationPatterns(

     RewritePatternSet &patterns);


 /// Populates `patterns` with patterns that fold operations like

 /// `linalg.transform` into elementwise op map.

 void populateLinalgFoldIntoElementwisePatterns(RewritePatternSet &patterns);


 /// Linalg decompose convolutions patterns


 /// Populates patterns to decompose high-D convolution ops into low-D ones.

 /// This is a step in progressive lowering for convolution ops, afterwards we

 /// can vectorize the low-D convolution ops.

 void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns,

                                           PatternBenefit benefit = 1);


 /// Populates patterns to decompose linalg.pack and linalg.unpack Ops into e.g.

 /// tensor.pad, linalg.transpose, tensor.{insert|extract}_slice. Require all

 /// outer dims to be unit.

 void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns);


 /// Populates patterns to decompose tensor.pad into e.g.

 /// tensor.empty, linalg.fill, tensor.insert_slice.

 void populateDecomposePadPatterns(RewritePatternSet &patterns);


 /// Populates patterns to transform linalg.conv_2d_xxx operations into

 /// linalg.generic (for img2col packing) and linalg.matmul.

 /// \see rewriteInIm2Col for more details.

 void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns);


 /// Populates `patterns` with patterns that vectorize tensor.pad.

 /// These patterns are meant to apply in a complementary fashion. Benefits

 /// are used to encode a certain ordering of pattern application. To avoid

 /// scattering magic constants throughout the code base, the patterns must be

 /// added with this function. `baseBenefit` can be used to offset the benefit

 /// of all tensor::PadOp vectorization patterns by a certain value.

 void populatePadOpVectorizationPatterns(RewritePatternSet &patterns,

                                         PatternBenefit baseBenefit = 1);


 /// Populate patterns for splitting a `LinalgOp` with multiple statements within

 /// its payload into multiple `GenericOp` that have a single statement.

 /// The option `removeDeadArgsAndResults` adds patterns to remove dead arguments

 /// and results from the generated decomposed ops. This is default `true` since

 /// the core decomposition patterns relies on these clean up patterns. It is set

 /// to false only for testing purposes.

 void populateDecomposeLinalgOpsPattern(RewritePatternSet &patterns,

                                        bool removeDeadArgsAndResults = true);


 /// Populate patterns that convert non-destination-style ops to destination

 /// style ops.

 void populateConvertToDestinationStylePatterns(RewritePatternSet &patterns);


 /// Populate patterns for vectorizing low-D convolution ops. This is a step in

 /// progressive lowering for convolution ops, it assume high-D convolution ops

 /// were decomposed previously.

 void populateConvolutionVectorizationPatterns(RewritePatternSet &patterns,

                                               PatternBenefit benefit = 1);


 /// Populate patterns that convert `ElementwiseMappable` ops to linalg

 /// parallel loops.

 void populateElementwiseToLinalgConversionPatterns(RewritePatternSet &patterns);


 /// Populate patterns that are only useful in the context of sparse tensors.

 void populateSparseTensorRewriting(RewritePatternSet &patterns);


 /// Function type which is used to control when to stop fusion. It is expected

 /// that OpOperand is not modified in the callback. The OpOperand is not marked

 /// as const to allow callers to use non-const methods.

 using ControlFusionFn = std::function<bool(OpOperand *fusedOperand)>;


 /// Patterns for fusing linalg operation on tensors.


 /// Pattern to fuse `linalg.generic` -> `linalg.generic` operations

 /// when both operations are fusable elementwise operations.

 void populateElementwiseOpsFusionPatterns(

     RewritePatternSet &patterns,

     const ControlFusionFn &controlElementwiseOpFusion);


 /// Function type which is used to control propagation of linalg.pack/unpack

 /// ops.

 using ControlPropagationFn = std::function<bool(OpOperand *opOperand)>;


 /// Patterns to bubble up or down data layout ops across other operations.

 void populateDataLayoutPropagationPatterns(

     RewritePatternSet &patterns,

     const ControlPropagationFn &controlPackUnPackPropagation);


 /// Pattern to remove dead operands and results of `linalg.generic` operations.

 /// This is a pattern wrapper for `deduplicateOperandsAndRemoveDeadResults`.

 void populateEraseUnusedOperandsAndResultsPatterns(RewritePatternSet &patterns);


 /// Patterns to promote inputs to outputs and remove unused inputs of

 /// `linalg.generic` ops.

 void populateEraseUnnecessaryInputsPatterns(RewritePatternSet &patterns);


 /// Function type to control generic op dimension collapsing. It is expected

 /// to return an array of `ReassociationIndices` representing dimensions that

 /// should be merged.

 using GetCollapsableDimensionsFn =

     std::function<SmallVector<ReassociationIndices>(linalg::LinalgOp)>;


 /// Pattern to collapse dimensions in a linalg.generic op. This will collapse

 /// tensor operands when needed and expand back the result tensors.

 void populateCollapseDimensions(

     RewritePatternSet &patterns,

     const GetCollapsableDimensionsFn &controlCollapseDimensions);


 /// Patterns to fold an expanding (collapsing) tensor_reshape operation with its

 /// producer (consumer) generic operation by expanding the dimensionality of the

 /// loop in the generic op.

 void populateFoldReshapeOpsByExpansionPatterns(

     RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes);


 /// Patterns to fold an expanding tensor.expand_shape operation with its

 /// producer generic operation by collapsing the dimensions of the generic op.

 void populateFoldReshapeOpsByCollapsingPatterns(

     RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes);


 /// Patterns to constant fold Linalg operations.

 void populateConstantFoldLinalgOperations(RewritePatternSet &patterns,

                                           const ControlFusionFn &controlFn);


 /// Pattern to replace `linalg.add` when destination passing on a contraction op

 /// suffices for achieving the sum.

 void populateFoldAddIntoDestPatterns(RewritePatternSet &patterns);


 /// Pattern to fuse a `tensor.pad` operation with the producer of its source,

 /// if the producer is a `linalg` operation with all parallel iterator types.

 void populateFuseTensorPadWithProducerLinalgOpPatterns(

     RewritePatternSet &patterns);


 /// Patterns to convert from one named op to another. These can be seen as

 /// canonicalizations of named ops into another named op.

 void populateLinalgNamedOpConversionPatterns(RewritePatternSet &patterns);


 /// Patterns to fold unit-extent dimensions in operands/results of linalg ops on

 /// tensors via reassociative reshape ops.

 void populateFoldUnitExtentDimsPatterns(RewritePatternSet &patterns,

                                         ControlDropUnitDims &options);


 /// A pattern that converts init operands to input operands.

 void populateMoveInitOperandsToInputPattern(RewritePatternSet &patterns);


 /// Patterns that are used to inline constant operands into linalg generic ops.

 void populateInlineConstantOperandsPatterns(RewritePatternSet &patterns);


 /// Patterns that are used to bubble up extract slice op above linalg op.

 void populateBubbleUpExtractSliceOpPatterns(RewritePatternSet &patterns);


 /// Adds patterns that waps tensor.extract_slice(linalg.fill(%cst, %init)) into

 /// linalg.fill(%cst, tensor.extract_slice(%init)).

 void populateSwapExtractSliceWithFillPatterns(RewritePatternSet &patterns);


 /// Add patterns to make explicit broadcasts and transforms in the

 /// input operands of a genericOp.

 void populateDecomposeProjectedPermutationPatterns(RewritePatternSet &patterns);


 /// Patterns to apply `splitReduction` below.

 void populateSplitReductionPattern(

     RewritePatternSet &patterns,

     const ControlSplitReductionFn &controlSplitReductionFn,

     bool useAlloc = false);


 /// Patterns to convert Linalg matmul ops to transposed variants.

 void populateTransposeMatmulPatterns(RewritePatternSet &patterns,

                                      bool transposeLHS = true);


 /// Patterns to block pack Linalg matmul ops.

 void populateBlockPackMatmulPatterns(RewritePatternSet &patterns,

                                      const ControlBlockPackMatmulFn &controlFn);


 /// Patterns to apply Winograd Conv2D algorithm F(m x m, r x r).

 void populateWinogradConv2DPatterns(RewritePatternSet &patterns, int64_t m,

                                     int64_t r);


 /// Patterns to decompose Winograd operators.

 void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns);


 /// Adds patterns that reduce the rank of named contraction ops that have

 /// unit dimensions in the operand(s) by converting to a sequence of

 /// `collapse_shape`,

 /// `<corresponding linalg named op>`, `expand_shape` (if on tensors).  For

 /// example a `linalg.batch_matmul` with unit batch size will convert to

 /// `linalg.matmul` and a `linalg.matvec` with with unit spatial dim in lhs will

 /// convert to a `linalg.dot`.

 void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns);


 /// Populates `patterns` with patterns that fold operations like `tensor.pad`

 /// and `tensor.extract_slice` into `tensor.pack` and `tensor.unpack` operations

 /// respectively.

 void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);


 /// Populates `patterns` with patterns that fold operations like `linalg.pack`

 /// and `linalg.unpack` into `tensor.empty`.

 void populateFoldPackUnpackIntoTensorEmptyPatterns(RewritePatternSet &patterns);


 /// Populates `patterns` with patterns that simplify `tensor.pack` and

 /// `tensor.unpack` operations.

 void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);


 } // namespace linalg

 } // namespace mlir


 #endif // MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H

Bufferization.h

DialectConversion.h

Utils.h

Utils.h

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition: PassManagerOptions.cpp:89

PatternMatch.h

StaticValueUtils.h

TilingInterface.h

VectorToSCF.h

VectorTransforms.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::DenseMap< Value, Value >

llvm::DenseSet
Definition: LLVM.h:59

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineMap
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::DataLayout
The main mechanism for performing data layout queries.
Definition: DataLayoutInterfaces.h:220

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::PatternBenefit
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:749

mlir::RewritePatternSet
Definition: PatternMatch.h:772

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

MemRef.h

Tensor.h

Transforms.h

mlir::linalg::detail::buildPackingLoopNest
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
Definition: HoistPadding.cpp:727

mlir::linalg::populateLinalgNamedOpConversionPatterns
void populateLinalgNamedOpConversionPatterns(RewritePatternSet &patterns)
Patterns to convert from one named op to another.
Definition: NamedOpConversions.cpp:161

mlir::linalg::populateMoveInitOperandsToInputPattern
void populateMoveInitOperandsToInputPattern(RewritePatternSet &patterns)
A pattern that converts init operands to input operands.
Definition: DropUnitDims.cpp:811

mlir::linalg::populateTransposeMatmulPatterns
void populateTransposeMatmulPatterns(RewritePatternSet &patterns, bool transposeLHS=true)
Patterns to convert Linalg matmul ops to transposed variants.
Definition: TransposeMatmul.cpp:169

mlir::linalg::populateContractionOpRankReducingPatterns
void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns)
Adds patterns that reduce the rank of named contraction ops that have unit dimensions in the operand(...
Definition: DropUnitDims.cpp:1073

mlir::linalg::rewriteAsPaddedOp
LogicalResult rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad, const LinalgPaddingOptions &options, LinalgOp &paddedOp, SmallVector< Value > &replacements, SmallVector< tensor::PadOp > &padOps)
Pad the iterator dimensions paddingDimensions of all opToPad operands to a static bounding box.
Definition: Padding.cpp:153

mlir::linalg::populateSplitReductionPattern
void populateSplitReductionPattern(RewritePatternSet &patterns, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Patterns to apply splitReduction below.
Definition: SplitReduction.cpp:448

mlir::linalg::populateFuseTensorPadWithProducerLinalgOpPatterns
void populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet &patterns)
Pattern to fuse a tensor.pad operation with the producer of its source, if the producer is a linalg o...
Definition: FusePadOpWithLinalgProducer.cpp:121

mlir::linalg::rewriteInIm2Col
FailureOr< std::pair< Operation *, Operation * > > rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp)
Convert linalg.conv_2d_nhwc_hwcf into linalg.generic (for img2col packing) and linalg....
Definition: ConvertConv2DToImg2Col.cpp:79

mlir::linalg::areDimSequencesPreserved
bool areDimSequencesPreserved(ArrayRef< AffineMap > maps, ArrayRef< ReassociationIndices > dimSequences)
Return true if all sequences of dimensions specified in dimSequences are contiguous in all the ranges...
Definition: ElementwiseOpFusion.cpp:1241

mlir::linalg::hasVectorizationImpl
bool hasVectorizationImpl(Operation *)
Return true if there's dedicated logic in the Linalg Vectorizer to vectorize this Op,...
Definition: Vectorization.cpp:2520

mlir::linalg::populateBubbleUpExtractSliceOpPatterns
void populateBubbleUpExtractSliceOpPatterns(RewritePatternSet &patterns)
Patterns that are used to bubble up extract slice op above linalg op.
Definition: BubbleUpExtractSlice.cpp:134

mlir::linalg::transformIndexOps
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
Definition: Tiling.cpp:79

mlir::linalg::AllocBufferCallbackFn
std::function< std::optional< Value >(OpBuilder &b, memref::SubViewOp subView, ArrayRef< Value > boundingSubViewSize, DataLayout &layout)> AllocBufferCallbackFn
Callback function type used to perform the allocation for the promoted subView.
Definition: Transforms.h:340

mlir::linalg::populateBlockPackMatmulPatterns
void populateBlockPackMatmulPatterns(RewritePatternSet &patterns, const ControlBlockPackMatmulFn &controlFn)
Patterns to block pack Linalg matmul ops.
Definition: BlockPackMatmul.cpp:320

mlir::linalg::populateConvertConv2DToImg2ColPatterns
void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns)
Populates patterns to transform linalg.conv_2d_xxx operations into linalg.generic (for img2col packin...
Definition: ConvertConv2DToImg2Col.cpp:687

mlir::linalg::decomposeWinogradFilterTransformOp
FailureOr< Operation * > decomposeWinogradFilterTransformOp(RewriterBase &rewriter, linalg::WinogradFilterTransformOp op)
Rewrite linalg.winograd_filter_transform.
Definition: WinogradConv2D.cpp:1195

mlir::linalg::LoopIndexToRangeIndexMap
DenseMap< int, int > LoopIndexToRangeIndexMap
Creates a number of ranges equal to the number of non-zero in tileSizes.
Definition: Transforms.h:810

mlir::linalg::allocateWorkgroupMemory
std::optional< Value > allocateWorkgroupMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU workgroup memory.
Definition: Promotion.cpp:469

mlir::linalg::packTranspose
FailureOr< PackTransposeResult > packTranspose(RewriterBase &rewriter, linalg::PackOp packOp, linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp, ArrayRef< int64_t > outerPerm, ArrayRef< int64_t > innerPerm)
Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the transposed PackOp -> LinalgOp ...
Definition: Transforms.cpp:678

mlir::linalg::bufferizeToAllocation
Value bufferizeToAllocation(RewriterBase &rewriter, const BufferizeToAllocationOptions &options, tensor::PadOp padOp, Attribute memorySpace={}, Operation *insertionPoint=nullptr)
Materialize a buffer allocation for the given tensor.pad op and lower the op to linalg....

mlir::linalg::ControlFusionFn
std::function< bool(OpOperand *fusedOperand)> ControlFusionFn
Function type which is used to control when to stop fusion.
Definition: Transforms.h:1778

mlir::linalg::isDimSequencePreserved
bool isDimSequencePreserved(AffineMap map, ReassociationIndicesRef dimSequence)
Return true if a given sequence of dimensions are contiguous in the range of the specified indexing m...
Definition: ElementwiseOpFusion.cpp:1200

mlir::linalg::hoistPaddingOnTensors
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
Definition: HoistPadding.cpp:943

mlir::linalg::populateDecomposeProjectedPermutationPatterns
void populateDecomposeProjectedPermutationPatterns(RewritePatternSet &patterns)
Add patterns to make explicit broadcasts and transforms in the input operands of a genericOp.
Definition: DecomposeGenericByUnfoldingPermutation.cpp:246

mlir::linalg::specializeGenericOp
FailureOr< LinalgOp > specializeGenericOp(RewriterBase &rewriter, GenericOp genericOp)
Create a namedOp from the given GenericOp and replace the GenericOp.
Definition: Specialize.cpp:260

mlir::linalg::populateFoldReshapeOpsByCollapsingPatterns
void populateFoldReshapeOpsByCollapsingPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding tensor.expand_shape operation with its producer generic operation by co...
Definition: ElementwiseOpFusion.cpp:2239

mlir::linalg::LinalgTilingLoopType
LinalgTilingLoopType
The type of loops to be generated during tiling.
Definition: Utils.h:115

mlir::linalg::LinalgTilingLoopType::Loops
@ Loops

mlir::linalg::lowerUnPack
FailureOr< LowerUnPackOpResult > lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp, bool lowerUnpadLikeWithExtractSlice=true)
Rewrite pack as empty + transpose + reshape + extract_slice.
Definition: Transforms.cpp:359

mlir::linalg::DeallocBufferCallbackFn
std::function< LogicalResult(OpBuilder &b, Value buffer)> DeallocBufferCallbackFn
Callback function type used to deallocate the buffers used to hold the promoted subview.
Definition: Transforms.h:345

mlir::linalg::populateDataLayoutPropagationPatterns
void populateDataLayoutPropagationPatterns(RewritePatternSet &patterns, const ControlPropagationFn &controlPackUnPackPropagation)
Patterns to bubble up or down data layout ops across other operations.
Definition: DataLayoutPropagation.cpp:1230

mlir::linalg::populatePadOpVectorizationPatterns
void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Populates patterns with patterns that vectorize tensor.pad.
Definition: Vectorization.cpp:3101

mlir::linalg::populateLinalgTilingCanonicalizationPatterns
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
Definition: Tiling.cpp:861

mlir::linalg::populateLinalgFoldIntoElementwisePatterns
void populateLinalgFoldIntoElementwisePatterns(RewritePatternSet &patterns)
Populates patterns with patterns that fold operations like linalg.transform into elementwise op map.
Definition: FoldIntoElementwise.cpp:85

mlir::linalg::deallocateGPUPrivateMemory
LogicalResult deallocateGPUPrivateMemory(OpBuilder &, Value)
In case of GPU private memory there is no need to deallocate since the memory is freed when going out...
Definition: Promotion.cpp:510

mlir::linalg::populateSparseTensorRewriting
void populateSparseTensorRewriting(RewritePatternSet &patterns)
Populate patterns that are only useful in the context of sparse tensors.

mlir::linalg::decomposeWinogradOutputTransformOp
FailureOr< Operation * > decomposeWinogradOutputTransformOp(RewriterBase &rewriter, linalg::WinogradOutputTransformOp op)
Rewrite linalg.winograd_output_transform.
Definition: WinogradConv2D.cpp:1207

mlir::linalg::fuseElementwiseOps
FailureOr< ElementwiseOpFusionResult > fuseElementwiseOps(RewriterBase &rewriter, OpOperand *fusedOperand)
Definition: ElementwiseOpFusion.cpp:339

mlir::linalg::getPreservedProducerResults
llvm::SmallDenseSet< int > getPreservedProducerResults(GenericOp producer, GenericOp consumer, OpOperand *fusedOperand)
Returns a set of indices of the producer's results which would be preserved after the fusion.
Definition: ElementwiseOpFusion.cpp:112

mlir::linalg::promoteSubviewAsNewBuffer
FailureOr< PromotionInfo > promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView, const AllocBufferCallbackFn &allocationFn, DataLayout &layout)
Definition: Promotion.cpp:238

mlir::linalg::allocateGPUPrivateMemory
std::optional< Value > allocateGPUPrivateMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU private memory.
Definition: Promotion.cpp:494

mlir::linalg::rewriteInDestinationPassingStyle
FailureOr< Operation * > rewriteInDestinationPassingStyle(RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp)
Rewrite tensor.from_elements to linalg.generic.
Definition: ConvertToDestinationStyle.cpp:349

mlir::linalg::dropUnitDims
FailureOr< DropUnitDimsResult > dropUnitDims(RewriterBase &rewriter, GenericOp genericOp, const ControlDropUnitDims &options)
Definition: DropUnitDims.cpp:386

mlir::linalg::blockPackMatmul
FailureOr< PackResult > blockPackMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp, const ControlBlockPackMatmulFn &controlPackMatmul)
Pack a matmul operation into blocked 4D layout.
Definition: BlockPackMatmul.cpp:139

mlir::linalg::peelLoops
void peelLoops(RewriterBase &rewriter, ArrayRef< scf::ForOp > loops)
Peel 'loops' and applies affine_min/max bounds simplification on the fly where relevant.
Definition: Transforms.cpp:76

mlir::linalg::winogradConv2D
FailureOr< Operation * > winogradConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op, int64_t m, int64_t r)
Convert linalg.conv_2d_nhwc_fhwc to Winograd Conv2D algorithm F(m x m, r x r).
Definition: WinogradConv2D.cpp:1188

mlir::linalg::populateConvertToDestinationStylePatterns
void populateConvertToDestinationStylePatterns(RewritePatternSet &patterns)
Populate patterns that convert non-destination-style ops to destination style ops.
Definition: ConvertToDestinationStyle.cpp:613

mlir::linalg::transposeConv2D
FailureOr< Operation * > transposeConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op)
Convert linalg.conv_2d_nhwc_fhwc(_q) to linalg.conv_2d_nhwc_hwcf(_q) by materializing transpose.
Definition: TransposeConv2D.cpp:127

mlir::linalg::populateFoldUnitExtentDimsPatterns
void populateFoldUnitExtentDimsPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via reassociativ...
Definition: DropUnitDims.cpp:799

mlir::linalg::copyToWorkgroupMemory
LogicalResult copyToWorkgroupMemory(OpBuilder &b, Value src, Value dst)
Create Memref copy operations and add gpu barrier guards before and after the copy operation to ensur...
Definition: Promotion.cpp:485

mlir::linalg::TileSizeComputationFunction
std::function< SmallVector< Value, 4 >(OpBuilder &, Operation *)> TileSizeComputationFunction
Definition: Transforms.h:187

mlir::linalg::OptimizeCopyFn
std::function< LogicalResult(RewriterBase &, tensor::PadOp, Value)> OptimizeCopyFn
Definition: Transforms.h:1514

mlir::linalg::populateElementwiseToLinalgConversionPatterns
void populateElementwiseToLinalgConversionPatterns(RewritePatternSet &patterns)
Populate patterns that convert ElementwiseMappable ops to linalg parallel loops.
Definition: ElementwiseToLinalg.cpp:115

mlir::linalg::linalgOpAnchoredEmptyTensorEliminationStep
LogicalResult linalgOpAnchoredEmptyTensorEliminationStep(RewriterBase &rewriter, Operation *op, bufferization::OneShotAnalysisState &state)
Try to eliminate tensor::EmptyOps inside op that are anchored on a LinalgOp.
Definition: EliminateEmptyTensors.cpp:40

mlir::linalg::linalgOpToLoops
FailureOr< LinalgLoops > linalgOpToLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.for with the proper body for linalgOp.
Definition: Loops.cpp:368

mlir::linalg::vectorize
LogicalResult vectorize(RewriterBase &rewriter, Operation *op, ArrayRef< int64_t > inputVectorSizes={}, ArrayRef< bool > inputScalableVecDims={}, bool vectorizeNDExtract=false, bool flatten1DDepthwiseConv=false)
Emit a suitable vector form for an operation.
Definition: Vectorization.cpp:2531

mlir::linalg::generalizeNamedOp
FailureOr< GenericOp > generalizeNamedOp(RewriterBase &rewriter, LinalgOp linalgOp)
Create a GenericOp from the given named operation linalgOp and replace the given linalgOp.
Definition: Generalization.cpp:53

mlir::linalg::makeTiledLoopRanges
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > allShapeSizes, ArrayRef< OpFoldResult > allTileSizes)
Definition: Tiling.cpp:50

mlir::linalg::transposeBatchMatmul
FailureOr< Operation * > transposeBatchMatmul(RewriterBase &rewriter, linalg::BatchMatmulOp op, bool transposeLHS=true)
Pattern to replace.
Definition: TransposeMatmul.cpp:88

mlir::linalg::promoteSubviewsPrecondition
LogicalResult promoteSubviewsPrecondition(Operation *op, LinalgPromotionOptions options)
Promote memref.subviews feeding linalg-on-buffers operations.
Definition: Promotion.cpp:398

mlir::linalg::copyToGPUPrivateMemory
LogicalResult copyToGPUPrivateMemory(OpBuilder &b, Value src, Value dst)
Normal copy to between src and dst.
Definition: Promotion.cpp:502

mlir::linalg::deduplicateOperandsAndRemoveDeadResults
FailureOr< linalg::GenericOp > deduplicateOperandsAndRemoveDeadResults(RewriterBase &rewriter, linalg::GenericOp genericOp, bool removeOutputs)
Method to deduplicate operands and remove dead results of linalg.generic operations.
Definition: EraseUnusedOperandsAndResults.cpp:231

mlir::linalg::populateDecomposeConvolutionPatterns
void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Linalg decompose convolutions patterns.
Definition: Transforms.cpp:1654

mlir::linalg::populateDecomposeWinogradOpsPatterns
void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns)
Patterns to decompose Winograd operators.
Definition: WinogradConv2D.cpp:1219

mlir::linalg::populateConvolutionVectorizationPatterns
void populateConvolutionVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns for vectorizing low-D convolution ops.
Definition: Vectorization.cpp:4120

mlir::linalg::vectorizeCopy
LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp)
Emit a suitable vector form for a Copy op with fully static shape.
Definition: Vectorization.cpp:2625

mlir::linalg::vectorizeOpPrecondition
LogicalResult vectorizeOpPrecondition(Operation *op, ArrayRef< int64_t > inputVectorSizes={}, ArrayRef< bool > inputScalableVecDims={}, bool vectorizeNDExtract=false, bool flatten1DDepthwiseConv=false)
Return success if the operation can be vectorized.
Definition: Vectorization.cpp:2472

mlir::linalg::interchangeGenericOp
FailureOr< GenericOp > interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, ArrayRef< unsigned > interchangeVector)
Interchange the iterator_types and iterator_maps dimensions and adapts the index accesses of op.
Definition: Interchange.cpp:50

mlir::linalg::populateCollapseDimensions
void populateCollapseDimensions(RewritePatternSet &patterns, const GetCollapsableDimensionsFn &controlCollapseDimensions)
Pattern to collapse dimensions in a linalg.generic op.
Definition: ElementwiseOpFusion.cpp:2261

mlir::linalg::areElementwiseOpsFusable
bool areElementwiseOpsFusable(OpOperand *fusedOperand)
Return true if two linalg.generic operations with producer/consumer relationship through fusedOperand...
Definition: ElementwiseOpFusion.cpp:139

mlir::linalg::computeStaticMultiTileSizes
FailureOr< StaticMultiSizeSpecification > computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize, int64_t divisor)
Definition: Tiling.cpp:243

mlir::linalg::linalgOpToAffineLoops
FailureOr< LinalgLoops > linalgOpToAffineLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of affine.for with the proper body for linalgOp.
Definition: Loops.cpp:363

mlir::linalg::populateDecomposePackUnpackPatterns
void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns)
Populates patterns to decompose linalg.pack and linalg.unpack Ops into e.g.
Definition: Transforms.cpp:1675

mlir::linalg::populateEraseUnusedOperandsAndResultsPatterns
void populateEraseUnusedOperandsAndResultsPatterns(RewritePatternSet &patterns)
Pattern to remove dead operands and results of linalg.generic operations.
Definition: EraseUnusedOperandsAndResults.cpp:431

mlir::linalg::computeContinuousTileSizes
FailureOr< ContinuousTileSizeSpecification > computeContinuousTileSizes(OpBuilder &builder, TilingInterface op, unsigned dimension, OpFoldResult targetSize, bool emitAssertions)
Definition: Tiling.cpp:163

mlir::linalg::computeStaticContinuousTileSizes
FailureOr< StaticContinuousTileSizeSpecification > computeStaticContinuousTileSizes(LinalgOp op, unsigned dimension, unsigned targetSize)
Definition: Tiling.cpp:112

mlir::linalg::CopyCallbackFn
std::function< LogicalResult(OpBuilder &b, Value src, Value dst)> CopyCallbackFn
Callback function type used to insert copy from original subview to subview of the promoted region fo...
Definition: Transforms.h:352

mlir::linalg::splitReduction
FailureOr< SplitReductionResult > splitReduction(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Definition: SplitReduction.cpp:30

mlir::linalg::populateSimplifyPackAndUnpackPatterns
void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns)
Populates patterns with patterns that simplify tensor.pack and tensor.unpack operations.
Definition: PackAndUnpackPatterns.cpp:533

mlir::linalg::populateFoldPackUnpackIntoTensorEmptyPatterns
void populateFoldPackUnpackIntoTensorEmptyPatterns(RewritePatternSet &patterns)
Populates patterns with patterns that fold operations like linalg.pack and linalg....
Definition: PackAndUnpackPatterns.cpp:538

mlir::linalg::padAndHoistLinalgOp
FailureOr< LinalgOp > padAndHoistLinalgOp(RewriterBase &rewriter, LinalgOp linalgOp, const LinalgPaddingOptions &options)
Apply padding and hoisting to linalgOp according to the configuration specified in options.
Definition: Padding.cpp:265

mlir::linalg::populateDecomposeLinalgOpsPattern
void populateDecomposeLinalgOpsPattern(RewritePatternSet &patterns, bool removeDeadArgsAndResults=true)
Populate patterns for splitting a LinalgOp with multiple statements within its payload into multiple ...
Definition: DecomposeLinalgOps.cpp:384

mlir::linalg::ControlPropagationFn
std::function< bool(OpOperand *opOperand)> ControlPropagationFn
Function type which is used to control propagation of linalg.pack/unpack ops.
Definition: Transforms.h:1790

mlir::linalg::tileReductionUsingForall
FailureOr< ForallReductionTilingResult > tileReductionUsingForall(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes={}, std::optional< ArrayAttr > mapping=std::nullopt)
Method to tile a reduction to parallel iterations computing partial reductions.
Definition: Tiling.cpp:595

mlir::linalg::packMatmulGreedily
FailureOr< PackResult > packMatmulGreedily(RewriterBase &rewriter, LinalgOp linalgOp, ArrayRef< OpFoldResult > mnkPackedSizes, ArrayRef< int64_t > mnkPaddedSizesNextMultipleOf, ArrayRef< int64_t > mnkOrder)
Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m and n are proper parallel d...
Definition: Transforms.cpp:769

mlir::linalg::pack
FailureOr< PackResult > pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef< OpFoldResult > packedSizes)
Implement packing of a single LinalgOp by packedSizes.
Definition: Transforms.cpp:481

mlir::linalg::populateEraseUnnecessaryInputsPatterns
void populateEraseUnnecessaryInputsPatterns(RewritePatternSet &patterns)
Patterns to promote inputs to outputs and remove unused inputs of linalg.generic ops.
Definition: EraseUnusedOperandsAndResults.cpp:438

mlir::linalg::tileLinalgOp
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
Definition: Tiling.cpp:821

mlir::linalg::GetCollapsableDimensionsFn
std::function< SmallVector< ReassociationIndices >(linalg::LinalgOp)> GetCollapsableDimensionsFn
Function type to control generic op dimension collapsing.
Definition: Transforms.h:1809

mlir::linalg::populateFoldReshapeOpsByExpansionPatterns
void populateFoldReshapeOpsByExpansionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding (collapsing) tensor_reshape operation with its producer (consumer) gene...
Definition: ElementwiseOpFusion.cpp:2228

mlir::linalg::populateSwapExtractSliceWithFillPatterns
void populateSwapExtractSliceWithFillPatterns(RewritePatternSet &patterns)
Adds patterns that waps tensor.extract_slice(linalg.fill(cst, init)) into linalg.fill(cst,...
Definition: SwapExtractSliceWithFillPatterns.cpp:42

mlir::linalg::populateInlineConstantOperandsPatterns
void populateInlineConstantOperandsPatterns(RewritePatternSet &patterns)
Patterns that are used to inline constant operands into linalg generic ops.
Definition: InlineScalarOperands.cpp:98

mlir::linalg::promoteSubViews
FailureOr< LinalgOp > promoteSubViews(OpBuilder &b, LinalgOp op, const LinalgPromotionOptions &options)
Promote the subViews into a new buffer allocated at the insertion point b.
Definition: Promotion.cpp:420

mlir::linalg::populateConstantFoldLinalgOperations
void populateConstantFoldLinalgOperations(RewritePatternSet &patterns, const ControlFusionFn &controlFn)
Patterns to constant fold Linalg operations.
Definition: ConstantFold.cpp:306

mlir::linalg::ControlSplitReductionFn
std::function< SplitReductionOptions(LinalgOp op)> ControlSplitReductionFn
Function signature to control reduction splitting.
Definition: Transforms.h:443

mlir::linalg::deallocateWorkgroupMemory
LogicalResult deallocateWorkgroupMemory(OpBuilder &, Value)
In case of GPU group memory there is no need to deallocate.
Definition: Promotion.cpp:478

mlir::linalg::transposeMatmul
FailureOr< Operation * > transposeMatmul(RewriterBase &rewriter, linalg::MatmulOp op, bool transposeLHS=true)
Convert Linalg matmul ops to transposed variants.
Definition: TransposeMatmul.cpp:31

mlir::linalg::populateLinalgNamedOpsGeneralizationPatterns
void populateLinalgNamedOpsGeneralizationPatterns(RewritePatternSet &patterns)
Linalg generalization patterns.
Definition: Generalization.cpp:95

mlir::linalg::populateLinalgGenericOpsSpecializationPatterns
void populateLinalgGenericOpsSpecializationPatterns(RewritePatternSet &patterns)
Populates patterns with patterns to convert linalg.generic ops to named ops where possible.
Definition: Specialize.cpp:356

mlir::linalg::populateWinogradConv2DPatterns
void populateWinogradConv2DPatterns(RewritePatternSet &patterns, int64_t m, int64_t r)
Patterns to apply Winograd Conv2D algorithm F(m x m, r x r).
Definition: WinogradConv2D.cpp:1212

mlir::linalg::ControlBlockPackMatmulFn
std::function< std::optional< BlockPackMatmulOptions >(linalg::LinalgOp)> ControlBlockPackMatmulFn
Function type which is used to control matmul packing.
Definition: Transforms.h:1219

mlir::linalg::getCombinerOpKind
std::optional< vector::CombiningKind > getCombinerOpKind(Operation *combinerOp)
Return vector::CombiningKind for the given op.
Definition: Vectorization.cpp:575

mlir::linalg::peelLoop
SmallVector< Value > peelLoop(RewriterBase &rewriter, Operation *op)
Try to peel and canonicalize loop op and return the new result.
Definition: Transforms.cpp:60

mlir::linalg::getLinalgTilingCanonicalizationPatterns
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
Definition: Tiling.cpp:855

mlir::linalg::collapseOpIterationDims
FailureOr< CollapseResult > collapseOpIterationDims(LinalgOp op, ArrayRef< ReassociationIndices > foldedIterationDims, RewriterBase &rewriter)
Collapses dimensions of linalg.generic/linalg.copy operation.
Definition: ElementwiseOpFusion.cpp:1711

mlir::linalg::decomposeWinogradInputTransformOp
FailureOr< Operation * > decomposeWinogradInputTransformOp(RewriterBase &rewriter, linalg::WinogradInputTransformOp op)
Rewrite linalg.winograd_input_transform.
Definition: WinogradConv2D.cpp:1201

mlir::linalg::populateDecomposePadPatterns
void populateDecomposePadPatterns(RewritePatternSet &patterns)
Populates patterns to decompose tensor.pad into e.g.
Definition: Transforms.cpp:1680

mlir::linalg::populateFoldAddIntoDestPatterns
void populateFoldAddIntoDestPatterns(RewritePatternSet &patterns)
Pattern to replace linalg.add when destination passing on a contraction op suffices for achieving the...
Definition: FoldAddIntoDest.cpp:147

mlir::linalg::splitOp
std::pair< TilingInterface, TilingInterface > splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension, OpFoldResult splitPoint)
Split the given op into two parts along the given iteration space dimension at the specified splitPoi...
Definition: Split.cpp:67

mlir::linalg::populateFoldIntoPackAndUnpackPatterns
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns)
Populates patterns with patterns that fold operations like tensor.pad and tensor.extract_slice into t...
Definition: PackAndUnpackPatterns.cpp:524

mlir::linalg::populateElementwiseOpsFusionPatterns
void populateElementwiseOpsFusionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlElementwiseOpFusion)
Patterns for fusing linalg operation on tensors.
Definition: ElementwiseOpFusion.cpp:2250

mlir::linalg::splitReductionByScaling
FailureOr< SplitReductionResult > splitReductionByScaling(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Scaling-based implementation of the split reduction transformation.
Definition: SplitReduction.cpp:241

mlir::linalg::computeMultiTileSizes
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
Definition: Tiling.cpp:269

mlir::linalg::lowerPack
FailureOr< LowerPackResult > lowerPack(RewriterBase &rewriter, linalg::PackOp packOp, bool lowerPadLikeWithInsertSlice=true)
Rewrite pack as pad + reshape + transpose.
Definition: Transforms.cpp:224

mlir::linalg::linalgOpToParallelLoops
FailureOr< LinalgLoops > linalgOpToParallelLoops(RewriterBase &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.parallel with the proper body for linalgOp.
Definition: Loops.cpp:375

mlir::lsp::CompletionItemKind::Value
@ Value

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::ReassociationIndicesRef
ArrayRef< int64_t > ReassociationIndicesRef
Definition: ReshapeOpsUtils.h:27

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::OpInterfaceRewritePattern
OpInterfaceRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting a...
Definition: PatternMatch.h:330

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

mlir::linalg::BlockPackMatmulOptions
Definition: Transforms.h:1186

mlir::linalg::BlockPackMatmulOptions::mnkOrder
SmallVector< int64_t, 3 > mnkOrder
Permutation of matmul (M, N, K) dimensions order.
Definition: Transforms.h:1199

mlir::linalg::BlockPackMatmulOptions::blockFactors
SmallVector< int64_t, 3 > blockFactors
Minor block factors (mb, nb, kb) for packing relayout where mb, mn are the parallel dimensions and kb...
Definition: Transforms.h:1189

mlir::linalg::BlockPackMatmulOptions::rhsTransposeOuterBlocks
bool rhsTransposeOuterBlocks
Transpose RHS outer block layout [KB][NB] -> [NB][KB].
Definition: Transforms.h:1208

mlir::linalg::BlockPackMatmulOptions::lhsTransposeInnerBlocks
bool lhsTransposeInnerBlocks
Transpose LHS inner block layout [mb][kb] -> [kb][mb].
Definition: Transforms.h:1205

mlir::linalg::BlockPackMatmulOptions::mnkPaddedSizesNextMultipleOf
SmallVector< int64_t, 3 > mnkPaddedSizesNextMultipleOf
Next multiples of the packing sizes.
Definition: Transforms.h:1196

mlir::linalg::BlockPackMatmulOptions::lhsTransposeOuterBlocks
bool lhsTransposeOuterBlocks
Transpose LHS outer block layout [MB][KB] -> [KB][MB].
Definition: Transforms.h:1202

mlir::linalg::BlockPackMatmulOptions::allowPadding
bool allowPadding
If true, allows packing of dimensions that only partially fit into the block factors.
Definition: Transforms.h:1193

mlir::linalg::BlockPackMatmulOptions::rhsTransposeInnerBlocks
bool rhsTransposeInnerBlocks
Transpose RHS inner block layout [kb][nb] -> [nb][kb].
Definition: Transforms.h:1211

mlir::linalg::BufferizeToAllocationOptions
Definition: Transforms.h:51

mlir::linalg::BufferizeToAllocationOptions::bufferizeDestinationOnly
bool bufferizeDestinationOnly
If set to "true", only the destination tensor operands are bufferized to a new allocation (and wrappe...
Definition: Transforms.h:65

mlir::linalg::BufferizeToAllocationOptions::allocOp
AllocOp allocOp
Definition: Transforms.h:53

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp
MemcpyOp
Definition: Transforms.h:55

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy
@ MemrefCopy

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy
@ LinalgCopy

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::MaterializeInDestination
@ MaterializeInDestination

mlir::linalg::BufferizeToAllocationOptions::AllocOp
AllocOp
Definition: Transforms.h:52

mlir::linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc
@ MemrefAlloc

mlir::linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca
@ MemrefAlloca

mlir::linalg::BufferizeToAllocationOptions::memcpyOp
MemcpyOp memcpyOp
Definition: Transforms.h:60

mlir::linalg::BufferizeToAllocationOptions::emitDealloc
bool emitDealloc
If set to "true", a memref.dealloc operation will be emitted for each allocated buffer.
Definition: Transforms.h:71

mlir::linalg::CollapseResult
Definition: Transforms.h:1101

mlir::linalg::CollapseResult::results
SmallVector< Value > results
Definition: Transforms.h:1102

mlir::linalg::CollapseResult::collapsedOp
LinalgOp collapsedOp
Definition: Transforms.h:1103

mlir::linalg::ContinuousTileSizeSpecification
Definition: Transforms.h:844

mlir::linalg::ControlDropUnitDims
Transformation to drop unit-extent dimensions from linalg.generic operations.
Definition: Transforms.h:474

mlir::linalg::ControlDropUnitDims::RankReductionStrategy
RankReductionStrategy
Definition: Transforms.h:475

mlir::linalg::ControlDropUnitDims::RankReductionStrategy::ReassociativeReshape
@ ReassociativeReshape

mlir::linalg::ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice
@ ExtractInsertSlice

mlir::linalg::ControlDropUnitDims::controlFn
ControlFnTy controlFn
Definition: Transforms.h:481

mlir::linalg::ControlDropUnitDims::rankReductionStrategy
RankReductionStrategy rankReductionStrategy
Definition: Transforms.h:477

mlir::linalg::ControlDropUnitDims::ControlFnTy
std::function< SmallVector< unsigned >(Operation *)> ControlFnTy
Definition: Transforms.h:480

mlir::linalg::CopyVectorizationPattern
Vectorization pattern for memref::CopyOp.
Definition: Transforms.h:1506

mlir::linalg::CopyVectorizationPattern::matchAndRewrite
LogicalResult matchAndRewrite(memref::CopyOp copyOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:919

mlir::linalg::DecomposeOuterUnitDimsPackOpPattern
Rewrites a linalg::PackOp into a sequence of:
Definition: Transforms.h:1564

mlir::linalg::DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite
LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:1160

mlir::linalg::DecomposeOuterUnitDimsUnPackOpPattern
Rewrites a linalg::UnPackOp into a sequence of rank-reduced.
Definition: Transforms.h:1598

mlir::linalg::DecomposeOuterUnitDimsUnPackOpPattern::matchAndRewrite
LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:1261

mlir::linalg::DecomposePadOpPattern
Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and InsertSliceOp.
Definition: Transforms.h:1518

mlir::linalg::DecomposePadOpPattern::matchAndRewrite
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:947

mlir::linalg::DecomposePadOpPattern::createFillOrGenerateOp
Value createFillOrGenerateOp(RewriterBase &rewriter, tensor::PadOp padOp, Value dest, const SmallVector< Value > &dynSizes) const
Filling dest using FillOp constant padding value if possible.
Definition: Transforms.cpp:926

mlir::linalg::DecomposePadOpPattern::DecomposePadOpPattern
DecomposePadOpPattern(MLIRContext *context, PatternBenefit benefit=1)
Definition: Transforms.h:1519

mlir::linalg::DownscaleConv2DOp
Definition: Transforms.h:1453

mlir::linalg::DownscaleConv2DOp::matchAndRewrite
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
Definition: Transforms.h:1460

mlir::linalg::DownscaleConv2DOp::returningMatchAndRewrite
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
Definition: Transforms.cpp:1598

mlir::linalg::DownscaleConv2DOp::DownscaleConv2DOp
DownscaleConv2DOp(MLIRContext *context, PatternBenefit benefit=1)
Definition: Transforms.h:1454

mlir::linalg::DownscaleDepthwiseConv2DNhwcHwcOp
Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh) dimensions into 1-D depthwise c...
Definition: Transforms.h:1438

mlir::linalg::DownscaleDepthwiseConv2DNhwcHwcOp::returningMatchAndRewrite
FailureOr< DepthwiseConv1DNwcWcOp > returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const
Definition: Transforms.cpp:1530

mlir::linalg::DownscaleDepthwiseConv2DNhwcHwcOp::matchAndRewrite
LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const override
Definition: Transforms.h:1447

mlir::linalg::DownscaleDepthwiseConv2DNhwcHwcOp::DownscaleDepthwiseConv2DNhwcHwcOp
DownscaleDepthwiseConv2DNhwcHwcOp(MLIRContext *context, PatternBenefit benefit=1)
Definition: Transforms.h:1439

mlir::linalg::DownscaleSizeOneWindowed2DConvolution
Rewrites 2-D convolution ops with size-1 window dimensions into 1-D convolution ops.
Definition: Transforms.h:1418

mlir::linalg::DownscaleSizeOneWindowed2DConvolution::matchAndRewrite
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
Definition: Transforms.h:1424

mlir::linalg::DownscaleSizeOneWindowed2DConvolution::returningMatchAndRewrite
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
Definition: Transforms.cpp:1407

mlir::linalg::DropUnitDimsResult
Definition: Transforms.h:492

mlir::linalg::DropUnitDimsResult::resultOp
linalg::GenericOp resultOp
Definition: Transforms.h:493

mlir::linalg::DropUnitDimsResult::replacements
SmallVector< Value > replacements
Definition: Transforms.h:494

mlir::linalg::ElementwiseOpFusionResult
Fuse two linalg.generic operations that have a producer-consumer relationship captured through fusedO...
Definition: Transforms.h:503

mlir::linalg::ElementwiseOpFusionResult::fusedOp
Operation * fusedOp
Definition: Transforms.h:504

mlir::linalg::ElementwiseOpFusionResult::replacements
llvm::DenseMap< Value, Value > replacements
Definition: Transforms.h:505

mlir::linalg::ExtractSliceOfPadTensorSwapPattern
Rewrite extract_slice(tensor.pad(x)) into tensor.pad(extract_slice(x)).
Definition: Transforms.h:1665

mlir::linalg::ExtractSliceOfPadTensorSwapPattern::ControlFn
std::function< std::optional< bool >(tensor::ExtractSliceOp)> ControlFn
A function to control pattern application and rewrite logic.
Definition: Transforms.h:1675

mlir::linalg::ExtractSliceOfPadTensorSwapPattern::matchAndRewrite
LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:997

mlir::linalg::ExtractSliceOfPadTensorSwapPattern::ExtractSliceOfPadTensorSwapPattern
ExtractSliceOfPadTensorSwapPattern(MLIRContext *context, ControlFn controlFn=nullptr, PatternBenefit benefit=1)
Definition: Transforms.h:1677

mlir::linalg::ForallReductionTilingResult
Transformation information returned after reduction tiling.
Definition: Transforms.h:892

mlir::linalg::ForallReductionTilingResult::mergeOps
SmallVector< Operation * > mergeOps
The final reduction operation merging all the partial reductions.
Definition: Transforms.h:896

mlir::linalg::ForallReductionTilingResult::initialValues
SmallVector< Value > initialValues
Initial values used for partial reductions.
Definition: Transforms.h:898

mlir::linalg::ForallReductionTilingResult::loops
scf::ForallOp loops
The scf.forall operation that iterate over the tiles.
Definition: Transforms.h:900

mlir::linalg::ForallReductionTilingResult::parallelTiledOps
SmallVector< Operation * > parallelTiledOps
The partial reduction tiled op generated.
Definition: Transforms.h:894

mlir::linalg::LinalgCopyVTRForwardingPattern
Match and rewrite for the pattern:
Definition: Transforms.h:1628

mlir::linalg::LinalgCopyVTRForwardingPattern::matchAndRewrite
LogicalResult matchAndRewrite(vector::TransferReadOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Definition: Vectorization.cpp:3157

mlir::linalg::LinalgCopyVTWForwardingPattern
Match and rewrite for the pattern:
Definition: Transforms.h:1656

mlir::linalg::LinalgCopyVTWForwardingPattern::matchAndRewrite
LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Definition: Vectorization.cpp:3235

mlir::linalg::LinalgGeneralizationPattern
Linalg generalization pattern.
Definition: Transforms.h:1475

mlir::linalg::LinalgGeneralizationPattern::matchAndRewrite
LogicalResult matchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const override
Definition: Transforms.h:1485

mlir::linalg::LinalgGeneralizationPattern::returningMatchAndRewrite
FailureOr< GenericOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
Definition: Transforms.h:1481

mlir::linalg::LinalgLoopDistributionOptions
Options that allow distribution of loops generated in Linalg transforms to processors while generatin...
Definition: Utils.h:316

mlir::linalg::LinalgPaddingOptions
Definition: Transforms.h:279

mlir::linalg::LinalgPaddingOptions::paddingValues
SmallVector< Attribute > paddingValues
A padding value for every operand.
Definition: Transforms.h:281

mlir::linalg::LinalgPaddingOptions::CopyBackOp
CopyBackOp
Definition: Transforms.h:319

mlir::linalg::LinalgPaddingOptions::CopyBackOp::None
@ None

mlir::linalg::LinalgPaddingOptions::CopyBackOp::LinalgCopy
@ LinalgCopy

mlir::linalg::LinalgPaddingOptions::CopyBackOp::BufferizationMaterializeInDestination
@ BufferizationMaterializeInDestination

mlir::linalg::LinalgPaddingOptions::setPadToMultipleOf
LinalgPaddingOptions & setPadToMultipleOf(ArrayRef< int64_t > m)
Definition: Transforms.h:294

mlir::linalg::LinalgPaddingOptions::padToMultipleOf
std::optional< SmallVector< int64_t > > padToMultipleOf
A list of multiples to which each padding dimension should be padded to.
Definition: Transforms.h:293

mlir::linalg::LinalgPaddingOptions::setNofoldFlags
LinalgPaddingOptions & setNofoldFlags(ArrayRef< bool > pp)
Definition: Transforms.h:301

mlir::linalg::LinalgPaddingOptions::setPaddingDimensions
LinalgPaddingOptions & setPaddingDimensions(ArrayRef< int64_t > pd)
Definition: Transforms.h:288

mlir::linalg::LinalgPaddingOptions::setTransposePaddings
LinalgPaddingOptions & setTransposePaddings(ArrayRef< SmallVector< int64_t >> tp)
Definition: Transforms.h:315

mlir::linalg::LinalgPaddingOptions::transposePaddings
SmallVector< SmallVector< int64_t > > transposePaddings
A permutation vector for every operand used to transpose the packed PadOp results.
Definition: Transforms.h:313

mlir::linalg::LinalgPaddingOptions::setPaddingValues
LinalgPaddingOptions & setPaddingValues(ArrayRef< Attribute > pv)
Definition: Transforms.h:282

mlir::linalg::LinalgPaddingOptions::nofoldFlags
SmallVector< bool > nofoldFlags
A flag for every operand to mark the PadOp as nofold which enables packing for statically shaped oper...
Definition: Transforms.h:300

mlir::linalg::LinalgPaddingOptions::setCopyBackOp
LinalgPaddingOptions & setCopyBackOp(CopyBackOp op)
Definition: Transforms.h:327

mlir::linalg::LinalgPaddingOptions::setHoistPaddings
LinalgPaddingOptions & setHoistPaddings(ArrayRef< int64_t > hp)
Definition: Transforms.h:307

mlir::linalg::LinalgPaddingOptions::hoistPaddings
SmallVector< int64_t > hoistPaddings
A number of loops to hoist the PadOp out for every operand.
Definition: Transforms.h:306

mlir::linalg::LinalgPaddingOptions::paddingDimensions
SmallVector< int64_t > paddingDimensions
A list of iterator dimensions to pad.
Definition: Transforms.h:287

mlir::linalg::LinalgPaddingOptions::copyBackOp
CopyBackOp copyBackOp
The op to be used for copying the padded result to the original destination tensor.
Definition: Transforms.h:326

mlir::linalg::LinalgPromotionOptions
Definition: Transforms.h:354

mlir::linalg::LinalgPromotionOptions::alignment
std::optional< unsigned > alignment
Alignment of promoted buffer. If std::nullopt do not specify alignment.
Definition: Transforms.h:385

mlir::linalg::LinalgPromotionOptions::setUseFullTileBuffersByDefault
LinalgPromotionOptions & setUseFullTileBuffersByDefault(bool use)
Definition: Transforms.h:380

mlir::linalg::LinalgPromotionOptions::useAlloca
bool useAlloca
Use alloca with the default allocation scheme.
Definition: Transforms.h:398

mlir::linalg::LinalgPromotionOptions::setAlignment
LinalgPromotionOptions & setAlignment(unsigned align)
Definition: Transforms.h:386

mlir::linalg::LinalgPromotionOptions::memorySpace
std::optional< Attribute > memorySpace
Memory space of promoted buffer.
Definition: Transforms.h:392

mlir::linalg::LinalgPromotionOptions::copyOutFn
std::optional< CopyCallbackFn > copyOutFn
Definition: Transforms.h:418

mlir::linalg::LinalgPromotionOptions::copyInFn
std::optional< CopyCallbackFn > copyInFn
Callback function to do the copy of data to and from the promoted subview.
Definition: Transforms.h:417

mlir::linalg::LinalgPromotionOptions::setUseAlloca
LinalgPromotionOptions & setUseAlloca(bool use)
Definition: Transforms.h:399

mlir::linalg::LinalgPromotionOptions::operandsToPromote
std::optional< DenseSet< unsigned > > operandsToPromote
Indices of subViews to promote.
Definition: Transforms.h:357

mlir::linalg::LinalgPromotionOptions::setCopyInOutFns
LinalgPromotionOptions & setCopyInOutFns(CopyCallbackFn const &copyIn, CopyCallbackFn const &copyOut)
Definition: Transforms.h:419

mlir::linalg::LinalgPromotionOptions::setUseFullTileBuffers
LinalgPromotionOptions & setUseFullTileBuffers(ArrayRef< bool > useFullTiles)
Definition: Transforms.h:369

mlir::linalg::LinalgPromotionOptions::allocationFn
std::optional< AllocBufferCallbackFn > allocationFn
Callback function to do the allocation of the promoted buffer.
Definition: Transforms.h:406

mlir::linalg::LinalgPromotionOptions::useFullTileBuffersDefault
bool useFullTileBuffersDefault
If true all operands unspecified by useFullTileBuffers will use the full view, otherwise the partial ...
Definition: Transforms.h:379

mlir::linalg::LinalgPromotionOptions::deallocationFn
std::optional< DeallocBufferCallbackFn > deallocationFn
Definition: Transforms.h:407

mlir::linalg::LinalgPromotionOptions::setMemorySpace
LinalgPromotionOptions & setMemorySpace(Attribute memorySpc)
Definition: Transforms.h:393

mlir::linalg::LinalgPromotionOptions::setAllocationDeallocationFns
LinalgPromotionOptions & setAllocationDeallocationFns(AllocBufferCallbackFn const &allocFn, DeallocBufferCallbackFn const &deallocFn)
Definition: Transforms.h:409

mlir::linalg::LinalgPromotionOptions::useFullTileBuffers
std::optional< llvm::SmallBitVector > useFullTileBuffers
If ith element of useFullTiles is true the full view should be used for the promoted buffer of the it...
Definition: Transforms.h:368

mlir::linalg::LinalgPromotionOptions::setOperandsToPromote
LinalgPromotionOptions & setOperandsToPromote(ArrayRef< int64_t > operands)
Definition: Transforms.h:358

mlir::linalg::LinalgSpecializationPattern
Definition: Transforms.h:1491

mlir::linalg::LinalgSpecializationPattern::matchAndRewrite
LogicalResult matchAndRewrite(GenericOp op, PatternRewriter &rewriter) const override
Definition: Transforms.h:1499

mlir::linalg::LinalgSpecializationPattern::returningMatchAndRewrite
FailureOr< GenericOp > returningMatchAndRewrite(GenericOp op, PatternRewriter &rewriter) const
Definition: Transforms.h:1495

mlir::linalg::LinalgTilingAndFusionOptions
Definition: Transforms.h:260

mlir::linalg::LinalgTilingAndFusionOptions::tileDistribution
std::optional< LinalgLoopDistributionOptions > tileDistribution
When specified, specifies distribution of generated tile loops to processors.
Definition: Transforms.h:271

mlir::linalg::LinalgTilingAndFusionOptions::setTileSizes
LinalgTilingAndFusionOptions & setTileSizes(ArrayRef< int64_t > ts)
Definition: Transforms.h:263

mlir::linalg::LinalgTilingAndFusionOptions::tileInterchange
SmallVector< int64_t > tileInterchange
Tile interchange used to permute the tile loops.
Definition: Transforms.h:268

mlir::linalg::LinalgTilingAndFusionOptions::setDistributionOptions
LinalgTilingAndFusionOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
Definition: Transforms.h:273

mlir::linalg::LinalgTilingAndFusionOptions::tileSizes
SmallVector< int64_t > tileSizes
Tile sizes used to tile the root operation.
Definition: Transforms.h:262

mlir::linalg::LinalgTilingOptions
Definition: Transforms.h:189

mlir::linalg::LinalgTilingOptions::setLoopType
LinalgTilingOptions & setLoopType(LinalgTilingLoopType lt)
Definition: Transforms.h:227

mlir::linalg::LinalgTilingOptions::setDistributionTypes
LinalgTilingOptions & setDistributionTypes(ArrayRef< StringRef > types)
Definition: Transforms.h:245

mlir::linalg::LinalgTilingOptions::setInterchange
LinalgTilingOptions & setInterchange(ArrayRef< unsigned > interchange)
Definition: Transforms.h:219

mlir::linalg::LinalgTilingOptions::loopType
LinalgTilingLoopType loopType
The type of tile loops to generate.
Definition: Transforms.h:225

mlir::linalg::LinalgTilingOptions::setTileSizeComputationFunction
LinalgTilingOptions & setTileSizeComputationFunction(TileSizeComputationFunction fun)
Definition: Transforms.h:196

mlir::linalg::LinalgTilingOptions::setTileSizes
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
Definition: Transforms.h:203

mlir::linalg::LinalgTilingOptions::setPeeledLoops
LinalgTilingOptions & setPeeledLoops(ArrayRef< int64_t > loops)
Definition: Transforms.h:253

mlir::linalg::LinalgTilingOptions::peeledLoops
SmallVector< int64_t > peeledLoops
Peel the specified loops.
Definition: Transforms.h:251

mlir::linalg::LinalgTilingOptions::setDistributionOptions
LinalgTilingOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
Definition: Transforms.h:237

mlir::linalg::LinalgTilingOptions::interchangeVector
SmallVector< unsigned, 4 > interchangeVector
The interchange vector to reorder the tiled loops.
Definition: Transforms.h:217

mlir::linalg::LinalgTilingOptions::tileSizeComputationFunction
TileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
Definition: Transforms.h:193

mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims
LinalgTilingOptions & scalarizeDynamicDims()
Tile all dynamic dimensions by 1.

mlir::linalg::LinalgTilingOptions::distribution
std::optional< LinalgLoopDistributionOptions > distribution
When specified, specifies distribution of generated tile loops to processors.
Definition: Transforms.h:234

mlir::linalg::LinalgTilingOptions::distributionTypes
SmallVector< StringRef, 2 > distributionTypes
Specification markers of how to distribute the linalg.tiled_loop.
Definition: Transforms.h:243

mlir::linalg::LowerPackResult
Definition: Transforms.h:1118

mlir::linalg::LowerPackResult::padOp
tensor::PadOp padOp
Definition: Transforms.h:1119

mlir::linalg::LowerPackResult::transposeOp
linalg::TransposeOp transposeOp
Definition: Transforms.h:1121

mlir::linalg::LowerPackResult::expandShapeOp
tensor::ExpandShapeOp expandShapeOp
Definition: Transforms.h:1120

mlir::linalg::LowerUnPackOpResult
Definition: Transforms.h:1129

mlir::linalg::LowerUnPackOpResult::extractSliceOp
tensor::ExtractSliceOp extractSliceOp
Definition: Transforms.h:1133

mlir::linalg::LowerUnPackOpResult::transposeOp
linalg::TransposeOp transposeOp
Definition: Transforms.h:1131

mlir::linalg::LowerUnPackOpResult::emptyOp
tensor::EmptyOp emptyOp
Definition: Transforms.h:1130

mlir::linalg::LowerUnPackOpResult::collapseShapeOp
tensor::CollapseShapeOp collapseShapeOp
Definition: Transforms.h:1132

mlir::linalg::MultiSizeSpecification
A description of a multi-size tiling comprising tile sizes and numbers of tiles, expressed as Values ...
Definition: Transforms.h:839

mlir::linalg::PackResult
Struct to hold the result of a pack call.
Definition: Transforms.h:1142

mlir::linalg::PackResult::unPackOps
SmallVector< linalg::UnPackOp > unPackOps
Definition: Transforms.h:1145

mlir::linalg::PackResult::packedLinalgOp
linalg::LinalgOp packedLinalgOp
Definition: Transforms.h:1144

mlir::linalg::PackResult::packOps
SmallVector< linalg::PackOp > packOps
Definition: Transforms.h:1143

mlir::linalg::PackTransposeResult
Struct to hold the result of a packTranspose call.
Definition: Transforms.h:1154

mlir::linalg::PackTransposeResult::transposedPackOp
linalg::PackOp transposedPackOp
Definition: Transforms.h:1155

mlir::linalg::PackTransposeResult::transposedLinalgOp
linalg::LinalgOp transposedLinalgOp
Definition: Transforms.h:1156

mlir::linalg::PackTransposeResult::transposedUnPackOp
linalg::UnPackOp transposedUnPackOp
Definition: Transforms.h:1157

mlir::linalg::PromotionInfo
Create a new buffer using the allocationFn provided.
Definition: Transforms.h:719

mlir::linalg::PromotionInfo::fullLocalView
Value fullLocalView
Definition: Transforms.h:720

mlir::linalg::PromotionInfo::partialLocalView
Value partialLocalView
Definition: Transforms.h:721

mlir::linalg::SplitReductionOptions
Split Reduction options.
Definition: Transforms.h:428

mlir::linalg::SplitReductionOptions::ratio
int64_t ratio
Definition: Transforms.h:431

mlir::linalg::SplitReductionOptions::index
unsigned index
Definition: Transforms.h:434

mlir::linalg::SplitReductionOptions::innerParallel
bool innerParallel
Definition: Transforms.h:436

mlir::linalg::SplitReductionResult
Apply transformation to split the single linalg op reduction into a parallel and reduction dimension.
Definition: Transforms.h:1030

mlir::linalg::SplitReductionResult::splitLinalgOp
LinalgOp splitLinalgOp
Definition: Transforms.h:1033

mlir::linalg::SplitReductionResult::fillOp
FillOp fillOp
Definition: Transforms.h:1032

mlir::linalg::SplitReductionResult::initOrAlloc
Operation * initOrAlloc
Definition: Transforms.h:1031

mlir::linalg::SplitReductionResult::resultCombiningLinalgOp
LinalgOp resultCombiningLinalgOp
Definition: Transforms.h:1034

mlir::linalg::StaticContinuousTileSizeSpecification
Definition: Transforms.h:846

mlir::linalg::StaticMultiSizeSpecification
Definition: Transforms.h:841

mlir::linalg::TiledLinalgOp
Perform standalone tiling of a single LinalgOp by tileSizes.
Definition: Transforms.h:680

mlir::linalg::TiledLinalgOp::loops
SmallVector< Operation *, 8 > loops
Definition: Transforms.h:682

mlir::linalg::TiledLinalgOp::op
LinalgOp op
Definition: Transforms.h:681

mlir::linalg::TiledLinalgOp::tensorResults
SmallVector< Value, 4 > tensorResults
Definition: Transforms.h:683

mlir::linalg::detail::ContinuousTileSizeSpecificationBase
Definition: Transforms.h:826

mlir::linalg::detail::ContinuousTileSizeSpecificationBase::tileSizes
SmallVector< T > tileSizes
Tile sizes.
Definition: Transforms.h:828

mlir::linalg::detail::ContinuousTileSizeSpecificationBase::tripCounts
SmallVector< T > tripCounts
Number of tiles associated with each size.
Definition: Transforms.h:830

mlir::linalg::detail::MultiSizeSpecificationBase
Definition: Transforms.h:818

mlir::linalg::detail::MultiSizeSpecificationBase::lowTripCount
T lowTripCount
Number of tiles associated with each size.
Definition: Transforms.h:822

mlir::linalg::detail::MultiSizeSpecificationBase::lowTileSize
T lowTileSize
Tile sizes.
Definition: Transforms.h:820

mlir::linalg::detail::MultiSizeSpecificationBase::highTileSize
T highTileSize
Definition: Transforms.h:820

mlir::linalg::detail::MultiSizeSpecificationBase::highTripCount
T highTripCount
Definition: Transforms.h:822

mlir::linalg::detail::PackingResult
Helper struct to hold the results of building a packing loop nest.
Definition: Transforms.h:550

mlir::linalg::detail::PackingResult::strides
SmallVector< OpFoldResult > strides
Definition: Transforms.h:551

mlir::linalg::detail::PackingResult::leadingPackedTensorIndexings
SmallVector< Value > leadingPackedTensorIndexings
Definition: Transforms.h:552

mlir::linalg::detail::PackingResult::clonedLoopIvs
SmallVector< Value > clonedLoopIvs
Definition: Transforms.h:552

mlir::linalg::detail::PackingResult::sizes
SmallVector< OpFoldResult > sizes
Definition: Transforms.h:551

mlir::linalg::detail::PackingResult::maybeTransposeOp
TransposeOp maybeTransposeOp
Definition: Transforms.h:553

mlir::linalg::detail::PackingResult::hoistedPadOp
tensor::PadOp hoistedPadOp
Definition: Transforms.h:554

mlir::linalg::detail::PackingResult::offsets
SmallVector< OpFoldResult > offsets
Definition: Transforms.h:551