doxygen/FoldTensorSubsetOps_8cpp_source.html

 //===- FoldTensorSubsetOps.cpp - Fold tensor subset ops -------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // Fold tensor subset ops with producer / consumers.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Tensor/Transforms/Passes.h"

 #include "mlir/Dialect/Tensor/Transforms/Transforms.h"

 #include "mlir/Dialect/Utils/IndexingUtils.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"

 #include "mlir/IR/AffineMap.h"

 #include "mlir/IR/BuiltinAttributes.h"

 #include "mlir/Interfaces/ValueBoundsOpInterface.h"

 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"

 #include "llvm/ADT/TypeSwitch.h"

 #include <type_traits>


 namespace mlir {

 namespace tensor {

 #define GEN_PASS_DEF_FOLDTENSORSUBSETOPSPASS

 #include "mlir/Dialect/Tensor/Transforms/Passes.h.inc"

 } // namespace tensor

 } // namespace mlir


 using namespace mlir;


 static Value getTensorOperand(vector::TransferReadOp op) {

   return op.getBase();

 }


 static Value getTensorOperand(tensor::InsertSliceOp op) {

   return op.getSource();

 }


 //===----------------------------------------------------------------------===//

 // Patterns

 //===----------------------------------------------------------------------===//


 namespace {

 /// Merge extract_slice operation with load/transferRead operation.

 class TransferReadOfExtractSliceOpFolder final

     : public vector::MaskableOpRewritePattern<vector::TransferReadOp> {

 public:

   using MaskableOpRewritePattern::MaskableOpRewritePattern;


   FailureOr<mlir::Value>

   matchAndRewriteMaskableOp(vector::TransferReadOp readOp,

                             vector::MaskingOpInterface maskOp,

                             PatternRewriter &rewriter) const override;

 };


 /// Merge insert_slice operation with store/transferWriteOp operation.

 class InsertSliceOfTransferWriteOpFolder final

     : public OpRewritePattern<tensor::InsertSliceOp> {

 public:

   using OpRewritePattern<tensor::InsertSliceOp>::OpRewritePattern;


   LogicalResult matchAndRewrite(tensor::InsertSliceOp insertSliceOp,

                                 PatternRewriter &rewriter) const override;


 private:

   static bool

   doesTransferWriteCoverInsertSlice(vector::TransferWriteOp writeOp);

 };

 } // namespace


 template <typename XferOp, typename ExtractOrInsertOp>

 static LogicalResult preconditionsFoldExtractOrInsertWithTransferOp(

     RewriterBase &rewriter, XferOp xferOp,

     ExtractOrInsertOp extractOrInsertSliceOp) {

   if (xferOp.hasOutOfBoundsDim())

     return rewriter.notifyMatchFailure(xferOp, "out of bounds transfer dim");

   if (xferOp.getMask())

     return rewriter.notifyMatchFailure(xferOp, "masked transfer");

   if (!extractOrInsertSliceOp.hasUnitStride()) {

     return rewriter.notifyMatchFailure(

         xferOp, "non-1 stride insert/extract, requires keeping track of "

                 "strides, this may result in needing to insert "

                 "vector.insert_strided_slice/extract_strided_slice ops");

   }

   return success();

 }


 FailureOr<mlir::Value>

 TransferReadOfExtractSliceOpFolder::matchAndRewriteMaskableOp(

     vector::TransferReadOp readOp, vector::MaskingOpInterface maskOp,

     PatternRewriter &rewriter) const {

   auto extractSliceOp =

       getTensorOperand(readOp).getDefiningOp<tensor::ExtractSliceOp>();

   if (!extractSliceOp)

     return rewriter.notifyMatchFailure(readOp, "not an extract_slice");


   LogicalResult preconditionResult =

       preconditionsFoldExtractOrInsertWithTransferOp(rewriter, readOp,

                                                      extractSliceOp);

   if (failed(preconditionResult))

     return rewriter.notifyMatchFailure(readOp, "Failed preconditions");


   SmallVector<Value> indices(readOp.getIndices().begin(),

                              readOp.getIndices().end());

   SmallVector<Value> sourceIndices;

   affine::resolveIndicesIntoOpWithOffsetsAndStrides(

       rewriter, readOp.getLoc(), extractSliceOp.getMixedOffsets(),

       extractSliceOp.getMixedStrides(), extractSliceOp.getDroppedDims(),

       indices, sourceIndices);


   Operation *newOp = rewriter.create<vector::TransferReadOp>(

       readOp.getLoc(), readOp.getVectorType(), extractSliceOp.getSource(),

       sourceIndices,

       AffineMapAttr::get(expandDimsToRank(

           readOp.getPermutationMap(), extractSliceOp.getSourceType().getRank(),

           extractSliceOp.getDroppedDims())),

       readOp.getPadding(),

       /*mask=*/Value(), readOp.getInBoundsAttr());

   if (maskOp)

     newOp = mlir::vector::maskOperation(rewriter, newOp, maskOp.getMask());

   return newOp->getResults()[0];

 }


 LogicalResult InsertSliceOfTransferWriteOpFolder::matchAndRewrite(

     tensor::InsertSliceOp insertSliceOp, PatternRewriter &rewriter) const {

   auto writeOp = getTensorOperand(insertSliceOp)

                      .template getDefiningOp<vector::TransferWriteOp>();

   if (!writeOp)

     return rewriter.notifyMatchFailure(insertSliceOp, "not a transfer_write");


   LogicalResult preconditionResult =

       preconditionsFoldExtractOrInsertWithTransferOp(rewriter, writeOp,

                                                      insertSliceOp);

   if (failed(preconditionResult))

     return preconditionResult;


   if (!doesTransferWriteCoverInsertSlice(writeOp))

     return rewriter.notifyMatchFailure(

         insertSliceOp, "transfer_write does not cover insert_slice");


   SmallVector<Value> indices(writeOp.getIndices().begin(),

                              writeOp.getIndices().end());

   SmallVector<Value> sourceIndices;

   affine::resolveIndicesIntoOpWithOffsetsAndStrides(

       rewriter, writeOp.getLoc(), insertSliceOp.getMixedOffsets(),

       insertSliceOp.getMixedStrides(), insertSliceOp.getDroppedDims(), indices,

       sourceIndices);


   rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(

       insertSliceOp, writeOp.getValue(), insertSliceOp.getDest(), sourceIndices,

       AffineMapAttr::get(expandDimsToRank(writeOp.getPermutationMap(),

                                           insertSliceOp.getDestType().getRank(),

                                           insertSliceOp.getDroppedDims())),

       writeOp.getInBoundsAttr());


   return success();

 }


 bool InsertSliceOfTransferWriteOpFolder::doesTransferWriteCoverInsertSlice(

     vector::TransferWriteOp writeOp) {

   if (writeOp.getShapedType().hasStaticShape())

     return llvm::equal(writeOp.getVectorType().getShape(),

                        writeOp.getShapedType().getShape());


   // TODO: Use ValueBoundsConstraintSet for dynamic shapes.


   return false;

 }


 template <typename OpTy>

 struct InsertSliceOfInsertSliceFolder : public OpRewritePattern<OpTy> {

   using OpRewritePattern<OpTy>::OpRewritePattern;


   LogicalResult matchAndRewrite(OpTy insertSliceOp,

                                 PatternRewriter &rewriter) const override {

     auto sourceInsertSliceOp =

         insertSliceOp.getSource()

             .template getDefiningOp<tensor::InsertSliceOp>();

     if (!sourceInsertSliceOp)

       return failure();


     // TODO: relax unit stride assumption where possible.

     if (!insertSliceOp.hasUnitStride()) {

       return rewriter.notifyMatchFailure(insertSliceOp,

                                          "requires unit strides");

     }

     if (!sourceInsertSliceOp.hasUnitStride()) {

       return rewriter.notifyMatchFailure(sourceInsertSliceOp,

                                          "requires unit strides");

     }


     int64_t srcDim = 0;

     llvm::SmallBitVector droppedDims = insertSliceOp.getDroppedDims();

     for (int64_t d = 0, e = insertSliceOp.getDestType().getRank(); d < e; ++d) {

       if (droppedDims[d])

         continue;

       if (insertSliceOp.getMixedSizes()[d] !=

           sourceInsertSliceOp.getMixedSizes()[srcDim++]) {

         return rewriter.notifyMatchFailure(

             sourceInsertSliceOp,

             "requires matching sizes to fold, otherwise a copy is needed");

       }

     }


     // Resolve sizes according to dropped dims.

     SmallVector<OpFoldResult> resolvedSizes;

     // Note: the "insertSlice" case is symmetrical to the extract/subview case:

     // `insertSliceOp` is passed as the "source" and `sourceInsertSliceOp` is

     // passed as the destination to the helper function.

     affine::resolveSizesIntoOpWithSizes(insertSliceOp.getMixedSizes(),

                                         sourceInsertSliceOp.getMixedSizes(),

                                         droppedDims, resolvedSizes);


     // If we are inside an InParallel region, temporarily set the insertion

     // point outside: only tensor.parallel_insert_slice ops are allowed in

     // there.

     if (std::is_same_v<OpTy, tensor::ParallelInsertSliceOp>) {

       rewriter.setInsertionPoint(

           insertSliceOp->template getParentOfType<scf::InParallelOp>());

     }


     // Resolve offsets according to source offsets and strides.

     SmallVector<Value> resolvedOffsets;

     // Note: the "insertSlice" case is symmetrical to the extract/subview case:

     // `insertSliceOp` is passed as the "source" and `sourceInsertSliceOp` is

     // passed as the destination to the helper function.

     affine::resolveIndicesIntoOpWithOffsetsAndStrides(

         rewriter, insertSliceOp.getLoc(), insertSliceOp.getMixedOffsets(),

         insertSliceOp.getMixedStrides(), droppedDims,

         sourceInsertSliceOp.getMixedOffsets(), resolvedOffsets);


     // Reset the insertion point.

     rewriter.setInsertionPoint(insertSliceOp);

     // Replace original op.

     rewriter.replaceOpWithNewOp<OpTy>(

         insertSliceOp, sourceInsertSliceOp.getSource(), insertSliceOp.getDest(),

         getAsOpFoldResult(resolvedOffsets), resolvedSizes,

         insertSliceOp.getMixedStrides());


     return success();

   }

 };


 void tensor::populateFoldTensorSubsetOpPatterns(RewritePatternSet &patterns) {

   populateFoldTensorSubsetIntoVectorTransferPatterns(patterns);

   patterns.add<InsertSliceOfInsertSliceFolder<tensor::InsertSliceOp>,

                InsertSliceOfInsertSliceFolder<tensor::ParallelInsertSliceOp>>(

       patterns.getContext());

 }


 void tensor::populateFoldTensorSubsetIntoVectorTransferPatterns(

     RewritePatternSet &patterns) {

   patterns.add<TransferReadOfExtractSliceOpFolder,

                InsertSliceOfTransferWriteOpFolder>(patterns.getContext());

 }


 //===----------------------------------------------------------------------===//

 // Pass registration

 //===----------------------------------------------------------------------===//


 namespace {


 struct FoldTensorSubsetOpsPass final

     : public tensor::impl::FoldTensorSubsetOpsPassBase<

           FoldTensorSubsetOpsPass> {

   void runOnOperation() override;

 };


 } // namespace


 void FoldTensorSubsetOpsPass::runOnOperation() {

   RewritePatternSet patterns(&getContext());

   tensor::populateFoldTensorSubsetOpPatterns(patterns);

   (void)applyPatternsGreedily(getOperation(), std::move(patterns));

 }

AffineOps.h

Passes.h

getTensorOperand
static Value getTensorOperand(vector::TransferReadOp op)
Definition: FoldTensorSubsetOps.cpp:38

preconditionsFoldExtractOrInsertWithTransferOp
static LogicalResult preconditionsFoldExtractOrInsertWithTransferOp(RewriterBase &rewriter, XferOp xferOp, ExtractOrInsertOp extractOrInsertSliceOp)
Definition: FoldTensorSubsetOps.cpp:79

GreedyPatternRewriteDriver.h

getContext
static MLIRContext * getContext(OpFoldResult val)
Definition: IndexingUtils.cpp:296

IndexingUtils.h

ValueBoundsOpInterface.h

VectorOps.h

VectorUtils.h

ViewLikeInterfaceUtils.h

llvm::SmallVector
Definition: LLVM.h:72

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getResults
result_range getResults()
Definition: Operation.h:415

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:748

mlir::RewritePatternSet
Definition: PatternMatch.h:771

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:681

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:500

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20

SCF.h

Tensor.h

Transforms.h

AffineMap.h

BuiltinAttributes.h

mlir::affine::resolveSizesIntoOpWithSizes
void resolveSizesIntoOpWithSizes(ArrayRef< OpFoldResult > sourceSizes, ArrayRef< OpFoldResult > destSizes, const llvm::SmallBitVector &rankReducedSourceDims, SmallVectorImpl< OpFoldResult > &resolvedSizes)
Given sourceSizes, destSizes and information about which dimensions are dropped by the source: rankRe...
Definition: ViewLikeInterfaceUtils.cpp:112

mlir::affine::resolveIndicesIntoOpWithOffsetsAndStrides
void resolveIndicesIntoOpWithOffsetsAndStrides(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > mixedSourceOffsets, ArrayRef< OpFoldResult > mixedSourceStrides, const llvm::SmallBitVector &rankReducedDims, ArrayRef< OpFoldResult > consumerIndices, SmallVectorImpl< Value > &resolvedIndices)
Given the 'consumerIndices' of a load/store operation operating on an op with offsets and strides,...
Definition: ViewLikeInterfaceUtils.cpp:80

mlir::tensor::populateFoldTensorSubsetOpPatterns
void populateFoldTensorSubsetOpPatterns(RewritePatternSet &patterns)
Appends patterns for folding tensor subset ops into consumer load/store ops into patterns.
Definition: FoldTensorSubsetOps.cpp:251

mlir::tensor::populateFoldTensorSubsetIntoVectorTransferPatterns
void populateFoldTensorSubsetIntoVectorTransferPatterns(RewritePatternSet &patterns)
Appends patterns for folding tensor subset ops into vector transfer ops.
Definition: FoldTensorSubsetOps.cpp:258

mlir::vector::maskOperation
Operation * maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, Value passthru=Value())
Creates a vector.mask operation around a maskable operation.

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::expandDimsToRank
AffineMap expandDimsToRank(AffineMap map, int64_t rank, const llvm::SmallBitVector &projectedDimensions)
Expand map to operate on rank dims while projecting out the dims in projectedDimensions.
Definition: AffineMap.cpp:952

mlir::applyPatternsGreedily
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
Definition: GreedyPatternRewriteDriver.cpp:898

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::getAsOpFoldResult
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
Definition: StaticValueUtils.cpp:79

InsertSliceOfInsertSliceFolder
Definition: FoldTensorSubsetOps.cpp:178

InsertSliceOfInsertSliceFolder::matchAndRewrite
LogicalResult matchAndRewrite(OpTy insertSliceOp, PatternRewriter &rewriter) const override
Definition: FoldTensorSubsetOps.cpp:181

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

mlir::vector::MaskableOpRewritePattern
A pattern for ops that implement MaskableOpInterface and that might be masked (i.e.
Definition: VectorUtils.h:161