doxygen/VectorTransferOpTransforms_8cpp_source.html

 //===- VectorTransferOpTransforms.cpp - transfer op transforms ------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements functions concerned with optimizing transfer_read and

 // transfer_write ops.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Utils/IndexingUtils.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"

 #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"

 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"

 #include "mlir/IR/Dominance.h"

 #include "mlir/Interfaces/SideEffectInterfaces.h"

 #include "llvm/ADT/STLExtras.h"

 #include "llvm/ADT/StringRef.h"

 #include "llvm/Support/Debug.h"


 #define DEBUG_TYPE "vector-transfer-opt"


 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")


 using namespace mlir;


 /// Return the ancestor op in the region or nullptr if the region is not

 /// an ancestor of the op.

 static Operation *findAncestorOpInRegion(Region *region, Operation *op) {

   for (; op != nullptr && op->getParentRegion() != region;

        op = op->getParentOp())

     ;

   return op;

 }


 namespace {


 class TransferOptimization {

 public:

   TransferOptimization(RewriterBase &rewriter, Operation *op)

       : rewriter(rewriter), dominators(op), postDominators(op) {}

   void deadStoreOp(vector::TransferWriteOp);

   void storeToLoadForwarding(vector::TransferReadOp);

   void removeDeadOp() {

     for (Operation *op : opToErase)

       rewriter.eraseOp(op);

     opToErase.clear();

   }


 private:

   RewriterBase &rewriter;

   bool isReachable(Operation *start, Operation *dest);

   DominanceInfo dominators;

   PostDominanceInfo postDominators;

   std::vector<Operation *> opToErase;

 };


 } // namespace

 /// Return true if there is a path from start operation to dest operation,

 /// otherwise return false. The operations have to be in the same region.

 bool TransferOptimization::isReachable(Operation *start, Operation *dest) {

   assert(start->getParentRegion() == dest->getParentRegion() &&

          "This function only works for ops i the same region");

   // Simple case where the start op dominate the destination.

   if (dominators.dominates(start, dest))

     return true;

   return start->getBlock()->isReachable(dest->getBlock());

 }


 /// For transfer_write to overwrite fully another transfer_write must:

 /// 1. Access the same memref with the same indices and vector type.

 /// 2. Post-dominate the other transfer_write operation.

 /// If several candidates are available, one must be post-dominated by all the

 /// others since they are all post-dominating the same transfer_write. We only

 /// consider the transfer_write post-dominated by all the other candidates as

 /// this will be the first transfer_write executed after the potentially dead

 /// transfer_write.

 /// If we found such an overwriting transfer_write we know that the original

 /// transfer_write is dead if all reads that can be reached from the potentially

 /// dead transfer_write are dominated by the overwriting transfer_write.

 void TransferOptimization::deadStoreOp(vector::TransferWriteOp write) {

   LLVM_DEBUG(DBGS() << "Candidate for dead store: " << *write.getOperation()

                     << "\n");

   llvm::SmallVector<Operation *, 8> blockingAccesses;

   Operation *firstOverwriteCandidate = nullptr;

   Value source = memref::skipViewLikeOps(cast<MemrefValue>(write.getBase()));

   llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),

                                            source.getUsers().end());

   llvm::SmallDenseSet<Operation *, 32> processed;

   while (!users.empty()) {

     Operation *user = users.pop_back_val();

     // If the user has already been processed skip.

     if (!processed.insert(user).second)

       continue;

     if (isa<ViewLikeOpInterface>(user)) {

       users.append(user->getUsers().begin(), user->getUsers().end());

       continue;

     }

     if (isMemoryEffectFree(user))

       continue;

     if (user == write.getOperation())

       continue;

     if (auto nextWrite = dyn_cast<vector::TransferWriteOp>(user)) {

       // Check candidate that can override the store.

       if (memref::isSameViewOrTrivialAlias(

               cast<MemrefValue>(nextWrite.getBase()),

               cast<MemrefValue>(write.getBase())) &&

           checkSameValueWAW(nextWrite, write) &&

           postDominators.postDominates(nextWrite, write)) {

         if (firstOverwriteCandidate == nullptr ||

             postDominators.postDominates(firstOverwriteCandidate, nextWrite))

           firstOverwriteCandidate = nextWrite;

         else

           assert(

               postDominators.postDominates(nextWrite, firstOverwriteCandidate));

         continue;

       }

     }

     if (auto transferOp = dyn_cast<VectorTransferOpInterface>(user)) {

       // Don't need to consider disjoint accesses.

       if (vector::isDisjointTransferSet(

               cast<VectorTransferOpInterface>(write.getOperation()),

               cast<VectorTransferOpInterface>(transferOp.getOperation()),

               /*testDynamicValueUsingBounds=*/true))

         continue;

     }

     blockingAccesses.push_back(user);

   }

   if (firstOverwriteCandidate == nullptr)

     return;

   Region *topRegion = firstOverwriteCandidate->getParentRegion();

   Operation *writeAncestor = findAncestorOpInRegion(topRegion, write);

   assert(writeAncestor &&

          "write op should be recursively part of the top region");


   for (Operation *access : blockingAccesses) {

     Operation *accessAncestor = findAncestorOpInRegion(topRegion, access);

     // TODO: if the access and write have the same ancestor we could recurse in

     // the region to know if the access is reachable with more precision.

     if (accessAncestor == nullptr ||

         !isReachable(writeAncestor, accessAncestor))

       continue;

     if (!dominators.dominates(firstOverwriteCandidate, accessAncestor)) {

       LLVM_DEBUG(DBGS() << "Store may not be dead due to op: "

                         << *accessAncestor << "\n");

       return;

     }

   }

   LLVM_DEBUG(DBGS() << "Found dead store: " << *write.getOperation()

                     << " overwritten by: " << *firstOverwriteCandidate << "\n");

   opToErase.push_back(write.getOperation());

 }


 /// A transfer_write candidate to storeToLoad forwarding must:

 /// 1. Access the same memref with the same indices and vector type as the

 /// transfer_read.

 /// 2. Dominate the transfer_read operation.

 /// If several candidates are available, one must be dominated by all the others

 /// since they are all dominating the same transfer_read. We only consider the

 /// transfer_write dominated by all the other candidates as this will be the

 /// last transfer_write executed before the transfer_read.

 /// If we found such a candidate we can do the forwarding if all the other

 /// potentially aliasing ops that may reach the transfer_read are post-dominated

 /// by the transfer_write.

 void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {

   if (read.hasOutOfBoundsDim())

     return;

   LLVM_DEBUG(DBGS() << "Candidate for Forwarding: " << *read.getOperation()

                     << "\n");

   SmallVector<Operation *, 8> blockingWrites;

   vector::TransferWriteOp lastwrite = nullptr;

   Value source = memref::skipViewLikeOps(cast<MemrefValue>(read.getBase()));

   llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),

                                            source.getUsers().end());

   llvm::SmallDenseSet<Operation *, 32> processed;

   while (!users.empty()) {

     Operation *user = users.pop_back_val();

     // If the user has already been processed skip.

     if (!processed.insert(user).second)

       continue;

     if (isa<ViewLikeOpInterface>(user)) {

       users.append(user->getUsers().begin(), user->getUsers().end());

       continue;

     }

     if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))

       continue;

     if (auto write = dyn_cast<vector::TransferWriteOp>(user)) {

       // If there is a write, but we can prove that it is disjoint we can ignore

       // the write.

       if (vector::isDisjointTransferSet(

               cast<VectorTransferOpInterface>(write.getOperation()),

               cast<VectorTransferOpInterface>(read.getOperation()),

               /*testDynamicValueUsingBounds=*/true))

         continue;

       if (memref::isSameViewOrTrivialAlias(

               cast<MemrefValue>(read.getBase()),

               cast<MemrefValue>(write.getBase())) &&

           dominators.dominates(write, read) && checkSameValueRAW(write, read)) {

         if (lastwrite == nullptr || dominators.dominates(lastwrite, write))

           lastwrite = write;

         else

           assert(dominators.dominates(write, lastwrite));

         continue;

       }

     }

     blockingWrites.push_back(user);

   }


   if (lastwrite == nullptr)

     return;


   Region *topRegion = lastwrite->getParentRegion();

   Operation *readAncestor = findAncestorOpInRegion(topRegion, read);

   assert(readAncestor &&

          "read op should be recursively part of the top region");


   for (Operation *write : blockingWrites) {

     Operation *writeAncestor = findAncestorOpInRegion(topRegion, write);

     // TODO: if the store and read have the same ancestor we could recurse in

     // the region to know if the read is reachable with more precision.

     if (writeAncestor == nullptr || !isReachable(writeAncestor, readAncestor))

       continue;

     if (!postDominators.postDominates(lastwrite, write)) {

       LLVM_DEBUG(DBGS() << "Fail to do write to read forwarding due to op: "

                         << *write << "\n");

       return;

     }

   }


   LLVM_DEBUG(DBGS() << "Forward value from " << *lastwrite.getOperation()

                     << " to: " << *read.getOperation() << "\n");

   read.replaceAllUsesWith(lastwrite.getVector());

   opToErase.push_back(read.getOperation());

 }


 /// Converts OpFoldResults to int64_t shape without unit dims.

 static SmallVector<int64_t> getReducedShape(ArrayRef<OpFoldResult> mixedSizes) {

   SmallVector<int64_t> reducedShape;

   for (const auto size : mixedSizes) {

     if (llvm::dyn_cast_if_present<Value>(size)) {

       reducedShape.push_back(ShapedType::kDynamic);

       continue;

     }


     auto value = cast<IntegerAttr>(cast<Attribute>(size)).getValue();

     if (value == 1)

       continue;

     reducedShape.push_back(value.getSExtValue());

   }

   return reducedShape;

 }


 /// Drops unit dimensions from the input MemRefType.

 static MemRefType dropUnitDims(MemRefType inputType,

                                ArrayRef<OpFoldResult> offsets,

                                ArrayRef<OpFoldResult> sizes,

                                ArrayRef<OpFoldResult> strides) {

   auto targetShape = getReducedShape(sizes);

   MemRefType rankReducedType = memref::SubViewOp::inferRankReducedResultType(

       targetShape, inputType, offsets, sizes, strides);

   return rankReducedType.canonicalizeStridedLayout();

 }


 /// Creates a rank-reducing memref.subview op that drops unit dims from its

 /// input. Or just returns the input if it was already without unit dims.

 static Value rankReducingSubviewDroppingUnitDims(PatternRewriter &rewriter,

                                                  mlir::Location loc,

                                                  Value input) {

   MemRefType inputType = cast<MemRefType>(input.getType());

   SmallVector<OpFoldResult> offsets(inputType.getRank(),

                                     rewriter.getIndexAttr(0));

   SmallVector<OpFoldResult> sizes = memref::getMixedSizes(rewriter, loc, input);

   SmallVector<OpFoldResult> strides(inputType.getRank(),

                                     rewriter.getIndexAttr(1));

   MemRefType resultType = dropUnitDims(inputType, offsets, sizes, strides);


   if (resultType.canonicalizeStridedLayout() ==

       inputType.canonicalizeStridedLayout())

     return input;

   return memref::SubViewOp::create(rewriter, loc, resultType, input, offsets,

                                    sizes, strides);

 }


 /// Returns the number of dims that aren't unit dims.

 static int getReducedRank(ArrayRef<int64_t> shape) {

   return llvm::count_if(shape, [](int64_t dimSize) { return dimSize != 1; });

 }


 /// Trims non-scalable one dimensions from `oldType` and returns the result

 /// type.

 static VectorType trimNonScalableUnitDims(VectorType oldType) {

   SmallVector<int64_t> newShape;

   SmallVector<bool> newScalableDims;

   for (auto [dimIdx, dimSize] : llvm::enumerate(oldType.getShape())) {

     if (dimSize == 1 && !oldType.getScalableDims()[dimIdx])

       continue;

     newShape.push_back(dimSize);

     newScalableDims.push_back(oldType.getScalableDims()[dimIdx]);

   }

   return VectorType::get(newShape, oldType.getElementType(), newScalableDims);

 }


 // Rewrites vector.create_mask 'op' to drop non-scalable one dimensions.

 static FailureOr<Value>

 createMaskDropNonScalableUnitDims(PatternRewriter &rewriter, Location loc,

                                   vector::CreateMaskOp op) {

   auto type = op.getType();

   VectorType reducedType = trimNonScalableUnitDims(type);

   if (reducedType.getRank() == type.getRank())

     return failure();


   SmallVector<Value> reducedOperands;

   for (auto [dim, dimIsScalable, operand] : llvm::zip_equal(

            type.getShape(), type.getScalableDims(), op.getOperands())) {

     if (dim == 1 && !dimIsScalable) {

       // If the mask for the unit dim is not a constant of 1, do nothing.

       auto constant = operand.getDefiningOp<arith::ConstantIndexOp>();

       if (!constant || (constant.value() != 1))

         return failure();

       continue;

     }

     reducedOperands.push_back(operand);

   }

   return vector::CreateMaskOp::create(rewriter, loc, reducedType,

                                       reducedOperands)

       .getResult();

 }


 namespace {


 /// Rewrites `vector.transfer_read` ops where the source has unit dims, by

 /// inserting a memref.subview dropping those unit dims. The vector shapes are

 /// also reduced accordingly.

 class TransferReadDropUnitDimsPattern

     : public vector::MaskableOpRewritePattern<vector::TransferReadOp> {

   using MaskableOpRewritePattern::MaskableOpRewritePattern;


   FailureOr<Value>

   matchAndRewriteMaskableOp(vector::TransferReadOp transferReadOp,

                             vector::MaskingOpInterface maskingOp,

                             PatternRewriter &rewriter) const override {

     auto loc = transferReadOp.getLoc();

     Value vector = transferReadOp.getVector();

     VectorType vectorType = cast<VectorType>(vector.getType());

     Value source = transferReadOp.getBase();

     MemRefType sourceType = dyn_cast<MemRefType>(source.getType());

     // TODO: support tensor types.

     if (!sourceType)

       return failure();

     // TODO: generalize this pattern, relax the requirements here.

     if (transferReadOp.hasOutOfBoundsDim())

       return failure();

     if (!transferReadOp.getPermutationMap().isMinorIdentity())

       return failure();

     // Check if the source shape can be further reduced.

     int reducedRank = getReducedRank(sourceType.getShape());

     if (reducedRank == sourceType.getRank())

       return failure();

     // TODO: Extend vector.mask to support 0-d vectors. In the meantime, bail

     // out.

     if (reducedRank == 0 && maskingOp)

       return failure();

     // Check if the reduced vector shape matches the reduced source shape.

     // Otherwise, this case is not supported yet.

     VectorType reducedVectorType = trimNonScalableUnitDims(vectorType);

     if (reducedRank != reducedVectorType.getRank())

       return failure();

     if (llvm::any_of(transferReadOp.getIndices(), [](Value v) {

           return getConstantIntValue(v) != static_cast<int64_t>(0);

         }))

       return failure();


     Value maskOp = transferReadOp.getMask();

     if (maskOp) {

       auto createMaskOp = maskOp.getDefiningOp<vector::CreateMaskOp>();

       if (!createMaskOp)

         return rewriter.notifyMatchFailure(

             transferReadOp, "unsupported mask op, only 'vector.create_mask' is "

                             "currently supported");

       FailureOr<Value> rankReducedCreateMask =

           createMaskDropNonScalableUnitDims(rewriter, loc, createMaskOp);

       if (failed(rankReducedCreateMask))

         return failure();

       maskOp = *rankReducedCreateMask;

     }


     Value reducedShapeSource =

         rankReducingSubviewDroppingUnitDims(rewriter, loc, source);

     Value c0 = arith::ConstantIndexOp::create(rewriter, loc, 0);

     SmallVector<Value> zeros(reducedRank, c0);

     auto identityMap = rewriter.getMultiDimIdentityMap(reducedRank);

     SmallVector<bool> inBounds(reducedVectorType.getRank(), true);

     Operation *newTransferReadOp = vector::TransferReadOp::create(

         rewriter, loc, reducedVectorType, reducedShapeSource, zeros,

         identityMap, transferReadOp.getPadding(), maskOp,

         rewriter.getBoolArrayAttr(inBounds));


     if (maskingOp) {

       auto shapeCastMask = rewriter.createOrFold<vector::ShapeCastOp>(

           loc, reducedVectorType.cloneWith(std::nullopt, rewriter.getI1Type()),

           maskingOp.getMask());

       newTransferReadOp = mlir::vector::maskOperation(

           rewriter, newTransferReadOp, shapeCastMask);

     }


     auto shapeCast = rewriter.createOrFold<vector::ShapeCastOp>(

         loc, vectorType, newTransferReadOp->getResults()[0]);


     return shapeCast;

   }

 };


 /// Rewrites `vector.transfer_write` ops where the "source" (i.e. destination)

 /// has unit dims, by inserting a `memref.subview` dropping those unit dims. The

 /// vector shapes are also reduced accordingly.

 class TransferWriteDropUnitDimsPattern

     : public vector::MaskableOpRewritePattern<vector::TransferWriteOp> {

   using MaskableOpRewritePattern::MaskableOpRewritePattern;


   FailureOr<Value>

   matchAndRewriteMaskableOp(vector::TransferWriteOp transferWriteOp,

                             vector::MaskingOpInterface maskingOp,

                             PatternRewriter &rewriter) const override {

     auto loc = transferWriteOp.getLoc();

     Value vector = transferWriteOp.getVector();

     VectorType vectorType = cast<VectorType>(vector.getType());

     Value source = transferWriteOp.getBase();

     MemRefType sourceType = dyn_cast<MemRefType>(source.getType());

     // TODO: support tensor type.

     if (!sourceType)

       return failure();

     // TODO: generalize this pattern, relax the requirements here.

     if (transferWriteOp.hasOutOfBoundsDim())

       return failure();

     if (!transferWriteOp.getPermutationMap().isMinorIdentity())

       return failure();

     // Check if the destination shape can be further reduced.

     int reducedRank = getReducedRank(sourceType.getShape());

     if (reducedRank == sourceType.getRank())

       return failure();

     // TODO: Extend vector.mask to support 0-d vectors. In the meantime, bail

     // out.

     if (reducedRank == 0 && maskingOp)

       return failure();

     // Check if the reduced vector shape matches the reduced destination shape.

     // Otherwise, this case is not supported yet.

     VectorType reducedVectorType = trimNonScalableUnitDims(vectorType);

     if (reducedRank != reducedVectorType.getRank())

       return failure();

     if (llvm::any_of(transferWriteOp.getIndices(), [](Value v) {

           return getConstantIntValue(v) != static_cast<int64_t>(0);

         }))

       return failure();


     Value maskOp = transferWriteOp.getMask();

     if (maskOp) {

       auto createMaskOp = maskOp.getDefiningOp<vector::CreateMaskOp>();

       if (!createMaskOp)

         return rewriter.notifyMatchFailure(

             transferWriteOp,

             "unsupported mask op, only 'vector.create_mask' is "

             "currently supported");

       FailureOr<Value> rankReducedCreateMask =

           createMaskDropNonScalableUnitDims(rewriter, loc, createMaskOp);

       if (failed(rankReducedCreateMask))

         return failure();

       maskOp = *rankReducedCreateMask;

     }

     Value reducedShapeSource =

         rankReducingSubviewDroppingUnitDims(rewriter, loc, source);

     Value c0 = arith::ConstantIndexOp::create(rewriter, loc, 0);

     SmallVector<Value> zeros(reducedRank, c0);

     auto identityMap = rewriter.getMultiDimIdentityMap(reducedRank);

     SmallVector<bool> inBounds(reducedVectorType.getRank(), true);

     auto shapeCastSrc = rewriter.createOrFold<vector::ShapeCastOp>(

         loc, reducedVectorType, vector);

     Operation *newXferWrite = vector::TransferWriteOp::create(

         rewriter, loc, Type(), shapeCastSrc, reducedShapeSource, zeros,

         identityMap, maskOp, rewriter.getBoolArrayAttr(inBounds));


     if (maskingOp) {

       auto shapeCastMask = rewriter.createOrFold<vector::ShapeCastOp>(

           loc, reducedVectorType.cloneWith(std::nullopt, rewriter.getI1Type()),

           maskingOp.getMask());

       newXferWrite =

           mlir::vector::maskOperation(rewriter, newXferWrite, shapeCastMask);

     }


     if (transferWriteOp.hasPureTensorSemantics())

       return newXferWrite->getResults()[0];


     // With Memref semantics, there's no return value. Use empty value to signal

     // success.

     return Value();

   }

 };


 } // namespace


 /// Creates a memref.collapse_shape collapsing all inner dimensions of the

 /// input starting at `firstDimToCollapse`.

 static Value collapseInnerDims(PatternRewriter &rewriter, mlir::Location loc,

                                Value input, int64_t firstDimToCollapse) {

   ShapedType inputType = cast<ShapedType>(input.getType());

   if (inputType.getRank() == 1)

     return input;

   SmallVector<ReassociationIndices> reassociation;

   for (int64_t i = 0; i < firstDimToCollapse; ++i)

     reassociation.push_back(ReassociationIndices{i});

   ReassociationIndices collapsedIndices;

   for (int64_t i = firstDimToCollapse; i < inputType.getRank(); ++i)

     collapsedIndices.push_back(i);

   reassociation.push_back(collapsedIndices);

   return memref::CollapseShapeOp::create(rewriter, loc, input, reassociation);

 }


 /// Returns the new indices that collapses the inner dimensions starting from

 /// the `firstDimToCollapse` dimension.

 static SmallVector<Value> getCollapsedIndices(RewriterBase &rewriter,

                                               Location loc,

                                               ArrayRef<int64_t> shape,

                                               ValueRange indices,

                                               int64_t firstDimToCollapse) {

   assert(firstDimToCollapse < static_cast<int64_t>(indices.size()));


   // If all the collapsed indices are zero then no extra logic is needed.

   // Otherwise, a new offset/index has to be computed.

   SmallVector<Value> indicesAfterCollapsing(

       indices.begin(), indices.begin() + firstDimToCollapse);

   SmallVector<Value> indicesToCollapse(indices.begin() + firstDimToCollapse,

                                        indices.end());

   if (llvm::all_of(indicesToCollapse, isZeroInteger)) {

     indicesAfterCollapsing.push_back(indicesToCollapse[0]);

     return indicesAfterCollapsing;

   }


   // Compute the remaining trailing index/offset required for reading from

   // the collapsed memref:

   //

   //    offset = 0

   //    for (i = firstDimToCollapse; i < outputRank; ++i)

   //      offset += sourceType.getDimSize(i) * transferReadOp.indices[i]

   //

   // For this example:

   //   %2 = vector.transfer_read/write %arg4[%c0, %arg0, %c0] (...) :

   //      memref<1x43x2xi32>, vector<1x2xi32>

   // which would be collapsed to:

   //   %1 = vector.transfer_read/write %collapse_shape[%c0, %offset] (...) :

   //      memref<1x86xi32>, vector<2xi32>

   // one would get the following offset:

   //    %offset = %arg0 * 43

   OpFoldResult collapsedOffset =

       arith::ConstantIndexOp::create(rewriter, loc, 0).getResult();


   auto collapsedStrides = computeSuffixProduct(

       ArrayRef<int64_t>(shape.begin() + firstDimToCollapse, shape.end()));


   // Compute the collapsed offset.

   auto &&[collapsedExpr, collapsedVals] =

       computeLinearIndex(collapsedOffset, collapsedStrides, indicesToCollapse);

   collapsedOffset = affine::makeComposedFoldedAffineApply(

       rewriter, loc, collapsedExpr, collapsedVals);


   if (auto value = dyn_cast<Value>(collapsedOffset)) {

     indicesAfterCollapsing.push_back(value);

   } else {

     indicesAfterCollapsing.push_back(arith::ConstantIndexOp::create(

         rewriter, loc, *getConstantIntValue(collapsedOffset)));

   }


   return indicesAfterCollapsing;

 }


 namespace {

 /// Rewrites contiguous row-major vector.transfer_read ops by inserting

 /// memref.collapse_shape on the source so that the resulting

 /// vector.transfer_read has a 1D source. Requires the source shape to be

 /// already reduced i.e. without unit dims.

 ///

 /// If `targetVectorBitwidth` is provided, the flattening will only happen if

 /// the trailing dimension of the vector read is smaller than the provided

 /// bitwidth.

 class FlattenContiguousRowMajorTransferReadPattern

     : public OpRewritePattern<vector::TransferReadOp> {

 public:

   FlattenContiguousRowMajorTransferReadPattern(MLIRContext *context,

                                                unsigned vectorBitwidth,

                                                PatternBenefit benefit)

       : OpRewritePattern<vector::TransferReadOp>(context, benefit),

         targetVectorBitwidth(vectorBitwidth) {}


   LogicalResult matchAndRewrite(vector::TransferReadOp transferReadOp,

                                 PatternRewriter &rewriter) const override {

     auto loc = transferReadOp.getLoc();

     Value vector = transferReadOp.getVector();

     VectorType vectorType = cast<VectorType>(vector.getType());

     auto source = transferReadOp.getBase();

     MemRefType sourceType = dyn_cast<MemRefType>(source.getType());


     // 0. Check pre-conditions

     // Contiguity check is valid on tensors only.

     if (!sourceType)

       return failure();

     // If this is already 0D/1D, there's nothing to do.

     if (vectorType.getRank() <= 1)

       return failure();

     if (!vectorType.getElementType().isSignlessIntOrFloat())

       return failure();

     unsigned trailingVectorDimBitwidth =

         vectorType.getShape().back() * vectorType.getElementTypeBitWidth();

     if (trailingVectorDimBitwidth >= targetVectorBitwidth)

       return failure();

     if (!vector::isContiguousSlice(sourceType, vectorType))

       return failure();

     // TODO: generalize this pattern, relax the requirements here.

     if (transferReadOp.hasOutOfBoundsDim())

       return failure();

     if (!transferReadOp.getPermutationMap().isMinorIdentity())

       return failure();

     if (transferReadOp.getMask())

       return failure();


     // Determine the first memref dimension to collapse - just enough so we can

     // read a flattened vector.

     int64_t firstDimToCollapse =

         sourceType.getRank() -

         vectorType.getShape().drop_while([](auto v) { return v == 1; }).size();


     // 1. Collapse the source memref

     Value collapsedSource =

         collapseInnerDims(rewriter, loc, source, firstDimToCollapse);

     MemRefType collapsedSourceType =

         cast<MemRefType>(collapsedSource.getType());

     int64_t collapsedRank = collapsedSourceType.getRank();

     assert(collapsedRank == firstDimToCollapse + 1);


     // 2. Generate input args for a new vector.transfer_read that will read

     // from the collapsed memref.

     // 2.1. New dim exprs + affine map

     SmallVector<AffineExpr, 1> dimExprs{

         getAffineDimExpr(firstDimToCollapse, rewriter.getContext())};

     auto collapsedMap =

         AffineMap::get(collapsedRank, 0, dimExprs, rewriter.getContext());


     // 2.2 New indices

     SmallVector<Value> collapsedIndices =

         getCollapsedIndices(rewriter, loc, sourceType.getShape(),

                             transferReadOp.getIndices(), firstDimToCollapse);


     // 3. Create new vector.transfer_read that reads from the collapsed memref

     VectorType flatVectorType = VectorType::get({vectorType.getNumElements()},

                                                 vectorType.getElementType());

     vector::TransferReadOp flatRead = vector::TransferReadOp::create(

         rewriter, loc, flatVectorType, collapsedSource, collapsedIndices,

         transferReadOp.getPadding(), collapsedMap);

     flatRead.setInBoundsAttr(rewriter.getBoolArrayAttr({true}));


     // 4. Replace the old transfer_read with the new one reading from the

     // collapsed shape

     rewriter.replaceOpWithNewOp<vector::ShapeCastOp>(

         transferReadOp, cast<VectorType>(vector.getType()), flatRead);

     return success();

   }


 private:

   // Minimum bitwidth that the trailing vector dimension should have after

   // flattening.

   unsigned targetVectorBitwidth;

 };


 /// Rewrites contiguous row-major vector.transfer_write ops by inserting

 /// memref.collapse_shape on the source so that the resulting

 /// vector.transfer_write has a 1D source. Requires the source shape to be

 /// already reduced i.e. without unit dims.

 ///

 /// If `targetVectorBitwidth` is provided, the flattening will only happen if

 /// the trailing dimension of the vector read is smaller than the provided

 /// bitwidth.

 class FlattenContiguousRowMajorTransferWritePattern

     : public OpRewritePattern<vector::TransferWriteOp> {

 public:

   FlattenContiguousRowMajorTransferWritePattern(MLIRContext *context,

                                                 unsigned vectorBitwidth,

                                                 PatternBenefit benefit)

       : OpRewritePattern<vector::TransferWriteOp>(context, benefit),

         targetVectorBitwidth(vectorBitwidth) {}


   LogicalResult matchAndRewrite(vector::TransferWriteOp transferWriteOp,

                                 PatternRewriter &rewriter) const override {

     auto loc = transferWriteOp.getLoc();

     Value vector = transferWriteOp.getVector();

     VectorType vectorType = cast<VectorType>(vector.getType());

     Value source = transferWriteOp.getBase();

     MemRefType sourceType = dyn_cast<MemRefType>(source.getType());


     // 0. Check pre-conditions

     // Contiguity check is valid on tensors only.

     if (!sourceType)

       return failure();

     // If this is already 0D/1D, there's nothing to do.

     if (vectorType.getRank() <= 1)

       // Already 0D/1D, nothing to do.

       return failure();

     if (!vectorType.getElementType().isSignlessIntOrFloat())

       return failure();

     unsigned trailingVectorDimBitwidth =

         vectorType.getShape().back() * vectorType.getElementTypeBitWidth();

     if (trailingVectorDimBitwidth >= targetVectorBitwidth)

       return failure();

     if (!vector::isContiguousSlice(sourceType, vectorType))

       return failure();

     // TODO: generalize this pattern, relax the requirements here.

     if (transferWriteOp.hasOutOfBoundsDim())

       return failure();

     if (!transferWriteOp.getPermutationMap().isMinorIdentity())

       return failure();

     if (transferWriteOp.getMask())

       return failure();


     // Determine the first memref dimension to collapse - just enough so we can

     // read a flattened vector.

     int64_t firstDimToCollapse =

         sourceType.getRank() -

         vectorType.getShape().drop_while([](auto v) { return v == 1; }).size();


     // 1. Collapse the source memref

     Value collapsedSource =

         collapseInnerDims(rewriter, loc, source, firstDimToCollapse);

     MemRefType collapsedSourceType =

         cast<MemRefType>(collapsedSource.getType());

     int64_t collapsedRank = collapsedSourceType.getRank();

     assert(collapsedRank == firstDimToCollapse + 1);


     // 2. Generate input args for a new vector.transfer_read that will read

     // from the collapsed memref.

     // 2.1. New dim exprs + affine map

     SmallVector<AffineExpr, 1> dimExprs{

         getAffineDimExpr(firstDimToCollapse, rewriter.getContext())};

     auto collapsedMap =

         AffineMap::get(collapsedRank, 0, dimExprs, rewriter.getContext());


     // 2.2 New indices

     SmallVector<Value> collapsedIndices =

         getCollapsedIndices(rewriter, loc, sourceType.getShape(),

                             transferWriteOp.getIndices(), firstDimToCollapse);


     // 3. Create new vector.transfer_write that writes to the collapsed memref

     VectorType flatVectorType = VectorType::get({vectorType.getNumElements()},

                                                 vectorType.getElementType());

     Value flatVector =

         vector::ShapeCastOp::create(rewriter, loc, flatVectorType, vector);

     vector::TransferWriteOp flatWrite = vector::TransferWriteOp::create(

         rewriter, loc, flatVector, collapsedSource, collapsedIndices,

         collapsedMap);

     flatWrite.setInBoundsAttr(rewriter.getBoolArrayAttr({true}));


     // 4. Replace the old transfer_write with the new one writing the

     // collapsed shape

     rewriter.eraseOp(transferWriteOp);

     return success();

   }


 private:

   // Minimum bitwidth that the trailing vector dimension should have after

   // flattening.

   unsigned targetVectorBitwidth;

 };


 /// Rewrite `vector.extract(vector.transfer_read)` to `memref.load`.

 ///

 /// All the users of the transfer op must be `vector.extract` ops. If

 /// `allowMultipleUses` is set to true, rewrite transfer ops with any number of

 /// users. Otherwise, rewrite only if the extract op is the single user of the

 /// transfer op. Rewriting a single vector load with multiple scalar loads may

 /// negatively affect performance.

 class RewriteScalarExtractOfTransferRead

     : public OpRewritePattern<vector::ExtractOp> {

 public:

   RewriteScalarExtractOfTransferRead(MLIRContext *context,

                                      PatternBenefit benefit,

                                      bool allowMultipleUses)

       : OpRewritePattern(context, benefit),

         allowMultipleUses(allowMultipleUses) {}


   LogicalResult matchAndRewrite(vector::ExtractOp extractOp,

                                 PatternRewriter &rewriter) const override {

     // Match phase.

     auto xferOp = extractOp.getVector().getDefiningOp<vector::TransferReadOp>();

     if (!xferOp)

       return failure();

     // Check that we are extracting a scalar and not a sub-vector.

     if (isa<VectorType>(extractOp.getResult().getType()))

       return failure();

     // If multiple uses are not allowed, check if xfer has a single use.

     if (!allowMultipleUses && !xferOp.getResult().hasOneUse())

       return failure();

     // If multiple uses are allowed, check if all the xfer uses are extract ops.

     if (allowMultipleUses &&

         !llvm::all_of(xferOp->getUses(), [](OpOperand &use) {

           return isa<vector::ExtractOp>(use.getOwner());

         }))

       return failure();

     // Mask not supported.

     if (xferOp.getMask())

       return failure();

     // Map not supported.

     if (!xferOp.getPermutationMap().isMinorIdentity())

       return failure();

     // Cannot rewrite if the indices may be out of bounds.

     if (xferOp.hasOutOfBoundsDim())

       return failure();


     // Rewrite phase: construct scalar load.

     SmallVector<Value> newIndices(xferOp.getIndices().begin(),

                                   xferOp.getIndices().end());

     for (auto [i, pos] : llvm::enumerate(extractOp.getMixedPosition())) {

       int64_t idx = newIndices.size() - extractOp.getNumIndices() + i;


       // Compute affine expression `newIndices[idx] + pos` where `pos` can be

       // either a constant or a value.

       OpFoldResult composedIdx;

       if (auto attr = dyn_cast<Attribute>(pos)) {

         int64_t offset = cast<IntegerAttr>(attr).getInt();

         composedIdx = affine::makeComposedFoldedAffineApply(

             rewriter, extractOp.getLoc(),

             rewriter.getAffineSymbolExpr(0) + offset, {newIndices[idx]});

       } else {

         Value dynamicOffset = cast<Value>(pos);

         AffineExpr sym0, sym1;

         bindSymbols(rewriter.getContext(), sym0, sym1);

         composedIdx = affine::makeComposedFoldedAffineApply(

             rewriter, extractOp.getLoc(), sym0 + sym1,

             {newIndices[idx], dynamicOffset});

       }


       // Update the corresponding index with the folded result.

       if (auto value = dyn_cast<Value>(composedIdx)) {

         newIndices[idx] = value;

       } else {

         newIndices[idx] = arith::ConstantIndexOp::create(

             rewriter, extractOp.getLoc(), *getConstantIntValue(composedIdx));

       }

     }

     if (isa<MemRefType>(xferOp.getBase().getType())) {

       rewriter.replaceOpWithNewOp<memref::LoadOp>(extractOp, xferOp.getBase(),

                                                   newIndices);

     } else {

       rewriter.replaceOpWithNewOp<tensor::ExtractOp>(

           extractOp, xferOp.getBase(), newIndices);

     }


     return success();

   }


 private:

   bool allowMultipleUses;

 };


 /// Rewrite transfer_writes of vectors of size 1 (e.g., vector<1x1xf32>)

 /// to memref.store.

 class RewriteScalarWrite : public OpRewritePattern<vector::TransferWriteOp> {

   using OpRewritePattern::OpRewritePattern;


   LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp,

                                 PatternRewriter &rewriter) const override {

     // Must be a scalar write.

     auto vecType = xferOp.getVectorType();

     if (!llvm::all_of(vecType.getShape(), [](int64_t sz) { return sz == 1; }))

       return failure();

     // Mask not supported.

     if (xferOp.getMask())

       return failure();

     // Map not supported.

     if (!xferOp.getPermutationMap().isMinorIdentity())

       return failure();

     // Only float and integer element types are supported.

     Value scalar = vector::ExtractOp::create(rewriter, xferOp.getLoc(),

                                              xferOp.getVector());

     // Construct a scalar store.

     if (isa<MemRefType>(xferOp.getBase().getType())) {

       rewriter.replaceOpWithNewOp<memref::StoreOp>(

           xferOp, scalar, xferOp.getBase(), xferOp.getIndices());

     } else {

       rewriter.replaceOpWithNewOp<tensor::InsertOp>(

           xferOp, scalar, xferOp.getBase(), xferOp.getIndices());

     }

     return success();

   }

 };


 } // namespace


 void mlir::vector::transferOpflowOpt(RewriterBase &rewriter,

                                      Operation *rootOp) {

   TransferOptimization opt(rewriter, rootOp);

   // Run store to load forwarding first since it can expose more dead store

   // opportunity.

   rootOp->walk([&](vector::TransferReadOp read) {

     if (isa<MemRefType>(read.getShapedType()))

       opt.storeToLoadForwarding(read);

   });

   opt.removeDeadOp();

   rootOp->walk([&](vector::TransferWriteOp write) {

     if (isa<MemRefType>(write.getShapedType()))

       opt.deadStoreOp(write);

   });

   opt.removeDeadOp();

 }


 void mlir::vector::populateScalarVectorTransferLoweringPatterns(

     RewritePatternSet &patterns, PatternBenefit benefit,

     bool allowMultipleUses) {

   patterns.add<RewriteScalarExtractOfTransferRead>(patterns.getContext(),

                                                    benefit, allowMultipleUses);

   patterns.add<RewriteScalarWrite>(patterns.getContext(), benefit);

 }


 void mlir::vector::populateVectorTransferDropUnitDimsPatterns(

     RewritePatternSet &patterns, PatternBenefit benefit) {

   patterns

       .add<TransferReadDropUnitDimsPattern, TransferWriteDropUnitDimsPattern>(

           patterns.getContext(), benefit);

 }


 void mlir::vector::populateFlattenVectorTransferPatterns(

     RewritePatternSet &patterns, unsigned targetVectorBitwidth,

     PatternBenefit benefit) {

   patterns.add<FlattenContiguousRowMajorTransferReadPattern,

                FlattenContiguousRowMajorTransferWritePattern>(

       patterns.getContext(), targetVectorBitwidth, benefit);

   populateDropUnitDimWithShapeCastPatterns(patterns, benefit);

 }

AffineOps.h

MemRefUtils.h

Dominance.h

IndexingUtils.h

LoweringPatterns.h

SideEffectInterfaces.h

VectorOps.h

rankReducingSubviewDroppingUnitDims
static Value rankReducingSubviewDroppingUnitDims(PatternRewriter &rewriter, mlir::Location loc, Value input)
Creates a rank-reducing memref.subview op that drops unit dims from its input.
Definition: VectorTransferOpTransforms.cpp:275

getReducedRank
static int getReducedRank(ArrayRef< int64_t > shape)
Returns the number of dims that aren't unit dims.
Definition: VectorTransferOpTransforms.cpp:294

getReducedShape
static SmallVector< int64_t > getReducedShape(ArrayRef< OpFoldResult > mixedSizes)
Converts OpFoldResults to int64_t shape without unit dims.
Definition: VectorTransferOpTransforms.cpp:246

trimNonScalableUnitDims
static VectorType trimNonScalableUnitDims(VectorType oldType)
Trims non-scalable one dimensions from oldType and returns the result type.
Definition: VectorTransferOpTransforms.cpp:300

createMaskDropNonScalableUnitDims
static FailureOr< Value > createMaskDropNonScalableUnitDims(PatternRewriter &rewriter, Location loc, vector::CreateMaskOp op)
Definition: VectorTransferOpTransforms.cpp:314

findAncestorOpInRegion
static Operation * findAncestorOpInRegion(Region *region, Operation *op)
Return the ancestor op in the region or nullptr if the region is not an ancestor of the op.
Definition: VectorTransferOpTransforms.cpp:38

getCollapsedIndices
static SmallVector< Value > getCollapsedIndices(RewriterBase &rewriter, Location loc, ArrayRef< int64_t > shape, ValueRange indices, int64_t firstDimToCollapse)
Returns the new indices that collapses the inner dimensions starting from the firstDimToCollapse dime...
Definition: VectorTransferOpTransforms.cpp:528

DBGS
#define DBGS()
Definition: VectorTransferOpTransforms.cpp:32

collapseInnerDims
static Value collapseInnerDims(PatternRewriter &rewriter, mlir::Location loc, Value input, int64_t firstDimToCollapse)
Creates a memref.collapse_shape collapsing all inner dimensions of the input starting at firstDimToCo...
Definition: VectorTransferOpTransforms.cpp:511

dropUnitDims
static MemRefType dropUnitDims(MemRefType inputType, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ArrayRef< OpFoldResult > strides)
Drops unit dimensions from the input MemRefType.
Definition: VectorTransferOpTransforms.cpp:263

VectorTransforms.h

VectorUtils.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineMap::get
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Definition: MLIRContext.cpp:1203

mlir::Block::isReachable
bool isReachable(Block *other, SmallPtrSet< Block *, 16 > &&except={})
Return "true" if there is a path from this block to the given block (according to the successors rela...
Definition: Block.cpp:353

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103

mlir::Builder::getMultiDimIdentityMap
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:382

mlir::Builder::getAffineSymbolExpr
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:363

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:55

mlir::Builder::getI1Type
IntegerType getI1Type()
Definition: Builders.cpp:52

mlir::Builder::getBoolArrayAttr
ArrayAttr getBoolArrayAttr(ArrayRef< bool > values)
Definition: Builders.cpp:265

mlir::DominanceInfo
A class for computing basic dominance information.
Definition: Dominance.h:140

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::OpBuilder::createOrFold
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:517

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::walk
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797

mlir::Operation::getParentOp
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234

mlir::Operation::getBlock
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213

mlir::Operation::getUsers
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873

mlir::Operation::getParentRegion
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230

mlir::Operation::getResults
result_range getResults()
Definition: Operation.h:415

mlir::PatternBenefit
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:769

mlir::PostDominanceInfo
A class for computing basic postdominance information.
Definition: Dominance.h:204

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26

mlir::Region::getParentRegion
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45

mlir::RewritePatternSet
Definition: PatternMatch.h:792

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:702

mlir::RewriterBase::eraseOp
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Definition: PatternMatch.cpp:155

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:519

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::Value::getUsers
user_range getUsers() const
Definition: Value.h:218

mlir::Value::getLoc
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:24

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18

mlir::arith::ConstantIndexOp::create
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition: ArithOps.cpp:359

Arith.h

MemRef.h

Tensor.h

mlir::affine::makeComposedFoldedAffineApply
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1331

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::memref::isSameViewOrTrivialAlias
bool isSameViewOrTrivialAlias(MemrefValue a, MemrefValue b)
Checks if two (memref) values are the same or statically known to alias the same region of memory.
Definition: MemRefUtils.h:111

mlir::memref::skipViewLikeOps
MemrefValue skipViewLikeOps(MemrefValue source)
Walk up the source chain until we find an operation that is not a view of the source memref (i....
Definition: MemRefUtils.cpp:210

mlir::memref::getMixedSizes
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given memref value.
Definition: MemRefOps.cpp:77

mlir::vector::isContiguousSlice
bool isContiguousSlice(MemRefType memrefType, VectorType vectorType)
Return true if vectorType is a contiguous slice of memrefType, in the sense that it can be read/writt...
Definition: VectorUtils.cpp:255

mlir::vector::checkSameValueRAW
bool checkSameValueRAW(TransferWriteOp defWrite, TransferReadOp read)
Return true if the transfer_write fully writes the data accessed by the transfer_read.

mlir::vector::maskOperation
Operation * maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, Value passthru=Value())
Creates a vector.mask operation around a maskable operation.

mlir::vector::isDisjointTransferSet
bool isDisjointTransferSet(VectorTransferOpInterface transferA, VectorTransferOpInterface transferB, bool testDynamicValueUsingBounds=false)
Return true if we can prove that the transfer operations access disjoint memory, requiring the operat...
Definition: VectorOps.cpp:314

mlir::vector::checkSameValueWAW
bool checkSameValueWAW(TransferWriteOp write, TransferWriteOp priorWrite)
Return true if the write op fully over-write the priorWrite transfer_write op.

mlir::vector::transferOpflowOpt
void transferOpflowOpt(RewriterBase &rewriter, Operation *rootOp)
Implements transfer op write to read forwarding and dead transfer write optimizations.
Definition: VectorTransferOpTransforms.cpp:902

mlir::vector::populateScalarVectorTransferLoweringPatterns
void populateScalarVectorTransferLoweringPatterns(RewritePatternSet &patterns, PatternBenefit benefit, bool allowMultipleUses)
Collects patterns that lower scalar vector transfer ops to memref loads and stores when beneficial.
Definition: VectorTransferOpTransforms.cpp:919

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::getConstantIntValue
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Definition: StaticValueUtils.cpp:115

mlir::computeLinearIndex
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
Definition: IndexingUtils.cpp:305

mlir::isMemoryEffectFree
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
Definition: SideEffectInterfaces.cpp:315

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::isZeroInteger
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
Definition: StaticValueUtils.cpp:18

mlir::bindSymbols
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325

mlir::computeSuffixProduct
SmallVector< int64_t > computeSuffixProduct(ArrayRef< int64_t > sizes)
Given a set of sizes, return the suffix product.
Definition: IndexingUtils.cpp:71

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::getAffineDimExpr
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:619

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

mlir::OpRewritePattern::OpRewritePattern
OpRewritePattern(MLIRContext *context, PatternBenefit benefit=1, ArrayRef< StringRef > generatedNames={})
Patterns must specify the root operation name they match against, and can also specify the benefit of...
Definition: PatternMatch.h:319

mlir::vector::MaskableOpRewritePattern
A pattern for ops that implement MaskableOpInterface and that might be masked (i.e.
Definition: VectorUtils.h:162