doxygen/Hoisting_8cpp_source.html

 //===- Hoisting.cpp - Linalg hoisting transformations ---------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements functions concerned with hoisting invariant operations

 // in the context of Linalg transformations.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Linalg/Transforms/Hoisting.h"

 #include "mlir/Analysis/SliceAnalysis.h"

 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"

 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"

 #include "mlir/Dialect/Affine/Utils.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Func/IR/FuncOps.h"

 #include "mlir/Dialect/Linalg/IR/Linalg.h"

 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"

 #include "mlir/Dialect/SCF/Utils/Utils.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"

 #include "mlir/IR/BuiltinOps.h"

 #include "mlir/IR/Dominance.h"

 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"

 #include "mlir/Transforms/LoopInvariantCodeMotionUtils.h"

 #include "llvm/ADT/StringRef.h"

 #include "llvm/ADT/TypeSwitch.h"

 #include "llvm/Support/Debug.h"


 using llvm::dbgs;


 #define DEBUG_TYPE "linalg-hoisting"


 #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ")


 using namespace mlir;

 using namespace mlir::linalg;


 /// Replace `loop` with a new loop that has a different init operand at

 /// position `index`. The body of this loop is moved over to the new loop.

 ///

 /// `newInitOperands` specifies the replacement "init" operands.

 /// `newYieldValue` is the replacement yield value of the loop at position

 /// `index`.

 static scf::ForOp replaceWithDifferentYield(RewriterBase &rewriter,

                                             scf::ForOp loop,

                                             Value newInitOperand,

                                             unsigned index,

                                             Value newYieldValue) {

   OpBuilder::InsertionGuard g(rewriter);

   rewriter.setInsertionPoint(loop.getOperation());

   auto inits = llvm::to_vector(loop.getInits());


   // Replace the init value with the new operand.

   assert(index < inits.size());

   inits[index] = newInitOperand;


   scf::ForOp newLoop = rewriter.create<scf::ForOp>(

       loop.getLoc(), loop.getLowerBound(), loop.getUpperBound(), loop.getStep(),

       inits, [](OpBuilder &, Location, Value, ValueRange) {});


   // Generate the new yield with the replaced operand.

   auto yieldOp = cast<scf::YieldOp>(loop.getBody()->getTerminator());

   yieldOp.setOperand(index, newYieldValue);


   // Move the loop body to the new op.

   rewriter.mergeBlocks(loop.getBody(), newLoop.getBody(),

                        newLoop.getBody()->getArguments());


   // Replace the old loop.

   rewriter.replaceOp(loop.getOperation(), newLoop->getResults());

   return newLoop;

 }


 // Hoist out a pair of corresponding vector.extract+vector.broadcast

 // operations. This function transforms a loop like this:

 //  %res = scf.for _ = _ to _ step _ iter_args(%iarg = %v) -> (t1) {

 //   %e = vector.extract %iarg : t1 to t2

 //   %u = "some_use"(%e) : (t2) -> t2

 //   %b = vector.broadcast %u : t2 to t1

 //   scf.yield %b : t1

 //  }

 // into the following:

 //  %e = vector.extract %v: t1 to t2

 //  %res' = scf.for _ = _ to _ step _ iter_args(%iarg = %e) -> (t2) {

 //   %u' = "some_use"(%iarg) : (t2) -> t2

 //   scf.yield %u' : t2

 //  }

 //  %res = vector.broadcast %res' : t2 to t1

 void mlir::linalg::hoistRedundantVectorBroadcasts(RewriterBase &rewriter,

                                                   Operation *root) {

   bool changed = true;

   while (changed) {

     changed = false;

     // First move loop invariant ops outside of their loop. This needs to be

     // done before as we cannot move ops without interrupting the function walk.

     root->walk(

         [&](LoopLikeOpInterface loopLike) { moveLoopInvariantCode(loopLike); });


     root->walk([&](vector::ExtractOp extractOp) {

       LLVM_DEBUG(DBGS() << "Candidate for hoisting: "

                         << *extractOp.getOperation() << "\n");


       auto loop = dyn_cast<scf::ForOp>(extractOp->getParentOp());

       if (!loop)

         return WalkResult::advance();


       // Check that the vector to extract from is a BlockArgument.

       auto blockArg = dyn_cast<BlockArgument>(extractOp.getVector());

       if (!blockArg)

         return WalkResult::advance();


       // Check that the blockArg is an iter_arg of the loop.

       OpOperand *initArg = loop.getTiedLoopInit(blockArg);

       if (!initArg)

         return WalkResult::advance();


       // If the iter_arg does not have only one use, it won't be possible to

       // hoist the extractOp out.

       if (!blockArg.hasOneUse())

         return WalkResult::advance();


       unsigned index = blockArg.getArgNumber() - loop.getNumInductionVars();


       // Check that the loop yields a broadcast that has just one use.

       Operation *yieldedVal =

           loop.getTiedLoopYieldedValue(blockArg)->get().getDefiningOp();

       auto broadcast = dyn_cast<vector::BroadcastOp>(yieldedVal);

       if (!broadcast || !broadcast.getResult().hasOneUse())

         return WalkResult::advance();


       LLVM_DEBUG(DBGS() << "Candidate broadcast: " << broadcast << "\n");


       Type broadcastInputType = broadcast.getSourceType();

       if (broadcastInputType != extractOp.getType())

         return WalkResult::advance();


       // The position of the extract must be defined outside of the loop if

       // it is dynamic.

       for (auto operand : extractOp.getDynamicPosition())

         if (!loop.isDefinedOutsideOfLoop(operand))

           return WalkResult::advance();


       rewriter.modifyOpInPlace(broadcast, [&] {

         extractOp.getVectorMutable().assign(initArg->get());

       });

       loop.moveOutOfLoop(extractOp);

       rewriter.moveOpAfter(broadcast, loop);


       scf::ForOp newLoop = replaceWithDifferentYield(

           rewriter, loop, extractOp.getResult(), index, broadcast.getSource());


       LLVM_DEBUG(DBGS() << "New loop: " << newLoop << "\n");


       rewriter.replaceAllUsesWith(newLoop.getResult(index), broadcast);

       rewriter.modifyOpInPlace(

           broadcast, [&] { broadcast.setOperand(newLoop.getResult(index)); });


       changed = true;

       return WalkResult::interrupt();

     });

   }

 }


 static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,

                                 LoopLikeOpInterface loop) {

   Value source = transferRead.getBase();


   // Skip view-like Ops and retrive the actual soruce Operation

   while (auto srcOp =

              dyn_cast_or_null<ViewLikeOpInterface>(source.getDefiningOp()))

     source = srcOp.getViewSource();


   llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),

                                            source.getUsers().end());

   llvm::SmallDenseSet<Operation *, 32> processed;

   while (!users.empty()) {

     Operation *user = users.pop_back_val();

     // If the user has already been processed skip.

     if (!processed.insert(user).second)

       continue;

     if (auto viewLike = dyn_cast<ViewLikeOpInterface>(user)) {

       users.append(viewLike->getUsers().begin(), viewLike->getUsers().end());

       continue;

     }

     if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))

       continue;

     if (!loop->isAncestor(user))

       continue;

     return false;

   }

   return true;

 }


 void mlir::linalg::hoistRedundantVectorTransfers(Operation *root,

                                                  bool verifyNonZeroTrip) {

   bool changed = true;

   while (changed) {

     changed = false;

     // First move loop invariant ops outside of their loop. This needs to be

     // done before as we cannot move ops without interrupting the function walk.

     root->walk(

         [&](LoopLikeOpInterface loopLike) { moveLoopInvariantCode(loopLike); });


     // Find all loops that are certain to have non zero trip count. Any loops

     // that are not part of this set cannot be hoisted from, since hoisting from

     // a potentially zero trip count loop may cause a vector transfer to be

     // executed when it shouldn't be.

     llvm::DenseSet<LoopLikeOpInterface> definiteNonZeroTripCountLoops;

     if (verifyNonZeroTrip) {

       root->walk([&](LoopLikeOpInterface loopLike) {

         std::optional<SmallVector<OpFoldResult>> lbs =

             loopLike.getLoopLowerBounds();

         std::optional<SmallVector<OpFoldResult>> ubs =

             loopLike.getLoopUpperBounds();

         // If loop bounds cannot be found, assume possibly zero trip count.

         if (!lbs || !ubs)

           return;


         // Otherwise, use ValueBounds to find the maximum lower bound and

         // minimum upper bound. If the bounds are found, and maxLb is less

         // than the minUb, then the loop will not have zero trip count.

         for (auto [lb, ub] : llvm::zip_equal(lbs.value(), ubs.value())) {

           FailureOr<int64_t> maxLb =

               ValueBoundsConstraintSet::computeConstantBound(

                   presburger::BoundType::UB, lb,

                   /*stopCondition=*/nullptr, /*closedUB=*/true);

           if (failed(maxLb))

             return;

           FailureOr<int64_t> minUb =

               ValueBoundsConstraintSet::computeConstantBound(

                   presburger::BoundType::LB, ub);

           if (failed(minUb))

             return;

           if (minUb.value() <= maxLb.value())

             return;

           definiteNonZeroTripCountLoops.insert(loopLike);

         }

       });

     }


     root->walk([&](vector::TransferReadOp transferRead) {

       if (!isa<MemRefType>(transferRead.getShapedType()))

         return WalkResult::advance();


       LLVM_DEBUG(DBGS() << "Candidate for hoisting: "

                         << *transferRead.getOperation() << "\n");

       auto loop = dyn_cast<LoopLikeOpInterface>(transferRead->getParentOp());

       LLVM_DEBUG(DBGS() << "Parent op: " << *transferRead->getParentOp()

                         << "\n");

       if (!isa_and_nonnull<scf::ForOp, affine::AffineForOp>(loop))

         return WalkResult::advance();


       if (verifyNonZeroTrip && !definiteNonZeroTripCountLoops.contains(loop)) {

         LLVM_DEBUG(DBGS() << "Loop may have zero trip count: " << *loop

                           << "\n");

         return WalkResult::advance();

       }


       LLVM_DEBUG(DBGS() << "Candidate read: " << *transferRead.getOperation()

                         << "\n");


       SetVector<Operation *> forwardSlice;

       getForwardSlice(transferRead.getOperation(), &forwardSlice);


       // Look for the last TransferWriteOp in the forwardSlice of

       // `transferRead` that operates on the same memref.

       vector::TransferWriteOp transferWrite;

       for (auto *sliceOp : llvm::reverse(forwardSlice)) {

         auto candidateWrite = dyn_cast<vector::TransferWriteOp>(sliceOp);

         if (!candidateWrite ||

             candidateWrite.getBase() != transferRead.getBase())

           continue;

         transferWrite = candidateWrite;

       }


       // All operands of the TransferRead must be defined outside of the loop.

       for (auto operand : transferRead.getOperands())

         if (!loop.isDefinedOutsideOfLoop(operand))

           return WalkResult::advance();


       // Only hoist transfer_read / transfer_write pairs and singleton

       // transfer_reads for now.

       if (!transferWrite) {

         // Make sure there are no other accesses to the memref before

         // hoisting transfer_read.

         if (noAliasingUseInLoop(transferRead, loop))

           loop.moveOutOfLoop(transferRead);

         return WalkResult::advance();

       }


       LLVM_DEBUG(DBGS() << "Candidate: " << *transferWrite.getOperation()

                         << "\n");


       // Approximate aliasing by checking that:

       //   1. indices, vector type and permutation map are the same (i.e., the

       //      transfer_read/transfer_write ops are matching),

       //   2. source operands for transfer.{read|write} do not originate from

       //      Ops implementing ViewLikeOpInterface.

       //   3. no other operations in the loop access the same memref except

       //      for transfer_read/transfer_write accessing statically disjoint

       //      slices.

       if (transferRead.getIndices() != transferWrite.getIndices() ||

           transferRead.getVectorType() != transferWrite.getVectorType() ||

           transferRead.getPermutationMap() != transferWrite.getPermutationMap())

         return WalkResult::advance();


       auto *source = transferRead.getBase().getDefiningOp();

       if (source && isa_and_nonnull<ViewLikeOpInterface>(source))

         return WalkResult::advance();


       source = transferWrite.getBase().getDefiningOp();

       if (source && isa_and_nonnull<ViewLikeOpInterface>(source))

         return WalkResult::advance();


       // TODO: may want to memoize this information for performance but it

       // likely gets invalidated often.

       DominanceInfo dom(loop);

       if (!dom.properlyDominates(transferRead.getOperation(), transferWrite))

         return WalkResult::advance();

       for (auto &use : transferRead.getBase().getUses()) {

         if (!loop->isAncestor(use.getOwner()))

           continue;

         if (use.getOwner() == transferRead.getOperation() ||

             use.getOwner() == transferWrite.getOperation())

           continue;

         if (auto transferWriteUse =

                 dyn_cast<vector::TransferWriteOp>(use.getOwner())) {

           if (!vector::isDisjointTransferSet(

                   cast<VectorTransferOpInterface>(*transferWrite),

                   cast<VectorTransferOpInterface>(*transferWriteUse),

                   /*testDynamicValueUsingBounds=*/true))

             return WalkResult::advance();

         } else if (auto transferReadUse =

                        dyn_cast<vector::TransferReadOp>(use.getOwner())) {

           if (!vector::isDisjointTransferSet(

                   cast<VectorTransferOpInterface>(*transferWrite),

                   cast<VectorTransferOpInterface>(*transferReadUse),

                   /*testDynamicValueUsingBounds=*/true))

             return WalkResult::advance();

         } else {

           // Unknown use, we cannot prove that it doesn't alias with the

           // transferRead/transferWrite operations.

           return WalkResult::advance();

         }

       }


       // Hoist read before.

       loop.moveOutOfLoop(transferRead);


       // Hoist write after.

       transferWrite->moveAfter(loop);


       // Rewrite `loop` with new yields by cloning and erase the original loop.

       IRRewriter rewriter(transferRead.getContext());

       NewYieldValuesFn yieldFn = [&](OpBuilder &b, Location loc,

                                      ArrayRef<BlockArgument> newBBArgs) {

         return SmallVector<Value>{transferWrite.getVector()};

       };


       auto maybeNewLoop = loop.replaceWithAdditionalYields(

           rewriter, transferRead.getVector(),

           /*replaceInitOperandUsesInLoop=*/true, yieldFn);

       if (failed(maybeNewLoop))

         return WalkResult::interrupt();


       transferWrite.getValueToStoreMutable().assign(

           maybeNewLoop->getOperation()->getResults().back());

       changed = true;

       // Need to interrupt and restart because erasing the loop messes up

       // the walk.

       return WalkResult::interrupt();

     });

   }

 }

AffineOps.h

AffineStructures.h

AffineValueMap.h

BuiltinOps.h

Utils.h

Utils.h

Dominance.h

FuncOps.h

GreedyPatternRewriteDriver.h

noAliasingUseInLoop
static bool noAliasingUseInLoop(vector::TransferReadOp transferRead, LoopLikeOpInterface loop)
Definition: Hoisting.cpp:172

replaceWithDifferentYield
static scf::ForOp replaceWithDifferentYield(RewriterBase &rewriter, scf::ForOp loop, Value newInitOperand, unsigned index, Value newYieldValue)
Replace loop with a new loop that has a different init operand at position index.
Definition: Hoisting.cpp:52

DBGS
#define DBGS()
Definition: Hoisting.cpp:41

Hoisting.h

LoopInvariantCodeMotionUtils.h

broadcast
static Value broadcast(Location loc, Value toBroadcast, unsigned numElements, const TypeConverter &typeConverter, ConversionPatternRewriter &rewriter)
Broadcasts the value to vector with numElements number of elements.
Definition: SPIRVToLLVM.cpp:151

SliceAnalysis.h

VectorOps.h

VectorUtils.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::DenseSet
Definition: LLVM.h:59

llvm::SetVector
Definition: LLVM.h:66

llvm::SmallVector
Definition: LLVM.h:72

mlir::DominanceInfo
A class for computing basic dominance information.
Definition: Dominance.h:140

mlir::DominanceInfo::properlyDominates
bool properlyDominates(Operation *a, Operation *b, bool enclosingOpOk=true) const
Return true if operation A properly dominates operation B, i.e.
Definition: Dominance.cpp:324

mlir::IROperand::get
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:160

mlir::IRRewriter
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:729

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::OpBuilder::InsertionGuard
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::walk
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:129

mlir::RewriterBase::replaceAllUsesWith
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
Definition: PatternMatch.h:601

mlir::RewriterBase::mergeBlocks
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
Definition: PatternMatch.cpp:335

mlir::RewriterBase::modifyOpInPlace
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:593

mlir::RewriterBase::moveOpAfter
void moveOpAfter(Operation *op, Operation *existingOp)
Unlink this operation from its current block and insert it right after existingOp which may be in the...
Definition: PatternMatch.cpp:413

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::ValueBoundsConstraintSet::computeConstantBound
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, StopConditionFn stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
Definition: ValueBoundsOpInterface.cpp:630

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getUsers
user_range getUsers() const
Definition: Value.h:218

mlir::Value::hasOneUse
bool hasOneUse() const
Returns true if this value has exactly one use.
Definition: Value.h:197

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20

mlir::WalkResult::advance
static WalkResult advance()
Definition: WalkResult.h:47

mlir::WalkResult::interrupt
static WalkResult interrupt()
Definition: WalkResult.h:46

Arith.h

Linalg.h

Transforms.h

SCF.h

Tensor.h

mlir::linalg
Definition: LinalgToStandard.h:24

mlir::linalg::hoistRedundantVectorBroadcasts
void hoistRedundantVectorBroadcasts(RewriterBase &rewriter, Operation *root)
Hoist vector.extract/vector.broadcast pairs out of immediately enclosing scf::ForOp iteratively,...
Definition: Hoisting.cpp:97

mlir::linalg::hoistRedundantVectorTransfers
void hoistRedundantVectorTransfers(Operation *root, bool verifyNonZeroTrip=false)
Hoist vector.transfer_read/vector.transfer_write on buffers pairs out of immediately enclosing scf::F...
Definition: Hoisting.cpp:202

mlir::presburger::BoundType::UB
@ UB

mlir::presburger::BoundType::LB
@ LB

mlir::vector::isDisjointTransferSet
bool isDisjointTransferSet(VectorTransferOpInterface transferA, VectorTransferOpInterface transferB, bool testDynamicValueUsingBounds=false)
Return true if we can prove that the transfer operations access disjoint memory, requiring the operat...
Definition: VectorOps.cpp:315

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::changed
const FrozenRewritePatternSet GreedyRewriteConfig bool * changed
Definition: GreedyPatternRewriteDriver.h:285

mlir::isMemoryEffectFree
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
Definition: SideEffectInterfaces.cpp:316

mlir::NewYieldValuesFn
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Definition: LoopLikeInterface.h:26

mlir::moveLoopInvariantCode
size_t moveLoopInvariantCode(ArrayRef< Region * > regions, function_ref< bool(Value, Region *)> isDefinedOutsideRegion, function_ref< bool(Operation *, Region *)> shouldMoveOutOfRegion, function_ref< void(Operation *, Region *)> moveOutOfRegion)
Given a list of regions, perform loop-invariant code motion.
Definition: LoopInvariantCodeMotionUtils.cpp:59

mlir::getForwardSlice
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
Definition: SliceAnalysis.cpp:55