24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/Support/Debug.h" 27 #define DEBUG_TYPE "affine-pipeline-data-transfer" 32 struct PipelineDataTransfer
33 :
public AffinePipelineDataTransferBase<PipelineDataTransfer> {
34 void runOnOperation()
override;
35 void runOnAffineForOp(AffineForOp forOp);
37 std::vector<AffineForOp> forOps;
44 std::unique_ptr<OperationPass<func::FuncOp>>
46 return std::make_unique<PipelineDataTransfer>();
53 assert((isa<AffineDmaStartOp, AffineDmaWaitOp>(dmaOp)));
54 if (
auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaOp)) {
55 return dmaStartOp.getTagMemRefOperandIndex();
67 auto *forBody = forOp.getBody();
68 OpBuilder bInner(forBody, forBody->begin());
71 auto doubleShape = [&](MemRefType oldMemRefType) -> MemRefType {
76 std::copy(oldShape.begin(), oldShape.end(), newShape.begin() + 1);
80 auto oldMemRefType = oldMemRef.
getType().
cast<MemRefType>();
81 auto newMemRefType = doubleShape(oldMemRefType);
88 if (dim.value() == ShapedType::kDynamicSize)
89 allocOperands.push_back(bOuter.
createOrFold<memref::DimOp>(
90 forOp.getLoc(), oldMemRef, dim.index()));
95 forOp.getLoc(), newMemRefType, allocOperands);
98 auto d0 = bInner.getAffineDimExpr(0);
99 int64_t step = forOp.getStep();
102 auto ivModTwoOp = bInner.create<AffineApplyOp>(forOp.getLoc(), modTwoMap,
103 forOp.getInductionVar());
108 oldMemRef, newMemRef,
113 &*forOp.getBody()->begin()))) {
115 forOp.emitError(
"memref replacement for double buffering failed"));
121 bOuter.
create<memref::DeallocOp>(forOp.getLoc(), newMemRef);
127 void PipelineDataTransfer::runOnOperation() {
134 getOperation().
walk([&](AffineForOp forOp) { forOps.push_back(forOp); });
135 for (
auto forOp : forOps)
136 runOnAffineForOp(forOp);
147 for (
auto it = startIndices.begin(), wIt = waitIndices.begin(),
148 e = startIndices.end();
149 it != e; ++it, ++wIt) {
165 SmallVectorImpl<std::pair<Operation *, Operation *>> &startWaitPairs) {
169 for (
auto &op : *forOp.getBody()) {
171 if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster())
172 outgoingDmaOps.push_back(dmaStartOp);
176 for (
auto &op : *forOp.getBody()) {
178 if (isa<AffineDmaWaitOp>(op)) {
179 dmaFinishInsts.push_back(&op);
188 if (!dmaStartOp.isDestMemorySpaceFaster())
194 auto *it = outgoingDmaOps.begin();
195 for (; it != outgoingDmaOps.end(); ++it) {
196 if (it->getDstMemRef() == dmaStartOp.getSrcMemRef())
199 if (it != outgoingDmaOps.end())
203 auto memref = dmaStartOp.getOperand(dmaStartOp.getFasterMemPos());
204 bool escapingUses =
false;
205 for (
auto *user : memref.getUsers()) {
207 if (isa<memref::DeallocOp>(user))
209 if (!forOp.getBody()->findAncestorOpInBlock(*user)) {
210 LLVM_DEBUG(llvm::dbgs()
211 <<
"can't pipeline: buffer is live out of loop\n";);
217 dmaStartInsts.push_back(&op);
221 for (
auto *dmaStartOp : dmaStartInsts) {
222 for (
auto *dmaFinishOp : dmaFinishInsts) {
224 cast<AffineDmaWaitOp>(dmaFinishOp))) {
225 startWaitPairs.push_back({dmaStartOp, dmaFinishOp});
235 void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
237 if (!mayBeConstTripCount) {
238 LLVM_DEBUG(forOp.emitRemark(
"won't pipeline due to unknown trip count"));
245 if (startWaitPairs.empty()) {
246 LLVM_DEBUG(forOp.emitRemark(
"No dma start/finish pairs\n"));
258 for (
auto &pair : startWaitPairs) {
259 auto *dmaStartOp = pair.first;
260 Value oldMemRef = dmaStartOp->getOperand(
261 cast<AffineDmaStartOp>(dmaStartOp).getFasterMemPos());
265 LLVM_DEBUG(llvm::dbgs()
266 <<
"double buffering failed for" << dmaStartOp <<
"\n";);
281 dyn_cast<memref::DeallocOp>(*oldMemRef.
user_begin())) {
290 for (
auto &pair : startWaitPairs) {
291 auto *dmaFinishOp = pair.second;
294 LLVM_DEBUG(llvm::dbgs() <<
"tag double buffering failed\n";);
304 dyn_cast<memref::DeallocOp>(*oldTagMemRef.
user_begin())) {
313 startWaitPairs.clear();
318 for (
auto &pair : startWaitPairs) {
319 auto *dmaStartOp = pair.first;
320 assert(isa<AffineDmaStartOp>(dmaStartOp));
321 instShiftMap[dmaStartOp] = 0;
325 if (!sliceOps.empty()) {
326 for (
auto sliceOp : sliceOps) {
327 instShiftMap[sliceOp.getOperation()] = 0;
335 for (
auto *op : affineApplyInsts) {
336 instShiftMap[op] = 0;
341 for (
auto &op : forOp.getBody()->without_terminator())
342 if (instShiftMap.find(&op) == instShiftMap.end())
343 instShiftMap[&op] = 1;
348 for (
auto &op : forOp.getBody()->without_terminator()) {
349 assert(instShiftMap.find(&op) != instShiftMap.end());
350 shifts[s++] = instShiftMap[&op];
361 LLVM_DEBUG(llvm::dbgs() <<
"Shifts invalid - unexpected\n";);
366 LLVM_DEBUG(llvm::dbgs() <<
"op body skewing failed - unexpected\n";);
Include the generated interface declarations.
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation is a basic unit of execution within MLIR.
static unsigned getTagMemRefPos(Operation &dmaOp)
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
operand_range getTagIndices()
Returns the tag memref indices for this DMA operation.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
Value getTagMemRef()
Returns the Tag MemRef associated with the DMA operation being waited on.
std::enable_if< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT >::type walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one)...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
static bool checkTagMatch(AffineDmaStartOp startOp, AffineDmaWaitOp waitOp)
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
IntegerAttr getI64IntegerAttr(int64_t value)
bool hasOneUse() const
Returns true if this value has exactly one use.
operand_range getTagIndices()
Returns the tag memref index for this DMA operation.
user_iterator user_begin() const
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued...
AffineDmaWaitOp blocks until the completion of a DMA operation associated with the tag element 'tag[i...
Value getTagMemRef()
Returns the Tag MemRef for this DMA operation.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
static bool doubleBuffer(Value oldMemRef, AffineForOp forOp)
Doubles the buffer of the supplied memref on the specified 'affine.for' operation by adding a leading...
Type getType() const
Return the type of this value.
This is a builder type that keeps local references to arguments.
static void findMatchingStartFinishInsts(AffineForOp forOp, SmallVectorImpl< std::pair< Operation *, Operation *>> &startWaitPairs)
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineDmaStartOp starts a non-blocking DMA operation that transfers data from a source memref to a de...
std::unique_ptr< OperationPass< func::FuncOp > > createPipelineDataTransferPass()
Creates a pass to pipeline explicit movement of data across levels of the memory hierarchy.
Optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, None otherwise.
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation *> &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
This class helps build Operations.
Builder & setShape(ArrayRef< int64_t > newShape)
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...