26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/Support/Debug.h"
31 #define GEN_PASS_DEF_AFFINEPIPELINEDATATRANSFER
32 #include "mlir/Dialect/Affine/Passes.h.inc"
36 #define DEBUG_TYPE "affine-pipeline-data-transfer"
42 struct PipelineDataTransfer
43 :
public affine::impl::AffinePipelineDataTransferBase<
44 PipelineDataTransfer> {
45 void runOnOperation()
override;
46 void runOnAffineForOp(AffineForOp forOp);
48 std::vector<AffineForOp> forOps;
55 std::unique_ptr<OperationPass<func::FuncOp>>
57 return std::make_unique<PipelineDataTransfer>();
64 assert((isa<AffineDmaStartOp, AffineDmaWaitOp>(dmaOp)));
65 if (
auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaOp)) {
66 return dmaStartOp.getTagMemRefOperandIndex();
78 auto *forBody = forOp.getBody();
79 OpBuilder bInner(forBody, forBody->begin());
82 auto doubleShape = [&](MemRefType oldMemRefType) -> MemRefType {
87 std::copy(oldShape.begin(), oldShape.end(), newShape.begin() + 1);
91 auto oldMemRefType = cast<MemRefType>(oldMemRef.
getType());
92 auto newMemRefType = doubleShape(oldMemRefType);
99 if (dim.value() == ShapedType::kDynamic)
100 allocOperands.push_back(bOuter.
createOrFold<memref::DimOp>(
101 forOp.getLoc(), oldMemRef, dim.index()));
106 forOp.getLoc(), newMemRefType, allocOperands);
110 int64_t step = forOp.getStepAsInt();
113 auto ivModTwoOp = bInner.
create<AffineApplyOp>(forOp.getLoc(), modTwoMap,
114 forOp.getInductionVar());
119 oldMemRef, newMemRef,
124 &*forOp.getBody()->begin()))) {
126 forOp.emitError(
"memref replacement for double buffering failed"));
132 bOuter.
create<memref::DeallocOp>(forOp.getLoc(), newMemRef);
138 void PipelineDataTransfer::runOnOperation() {
145 getOperation().
walk([&](AffineForOp forOp) { forOps.push_back(forOp); });
146 for (
auto forOp : forOps)
147 runOnAffineForOp(forOp);
158 for (
auto it = startIndices.begin(), wIt = waitIndices.begin(),
159 e = startIndices.end();
160 it != e; ++it, ++wIt) {
176 SmallVectorImpl<std::pair<Operation *, Operation *>> &startWaitPairs) {
180 for (
auto &op : *forOp.getBody()) {
181 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
182 if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster())
183 outgoingDmaOps.push_back(dmaStartOp);
187 for (
auto &op : *forOp.getBody()) {
189 if (isa<AffineDmaWaitOp>(op)) {
190 dmaFinishInsts.push_back(&op);
193 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
199 if (!dmaStartOp.isDestMemorySpaceFaster())
205 auto *it = outgoingDmaOps.begin();
206 for (; it != outgoingDmaOps.end(); ++it) {
207 if (it->getDstMemRef() == dmaStartOp.getSrcMemRef())
210 if (it != outgoingDmaOps.end())
214 auto memref = dmaStartOp.getOperand(dmaStartOp.getFasterMemPos());
215 bool escapingUses =
false;
216 for (
auto *user : memref.getUsers()) {
218 if (isa<memref::DeallocOp>(user))
220 if (!forOp.getBody()->findAncestorOpInBlock(*user)) {
221 LLVM_DEBUG(llvm::dbgs()
222 <<
"can't pipeline: buffer is live out of loop\n";);
228 dmaStartInsts.push_back(&op);
232 for (
auto *dmaStartOp : dmaStartInsts) {
233 for (
auto *dmaFinishOp : dmaFinishInsts) {
235 cast<AffineDmaWaitOp>(dmaFinishOp))) {
236 startWaitPairs.push_back({dmaStartOp, dmaFinishOp});
246 void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
248 if (!mayBeConstTripCount) {
249 LLVM_DEBUG(forOp.emitRemark(
"won't pipeline due to unknown trip count"));
256 if (startWaitPairs.empty()) {
257 LLVM_DEBUG(forOp.emitRemark(
"No dma start/finish pairs\n"));
269 for (
auto &pair : startWaitPairs) {
270 auto *dmaStartOp = pair.first;
271 Value oldMemRef = dmaStartOp->getOperand(
272 cast<AffineDmaStartOp>(dmaStartOp).getFasterMemPos());
276 LLVM_DEBUG(llvm::dbgs()
277 <<
"double buffering failed for" << dmaStartOp <<
"\n";);
292 dyn_cast<memref::DeallocOp>(*oldMemRef.
user_begin())) {
301 for (
auto &pair : startWaitPairs) {
302 auto *dmaFinishOp = pair.second;
305 LLVM_DEBUG(llvm::dbgs() <<
"tag double buffering failed\n";);
315 dyn_cast<memref::DeallocOp>(*oldTagMemRef.
user_begin())) {
324 startWaitPairs.clear();
329 for (
auto &pair : startWaitPairs) {
330 auto *dmaStartOp = pair.first;
331 assert(isa<AffineDmaStartOp>(dmaStartOp));
332 instShiftMap[dmaStartOp] = 0;
336 if (!sliceOps.empty()) {
337 for (
auto sliceOp : sliceOps) {
338 instShiftMap[sliceOp.getOperation()] = 0;
346 for (
auto *op : affineApplyInsts) {
347 instShiftMap[op] = 0;
352 for (
auto &op : forOp.getBody()->without_terminator())
353 instShiftMap.try_emplace(&op, 1);
358 for (
auto &op : forOp.getBody()->without_terminator()) {
359 assert(instShiftMap.contains(&op));
360 shifts[s++] = instShiftMap[&op];
365 op.setAttr(
"shift", b.getI64IntegerAttr(shifts[s - 1]));
371 LLVM_DEBUG(llvm::dbgs() <<
"Shifts invalid - unexpected\n";);
376 LLVM_DEBUG(llvm::dbgs() <<
"op body skewing failed - unexpected\n";);
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static void findMatchingStartFinishInsts(AffineForOp forOp, SmallVectorImpl< std::pair< Operation *, Operation * >> &startWaitPairs)
static unsigned getTagMemRefPos(Operation &dmaOp)
static bool checkTagMatch(AffineDmaStartOp startOp, AffineDmaWaitOp waitOp)
static bool doubleBuffer(Value oldMemRef, AffineForOp forOp)
Doubles the buffer of the supplied memref on the specified 'affine.for' operation by adding a leading...
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineExpr getAffineDimExpr(unsigned position)
This is a builder type that keeps local references to arguments.
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Builder & setShape(ArrayRef< int64_t > newShape)
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Operation is the basic unit of execution within MLIR.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
user_iterator user_begin() const
bool hasOneUse() const
Returns true if this value has exactly one use.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineDmaStartOp starts a non-blocking DMA operation that transfers data from a source memref to a de...
Value getTagMemRef()
Returns the Tag MemRef for this DMA operation.
operand_range getTagIndices()
Returns the tag memref indices for this DMA operation.
AffineDmaWaitOp blocks until the completion of a DMA operation associated with the tag element 'tag[i...
Value getTagMemRef()
Returns the Tag MemRef associated with the DMA operation being waited on.
operand_range getTagIndices()
Returns the tag memref index for this DMA operation.
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
std::unique_ptr< OperationPass< func::FuncOp > > createPipelineDataTransferPass()
Creates a pass to pipeline explicit movement of data across levels of the memory hierarchy.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Include the generated interface declarations.