24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/Support/Debug.h"
29 #define GEN_PASS_DEF_AFFINEPIPELINEDATATRANSFER
30 #include "mlir/Dialect/Affine/Passes.h.inc"
34 #define DEBUG_TYPE "affine-pipeline-data-transfer"
40 struct PipelineDataTransfer
41 :
public affine::impl::AffinePipelineDataTransferBase<
42 PipelineDataTransfer> {
43 void runOnOperation()
override;
44 void runOnAffineForOp(AffineForOp forOp);
46 std::vector<AffineForOp> forOps;
53 std::unique_ptr<OperationPass<func::FuncOp>>
55 return std::make_unique<PipelineDataTransfer>();
62 assert((isa<AffineDmaStartOp, AffineDmaWaitOp>(dmaOp)));
63 if (
auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaOp)) {
64 return dmaStartOp.getTagMemRefOperandIndex();
76 auto *forBody = forOp.getBody();
77 OpBuilder bInner(forBody, forBody->begin());
80 auto doubleShape = [&](MemRefType oldMemRefType) -> MemRefType {
85 std::copy(oldShape.begin(), oldShape.end(), newShape.begin() + 1);
89 auto oldMemRefType = cast<MemRefType>(oldMemRef.
getType());
90 auto newMemRefType = doubleShape(oldMemRefType);
97 if (dim.value() == ShapedType::kDynamic)
98 allocOperands.push_back(bOuter.
createOrFold<memref::DimOp>(
99 forOp.getLoc(), oldMemRef, dim.index()));
103 Value newMemRef = memref::AllocOp::create(bOuter, forOp.getLoc(),
104 newMemRefType, allocOperands);
108 int64_t step = forOp.getStepAsInt();
111 auto ivModTwoOp = AffineApplyOp::create(bInner, forOp.getLoc(), modTwoMap,
112 forOp.getInductionVar());
116 auto userFilterFn = [&](
Operation *user) {
117 auto domInfo = std::make_unique<DominanceInfo>(
118 forOp->getParentOfType<FunctionOpInterface>());
119 return domInfo->dominates(&*forOp.getBody()->begin(), user);
125 {}, userFilterFn))) {
127 forOp.emitError(
"memref replacement for double buffering failed"));
133 memref::DeallocOp::create(bOuter, forOp.getLoc(), newMemRef);
139 void PipelineDataTransfer::runOnOperation() {
146 getOperation().walk([&](AffineForOp forOp) { forOps.push_back(forOp); });
147 for (
auto forOp : forOps)
148 runOnAffineForOp(forOp);
159 for (
auto it = startIndices.begin(), wIt = waitIndices.begin(),
160 e = startIndices.end();
161 it != e; ++it, ++wIt) {
177 SmallVectorImpl<std::pair<Operation *, Operation *>> &startWaitPairs) {
181 for (
auto &op : *forOp.getBody()) {
182 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
183 if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster())
184 outgoingDmaOps.push_back(dmaStartOp);
188 for (
auto &op : *forOp.getBody()) {
190 if (isa<AffineDmaWaitOp>(op)) {
191 dmaFinishInsts.push_back(&op);
194 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
200 if (!dmaStartOp.isDestMemorySpaceFaster())
206 auto *it = outgoingDmaOps.begin();
207 for (; it != outgoingDmaOps.end(); ++it) {
208 if (it->getDstMemRef() == dmaStartOp.getSrcMemRef())
211 if (it != outgoingDmaOps.end())
215 auto memref = dmaStartOp.getOperand(dmaStartOp.getFasterMemPos());
216 bool escapingUses =
false;
217 for (
auto *user : memref.getUsers()) {
219 if (isa<memref::DeallocOp>(user))
221 if (!forOp.getBody()->findAncestorOpInBlock(*user)) {
222 LLVM_DEBUG(llvm::dbgs()
223 <<
"can't pipeline: buffer is live out of loop\n";);
229 dmaStartInsts.push_back(&op);
233 for (
auto *dmaStartOp : dmaStartInsts) {
234 for (
auto *dmaFinishOp : dmaFinishInsts) {
236 cast<AffineDmaWaitOp>(dmaFinishOp))) {
237 startWaitPairs.push_back({dmaStartOp, dmaFinishOp});
247 void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
249 if (!mayBeConstTripCount) {
250 LLVM_DEBUG(forOp.emitRemark(
"won't pipeline due to unknown trip count"));
257 if (startWaitPairs.empty()) {
258 LLVM_DEBUG(forOp.emitRemark(
"No dma start/finish pairs\n"));
270 for (
auto &pair : startWaitPairs) {
271 auto *dmaStartOp = pair.first;
272 Value oldMemRef = dmaStartOp->getOperand(
273 cast<AffineDmaStartOp>(dmaStartOp).getFasterMemPos());
277 LLVM_DEBUG(llvm::dbgs()
278 <<
"double buffering failed for" << dmaStartOp <<
"\n";);
293 dyn_cast<memref::DeallocOp>(*oldMemRef.
user_begin())) {
302 for (
auto &pair : startWaitPairs) {
303 auto *dmaFinishOp = pair.second;
306 LLVM_DEBUG(llvm::dbgs() <<
"tag double buffering failed\n";);
316 dyn_cast<memref::DeallocOp>(*oldTagMemRef.
user_begin())) {
325 startWaitPairs.clear();
330 for (
auto &pair : startWaitPairs) {
331 auto *dmaStartOp = pair.first;
332 assert(isa<AffineDmaStartOp>(dmaStartOp));
333 instShiftMap[dmaStartOp] = 0;
337 if (!sliceOps.empty()) {
338 for (
auto sliceOp : sliceOps) {
339 instShiftMap[sliceOp.getOperation()] = 0;
347 for (
auto *op : affineApplyInsts) {
348 instShiftMap[op] = 0;
353 for (
auto &op : forOp.getBody()->without_terminator())
354 instShiftMap.try_emplace(&op, 1);
359 for (
auto &op : forOp.getBody()->without_terminator()) {
360 assert(instShiftMap.contains(&op));
361 shifts[s++] = instShiftMap[&op];
366 op.setAttr(
"shift", b.getI64IntegerAttr(shifts[s - 1]));
372 LLVM_DEBUG(llvm::dbgs() <<
"Shifts invalid - unexpected\n";);
377 LLVM_DEBUG(llvm::dbgs() <<
"op body skewing failed - unexpected\n";);
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static void findMatchingStartFinishInsts(AffineForOp forOp, SmallVectorImpl< std::pair< Operation *, Operation * >> &startWaitPairs)
static unsigned getTagMemRefPos(Operation &dmaOp)
static bool checkTagMatch(AffineDmaStartOp startOp, AffineDmaWaitOp waitOp)
static bool doubleBuffer(Value oldMemRef, AffineForOp forOp)
Doubles the buffer of the supplied memref on the specified 'affine.for' operation by adding a leading...
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineExpr getAffineDimExpr(unsigned position)
This is a builder type that keeps local references to arguments.
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Builder & setShape(ArrayRef< int64_t > newShape)
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Operation is the basic unit of execution within MLIR.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
user_iterator user_begin() const
bool hasOneUse() const
Returns true if this value has exactly one use.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineDmaStartOp starts a non-blocking DMA operation that transfers data from a source memref to a de...
Value getTagMemRef()
Returns the Tag MemRef for this DMA operation.
operand_range getTagIndices()
Returns the tag memref indices for this DMA operation.
AffineDmaWaitOp blocks until the completion of a DMA operation associated with the tag element 'tag[i...
Value getTagMemRef()
Returns the Tag MemRef associated with the DMA operation being waited on.
operand_range getTagIndices()
Returns the tag memref index for this DMA operation.
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
std::unique_ptr< OperationPass< func::FuncOp > > createPipelineDataTransferPass()
Creates a pass to pipeline explicit movement of data across levels of the memory hierarchy.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, llvm::function_ref< bool(Operation *)> userFilterFn=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Include the generated interface declarations.