26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/Support/Debug.h"
31 #define GEN_PASS_DEF_AFFINEPIPELINEDATATRANSFER
32 #include "mlir/Dialect/Affine/Passes.h.inc"
36 #define DEBUG_TYPE "affine-pipeline-data-transfer"
42 struct PipelineDataTransfer
43 :
public affine::impl::AffinePipelineDataTransferBase<
44 PipelineDataTransfer> {
45 void runOnOperation()
override;
46 void runOnAffineForOp(AffineForOp forOp);
48 std::vector<AffineForOp> forOps;
55 std::unique_ptr<OperationPass<func::FuncOp>>
57 return std::make_unique<PipelineDataTransfer>();
64 assert((isa<AffineDmaStartOp, AffineDmaWaitOp>(dmaOp)));
65 if (
auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaOp)) {
66 return dmaStartOp.getTagMemRefOperandIndex();
78 auto *forBody = forOp.getBody();
79 OpBuilder bInner(forBody, forBody->begin());
82 auto doubleShape = [&](MemRefType oldMemRefType) -> MemRefType {
87 std::copy(oldShape.begin(), oldShape.end(), newShape.begin() + 1);
91 auto oldMemRefType = cast<MemRefType>(oldMemRef.
getType());
92 auto newMemRefType = doubleShape(oldMemRefType);
99 if (dim.value() == ShapedType::kDynamic)
100 allocOperands.push_back(bOuter.
createOrFold<memref::DimOp>(
101 forOp.getLoc(), oldMemRef, dim.index()));
106 forOp.getLoc(), newMemRefType, allocOperands);
110 int64_t step = forOp.getStepAsInt();
113 auto ivModTwoOp = bInner.
create<AffineApplyOp>(forOp.getLoc(), modTwoMap,
114 forOp.getInductionVar());
119 oldMemRef, newMemRef,
124 &*forOp.getBody()->begin()))) {
126 forOp.emitError(
"memref replacement for double buffering failed"));
132 bOuter.
create<memref::DeallocOp>(forOp.getLoc(), newMemRef);
138 void PipelineDataTransfer::runOnOperation() {
145 getOperation().
walk([&](AffineForOp forOp) { forOps.push_back(forOp); });
146 for (
auto forOp : forOps)
147 runOnAffineForOp(forOp);
158 for (
auto it = startIndices.begin(), wIt = waitIndices.begin(),
159 e = startIndices.end();
160 it != e; ++it, ++wIt) {
176 SmallVectorImpl<std::pair<Operation *, Operation *>> &startWaitPairs) {
180 for (
auto &op : *forOp.getBody()) {
181 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
182 if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster())
183 outgoingDmaOps.push_back(dmaStartOp);
187 for (
auto &op : *forOp.getBody()) {
189 if (isa<AffineDmaWaitOp>(op)) {
190 dmaFinishInsts.push_back(&op);
193 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
199 if (!dmaStartOp.isDestMemorySpaceFaster())
205 auto *it = outgoingDmaOps.begin();
206 for (; it != outgoingDmaOps.end(); ++it) {
207 if (it->getDstMemRef() == dmaStartOp.getSrcMemRef())
210 if (it != outgoingDmaOps.end())
214 auto memref = dmaStartOp.getOperand(dmaStartOp.getFasterMemPos());
215 bool escapingUses =
false;
216 for (
auto *user : memref.getUsers()) {
218 if (isa<memref::DeallocOp>(user))
220 if (!forOp.getBody()->findAncestorOpInBlock(*user)) {
221 LLVM_DEBUG(llvm::dbgs()
222 <<
"can't pipeline: buffer is live out of loop\n";);
228 dmaStartInsts.push_back(&op);
232 for (
auto *dmaStartOp : dmaStartInsts) {
233 for (
auto *dmaFinishOp : dmaFinishInsts) {
235 cast<AffineDmaWaitOp>(dmaFinishOp))) {
236 startWaitPairs.push_back({dmaStartOp, dmaFinishOp});
246 void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
248 if (!mayBeConstTripCount) {
249 LLVM_DEBUG(forOp.emitRemark(
"won't pipeline due to unknown trip count"));
256 if (startWaitPairs.empty()) {
257 LLVM_DEBUG(forOp.emitRemark(
"No dma start/finish pairs\n"));
269 for (
auto &pair : startWaitPairs) {
270 auto *dmaStartOp = pair.first;
271 Value oldMemRef = dmaStartOp->getOperand(
272 cast<AffineDmaStartOp>(dmaStartOp).getFasterMemPos());
276 LLVM_DEBUG(llvm::dbgs()
277 <<
"double buffering failed for" << dmaStartOp <<
"\n";);
292 dyn_cast<memref::DeallocOp>(*oldMemRef.
user_begin())) {
301 for (
auto &pair : startWaitPairs) {
302 auto *dmaFinishOp = pair.second;
305 LLVM_DEBUG(llvm::dbgs() <<
"tag double buffering failed\n";);
315 dyn_cast<memref::DeallocOp>(*oldTagMemRef.
user_begin())) {
324 startWaitPairs.clear();
329 for (
auto &pair : startWaitPairs) {
330 auto *dmaStartOp = pair.first;
331 assert(isa<AffineDmaStartOp>(dmaStartOp));
332 instShiftMap[dmaStartOp] = 0;
336 if (!sliceOps.empty()) {
337 for (
auto sliceOp : sliceOps) {
338 instShiftMap[sliceOp.getOperation()] = 0;
346 for (
auto *op : affineApplyInsts) {
347 instShiftMap[op] = 0;
352 for (
auto &op : forOp.getBody()->without_terminator())
353 if (!instShiftMap.contains(&op))
354 instShiftMap[&op] = 1;
359 for (
auto &op : forOp.getBody()->without_terminator()) {
360 assert(instShiftMap.contains(&op));
361 shifts[s++] = instShiftMap[&op];
366 op.
setAttr(
"shift", b.getI64IntegerAttr(shifts[s - 1]));
372 LLVM_DEBUG(llvm::dbgs() <<
"Shifts invalid - unexpected\n";);
377 LLVM_DEBUG(llvm::dbgs() <<
"op body skewing failed - unexpected\n";);
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static void findMatchingStartFinishInsts(AffineForOp forOp, SmallVectorImpl< std::pair< Operation *, Operation * >> &startWaitPairs)
static unsigned getTagMemRefPos(Operation &dmaOp)
static bool checkTagMatch(AffineDmaStartOp startOp, AffineDmaWaitOp waitOp)
static bool doubleBuffer(Value oldMemRef, AffineForOp forOp)
Doubles the buffer of the supplied memref on the specified 'affine.for' operation by adding a leading...
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineExpr getAffineDimExpr(unsigned position)
This is a builder type that keeps local references to arguments.
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Builder & setShape(ArrayRef< int64_t > newShape)
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Operation is the basic unit of execution within MLIR.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
user_iterator user_begin() const
bool hasOneUse() const
Returns true if this value has exactly one use.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
AffineDmaStartOp starts a non-blocking DMA operation that transfers data from a source memref to a de...
Value getTagMemRef()
Returns the Tag MemRef for this DMA operation.
operand_range getTagIndices()
Returns the tag memref indices for this DMA operation.
AffineDmaWaitOp blocks until the completion of a DMA operation associated with the tag element 'tag[i...
Value getTagMemRef()
Returns the Tag MemRef associated with the DMA operation being waited on.
operand_range getTagIndices()
Returns the tag memref index for this DMA operation.
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
std::unique_ptr< OperationPass< func::FuncOp > > createPipelineDataTransferPass()
Creates a pass to pipeline explicit movement of data across levels of the memory hierarchy.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Include the generated interface declarations.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.