31 #include "llvm/ADT/MapVector.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
39 #define GEN_PASS_DEF_AFFINEDATACOPYGENERATION
40 #include "mlir/Dialect/Affine/Passes.h.inc"
44 #define DEBUG_TYPE "affine-data-copy-generate"
62 struct AffineDataCopyGeneration
63 :
public affine::impl::AffineDataCopyGenerationBase<
64 AffineDataCopyGeneration> {
65 AffineDataCopyGeneration() =
default;
66 explicit AffineDataCopyGeneration(
unsigned slowMemorySpace,
67 unsigned fastMemorySpace,
68 unsigned tagMemorySpace,
69 int minDmaTransferSize,
70 uint64_t fastMemCapacityBytes) {
71 this->slowMemorySpace = slowMemorySpace;
72 this->fastMemorySpace = fastMemorySpace;
73 this->tagMemorySpace = tagMemorySpace;
74 this->minDmaTransferSize = minDmaTransferSize;
75 this->fastMemoryCapacity = fastMemCapacityBytes / 1024;
78 void runOnOperation()
override;
82 Value zeroIndex =
nullptr;
90 std::unique_ptr<OperationPass<func::FuncOp>>
92 unsigned slowMemorySpace,
unsigned fastMemorySpace,
unsigned tagMemorySpace,
93 int minDmaTransferSize, uint64_t fastMemCapacityBytes) {
94 return std::make_unique<AffineDataCopyGeneration>(
95 slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
96 fastMemCapacityBytes);
98 std::unique_ptr<OperationPass<func::FuncOp>>
100 return std::make_unique<AffineDataCopyGeneration>();
107 void AffineDataCopyGeneration::runOnBlock(
Block *block,
112 uint64_t fastMemCapacityBytes =
114 ? fastMemoryCapacity * 1024
115 : fastMemoryCapacity;
117 fastMemorySpace, tagMemorySpace,
118 fastMemCapacityBytes};
131 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
132 copyNests.count(&op) == 0;
137 while (it != block->
end()) {
140 if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
143 std::nullopt, copyNests);
146 auto exceedsCapacity = [&](AffineForOp forOp) {
147 std::optional<int64_t> footprint =
150 return (footprint.has_value() &&
151 static_cast<uint64_t
>(*footprint) > fastMemCapacityBytes);
160 bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
161 : exceedsCapacity(forOp);
165 runOnBlock(forOp.getBody(), copyNests);
177 std::nullopt, copyNests);
180 curBegin = std::find_if(std::next(it), block->
end(), [&](
Operation &op) {
181 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
182 copyNests.count(&op) == 0;
186 assert(copyNests.count(&*it) == 0 &&
187 "all copy nests generated should have been skipped above");
194 if (curBegin != block->
end()) {
197 "can't be a terminator");
200 std::prev(block->
end()), copyOptions,
201 std::nullopt, copyNests);
205 void AffineDataCopyGeneration::runOnOperation() {
206 func::FuncOp f = getOperation();
208 zeroIndex = topBuilder.create<arith::ConstantIndexOp>(f.getLoc(), 0);
217 for (
auto &block : f)
218 runOnBlock(&block, copyNests);
227 if (
auto forOp = dyn_cast<AffineForOp>(op))
229 else if (isa<AffineLoadOp, AffineStoreOp>(op))
230 copyOps.push_back(op);
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Block represents an ordered list of Operations.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
This class helps build Operations.
This class provides the API for ops that are known to be terminators.
Operation is the basic unit of execution within MLIR.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, std::optional< Value > filterMemRef, DenseSet< Operation * > ©Nests)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
std::unique_ptr< OperationPass< func::FuncOp > > createAffineDataCopyGenerationPass(unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace=0, int minDmaTransferSize=1024, uint64_t fastMemCapacityBytes=std::numeric_limits< uint64_t >::max())
Performs packing (or explicit copying) of accessed memref regions into buffers in the specified faste...
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Include the generated interface declarations.
const FrozenRewritePatternSet GreedyRewriteConfig config
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
const FrozenRewritePatternSet & patterns
@ ExistingAndNewOps
Only pre-existing and newly created ops are processed.
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.