31 #include "llvm/ADT/MapVector.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
39 #define GEN_PASS_DEF_AFFINEDATACOPYGENERATION
40 #include "mlir/Dialect/Affine/Passes.h.inc"
44 #define DEBUG_TYPE "affine-data-copy-generate"
62 struct AffineDataCopyGeneration
63 :
public affine::impl::AffineDataCopyGenerationBase<
64 AffineDataCopyGeneration> {
65 AffineDataCopyGeneration() =
default;
66 explicit AffineDataCopyGeneration(
unsigned slowMemorySpace,
67 unsigned fastMemorySpace,
68 unsigned tagMemorySpace,
69 int minDmaTransferSize,
70 uint64_t fastMemCapacityBytes) {
71 this->slowMemorySpace = slowMemorySpace;
72 this->fastMemorySpace = fastMemorySpace;
73 this->tagMemorySpace = tagMemorySpace;
74 this->minDmaTransferSize = minDmaTransferSize;
75 this->fastMemoryCapacity = fastMemCapacityBytes / 1024;
78 void runOnOperation()
override;
82 Value zeroIndex =
nullptr;
90 std::unique_ptr<OperationPass<func::FuncOp>>
92 unsigned slowMemorySpace,
unsigned fastMemorySpace,
unsigned tagMemorySpace,
93 int minDmaTransferSize, uint64_t fastMemCapacityBytes) {
94 return std::make_unique<AffineDataCopyGeneration>(
95 slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
96 fastMemCapacityBytes);
98 std::unique_ptr<OperationPass<func::FuncOp>>
100 return std::make_unique<AffineDataCopyGeneration>();
107 void AffineDataCopyGeneration::runOnBlock(
Block *block,
112 uint64_t fastMemCapacityBytes =
114 ? fastMemoryCapacity * 1024
115 : fastMemoryCapacity;
117 fastMemorySpace, tagMemorySpace,
118 fastMemCapacityBytes};
129 auto curBegin = llvm::find_if(*block, [&](
Operation &op) {
130 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
131 copyNests.count(&op) == 0;
136 while (it != block->
end()) {
139 if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
142 std::nullopt, copyNests);
145 auto exceedsCapacity = [&](AffineForOp forOp) {
146 std::optional<int64_t> footprint =
149 return (footprint.has_value() &&
150 static_cast<uint64_t
>(*footprint) > fastMemCapacityBytes);
159 bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
160 : exceedsCapacity(forOp);
164 runOnBlock(forOp.getBody(), copyNests);
176 std::nullopt, copyNests);
179 curBegin = std::find_if(std::next(it), block->
end(), [&](
Operation &op) {
180 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
181 copyNests.count(&op) == 0;
185 assert(copyNests.count(&*it) == 0 &&
186 "all copy nests generated should have been skipped above");
193 if (curBegin != block->
end()) {
196 "can't be a terminator");
199 std::prev(block->
end()), copyOptions,
200 std::nullopt, copyNests);
204 void AffineDataCopyGeneration::runOnOperation() {
205 func::FuncOp f = getOperation();
207 zeroIndex = topBuilder.create<arith::ConstantIndexOp>(f.getLoc(), 0);
216 for (
auto &block : f)
217 runOnBlock(&block, copyNests);
226 if (
auto forOp = dyn_cast<AffineForOp>(op))
228 else if (isa<AffineLoadOp, AffineStoreOp>(op))
229 copyOps.push_back(op);
240 copyOps, frozenPatterns,
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Block represents an ordered list of Operations.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteConfig & setStrictness(GreedyRewriteStrictness mode)
This class helps build Operations.
This class provides the API for ops that are known to be terminators.
Operation is the basic unit of execution within MLIR.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, std::optional< Value > filterMemRef, DenseSet< Operation * > ©Nests)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
std::unique_ptr< OperationPass< func::FuncOp > > createAffineDataCopyGenerationPass(unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace=0, int minDmaTransferSize=1024, uint64_t fastMemCapacityBytes=std::numeric_limits< uint64_t >::max())
Performs packing (or explicit copying) of accessed memref regions into buffers in the specified faste...
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Include the generated interface declarations.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
const FrozenRewritePatternSet & patterns
@ ExistingAndNewOps
Only pre-existing and newly created ops are processed.
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.