31 #include "llvm/ADT/MapVector.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
38 #define GEN_PASS_DEF_AFFINEDATACOPYGENERATION
39 #include "mlir/Dialect/Affine/Passes.h.inc"
42 #define DEBUG_TYPE "affine-data-copy-generate"
59 struct AffineDataCopyGeneration
60 :
public impl::AffineDataCopyGenerationBase<AffineDataCopyGeneration> {
61 AffineDataCopyGeneration() =
default;
62 explicit AffineDataCopyGeneration(
unsigned slowMemorySpace,
63 unsigned fastMemorySpace,
64 unsigned tagMemorySpace,
65 int minDmaTransferSize,
66 uint64_t fastMemCapacityBytes) {
67 this->slowMemorySpace = slowMemorySpace;
68 this->fastMemorySpace = fastMemorySpace;
69 this->tagMemorySpace = tagMemorySpace;
70 this->minDmaTransferSize = minDmaTransferSize;
71 this->fastMemoryCapacity = fastMemCapacityBytes / 1024;
74 void runOnOperation()
override;
78 Value zeroIndex =
nullptr;
87 std::unique_ptr<OperationPass<func::FuncOp>>
89 unsigned fastMemorySpace,
90 unsigned tagMemorySpace,
91 int minDmaTransferSize,
92 uint64_t fastMemCapacityBytes) {
93 return std::make_unique<AffineDataCopyGeneration>(
94 slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
95 fastMemCapacityBytes);
97 std::unique_ptr<OperationPass<func::FuncOp>>
99 return std::make_unique<AffineDataCopyGeneration>();
106 void AffineDataCopyGeneration::runOnBlock(
Block *block,
111 uint64_t fastMemCapacityBytes =
113 ? fastMemoryCapacity * 1024
114 : fastMemoryCapacity;
116 fastMemorySpace, tagMemorySpace,
117 fastMemCapacityBytes};
133 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
134 copyNests.count(&op) == 0;
139 while (it != block->
end()) {
142 if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
145 std::nullopt, copyNests);
148 auto exceedsCapacity = [&](AffineForOp forOp) {
149 std::optional<int64_t> footprint =
152 return (footprint.has_value() &&
153 static_cast<uint64_t
>(*footprint) > fastMemCapacityBytes);
162 bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
163 : exceedsCapacity(forOp);
167 runOnBlock(forOp.getBody(), copyNests);
179 std::nullopt, copyNests);
182 curBegin = std::find_if(std::next(it), block->
end(), [&](
Operation &op) {
183 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
184 copyNests.count(&op) == 0;
188 assert(copyNests.count(&*it) == 0 &&
189 "all copy nests generated should have been skipped above");
196 if (curBegin != block->
end()) {
199 "can't be a terminator");
202 std::prev(block->
end()), copyOptions,
203 std::nullopt, copyNests);
207 void AffineDataCopyGeneration::runOnOperation() {
208 func::FuncOp f = getOperation();
210 zeroIndex = topBuilder.create<arith::ConstantIndexOp>(f.getLoc(), 0);
219 for (
auto &block : f)
220 runOnBlock(&block, copyNests);
229 if (
auto forOp = dyn_cast<AffineForOp>(op))
231 else if (isa<AffineLoadOp, AffineStoreOp>(op))
232 copyOps.push_back(op);
239 AffineLoadOp::getCanonicalizationPatterns(patterns, &getContext());
240 AffineStoreOp::getCanonicalizationPatterns(patterns, &getContext());
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Block represents an ordered list of Operations.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteStrictness strictMode
Strict mode can restrict the ops that are added to the worklist during the rewrite.
This class helps build Operations.
This class provides the API for ops that are known to be terminators.
Operation is the basic unit of execution within MLIR.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
This header declares functions that assit transformations in the MemRef dialect.
LogicalResult applyOpPatternsAndFold(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Applies the specified rewrite patterns on ops while also trying to fold these ops.
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, std::optional< Value > filterMemRef, DenseSet< Operation * > ©Nests)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
std::unique_ptr< OperationPass< func::FuncOp > > createAffineDataCopyGenerationPass(unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace=0, int minDmaTransferSize=1024, uint64_t fastMemCapacityBytes=std::numeric_limits< uint64_t >::max())
Performs packing (or explicit copying) of accessed memref regions into buffers in the specified faste...
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
@ ExistingAndNewOps
Only pre-existing and newly created ops are processed.
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.