31 #include "llvm/Support/CommandLine.h"
37 #define GEN_PASS_DEF_AFFINEDATACOPYGENERATION
38 #include "mlir/Dialect/Affine/Passes.h.inc"
42 #define DEBUG_TYPE "affine-data-copy-generate"
60 struct AffineDataCopyGeneration
61 :
public affine::impl::AffineDataCopyGenerationBase<
62 AffineDataCopyGeneration> {
63 AffineDataCopyGeneration() =
default;
64 explicit AffineDataCopyGeneration(
unsigned slowMemorySpace,
65 unsigned fastMemorySpace,
66 unsigned tagMemorySpace,
67 int minDmaTransferSize,
68 uint64_t fastMemCapacityBytes) {
69 this->slowMemorySpace = slowMemorySpace;
70 this->fastMemorySpace = fastMemorySpace;
71 this->tagMemorySpace = tagMemorySpace;
72 this->minDmaTransferSize = minDmaTransferSize;
73 this->fastMemoryCapacity = fastMemCapacityBytes / 1024;
76 void runOnOperation()
override;
80 Value zeroIndex =
nullptr;
88 std::unique_ptr<OperationPass<func::FuncOp>>
90 unsigned slowMemorySpace,
unsigned fastMemorySpace,
unsigned tagMemorySpace,
91 int minDmaTransferSize, uint64_t fastMemCapacityBytes) {
92 return std::make_unique<AffineDataCopyGeneration>(
93 slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
94 fastMemCapacityBytes);
96 std::unique_ptr<OperationPass<func::FuncOp>>
98 return std::make_unique<AffineDataCopyGeneration>();
105 void AffineDataCopyGeneration::runOnBlock(
Block *block,
110 uint64_t fastMemCapacityBytes =
112 ? fastMemoryCapacity * 1024
113 : fastMemoryCapacity;
115 fastMemorySpace, tagMemorySpace,
116 fastMemCapacityBytes};
127 auto curBegin = llvm::find_if(*block, [&](
Operation &op) {
128 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
129 copyNests.count(&op) == 0;
134 while (it != block->
end()) {
137 if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
140 std::nullopt, copyNests);
143 auto exceedsCapacity = [&](AffineForOp forOp) {
144 std::optional<int64_t> footprint =
147 return (footprint.has_value() &&
148 static_cast<uint64_t
>(*footprint) > fastMemCapacityBytes);
157 bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
158 : exceedsCapacity(forOp);
162 runOnBlock(forOp.getBody(), copyNests);
174 std::nullopt, copyNests);
177 curBegin = std::find_if(std::next(it), block->
end(), [&](
Operation &op) {
178 return isa<AffineLoadOp, AffineStoreOp, AffineForOp>(op) &&
179 copyNests.count(&op) == 0;
183 assert(copyNests.count(&*it) == 0 &&
184 "all copy nests generated should have been skipped above");
191 if (curBegin != block->
end()) {
194 "can't be a terminator");
197 std::prev(block->
end()), copyOptions,
198 std::nullopt, copyNests);
202 void AffineDataCopyGeneration::runOnOperation() {
203 func::FuncOp f = getOperation();
214 for (
auto &block : f)
215 runOnBlock(&block, copyNests);
224 if (
auto forOp = dyn_cast<AffineForOp>(op))
226 else if (isa<AffineLoadOp, AffineStoreOp>(op))
227 copyOps.push_back(op);
238 copyOps, frozenPatterns,
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Block represents an ordered list of Operations.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteConfig & setStrictness(GreedyRewriteStrictness mode)
This class helps build Operations.
This class provides the API for ops that are known to be terminators.
Operation is the basic unit of execution within MLIR.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, std::optional< Value > filterMemRef, DenseSet< Operation * > ©Nests)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
std::unique_ptr< OperationPass< func::FuncOp > > createAffineDataCopyGenerationPass(unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace=0, int minDmaTransferSize=1024, uint64_t fastMemCapacityBytes=std::numeric_limits< uint64_t >::max())
Performs packing (or explicit copying) of accessed memref regions into buffers in the specified faste...
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Include the generated interface declarations.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
const FrozenRewritePatternSet & patterns
@ ExistingAndNewOps
Only pre-existing and newly created ops are processed.
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.