26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
32 #define GEN_PASS_DEF_AFFINELOOPTILING
33 #include "mlir/Dialect/Affine/Passes.h.inc"
40 #define DEBUG_TYPE "affine-loop-tile"
45 struct LoopTiling :
public affine::impl::AffineLoopTilingBase<LoopTiling> {
46 LoopTiling() =
default;
47 explicit LoopTiling(uint64_t cacheSizeBytes,
bool avoidMaxMinBounds =
true)
48 : avoidMaxMinBounds(avoidMaxMinBounds) {
49 this->cacheSizeInKiB = cacheSizeBytes / 1024;
52 void runOnOperation()
override;
57 constexpr
static unsigned kDefaultTileSize = 4;
60 bool avoidMaxMinBounds =
true;
67 std::unique_ptr<OperationPass<func::FuncOp>>
69 return std::make_unique<LoopTiling>(cacheSizeBytes);
71 std::unique_ptr<OperationPass<func::FuncOp>>
73 return std::make_unique<LoopTiling>();
80 assert(band.size() == tileSizes->size() &&
"invalid tile size count");
81 for (
unsigned i = 0, e = band.size(); i < e; i++) {
82 unsigned &tSizeAdjusted = (*tileSizes)[i];
88 uint64_t constTripCount = *mayConst;
89 if (constTripCount > 1 && tSizeAdjusted > constTripCount / 2)
90 tSizeAdjusted = constTripCount / 2;
91 while (constTripCount % tSizeAdjusted != 0)
105 assert(!origLoops.empty() &&
"no original loops provided");
110 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
111 loadAndStoreOps.push_back(op);
114 unsigned numOps = loadAndStoreOps.size();
115 unsigned numLoops = origLoops.size();
116 for (
unsigned d = 1; d <= numLoops + 1; ++d) {
117 for (
unsigned i = 0; i < numOps; ++i) {
120 for (
unsigned j = 0;
j < numOps; ++
j) {
126 srcAccess, dstAccess, d,
nullptr,
136 LLVM_DEBUG(llvm::dbgs() <<
"Checking whether tiling legality violated "
137 "for dependence at depth: "
138 << Twine(d) <<
" between:\n";);
142 if (depComp.lb.has_value() && depComp.ub.has_value() &&
143 *depComp.lb < *depComp.ub && *depComp.ub < 0) {
144 LLVM_DEBUG(llvm::dbgs()
145 <<
"Dependence component lb = " << Twine(*depComp.lb)
146 <<
" ub = " << Twine(*depComp.ub)
147 <<
" is negative at depth: " << Twine(d)
148 <<
" and thus violates the legality rule.\n");
172 tileSizes->assign(band.size(), tileSize);
177 if (!this->tileSizes.empty()) {
178 tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
179 tileSizes->resize(band.size(), kDefaultTileSize);
182 tileSizes->resize(band.size());
185 AffineForOp rootForOp = band[0];
195 std::fill(tileSizes->begin(), tileSizes->end(),
196 LoopTiling::kDefaultTileSize);
197 if (avoidMaxMinBounds)
200 rootForOp.emitWarning(
"memory footprint unknown: using default tile "
201 "sizes adjusted to trip count divisors"));
206 uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
208 if (excessFactor <= 1) {
210 std::fill(tileSizes->begin(), tileSizes->end(), 1);
222 static_cast<unsigned>(floorl(std::pow(excessFactor, 1.0 / band.size())));
224 unsigned cumulProductOfTileSizes = 1;
225 for (
unsigned i = 0, e = band.size(); i < e; i++) {
227 (*tileSizes)[i] = tSize;
231 1U,
static_cast<unsigned>(excessFactor / cumulProductOfTileSizes));
232 cumulProductOfTileSizes *= (*tileSizes)[i];
234 if (avoidMaxMinBounds)
238 void LoopTiling::runOnOperation() {
240 std::vector<SmallVector<AffineForOp, 6>> bands;
244 for (
auto &band : bands) {
246 band.front().emitRemark(
"tiling code is illegal due to dependences");
253 getTileSizes(band, &tileSizes);
254 if (llvm::DebugFlag) {
255 auto diag = band[0].emitRemark(
"using tile sizes [");
256 for (
unsigned tSize : tileSizes)
257 diag << tSize <<
' ';
263 assert(!band.empty() &&
"guaranteed to succeed on empty bands");
264 LLVM_DEBUG(band.front()->emitRemark(
"loop tiling failed!\n"));
270 auto intraTileLoops =
273 assert(!intraTileLoops.empty() &&
274 "guaranteed to succeed on empty bands");
275 LLVM_DEBUG(intraTileLoops.front()->emitRemark(
276 "separation post tiling failed!\n"));
282 constexpr
unsigned LoopTiling::kDefaultTileSize;
static bool checkTilingLegality(MutableArrayRef< AffineForOp > origLoops)
Checks whether hyper-rectangular loop tiling of the nest represented by origLoops is valid.
static void adjustToDivisorsOfTripCounts(ArrayRef< AffineForOp > band, SmallVectorImpl< unsigned > *tileSizes)
Reduces each tile size to the largest divisor of the corresponding trip count (if the trip count is k...
static std::string diag(const llvm::Value &value)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Operation is the basic unit of execution within MLIR.
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
std::unique_ptr< OperationPass< func::FuncOp > > createLoopTilingPass(uint64_t cacheSizeBytes)
Creates a pass to perform tiling on loop nests.
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
void getTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 >> *bands)
Identify valid and profitable bands of loops to tile.
LogicalResult separateFullTiles(MutableArrayRef< AffineForOp > nest, SmallVectorImpl< AffineForOp > *fullTileNest=nullptr)
Separates full tiles from partial tiles for a perfect nest nest by generating a conditional guard tha...
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
Include the generated interface declarations.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Checks whether two accesses to the same memref access the same element.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.