23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/Debug.h"
29#define GEN_PASS_DEF_AFFINELOOPTILING
30#include "mlir/Dialect/Affine/Passes.h.inc"
37#define DEBUG_TYPE "affine-loop-tile"
43 LoopTiling() =
default;
44 explicit LoopTiling(uint64_t cacheSizeBytes,
bool avoidMaxMinBounds =
true)
45 : avoidMaxMinBounds(avoidMaxMinBounds) {
46 this->cacheSizeInKiB = cacheSizeBytes / 1024;
49 void runOnOperation()
override;
54 constexpr static unsigned kDefaultTileSize = 4;
57 bool avoidMaxMinBounds =
true;
68 for (AffineForOp forOp : f.getOps<AffineForOp>()) {
72 bands.push_back(band);
78std::unique_ptr<OperationPass<func::FuncOp>>
80 return std::make_unique<LoopTiling>(cacheSizeBytes);
82std::unique_ptr<OperationPass<func::FuncOp>>
84 return std::make_unique<LoopTiling>();
91 assert(band.size() == tileSizes->size() &&
"invalid tile size count");
92 for (
unsigned i = 0, e = band.size(); i < e; i++) {
93 unsigned &tSizeAdjusted = (*tileSizes)[i];
99 uint64_t constTripCount = *mayConst;
100 if (constTripCount > 1 && tSizeAdjusted > constTripCount / 2)
101 tSizeAdjusted = constTripCount / 2;
102 while (constTripCount % tSizeAdjusted != 0)
120 tileSizes->assign(band.size(), tileSize);
125 if (!this->tileSizes.empty()) {
126 tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
127 tileSizes->resize(band.size(), kDefaultTileSize);
130 tileSizes->resize(band.size());
134 if (cacheSizeInKiB == 0) {
135 llvm::fill(*tileSizes, 1);
146 llvm::fill(*tileSizes, LoopTiling::kDefaultTileSize);
147 if (avoidMaxMinBounds)
150 AffineForOp rootForOp = band[0];
153 rootForOp.emitWarning(
"memory footprint unknown: using default tile "
154 "sizes adjusted to trip count divisors"));
159 uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
160 uint64_t excessFactor = llvm::divideCeil(*fp, cacheSizeBytes);
161 if (excessFactor <= 1) {
163 llvm::fill(*tileSizes, 1);
175 static_cast<unsigned>(floorl(std::pow(excessFactor, 1.0 / band.size())));
177 unsigned cumulProductOfTileSizes = 1;
178 for (
unsigned i = 0, e = band.size(); i < e; i++) {
180 (*tileSizes)[i] = tSize;
183 (*tileSizes)[i] = std::max(
184 1U,
static_cast<unsigned>(excessFactor / cumulProductOfTileSizes));
185 cumulProductOfTileSizes *= (*tileSizes)[i];
187 if (avoidMaxMinBounds)
191void LoopTiling::runOnOperation() {
193 std::vector<SmallVector<AffineForOp, 6>> bands;
197 for (
auto &band : bands) {
200 SmallVector<unsigned, 6> tileSizes;
202 if (llvm::DebugFlag) {
203 auto diag = band[0].emitRemark(
"using tile sizes [");
204 llvm::interleaveComma(tileSizes, llvm::dbgs());
207 SmallVector<AffineForOp, 6> tiledNest;
210 assert(!band.empty() &&
"guaranteed to succeed on empty bands");
211 LLVM_DEBUG(band.front()->emitRemark(
"loop tiling failed!\n"));
217 auto intraTileLoops =
218 MutableArrayRef<AffineForOp>(tiledNest).drop_front(band.size());
219 if (
failed(separateFullTiles(intraTileLoops))) {
220 assert(!intraTileLoops.empty() &&
221 "guaranteed to succeed on empty bands");
222 LLVM_DEBUG(intraTileLoops.front()->emitRemark(
223 "separation post tiling failed!"));
static SmallVector< Value > getTileSizes(Location loc, amx::TileType tType, RewriterBase &rewriter)
Maps the 2-dim vector shape to the two 16-bit tile sizes.
static void adjustToDivisorsOfTripCounts(ArrayRef< AffineForOp > band, SmallVectorImpl< unsigned > *tileSizes)
Reduces each tile size to the largest divisor of the corresponding trip count (if the trip count is k...
static void getTopLevelTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 > > &bands)
Get bands of loops that are valid to tile from the top-level of f.
static std::string diag(const llvm::Value &value)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isTilingValid(ArrayRef< AffineForOp > loops)
Checks whether hyper-rectangular loop tiling of the nest represented by loops is valid.
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
std::unique_ptr< OperationPass< func::FuncOp > > createLoopTilingPass()
Overload relying on pass options for initialization.
Include the generated interface declarations.