25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Debug.h" 29 #define DEBUG_TYPE "affine-loop-tile" 34 struct LoopTiling :
public AffineLoopTilingBase<LoopTiling> {
35 LoopTiling() =
default;
36 explicit LoopTiling(uint64_t cacheSizeBytes,
bool avoidMaxMinBounds =
true)
37 : avoidMaxMinBounds(avoidMaxMinBounds) {
38 this->cacheSizeInKiB = cacheSizeBytes / 1024;
41 void runOnOperation()
override;
46 constexpr
static unsigned kDefaultTileSize = 4;
49 bool avoidMaxMinBounds =
true;
56 std::unique_ptr<OperationPass<func::FuncOp>>
58 return std::make_unique<LoopTiling>(cacheSizeBytes);
61 return std::make_unique<LoopTiling>();
68 assert(band.size() == tileSizes->size() &&
"invalid tile size count");
69 for (
unsigned i = 0, e = band.size(); i < e; i++) {
70 unsigned &tSizeAdjusted = (*tileSizes)[i];
76 uint64_t constTripCount = mayConst.getValue();
77 if (constTripCount > 1 && tSizeAdjusted > constTripCount / 2)
78 tSizeAdjusted = constTripCount / 2;
79 while (constTripCount % tSizeAdjusted != 0)
97 tileSizes->assign(band.size(), tileSize);
102 if (!this->tileSizes.empty()) {
103 tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
104 tileSizes->resize(band.size(), kDefaultTileSize);
107 tileSizes->resize(band.size());
110 AffineForOp rootForOp = band[0];
120 std::fill(tileSizes->begin(), tileSizes->end(),
121 LoopTiling::kDefaultTileSize);
122 if (avoidMaxMinBounds)
125 rootForOp.emitWarning(
"memory footprint unknown: using default tile " 126 "sizes adjusted to trip count divisors"));
131 uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
132 uint64_t excessFactor = llvm::divideCeil(fp.getValue(), cacheSizeBytes);
133 if (excessFactor <= 1) {
135 std::fill(tileSizes->begin(), tileSizes->end(), 1);
147 static_cast<unsigned>(floorl(std::pow(excessFactor, 1.0 / band.size())));
149 unsigned cumulProductOfTileSizes = 1;
150 for (
unsigned i = 0, e = band.size(); i < e; i++) {
152 (*tileSizes)[i] = tSize;
156 1U, static_cast<unsigned>(excessFactor / cumulProductOfTileSizes));
157 cumulProductOfTileSizes *= (*tileSizes)[i];
159 if (avoidMaxMinBounds)
163 void LoopTiling::runOnOperation() {
165 std::vector<SmallVector<AffineForOp, 6>> bands;
169 for (
auto &band : bands) {
173 getTileSizes(band, &tileSizes);
174 if (llvm::DebugFlag) {
175 auto diag = band[0].emitRemark(
"using tile sizes [");
176 for (
unsigned tSize : tileSizes)
177 diag << tSize <<
' ';
183 assert(!band.empty() &&
"guaranteed to succeed on empty bands");
184 LLVM_DEBUG(band.front()->emitRemark(
"loop tiling failed!\n"));
190 auto intraTileLoops =
193 assert(!intraTileLoops.empty() &&
194 "guaranteed to succeed on empty bands");
195 LLVM_DEBUG(intraTileLoops.front()->emitRemark(
196 "separation post tiling failed!\n"));
202 constexpr
unsigned LoopTiling::kDefaultTileSize;
Include the generated interface declarations.
static std::string diag(llvm::Value &v)
Optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops...
std::unique_ptr< OperationPass< func::FuncOp > > createLoopTilingPass(uint64_t cacheSizeBytes)
Creates a pass to perform tiling on loop nests.
static void adjustToDivisorsOfTripCounts(ArrayRef< AffineForOp > band, SmallVectorImpl< unsigned > *tileSizes)
Reduces each tile size to the largest divisor of the corresponding trip count (if the trip count is k...
LogicalResult separateFullTiles(MutableArrayRef< AffineForOp > nest, SmallVectorImpl< AffineForOp > *fullTileNest=nullptr)
Separates full tiles from partial tiles for a perfect nest nest by generating a conditional guard tha...
void getTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 >> *bands)
Identify valid and profitable bands of loops to tile.
Optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, None otherwise.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)