MLIR 22.0.0git
LoopTiling.cpp
Go to the documentation of this file.
1//===- LoopTiling.cpp --- Loop tiling pass ------------------------------*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a pass to tile affine loop nests.
10//
11//===----------------------------------------------------------------------===//
12
14
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/Debug.h"
25#include <optional>
26
27namespace mlir {
28namespace affine {
29#define GEN_PASS_DEF_AFFINELOOPTILING
30#include "mlir/Dialect/Affine/Passes.h.inc"
31} // namespace affine
32} // namespace mlir
33
34using namespace mlir;
35using namespace mlir::affine;
36
37#define DEBUG_TYPE "affine-loop-tile"
38
39namespace {
40
41/// A pass to perform loop tiling on all suitable loop nests of a func op.
42struct LoopTiling : public affine::impl::AffineLoopTilingBase<LoopTiling> {
43 LoopTiling() = default;
44 explicit LoopTiling(uint64_t cacheSizeBytes, bool avoidMaxMinBounds = true)
45 : avoidMaxMinBounds(avoidMaxMinBounds) {
46 this->cacheSizeInKiB = cacheSizeBytes / 1024;
47 }
48
49 void runOnOperation() override;
51 SmallVectorImpl<unsigned> *tileSizes);
52
53 // Default tile size if nothing is provided.
54 constexpr static unsigned kDefaultTileSize = 4;
55
56 // If true, tile sizes are set to avoid max/min in bounds if possible.
57 bool avoidMaxMinBounds = true;
58};
59
60} // namespace
61
62/// Get bands of loops that are valid to tile from the top-level of `f`.
63static void
65 std::vector<SmallVector<AffineForOp, 6>> &bands) {
66 // Get maximal perfect nest of 'affine.for' ops starting from root
67 // (inclusive).
68 for (AffineForOp forOp : f.getOps<AffineForOp>()) {
70 getPerfectlyNestedLoops(band, forOp);
71 if (isTilingValid(band))
72 bands.push_back(band);
73 }
74}
75
76/// Creates a pass to perform loop tiling on all suitable loop nests of a
77/// Function.
78std::unique_ptr<OperationPass<func::FuncOp>>
79mlir::affine::createLoopTilingPass(uint64_t cacheSizeBytes) {
80 return std::make_unique<LoopTiling>(cacheSizeBytes);
81}
82std::unique_ptr<OperationPass<func::FuncOp>>
84 return std::make_unique<LoopTiling>();
85}
86
87/// Reduces each tile size to the largest divisor of the corresponding trip
88/// count (if the trip count is known).
90 SmallVectorImpl<unsigned> *tileSizes) {
91 assert(band.size() == tileSizes->size() && "invalid tile size count");
92 for (unsigned i = 0, e = band.size(); i < e; i++) {
93 unsigned &tSizeAdjusted = (*tileSizes)[i];
94 std::optional<uint64_t> mayConst = getConstantTripCount(band[i]);
95 if (!mayConst)
96 continue;
97 // Adjust the tile size to largest factor of the trip count less than
98 // tSize.
99 uint64_t constTripCount = *mayConst;
100 if (constTripCount > 1 && tSizeAdjusted > constTripCount / 2)
101 tSizeAdjusted = constTripCount / 2;
102 while (constTripCount % tSizeAdjusted != 0)
103 tSizeAdjusted--;
104 }
105}
106
107// Returns tile sizes to use. Checks CL options; if none are specified, sets it
108// based on a simple model that looks at the memory footprint and determines
109// tile sizes assuming identity accesses / 1:1 tile size proportional footprint
110// along each of the dimensions being tiled.
111// TODO: evolve this model. Tile size determination is a large area
112// to play with in general.
113void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
114 SmallVectorImpl<unsigned> *tileSizes) {
115 if (band.empty())
116 return;
117
118 // Use command-line tileSize for all loops if specified.
119 if (tileSize) {
120 tileSizes->assign(band.size(), tileSize);
121 return;
122 }
123
124 // Use supplied tile sizes and fill them with default tile size if it's short.
125 if (!this->tileSizes.empty()) {
126 tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
127 tileSizes->resize(band.size(), kDefaultTileSize);
128 return;
129 }
130 tileSizes->resize(band.size());
131
132 // If the cache size is zero, set the minimum valid tile size. No good reason
133 // to pick another specific size over this.
134 if (cacheSizeInKiB == 0) {
135 llvm::fill(*tileSizes, 1);
136 return;
137 }
138
139 // Obtain memory footprint and set tile sizes so that a tile fits in
140 // the cache size. This is an approximation with the assumption that the
141 // footprint increases with the tile size linearly in that dimension (i.e.,
142 // assumes one-to-one access function).
143 std::optional<int64_t> fp = getMemoryFootprintBytes(band[0], 0);
144 if (!fp) {
145 // Fill with default tile sizes if footprint is unknown.
146 llvm::fill(*tileSizes, LoopTiling::kDefaultTileSize);
147 if (avoidMaxMinBounds)
148 adjustToDivisorsOfTripCounts(band, tileSizes);
149 // The first loop in the band.
150 AffineForOp rootForOp = band[0];
151 (void)rootForOp;
152 LLVM_DEBUG(
153 rootForOp.emitWarning("memory footprint unknown: using default tile "
154 "sizes adjusted to trip count divisors"));
155 return;
156 }
157
158 // Check how many times larger the cache size is when compared to footprint.
159 uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
160 uint64_t excessFactor = llvm::divideCeil(*fp, cacheSizeBytes);
161 if (excessFactor <= 1) {
162 // No need of any tiling - set tile size to 1.
163 llvm::fill(*tileSizes, 1);
164 return;
165 }
166
167 // Divide all loops equally in an attempt to reduce footprint.
168 // TODO: this is approximate. Ideally, obtain reuse factor /
169 // profitability along each dimension and weight tile sizes based on that as
170 // one possible approach. Or compute a polynomial in tile sizes and solve for
171 // it.
172
173 // For an n-d tileable band, compute the n^th root of the excess.
174 unsigned tSize =
175 static_cast<unsigned>(floorl(std::pow(excessFactor, 1.0 / band.size())));
176 // We'll keep a running product to determine the last tile size better.
177 unsigned cumulProductOfTileSizes = 1;
178 for (unsigned i = 0, e = band.size(); i < e; i++) {
179 if (i < e - 1)
180 (*tileSizes)[i] = tSize;
181 else
182 // Set last tile size to cover the balance.
183 (*tileSizes)[i] = std::max(
184 1U, static_cast<unsigned>(excessFactor / cumulProductOfTileSizes));
185 cumulProductOfTileSizes *= (*tileSizes)[i];
186 }
187 if (avoidMaxMinBounds)
188 adjustToDivisorsOfTripCounts(band, tileSizes);
189}
190
191void LoopTiling::runOnOperation() {
192 // Bands of loops to tile.
193 std::vector<SmallVector<AffineForOp, 6>> bands;
194 getTopLevelTileableBands(getOperation(), bands);
195
196 // Tile each band.
197 for (auto &band : bands) {
198 // Set up tile sizes; fill missing tile sizes at the end with default tile
199 // size or tileSize if one was provided.
200 SmallVector<unsigned, 6> tileSizes;
201 getTileSizes(band, &tileSizes);
202 if (llvm::DebugFlag) {
203 auto diag = band[0].emitRemark("using tile sizes [");
204 llvm::interleaveComma(tileSizes, llvm::dbgs());
205 diag << "]\n";
206 }
207 SmallVector<AffineForOp, 6> tiledNest;
208 if (failed(tilePerfectlyNested(band, tileSizes, &tiledNest))) {
209 // An empty band always succeeds.
210 assert(!band.empty() && "guaranteed to succeed on empty bands");
211 LLVM_DEBUG(band.front()->emitRemark("loop tiling failed!\n"));
212 continue;
213 }
214
215 // Separate full and partial tiles.
216 if (separate) {
217 auto intraTileLoops =
218 MutableArrayRef<AffineForOp>(tiledNest).drop_front(band.size());
219 if (failed(separateFullTiles(intraTileLoops))) {
220 assert(!intraTileLoops.empty() &&
221 "guaranteed to succeed on empty bands");
222 LLVM_DEBUG(intraTileLoops.front()->emitRemark(
223 "separation post tiling failed!"));
224 }
225 }
226 }
227}
static SmallVector< Value > getTileSizes(Location loc, amx::TileType tType, RewriterBase &rewriter)
Maps the 2-dim vector shape to the two 16-bit tile sizes.
static void adjustToDivisorsOfTripCounts(ArrayRef< AffineForOp > band, SmallVectorImpl< unsigned > *tileSizes)
Reduces each tile size to the largest divisor of the corresponding trip count (if the trip count is k...
static void getTopLevelTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 > > &bands)
Get bands of loops that are valid to tile from the top-level of f.
static std::string diag(const llvm::Value &value)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isTilingValid(ArrayRef< AffineForOp > loops)
Checks whether hyper-rectangular loop tiling of the nest represented by loops is valid.
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition Utils.cpp:2173
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
std::unique_ptr< OperationPass< func::FuncOp > > createLoopTilingPass()
Overload relying on pass options for initialization.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561
Include the generated interface declarations.