MLIR  19.0.0git
LoopTiling.cpp
Go to the documentation of this file.
1 //===- LoopTiling.cpp --- Loop tiling pass ------------------------------*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to tile loop nests.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
24 #include "mlir/IR/Builders.h"
25 #include "mlir/IR/IRMapping.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include <optional>
29 
30 namespace mlir {
31 namespace affine {
32 #define GEN_PASS_DEF_AFFINELOOPTILING
33 #include "mlir/Dialect/Affine/Passes.h.inc"
34 } // namespace affine
35 } // namespace mlir
36 
37 using namespace mlir;
38 using namespace mlir::affine;
39 
40 #define DEBUG_TYPE "affine-loop-tile"
41 
42 namespace {
43 
44 /// A pass to perform loop tiling on all suitable loop nests of a Function.
45 struct LoopTiling : public affine::impl::AffineLoopTilingBase<LoopTiling> {
46  LoopTiling() = default;
47  explicit LoopTiling(uint64_t cacheSizeBytes, bool avoidMaxMinBounds = true)
48  : avoidMaxMinBounds(avoidMaxMinBounds) {
49  this->cacheSizeInKiB = cacheSizeBytes / 1024;
50  }
51 
52  void runOnOperation() override;
53  void getTileSizes(ArrayRef<AffineForOp> band,
54  SmallVectorImpl<unsigned> *tileSizes);
55 
56  // Default tile size if nothing is provided.
57  constexpr static unsigned kDefaultTileSize = 4;
58 
59  // If true, tile sizes are set to avoid max/min in bounds if possible.
60  bool avoidMaxMinBounds = true;
61 };
62 
63 } // namespace
64 
65 /// Creates a pass to perform loop tiling on all suitable loop nests of a
66 /// Function.
67 std::unique_ptr<OperationPass<func::FuncOp>>
68 mlir::affine::createLoopTilingPass(uint64_t cacheSizeBytes) {
69  return std::make_unique<LoopTiling>(cacheSizeBytes);
70 }
71 std::unique_ptr<OperationPass<func::FuncOp>>
73  return std::make_unique<LoopTiling>();
74 }
75 
76 /// Reduces each tile size to the largest divisor of the corresponding trip
77 /// count (if the trip count is known).
79  SmallVectorImpl<unsigned> *tileSizes) {
80  assert(band.size() == tileSizes->size() && "invalid tile size count");
81  for (unsigned i = 0, e = band.size(); i < e; i++) {
82  unsigned &tSizeAdjusted = (*tileSizes)[i];
83  std::optional<uint64_t> mayConst = getConstantTripCount(band[i]);
84  if (!mayConst)
85  continue;
86  // Adjust the tile size to largest factor of the trip count less than
87  // tSize.
88  uint64_t constTripCount = *mayConst;
89  if (constTripCount > 1 && tSizeAdjusted > constTripCount / 2)
90  tSizeAdjusted = constTripCount / 2;
91  while (constTripCount % tSizeAdjusted != 0)
92  tSizeAdjusted--;
93  }
94 }
95 
96 /// Checks whether hyper-rectangular loop tiling of the nest represented by
97 /// `origLoops` is valid. The validity condition is from Irigoin and Triolet,
98 /// which states that two tiles cannot depend on each other. We simplify such
99 /// condition to just checking whether there is any negative dependence
100 /// direction, since we have the prior knowledge that the tiling results will be
101 /// hyper-rectangles, which are scheduled in the lexicographically increasing
102 /// order on the vector of loop indices. This function will return failure when
103 /// any dependence component is negative along any of `origLoops`.
105  assert(!origLoops.empty() && "no original loops provided");
106 
107  // We first find out all dependences we intend to check.
108  SmallVector<Operation *, 8> loadAndStoreOps;
109  origLoops[0]->walk([&](Operation *op) {
110  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
111  loadAndStoreOps.push_back(op);
112  });
113 
114  unsigned numOps = loadAndStoreOps.size();
115  unsigned numLoops = origLoops.size();
116  for (unsigned d = 1; d <= numLoops + 1; ++d) {
117  for (unsigned i = 0; i < numOps; ++i) {
118  Operation *srcOp = loadAndStoreOps[i];
119  MemRefAccess srcAccess(srcOp);
120  for (unsigned j = 0; j < numOps; ++j) {
121  Operation *dstOp = loadAndStoreOps[j];
122  MemRefAccess dstAccess(dstOp);
123 
126  srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
127  &depComps);
128 
129  // Skip if there is no dependence in this case.
130  if (!hasDependence(result))
131  continue;
132 
133  // Check whether there is any negative direction vector in the
134  // dependence components found above, which means that dependence is
135  // violated by the default hyper-rect tiling method.
136  LLVM_DEBUG(llvm::dbgs() << "Checking whether tiling legality violated "
137  "for dependence at depth: "
138  << Twine(d) << " between:\n";);
139  LLVM_DEBUG(srcAccess.opInst->dump(););
140  LLVM_DEBUG(dstAccess.opInst->dump(););
141  for (const DependenceComponent &depComp : depComps) {
142  if (depComp.lb.has_value() && depComp.ub.has_value() &&
143  *depComp.lb < *depComp.ub && *depComp.ub < 0) {
144  LLVM_DEBUG(llvm::dbgs()
145  << "Dependence component lb = " << Twine(*depComp.lb)
146  << " ub = " << Twine(*depComp.ub)
147  << " is negative at depth: " << Twine(d)
148  << " and thus violates the legality rule.\n");
149  return false;
150  }
151  }
152  }
153  }
154  }
155 
156  return true;
157 }
158 
159 // Returns tile sizes to use. Checks CL options; if none are specified, sets it
160 // based on a simple model that looks at the memory footprint and determines
161 // tile sizes assuming identity accesses / 1:1 tile size proportional footprint
162 // along each of the dimensions being tiled.
163 // TODO: evolve this model. Tile size determination is a large area
164 // to play with in general.
165 void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
166  SmallVectorImpl<unsigned> *tileSizes) {
167  if (band.empty())
168  return;
169 
170  // Use command-line tileSize for all loops if specified.
171  if (tileSize) {
172  tileSizes->assign(band.size(), tileSize);
173  return;
174  }
175 
176  // Use tileSizes and fill them with default tile size if it's short.
177  if (!this->tileSizes.empty()) {
178  tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
179  tileSizes->resize(band.size(), kDefaultTileSize);
180  return;
181  }
182  tileSizes->resize(band.size());
183 
184  // The first loop in the band.
185  AffineForOp rootForOp = band[0];
186  (void)rootForOp;
187 
188  // Obtain memory footprint and set tile sizes so that a tile fits in
189  // the cache size. This is an approximation with the assumption that the
190  // footprint increases with the tile size linearly in that dimension (i.e.,
191  // assumes one-to-one access function).
192  std::optional<int64_t> fp = getMemoryFootprintBytes(band[0], 0);
193  if (!fp) {
194  // Fill with default tile sizes if footprint is unknown.
195  std::fill(tileSizes->begin(), tileSizes->end(),
196  LoopTiling::kDefaultTileSize);
197  if (avoidMaxMinBounds)
198  adjustToDivisorsOfTripCounts(band, tileSizes);
199  LLVM_DEBUG(
200  rootForOp.emitWarning("memory footprint unknown: using default tile "
201  "sizes adjusted to trip count divisors"));
202  return;
203  }
204 
205  // Check how many times larger the cache size is when compared to footprint.
206  uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
207  uint64_t excessFactor = llvm::divideCeil(*fp, cacheSizeBytes);
208  if (excessFactor <= 1) {
209  // No need of any tiling - set tile size to 1.
210  std::fill(tileSizes->begin(), tileSizes->end(), 1);
211  return;
212  }
213 
214  // Divide all loops equally in an attempt to reduce footprint.
215  // TODO: this is approximate. Ideally, obtain reuse factor /
216  // profitability along each dimension and weight tile sizes based on that as
217  // one possible approach. Or compute a polynomial in tile sizes and solve for
218  // it.
219 
220  // For an n-d tileable band, compute the n^th root of the excess.
221  unsigned tSize =
222  static_cast<unsigned>(floorl(std::pow(excessFactor, 1.0 / band.size())));
223  // We'll keep a running product to determine the last tile size better.
224  unsigned cumulProductOfTileSizes = 1;
225  for (unsigned i = 0, e = band.size(); i < e; i++) {
226  if (i < e - 1)
227  (*tileSizes)[i] = tSize;
228  else
229  // Set last tile size to cover the balance.
230  (*tileSizes)[i] = std::max(
231  1U, static_cast<unsigned>(excessFactor / cumulProductOfTileSizes));
232  cumulProductOfTileSizes *= (*tileSizes)[i];
233  }
234  if (avoidMaxMinBounds)
235  adjustToDivisorsOfTripCounts(band, tileSizes);
236 }
237 
238 void LoopTiling::runOnOperation() {
239  // Bands of loops to tile.
240  std::vector<SmallVector<AffineForOp, 6>> bands;
241  getTileableBands(getOperation(), &bands);
242 
243  // Tile each band.
244  for (auto &band : bands) {
245  if (!checkTilingLegality(band)) {
246  band.front().emitRemark("tiling code is illegal due to dependences");
247  continue;
248  }
249 
250  // Set up tile sizes; fill missing tile sizes at the end with default tile
251  // size or tileSize if one was provided.
252  SmallVector<unsigned, 6> tileSizes;
253  getTileSizes(band, &tileSizes);
254  if (llvm::DebugFlag) {
255  auto diag = band[0].emitRemark("using tile sizes [");
256  for (unsigned tSize : tileSizes)
257  diag << tSize << ' ';
258  diag << "]\n";
259  }
260  SmallVector<AffineForOp, 6> tiledNest;
261  if (failed(tilePerfectlyNested(band, tileSizes, &tiledNest))) {
262  // An empty band always succeeds.
263  assert(!band.empty() && "guaranteed to succeed on empty bands");
264  LLVM_DEBUG(band.front()->emitRemark("loop tiling failed!\n"));
265  continue;
266  }
267 
268  // Separate full and partial tiles.
269  if (separate) {
270  auto intraTileLoops =
271  MutableArrayRef<AffineForOp>(tiledNest).drop_front(band.size());
272  if (failed(separateFullTiles(intraTileLoops))) {
273  assert(!intraTileLoops.empty() &&
274  "guaranteed to succeed on empty bands");
275  LLVM_DEBUG(intraTileLoops.front()->emitRemark(
276  "separation post tiling failed!\n"));
277  }
278  }
279  }
280 }
281 
282 constexpr unsigned LoopTiling::kDefaultTileSize;
static bool checkTilingLegality(MutableArrayRef< AffineForOp > origLoops)
Checks whether hyper-rectangular loop tiling of the nest represented by origLoops is valid.
Definition: LoopTiling.cpp:104
static void adjustToDivisorsOfTripCounts(ArrayRef< AffineForOp > band, SmallVectorImpl< unsigned > *tileSizes)
Reduces each tile size to the largest divisor of the corresponding trip count (if the trip count is k...
Definition: LoopTiling.cpp:78
static std::string diag(const llvm::Value &value)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition: Utils.cpp:1953
std::unique_ptr< OperationPass< func::FuncOp > > createLoopTilingPass(uint64_t cacheSizeBytes)
Creates a pass to perform tiling on loop nests.
Definition: LoopTiling.cpp:68
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
Definition: LoopUtils.cpp:780
void getTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 >> *bands)
Identify valid and profitable bands of loops to tile.
Definition: LoopUtils.cpp:881
LogicalResult separateFullTiles(MutableArrayRef< AffineForOp > nest, SmallVectorImpl< AffineForOp > *fullTileNest=nullptr)
Separates full tiles from partial tiles for a perfect nest nest by generating a conditional guard tha...
Definition: LoopUtils.cpp:2715
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
Include the generated interface declarations.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
Checks whether two accesses to the same memref access the same element.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.