MLIR  16.0.0git
Tiling.cpp
Go to the documentation of this file.
1 //===- Tiling.cpp - Implementation of linalg Tiling -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the linalg dialect Tiling pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
25 #include "mlir/IR/AffineExpr.h"
26 #include "mlir/IR/AffineMap.h"
29 #include "llvm/Support/CommandLine.h"
30 #include <utility>
31 
32 namespace mlir {
33 #define GEN_PASS_DEF_LINALGTILINGPASS
34 #include "mlir/Dialect/Linalg/Passes.h.inc"
35 } // namespace mlir
36 
37 using namespace mlir;
38 using namespace mlir::linalg;
39 using namespace mlir::scf;
40 
41 #define DEBUG_TYPE "linalg-tiling"
42 
43 static bool isZero(OpFoldResult v) {
44  if (!v)
45  return false;
46  if (auto attr = v.dyn_cast<Attribute>()) {
47  IntegerAttr intAttr = attr.dyn_cast<IntegerAttr>();
48  return intAttr && intAttr.getValue().isZero();
49  }
50  if (auto cst = v.get<Value>().getDefiningOp<arith::ConstantIndexOp>())
51  return cst.value() == 0;
52  return false;
53 }
54 
55 std::tuple<SmallVector<Range, 4>, LoopIndexToRangeIndexMap>
57  ArrayRef<OpFoldResult> allShapeSizes,
58  ArrayRef<OpFoldResult> allTileSizes) {
59  assert(allTileSizes.size() == map.getNumResults());
60  // Apply `map` to get shape sizes in loop order.
61  SmallVector<OpFoldResult> shapeSizes =
62  makeComposedFoldedMultiResultAffineApply(b, loc, map, allShapeSizes);
63  SmallVector<OpFoldResult> tileSizes(allTileSizes.begin(), allTileSizes.end());
64 
65  // Traverse the tile sizes, which are in loop order, erase zeros everywhere.
66  LoopIndexToRangeIndexMap loopIndexToRangeIndex;
67  for (int idx = 0, e = tileSizes.size(), zerosCount = 0; idx < e; ++idx) {
68  if (isZero(tileSizes[idx - zerosCount])) {
69  shapeSizes.erase(shapeSizes.begin() + idx - zerosCount);
70  tileSizes.erase(tileSizes.begin() + idx - zerosCount);
71  ++zerosCount;
72  continue;
73  }
74  loopIndexToRangeIndex[idx] = idx - zerosCount;
75  }
76 
77  // Create a new range with the applied tile sizes.
79  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
80  res.push_back(Range{b.getIndexAttr(0), shapeSizes[idx], tileSizes[idx]});
81  return std::make_tuple(res, loopIndexToRangeIndex);
82 }
83 
85  RewriterBase &b, LinalgOp op, SmallVectorImpl<Value> &ivs,
86  const LoopIndexToRangeIndexMap &loopIndexToRangeIndex) {
87  SmallVector<Value> allIvs(op.getNumLoops(), nullptr);
88  for (auto &en : enumerate(allIvs)) {
89  auto rangeIndex = loopIndexToRangeIndex.find(en.index());
90  if (rangeIndex == loopIndexToRangeIndex.end())
91  continue;
92  en.value() = ivs[rangeIndex->second];
93  }
94  offsetIndices(b, op, getAsOpFoldResult(allIvs));
95 }
96 
97 /// Asserts that the given index-typed value is strictly positive. If the value
98 /// is an attribute, asserts at compile time, otherwise emits an assertion
99 /// checked at runtime.
102  if (auto attr = value.dyn_cast<Attribute>()) {
103  assert(attr.cast<IntegerAttr>().getValue().isStrictlyPositive() &&
104  "expected strictly positive tile size and divisor");
105  return;
106  }
107 
108  Value zero = b.create<arith::ConstantIndexOp>(0);
109  Value condition = b.create<arith::CmpIOp>(arith::CmpIPredicate::sgt,
110  value.get<Value>(), zero);
111  b.create<cf::AssertOp>(
112  condition,
113  b.getStringAttr("expected strictly positive tile size and divisor"));
114 }
115 
118  unsigned dimension, OpFoldResult targetSize,
119  OpFoldResult divisor, bool emitAssertions) {
120  // Bail out on dimension overflow.
121  if (dimension >= op.getNumLoops())
122  return failure();
123 
124  // The code below works only on values.
125  Location loc = op.getLoc();
126  ImplicitLocOpBuilder b(loc, builder);
127  if (emitAssertions) {
128  emitIsPositiveIndexAssertion(b, targetSize);
129  emitIsPositiveIndexAssertion(b, divisor);
130  }
131  Value targetSizeValue =
132  getValueOrCreateConstantIndexOp(builder, loc, targetSize);
133  Value divisorValue = getValueOrCreateConstantIndexOp(builder, loc, divisor);
134 
135  // Find the trip count of the iteration space dimension for which the tile
136  // sizes are computed.
137  SmallVector<OpFoldResult> allShapes =
138  op.createFlatListOfOperandDims(b, b.getLoc());
139  AffineMap shapesToLoops = op.getShapesToLoopsMap();
140  SmallVector<OpFoldResult> loopRanges =
141  makeComposedFoldedMultiResultAffineApply(b, op.getLoc(), shapesToLoops,
142  allShapes);
143  Value tripCount =
144  getValueOrCreateConstantIndexOp(b, op.getLoc(), loopRanges[dimension]);
145 
146  // Compute the tile sizes and the respective numbers of tiles.
150  auto apply = [&](AffineExpr expr, ValueRange values) -> Value {
151  return makeComposedAffineApply(b, b.getLoc(), expr, values);
152  };
153  Value a = apply(s0.floorDiv(s1), {tripCount, divisorValue});
154  Value t = apply((s0 + s1 - 1).floorDiv(s1), {targetSizeValue, divisorValue});
155  Value d = apply((s0 + s1 - 1).floorDiv(s1), {a, t});
156  Value s = apply(s0.floorDiv(s1) * s2, {a, d, divisorValue});
157  Value v = apply(s0 % s1, {a, d});
158  Value u = apply(s0 - s1, {d, v});
159 
161  spec.lowTileSize = s;
162  spec.highTileSize = apply(s0 + s1, {s, divisorValue});
163  spec.lowTripCount = u;
164  spec.highTripCount = v;
165 
166  // If requested, emit the check that the tile sizes are computed correctly.
167  // For example, for iteration dimension size of 15 and the target size 8 it is
168  // impossible to find two tile sizes both divisible by 8 that fully cover the
169  // original space dimension.
170  if (emitAssertions) {
171  AffineExpr s3 = builder.getAffineSymbolExpr(3);
172  Value coveredSize =
173  apply(s0 * s1 + s2 * s3, {spec.lowTileSize, spec.lowTripCount,
174  spec.highTileSize, spec.highTripCount});
175  Value equals = b.create<arith::CmpIOp>(arith::CmpIPredicate::eq,
176  coveredSize, tripCount);
177  b.create<cf::AssertOp>(
178  equals, builder.getStringAttr(
179  "could not compute dynamic multi-size tile shapes"));
180  }
181 
182  return spec;
183 }
184 
185 /// Returns true if the maximum tile offset `tileSize * numThreads-1` is less
186 /// than `iterationSize`.
188  OpFoldResult numThreads,
189  OpFoldResult iterationSize) {
190  Optional<int64_t> tileSizeConst = getConstantIntValue(tileSize);
191  Optional<int64_t> numThreadsConst = getConstantIntValue(numThreads);
192  Optional<int64_t> iterSizeConst = getConstantIntValue(iterationSize);
193  if (!tileSizeConst || !numThreadsConst || !iterSizeConst)
194  return false;
195  return *tileSizeConst * (*numThreadsConst - 1) < *iterSizeConst;
196 }
197 
198 /// Build an `affine_max` of all the `vals`.
200  ArrayRef<OpFoldResult> vals) {
202  b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()),
203  vals);
204 }
205 
206 /// Build an `affine_min` of all the `vals`.
208  ArrayRef<OpFoldResult> vals) {
210  b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), loc.getContext()),
211  vals);
212 }
213 
214 /// Rewrite a TilingInterface `op` to a tiled `scf.foreach_thread`. The
215 /// tiling is specified by the number of tiles/threads `numThreads` and the
216 /// optional nominal tile size `nominalTileSizes`. If `nominalTilSizes` is
217 /// not specified, then it is derived from `numThreads` as `ceilDiv(dimSize[i],
218 /// numThreads[i])`. If non-empty, the `threadDimMapping` is added as an
219 /// attribute to the resulting `scf.foreach_thread`. A zero tile sizes indicate
220 /// that the dimension is not tiled, and can be thought of as tiling by the full
221 /// size of data.
222 /// It is the user's responsibility to ensure that `numThreads` is a valid
223 /// tiling specification (i.e. that only tiles parallel dimensions, e.g. in the
224 /// Linalg case). If `omitTileOffsetBoundsCheck` is true, then the function will
225 /// assume that `tileSize[i] * (numThread[i] -1) <= dimSize[i]` holds.
227  RewriterBase &b, TilingInterface op, ArrayRef<OpFoldResult> numThreads,
228  Optional<ArrayRef<OpFoldResult>> nominalTileSizes,
229  ArrayRef<int64_t> threadDimMapping, bool omitTileOffsetBoundsCheck) {
230  Location loc = op->getLoc();
232  SmallVector<Range> loopRanges = op.getIterationDomain(b);
233  if (loopRanges.empty())
234  return op->emitOpError("expected non-empty loop ranges");
235  auto hasStrideOne = [](Range r) { return !isConstantIntValue(r.stride, 1); };
236  if (llvm::any_of(loopRanges, hasStrideOne))
237  return op->emitOpError("only stride-1 supported atm");
238  auto dest = op.getDestinationOperands(b);
239 
240  SmallVector<OpFoldResult> nonZeroNumThreads =
241  llvm::to_vector(llvm::make_filter_range(numThreads, [](OpFoldResult ofr) {
242  return !isConstantIntValue(ofr, 0);
243  }));
244  SmallVector<Value> materializedNonZeroNumThreads =
245  llvm::to_vector(llvm::map_range(nonZeroNumThreads, [&](OpFoldResult ofr) {
246  return getValueOrCreateConstantIndexOp(b, loc, ofr);
247  }));
248 
249  Operation *tiledOp = nullptr;
250 
251  // Create the ForeachThreadOp. We don't use the lambda body-builder
252  // version because we require the use of RewriterBase in the body, so we
253  // manually move the insertion point to the body below.
254  scf::ForeachThreadOp foreachThreadOp = b.create<scf::ForeachThreadOp>(
255  loc, dest, ValueRange(materializedNonZeroNumThreads), threadDimMapping);
256 
257  // Fill out the ForeachThreadOp body.
258  b.setInsertionPointToStart(foreachThreadOp.getBody(0));
259  ValueRange threadIds = foreachThreadOp.getThreadIndices();
260  int64_t nLoops = loopRanges.size();
261  SmallVector<OpFoldResult> tiledOffsets, tiledSizes;
262  tiledOffsets.reserve(nLoops);
263  tiledSizes.reserve(nLoops);
264  for (unsigned loopIdx = 0, threadIdIdx = 0; loopIdx < nLoops; ++loopIdx) {
265  bool overflow = loopIdx >= numThreads.size();
266  bool isZero = !overflow && isConstantIntValue(numThreads[loopIdx], 0);
267  // Degenerate case: take the whole domain.
268  if (overflow || isZero) {
269  tiledOffsets.push_back(loopRanges[loopIdx].offset);
270  tiledSizes.push_back(loopRanges[loopIdx].size);
271  continue;
272  }
273 
274  // Tiled case: compute the offset and size.
275  AffineExpr i, j, m, n, o;
276  bindDims(b.getContext(), i, j);
277  bindSymbols(b.getContext(), m, n, o);
278  OpFoldResult size = loopRanges[loopIdx].size;
279  OpFoldResult offset = loopRanges[loopIdx].offset;
280  OpFoldResult threadId = threadIds[threadIdIdx];
281  // Symbolic fixed max size per thread.
282  // TODO: floor + 0/1 depending on case for better load-balancing.
283  OpFoldResult tileSizePerThread =
284  nominalTileSizes.has_value()
285  ? (*nominalTileSizes)[loopIdx]
287  b, loc, m.ceilDiv(n),
288  ArrayRef<OpFoldResult>{size, nonZeroNumThreads[threadIdIdx]});
289 
290  // Dynamic offset shifted by threadId * maxSizePerThread.
292  b, loc, i + j * m, {offset, threadId, tileSizePerThread});
293  // Dynamic upper-bound depending on the threadId.
294  OpFoldResult residualTileSize = makeComposedFoldedAffineApply(
295  b, loc, i + j * m - n,
296  {offset, nonZeroNumThreads[threadIdIdx], tileSizePerThread, size});
297  if (!isConstantIntValue(residualTileSize, 0)) {
298  OpFoldResult sizeMinusOffsetPerThread = makeComposedFoldedAffineApply(
299  b, loc, -i + m, {offsetPerThread, size});
300  tileSizePerThread =
301  buildMin(b, loc, {sizeMinusOffsetPerThread, tileSizePerThread});
302  }
303 
304  tiledOffsets.push_back(offsetPerThread);
305  // TODO: if tileSizePerThread <= 0 early exit.
306  if (!omitTileOffsetBoundsCheck &&
307  !canOmitTileOffsetInBoundsCheck(tileSizePerThread,
308  nonZeroNumThreads[threadIdIdx], size))
309  tileSizePerThread =
310  buildMax(b, loc, {b.getIndexAttr(0), tileSizePerThread});
311 
312  tiledSizes.push_back(tileSizePerThread);
313  ++threadIdIdx;
314  }
315 
316  // Clone the tileable op and update its destination operands to use the output
317  // bbArgs of the ForeachThreadOp.
318  ArrayRef<BlockArgument> destBbArgs =
319  foreachThreadOp.getOutputBlockArguments();
320  Operation *clonedOp = b.clone(*op.getOperation());
321  auto destinationStyleOp = dyn_cast<DestinationStyleOpInterface>(clonedOp);
322  if (destinationStyleOp) {
323  for (OpOperand *outOperand : destinationStyleOp.getOutputOperands()) {
324  auto it = llvm::find(dest, outOperand->get());
325  assert(it != dest.end() && "dest operand not found in dest");
326  unsigned destNum = std::distance(dest.begin(), it);
327  outOperand->set(destBbArgs[destNum]);
328  }
329  }
330 
331  // Tile the cloned op and delete the clone.
332  SmallVector<Operation *> tiledOps =
333  cast<TilingInterface>(clonedOp).getTiledImplementation(b, tiledOffsets,
334  tiledSizes);
335  b.eraseOp(clonedOp);
336  assert(tiledOps.size() == 1 && "expected a single produced tiled op");
337  tiledOp = tiledOps.front();
338 
339  auto tilingInterfaceOp = dyn_cast<TilingInterface>(tiledOp);
340  assert(tilingInterfaceOp && "Tiled op does not implement TilingInterface");
342  for (auto it : llvm::zip(llvm::seq(unsigned(0), unsigned(dest.size())),
343  tilingInterfaceOp->getResults(), destBbArgs)) {
344  b.setInsertionPoint(insertPt.getBlock(), insertPt.getPoint());
345  SmallVector<OpFoldResult> resultOffsets, resultSizes;
346  if (failed(op.getResultTilePosition(b, std::get<0>(it), tiledOffsets,
347  tiledSizes, resultOffsets,
348  resultSizes)))
349  return op->emitOpError("output offsets couldn't be calculated");
350  SmallVector<OpFoldResult> strides(resultSizes.size(), b.getIndexAttr(1));
351  b.setInsertionPointToEnd(foreachThreadOp.getTerminator().getBody());
352  b.create<tensor::ParallelInsertSliceOp>(loc, std::get<1>(it),
353  std::get<2>(it), resultOffsets,
354  resultSizes, strides);
355  }
356  return ForeachThreadTilingResult{foreachThreadOp, tiledOp};
357 }
358 
361  ArrayRef<OpFoldResult> numThreads,
362  ArrayRef<int64_t> threadDimMapping) {
363  return tileToForeachThreadOpImpl(b, op, numThreads, /*nominalTileSizes=*/None,
364  threadDimMapping,
365  /*omitTileOffsetBoundsCheck=*/false);
366 }
367 
370  RewriterBase &b, TilingInterface op, ArrayRef<OpFoldResult> tileSizes,
371  ArrayRef<int64_t> threadDimMapping) {
372  SmallVector<Range> loopRanges = op.getIterationDomain(b);
373  unsigned nLoops = loopRanges.size();
374  SmallVector<OpFoldResult> numThreads;
375  numThreads.reserve(nLoops);
376  AffineExpr s0, s1;
377  bindSymbols(b.getContext(), s0, s1);
378  AffineExpr divExpr = s0.ceilDiv(s1);
379  for (const auto &it : llvm::zip(tileSizes, loopRanges)) {
380  OpFoldResult numTiles = std::get<0>(it);
381  if (!isConstantIntValue(numTiles, 0))
383  b, op.getLoc(), divExpr, {std::get<1>(it).size, std::get<0>(it)});
384  numThreads.push_back(numTiles);
385  }
386  return tileToForeachThreadOpImpl(b, op, numThreads,
387  /*nominalTileSizes=*/tileSizes,
388  threadDimMapping,
389  /*omitTileOffsetBoundsCheck=*/true);
390 }
391 
392 // Insert a tile `source` into the destination tensor `dest`. The position at
393 // which the tile is inserted (as well as size of tile) is taken from a given
394 // ExtractSliceOp `sliceOp`.
396  tensor::ExtractSliceOp sliceOp, Value source,
397  Value dest) {
398  return b.create<tensor::InsertSliceOp>(
399  loc, sliceOp.getSource().getType(), source, dest, sliceOp.getOffsets(),
400  sliceOp.getSizes(), sliceOp.getStrides(), sliceOp.getStaticOffsets(),
401  sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
402 }
403 
404 template <typename LoopTy>
407  const LinalgTilingOptions &options) {
408  auto nLoops = op.getNumLoops();
409  // Initial tile sizes may be too big, only take the first nLoops.
410  tileSizes = tileSizes.take_front(nLoops);
411 
412  if (llvm::all_of(tileSizes, isZero)) {
413  TiledLinalgOp tiledOp;
414  tiledOp.op = cast<LinalgOp>(b.clone(*op.getOperation()));
415  tiledOp.tensorResults.assign(tiledOp.op->result_begin(),
416  tiledOp.op->result_end());
417  return tiledOp;
418  }
419 
420  // 1. Build the tiled loop ranges.
421  SmallVector<OpFoldResult> allShapeSizes =
422  op.createFlatListOfOperandDims(b, op.getLoc());
423  AffineMap shapeSizesToLoopsMap = op.getShapesToLoopsMap();
424  if (!shapeSizesToLoopsMap)
425  return failure();
426 
427  auto [loopRanges, loopIndexToRangeIndex] = makeTiledLoopRanges(
428  b, op.getLoc(), shapeSizesToLoopsMap, allShapeSizes, tileSizes);
429 
430  SmallVector<StringRef, 4> iteratorTypes;
431  for (const auto &attr : enumerate(op.getIteratorTypesArray())) {
432  if (loopIndexToRangeIndex.count(attr.index()))
433  iteratorTypes.push_back(attr.value());
434  }
435  // If interchangeVector is empty, use the identity. Build the permutation map
436  // otherwise.
437  auto invPermutationMap =
438  AffineMap::getMultiDimIdentityMap(tileSizes.size(), b.getContext());
439  if (!options.interchangeVector.empty()) {
440  // Based on the pruned iterations (due to zero tile size), recompute the
441  // interchange vector.
442  SmallVector<unsigned, 4> interchangeVector;
443  interchangeVector.reserve(options.interchangeVector.size());
444  for (auto pos : options.interchangeVector) {
445  auto it = loopIndexToRangeIndex.find(pos);
446  if (it == loopIndexToRangeIndex.end())
447  continue;
448  interchangeVector.push_back(it->second);
449  }
450  // Interchange vector is guaranteed to be a permutation,
451  // `inversePermutation` must succeed.
452  invPermutationMap = inversePermutation(
453  AffineMap::getPermutationMap(interchangeVector, b.getContext()));
454  assert(invPermutationMap);
455  SmallVector<int64_t> permutation(interchangeVector.begin(),
456  interchangeVector.end());
457  applyPermutationToVector(loopRanges, permutation);
458  applyPermutationToVector(iteratorTypes, permutation);
459  }
460 
461  // Handle distribution. Create a vector of the same size of loops that are to
462  // be tiled.
464  if (options.distribution) {
465  procInfo.resize(
466  iteratorTypes.size(),
468  // Collect loop ranges of tiled loopss, loops that are parallel.
469  SmallVector<Range> parallelLoopRanges;
470  for (const auto &iteratorType : llvm::enumerate(iteratorTypes)) {
471  if (!isParallelIterator(iteratorType.value()))
472  break;
473  parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);
474  }
475  auto returnedProcInfo =
476  options.distribution->procInfo(b, op.getLoc(), parallelLoopRanges);
477  unsigned procIdIdx = 0;
478  // Update the distribution information for the loops.
479  for (const auto &iteratorType : llvm::enumerate(iteratorTypes)) {
480  if (!isParallelIterator(iteratorType.value()))
481  break;
482  procInfo[iteratorType.index()] = returnedProcInfo[procIdIdx++];
483  }
484  }
485 
486  // 2. Create the tiled loops.
487  LinalgOp res = op;
488  SmallVector<Value, 4> ivs, tensorResults;
489  auto tiledLoopBodyBuilder =
490  [&](OpBuilder &builder, Location loc, ValueRange localIvs,
491  ValueRange operandValuesToUse) -> scf::ValueVector {
492  ivs.assign(localIvs.begin(), localIvs.end());
493 
494  // When an `interchangeVector` is present, it has been applied to the
495  // loop ranges and the iterator types. Apply its inverse to the
496  // resulting loop `ivs` to match the op definition.
497  SmallVector<Value, 4> interchangedIvs;
498  if (!options.interchangeVector.empty())
499  interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
500  else
501  interchangedIvs.assign(ivs.begin(), ivs.end());
502 
503  // Tile the `operandValuesToUse` that either match the `op` operands
504  // themselves or the tile loop arguments forwarding them.
505  assert(operandValuesToUse.size() ==
506  static_cast<size_t>(op.getNumInputsAndOutputs()) &&
507  "expect the number of operands and inputs and outputs to match");
508  SmallVector<Value> valuesToTile = operandValuesToUse;
509  SmallVector<OpFoldResult> sizeBounds =
510  makeComposedFoldedMultiResultAffineApply(b, loc, shapeSizesToLoopsMap,
511  allShapeSizes);
512  SmallVector<Value> tiledOperands = makeTiledShapes(
513  b, loc, op, valuesToTile, getAsOpFoldResult(interchangedIvs), tileSizes,
514  sizeBounds,
515  /*omitPartialTileCheck=*/false);
516 
517  SmallVector<Type> resultTensorTypes =
518  getTensorOutputTypes(op, tiledOperands);
519  res = op.clone(b, loc, resultTensorTypes, tiledOperands);
520  tensorResults =
521  insertSlicesBack(builder, loc, op, tiledOperands, res->getResults());
522  return scf::ValueVector(tensorResults.begin(), tensorResults.end());
523  };
524  GenerateLoopNest<LoopTy>::doit(b, op.getLoc(), loopRanges, op, iteratorTypes,
525  tiledLoopBodyBuilder, procInfo);
526 
527  // 3. Transform IndexOp results w.r.t. the tiling.
528  transformIndexOps(b, res, ivs, loopIndexToRangeIndex);
529 
530  // 4. Gather the newly created loops and return them with the new op.
532  loops.reserve(ivs.size());
533  for (auto iv : ivs) {
534  if (iv.isa<BlockArgument>()) {
535  loops.push_back(iv.cast<BlockArgument>().getOwner()->getParentOp());
536  assert(loops.back() && "no owner found for induction variable!");
537  } else {
538  // TODO: Instead of doing this, try to recover the ops used instead of the
539  // loop.
540  loops.push_back(nullptr);
541  }
542  }
543 
544  // 5. Get the tensor results from the outermost loop if available. Otherwise
545  // use the previously captured `tensorResults`.
546  Operation *outermostLoop = nullptr;
547  for (Operation *loop : loops)
548  if ((outermostLoop = loop))
549  break;
550 
551  return TiledLinalgOp{
552  res, loops, outermostLoop ? outermostLoop->getResults() : tensorResults};
553 }
554 
555 template <typename LoopTy>
557  RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options) {
559  b.setInsertionPoint(op);
560 
561  if (!options.tileSizeComputationFunction)
562  return failure();
563 
564  // Enforce the convention that "tiling by zero" skips tiling a particular
565  // dimension. This convention is significantly simpler to handle instead of
566  // adjusting affine maps to account for missing dimensions.
567  auto nLoops = op.getNumLoops();
568  SmallVector<OpFoldResult> tileSizeVector =
570  if (tileSizeVector.size() < nLoops) {
571  tileSizeVector.append(nLoops - tileSizeVector.size(), b.getIndexAttr(0));
572  }
573 
574  return tileLinalgOpImpl<LoopTy>(b, op, tileSizeVector, options);
575 }
576 
579  const LinalgTilingOptions &options) {
580  switch (options.loopType) {
582  return tileLinalgOpImpl<scf::ForOp>(b, op, options);
584  return tileLinalgOpImpl<scf::ParallelOp>(b, op, options);
585  default:;
586  }
587  return failure();
588 }
589 
590 /// Generate a loop nest around a given tensor::PadOp (for tiling). `newPadOp`
591 /// and `loopNest` are output parameters that return the new (tiled)
592 /// tensor::PadOp and the loop nest.
593 static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
594  tensor::PadOp &newPadOp, LoopNest &loopNest,
595  const LinalgTilingOptions &options) {
596  Location loc = op.getLoc();
597  OpBuilder::InsertionGuard g(builder);
598  builder.setInsertionPoint(op);
599 
600  // Clone tensor::PadOp so that the existing op can be replaced more easily.
601  newPadOp = cast<tensor::PadOp>(builder.clone(*op.getOperation()));
602  // Get rank and tile sizes.
603  int64_t rank = op.getResultType().getRank();
604  SmallVector<OpFoldResult> tileSizes =
605  getAsOpFoldResult(options.tileSizeComputationFunction(builder, op));
606  // Normalize untiled padding dimensions to 0.
607  tileSizes.append(rank - tileSizes.size(), builder.getIndexAttr(0));
608  // Compute lower and upper bounds of the loop nest.
609  TilingInterface tilingInterface =
610  dyn_cast<TilingInterface>(op.getOperation());
611  SmallVector<Range> ranges = tilingInterface.getIterationDomain(builder);
612  SmallVector<Value> lbs, dims, steps;
614  for (int64_t i = 0; i < rank; ++i) {
615  allDims.push_back(ranges[i].size);
616  if (!isZero(tileSizes[i])) {
617  lbs.push_back(
618  getValueOrCreateConstantIndexOp(builder, loc, ranges[i].offset));
619  dims.push_back(
620  getValueOrCreateConstantIndexOp(builder, loc, ranges[i].size));
621  steps.push_back(
622  getValueOrCreateConstantIndexOp(builder, loc, tileSizes[i]));
623  }
624  }
625  // Generate loop nest: One loop per dimension.
626  SmallVector<Value> destOperand =
627  tilingInterface.getDestinationOperands(builder);
628  loopNest = mlir::scf::buildLoopNest(
629  builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(destOperand),
630  [&](OpBuilder &b, Location loc, ValueRange localIvs,
631  ValueRange iterArgs) -> scf::ValueVector {
632  // Compute offsets and sizes of ExtractSliceOp.
633  SmallVector<Value> localIVVector = llvm::to_vector(localIvs);
635  b, loc, getAsOpFoldResult(localIVVector), tileSizes);
637  computeTileSizes(b, loc, tileSizes, allDims);
638  // Create ExtractSliceOp: Extract a tile from the tensor::PadOp.
639  // Note: The tensor::PadOp is located outside of the loop nest. It is
640  // later moved inside by ExtractSliceOfPadTensorSwapPattern.
641  auto map = AffineMap::getMultiDimIdentityMap(rank, b.getContext());
642  Value tiledOutput = makeTiledShape(
643  b, loc, newPadOp->getResult(0), tileSizes, map, offsets, allDims,
644  sizes, /*omitPartialTileCheck=*/false);
645  auto sliceOp = tiledOutput.getDefiningOp<tensor::ExtractSliceOp>();
646  assert(sliceOp && "expected ExtractSliceOp");
647  // Insert the tile into the output tensor.
648  Value yieldValue =
649  insertSliceIntoTensor(b, loc, sliceOp, sliceOp, iterArgs[0]);
650  return scf::ValueVector({yieldValue});
651  });
652  return success();
653 }
654 
655 namespace {
656 struct PadOpTilingPattern : public OpRewritePattern<tensor::PadOp> {
657  PadOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt)
658  : OpRewritePattern<tensor::PadOp>(ctx), options(std::move(opt)) {}
659 
660  LogicalResult matchAndRewrite(tensor::PadOp op,
661  PatternRewriter &rewriter) const override {
663  return failure();
664  tensor::PadOp newPadOp;
665  LoopNest loopNest;
666  if (failed(tilePadOp(rewriter, op, newPadOp, loopNest, options)))
667  return failure();
669  rewriter.getUnitAttr());
670  // Replace all uses of the original tensor::PadOp.
671  rewriter.replaceOp(op, loopNest.getResults()[0]);
672  return success();
673  }
674 
676 };
677 } // namespace
678 
679 namespace {
680 /// Helper classes for type list expansion.
681 template <typename... OpTypes>
682 class CanonicalizationPatternList;
683 
684 template <>
685 class CanonicalizationPatternList<> {
686 public:
687  static void insert(RewritePatternSet &patterns) {}
688 };
689 
690 template <typename OpTy, typename... OpTypes>
691 class CanonicalizationPatternList<OpTy, OpTypes...> {
692 public:
693  static void insert(RewritePatternSet &patterns) {
694  OpTy::getCanonicalizationPatterns(patterns, patterns.getContext());
695  CanonicalizationPatternList<OpTypes...>::insert(patterns);
696  }
697 };
698 } // namespace
699 
702  RewritePatternSet patterns(ctx);
704  return patterns;
705 }
706 
708  RewritePatternSet &patterns) {
709  auto *ctx = patterns.getContext();
710  AffineApplyOp::getCanonicalizationPatterns(patterns, ctx);
711  AffineForOp::getCanonicalizationPatterns(patterns, ctx);
712  AffineMinOp::getCanonicalizationPatterns(patterns, ctx);
713  AffineMaxOp::getCanonicalizationPatterns(patterns, ctx);
714  arith::ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
715 
716  memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx);
717  memref::ViewOp::getCanonicalizationPatterns(patterns, ctx);
718 
719  scf::ForOp::getCanonicalizationPatterns(patterns, ctx);
720  scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
721 
722  tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
723  tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx);
724  tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
725  tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
726  tensor::PadOp::getCanonicalizationPatterns(patterns, ctx);
727  ctx->getLoadedDialect<LinalgDialect>()->getCanonicalizationPatterns(patterns);
728 
729  CanonicalizationPatternList<
730 #define GET_OP_LIST
731 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
732  >::insert(patterns);
733 }
734 
735 /// Populate the given list with patterns that apply Linalg tiling.
737  const LinalgTilingOptions &options) {
738  auto *ctx = patterns.getContext();
740  StringAttr::get(ctx, "tiled"));
741  TilingPatterns<GenericOp,
742 #define GET_OP_LIST
743 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
744  >::insert(patterns, options, f);
745  patterns.add<PadOpTilingPattern>(ctx, options);
746 }
747 
749  RewritePatternSet &patterns, const LinalgTilingOptions &options) {
750  auto *ctx = patterns.getContext();
751  patterns.add<PadOpTilingPattern>(ctx, options);
752 }
753 
754 static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp) {
755  MLIRContext *ctx = funcOp.getContext();
756  RewritePatternSet patterns(ctx);
757  patterns.add<ExtractSliceOfPadTensorSwapPattern>(patterns.getContext());
758  (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
761 }
762 
763 namespace {
764 struct LinalgTilingPass : public impl::LinalgTilingPassBase<LinalgTilingPass> {
765  LinalgTilingPass() = default;
766  LinalgTilingPass(ArrayRef<int64_t> tileSizes, LinalgTilingLoopType loopType) {
767  this->tileSizes = tileSizes;
768  this->loopType = "";
769  this->loopTypeEnum = loopType;
770  }
771 
772  void runOnOperation() override {
773  func::FuncOp funcOp = getOperation();
774  LinalgTilingLoopType type =
776  .Case("for", LinalgTilingLoopType::Loops)
777  .Case("affine", LinalgTilingLoopType::AffineLoops)
778  .Case("parallel", LinalgTilingLoopType::ParallelLoops)
779  .Default(loopTypeEnum);
780  auto options =
781  LinalgTilingOptions().setTileSizes(tileSizes).setLoopType(type);
782  MLIRContext *ctx = funcOp.getContext();
783  RewritePatternSet patterns(ctx);
784  insertTilingPatterns(patterns, options);
786  (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
789  // Drop the marker.
790  funcOp.walk([](LinalgOp op) {
792  });
793 
794  // Apply swap pattern after generating loop nest and running
795  // canonicalizations.
797  }
798 
799  LinalgTilingLoopType loopTypeEnum;
800 };
801 
802 } // namespace
803 
804 std::unique_ptr<OperationPass<func::FuncOp>>
806  linalg::LinalgTilingLoopType loopType) {
807  return std::make_unique<LinalgTilingPass>(tileSizes, loopType);
808 }
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< StringRef > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, ArrayRef< linalg::ProcInfo > procInfo={})
Include the generated interface declarations.
Helper class to control application of linalg transformation patterns.
Definition: Transforms.h:365
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:343
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > allShapeSizes, ArrayRef< OpFoldResult > allTileSizes)
Definition: Tiling.cpp:56
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
Definition: AffineMap.cpp:665
MLIRContext * getContext() const
Definition: Builders.h:54
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands...
Definition: AffineOps.cpp:1066
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types &#39;Ts&#39; to the pattern list with the given arguments...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:600
static OpFoldResult buildMin(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > vals)
Build an affine_min of all the vals.
Definition: Tiling.cpp:207
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
Definition: IndexingUtils.h:39
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition: Utils.h:360
SmallVector< Value, 4 > tensorResults
Definition: Transforms.h:175
Block::iterator getPoint() const
Definition: Builders.h:291
TileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
Definition: Transforms.h:614
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:349
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
This class represents a single result from folding an operation.
Definition: OpDefinition.h:239
Operation * clone(Operation &op, BlockAndValueMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:506
Rewrite a TilingInterface op to a tiled scf.foreach_thread, applying tiling by numThreads.
Definition: Transforms.h:484
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:327
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Definition: AffineMap.cpp:206
std::vector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:63
Rewrite extract_slice(tensor.pad(x)) into tensor.pad(extract_slice(x)).
Definition: Transforms.h:1148
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ValueRange operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:957
void populatePadTensorTilingPatterns(RewritePatternSet &patterns, const LinalgTilingOptions &options)
Definition: Tiling.cpp:748
static FailureOr< TiledLinalgOp > tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ArrayRef< OpFoldResult > tileSizes, const LinalgTilingOptions &options)
Definition: Tiling.cpp:406
static bool canOmitTileOffsetInBoundsCheck(OpFoldResult tileSize, OpFoldResult numThreads, OpFoldResult iterationSize)
Returns true if the maximum tile offset tileSize * numThreads-1 is less than iterationSize.
Definition: Tiling.cpp:187
static constexpr const bool value
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
LinalgTilingLoopType loopType
The type of tile loops to generate.
Definition: Transforms.h:646
OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a maximum across the results of applying map to operands...
Definition: AffineOps.cpp:1072
Block * getOwner() const
Returns the block that owns this argument.
Definition: Value.h:309
std::unique_ptr< OperationPass< func::FuncOp > > createLinalgTilingPass(ArrayRef< int64_t > tileSizes={}, linalg::LinalgTilingLoopType loopType=linalg::LinalgTilingLoopType::Loops)
Definition: Tiling.cpp:805
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:418
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
int64_t floorDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR&#39;s floordiv operation on constants.
Definition: MathExtras.h:33
virtual void replaceOp(Operation *op, ValueRange newValues)
This method replaces the results of the operation with the specified list of values.
Optional< LinalgLoopDistributionOptions > distribution
When specified, specifies distribution of generated tile loops to processors.
Definition: Transforms.h:655
static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op, tensor::PadOp &newPadOp, LoopNest &loopNest, const LinalgTilingOptions &options)
Generate a loop nest around a given tensor::PadOp (for tiling).
Definition: Tiling.cpp:593
This class provides support for representing a failure result, or a valid value of type T...
Definition: LogicalResult.h:78
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition: Utils.cpp:916
UnitAttr getUnitAttr()
Definition: Builders.cpp:95
Attributes are known-constant values of operations.
Definition: Attributes.h:25
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:232
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:891
ResultRange getResults()
Definition: SCF.h:66
Base type for affine expression.
Definition: AffineExpr.h:68
static void emitIsPositiveIndexAssertion(ImplicitLocOpBuilder &b, OpFoldResult value)
Asserts that the given index-typed value is strictly positive.
Definition: Tiling.cpp:100
SmallVector< Value, 4 > applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values)
Returns the values obtained by applying map to the list of values.
Definition: AffineOps.cpp:1088
unsigned getNumResults() const
Definition: AffineMap.cpp:314
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder...
Definition: Utils.cpp:990
SmallVector< unsigned, 4 > interchangeVector
The interchange vector to reorder the tiled loops.
Definition: Transforms.h:638
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:42
static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp)
Definition: Tiling.cpp:754
Block * getBlock() const
Definition: Builders.h:290
InsertPoint saveInsertionPoint() const
Return a saved insertion point.
Definition: Builders.h:336
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
Definition: Tiling.cpp:578
This class represents an argument of a Block.
Definition: Value.h:300
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:807
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1005
bool isConstantIntValue(OpFoldResult ofr, int64_t value)
Return true if ofr is constant integer equal to value.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
Definition: Tiling.cpp:707
static void insertTilingPatterns(RewritePatternSet &patterns, const LinalgTilingOptions &options)
Populate the given list with patterns that apply Linalg tiling.
Definition: Tiling.cpp:736
void populateSCFForLoopCanonicalizationPatterns(RewritePatternSet &patterns)
Populate patterns for canonicalizing operations inside SCF loop bodies.
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:505
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
Definition: Tiling.cpp:84
static llvm::ManagedStatic< PassManagerOptions > options
static Value insertSliceIntoTensor(OpBuilder &b, Location loc, tensor::ExtractSliceOp sliceOp, Value source, Value dest)
Definition: Tiling.cpp:395
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:355
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:382
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:877
OpTy create(Args &&...args)
Create an operation of specific op type at the current insertion point and location.
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:299
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:907
MLIRContext * getContext() const
Return the context this location is uniqued in.
Definition: Location.h:58
LinalgTilingLoopType
The type of loops to be generated during tiling.
Definition: Utils.h:149
static const StringLiteral kLinalgTransformMarker
Definition: Transforms.h:355
This class represents a saved insertion point.
Definition: Builders.h:278
ImplicitLocOpBuilder maintains a &#39;current location&#39;, allowing use of the create<> method without spec...
LinalgTilingOptions & setLoopType(LinalgTilingLoopType lt)
Definition: Transforms.h:648
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
Definition: Tiling.cpp:117
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:80
Perform standalone tiling of a single LinalgOp by tileSizes.
Definition: Transforms.h:172
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
SmallVector< OpFoldResult > makeComposedFoldedMultiResultAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Variant of makeComposedFoldedAffineApply suitable for multi-result maps.
Definition: AffineOps.cpp:1030
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:56
This class represents an operand of an operation.
Definition: Value.h:251
static OpFoldResult buildMax(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > vals)
Build an affine_max of all the vals.
Definition: Tiling.cpp:199
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:387
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with &#39;numDims&#39; identity result dim exprs.
Definition: AffineMap.cpp:256
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:336
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:53
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:1011
FailureOr< ForeachThreadTilingResult > tileToForeachThreadOp(RewriterBase &builder, TilingInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< int64_t > threadDimMapping={})
Definition: Tiling.cpp:360
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
Definition: Tiling.cpp:701
Optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult applyPatternsAndFoldGreedily(MutableArrayRef< Region > regions, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig())
Rewrite the regions of the specified operation, which must be isolated from above, by repeatedly applying the highest benefit patterns in a greedy work-list driven manner.
result_range getResults()
Definition: Operation.h:332
This class helps build Operations.
Definition: Builders.h:197
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:345
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:105
FailureOr< ForeachThreadTilingResult > tileToForeachThreadOpUsingTileSizes(RewriterBase &builder, TilingInterface op, ArrayRef< OpFoldResult > tileSizes, ArrayRef< int64_t > threadDimMapping={})
Same as tileToForeachThreadOp, but calculate the number of threads required using the given tileSizes...
Definition: Tiling.cpp:369
A description of a multi-size tiling comprising tile sizes and numbers of tiles, expressed as Values ...
Definition: Transforms.h:436
static bool isZero(OpFoldResult v)
Definition: Tiling.cpp:43
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:239
bool isParallelIterator(StringRef iteratorType)
Check if iterator type has "parallel" semantics.
Definition: Utils.cpp:202
Location getLoc() const
Accessors for the implied location.
MLIRContext * getContext() const
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:758
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:398
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
Definition: Transforms.h:624
static FailureOr< ForeachThreadTilingResult > tileToForeachThreadOpImpl(RewriterBase &b, TilingInterface op, ArrayRef< OpFoldResult > numThreads, Optional< ArrayRef< OpFoldResult >> nominalTileSizes, ArrayRef< int64_t > threadDimMapping, bool omitTileOffsetBoundsCheck)
Rewrite a TilingInterface op to a tiled scf.foreach_thread.
Definition: Tiling.cpp:226