MLIR  18.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities for the Linalg dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
30 #include "mlir/IR/AffineExpr.h"
32 #include "mlir/IR/AffineMap.h"
33 #include "mlir/IR/Matchers.h"
35 #include "mlir/Pass/Pass.h"
36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
38 #include <optional>
39 
40 #define DEBUG_TYPE "linalg-utils"
41 
42 using namespace mlir;
43 using namespace presburger;
44 using namespace mlir::affine;
45 using namespace mlir::linalg;
46 using namespace mlir::scf;
47 
48 namespace {
49 
50 // Helper visitor to determine whether an AffineExpr is tiled.
51 // This is achieved by traversing every AffineDimExpr with position `pos` and
52 // checking whether the corresponding `tileSizes[pos]` is non-zero.
53 // This also enforces only positive coefficients occur in multiplications.
54 //
55 // Example:
56 // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
57 //
58 struct TileCheck : public AffineExprVisitor<TileCheck> {
59  TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
60 
61  void visitDimExpr(AffineDimExpr expr) {
62  isTiled |= !isZeroIndex(tileSizes[expr.getPosition()]);
63  }
64  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
65  visit(expr.getLHS());
66  visit(expr.getRHS());
67  if (expr.getKind() == mlir::AffineExprKind::Mul)
68  assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 &&
69  "nonpositive multiplying coefficient");
70  }
71  bool isTiled = false;
72  ArrayRef<OpFoldResult> tileSizes;
73 };
74 
75 } // namespace
76 
77 static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
78  if (!expr)
79  return false;
80  TileCheck t(tileSizes);
81  t.visit(expr);
82  return t.isTiled;
83 }
84 
85 // Checks whether the `map varies with respect to a non-zero `tileSize`.
86 static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
87  if (!map)
88  return false;
89  for (unsigned r = 0; r < map.getNumResults(); ++r)
90  if (isTiled(map.getResult(r), tileSizes))
91  return true;
92  return false;
93 }
94 
95 std::optional<RegionMatcher::BinaryOpKind>
96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
97  auto &region = op.getRegion();
98  if (!llvm::hasSingleElement(region))
99  return std::nullopt;
100 
101  Block &block = region.front();
102  if (block.getNumArguments() != 2 ||
103  !block.getArgument(0).getType().isSignlessIntOrFloat() ||
105  return std::nullopt;
106 
107  auto &ops = block.getOperations();
108  if (!llvm::hasSingleElement(block.without_terminator()))
109  return std::nullopt;
110 
111  using mlir::matchers::m_Val;
112  auto a = m_Val(block.getArgument(0));
113  auto b = m_Val(block.getArgument(1));
114 
115  auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
116  if (addPattern.match(&ops.back()))
117  return BinaryOpKind::IAdd;
118 
119  return std::nullopt;
120 }
121 
122 /// Explicit instantiation of loop nest generator for different loop types.
126 
127 /// Given a list of subview ranges, extract individual values for lower, upper
128 /// bounds and steps and put them into the corresponding vectors.
129 static void unpackRanges(OpBuilder &builder, Location loc,
132  SmallVectorImpl<Value> &steps) {
133  for (Range range : ranges) {
134  lbs.emplace_back(
135  getValueOrCreateConstantIndexOp(builder, loc, range.offset));
136  ubs.emplace_back(getValueOrCreateConstantIndexOp(builder, loc, range.size));
137  steps.emplace_back(
138  getValueOrCreateConstantIndexOp(builder, loc, range.stride));
139  }
140 }
141 
142 //===----------------------------------------------------------------------===//
143 // General utilities
144 //===----------------------------------------------------------------------===//
145 
146 namespace mlir {
147 namespace linalg {
148 
150  return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
151  return m.isProjectedPermutation(/*allowZeroInResults=*/true);
152  });
153 }
154 
156  if (!llvm::hasSingleElement(r))
157  return false;
158  for (Operation &op : r.front()) {
159  if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
160  linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
162  llvm::any_of(op.getResultTypes(),
163  [](Type type) { return !type.isIntOrIndexOrFloat(); }))
164  return false;
165  }
166  return true;
167 }
168 
169 bool isElementwise(LinalgOp op) {
170  if (op.getNumLoops() != op.getNumParallelLoops())
171  return false;
172 
174  return false;
175 
176  // TODO: relax the restrictions on indexing map.
177  for (OpOperand &opOperand : op.getDpsInitsMutable()) {
178  if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
179  return false;
180  }
181  return hasOnlyScalarElementwiseOp(op->getRegion(0));
182 }
183 
184 bool isParallelIterator(utils::IteratorType iteratorType) {
185  return iteratorType == utils::IteratorType::parallel;
186 }
187 
188 bool isReductionIterator(utils::IteratorType iteratorType) {
189  return iteratorType == utils::IteratorType::reduction;
190 }
191 
192 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
193  Value source, Value pad, bool nofold) {
194  // Exit if `source` is not defined by an ExtractSliceOp.
195  auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
196  if (!sliceOp)
197  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
198 
199  // Search the `source` use-def chain for padded LinalgOps.
200  Value current = sliceOp.getSource();
201  while (current) {
202  auto linalgOp = current.getDefiningOp<LinalgOp>();
203  if (!linalgOp)
204  break;
205  OpResult opResult = cast<OpResult>(current);
206  current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();
207  }
208  auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
209 
210  // Exit if the search fails to match a tensor::PadOp at the end of the matched
211  // LinalgOp sequence.
212  if (!padOp)
213  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
214 
215  // Exit if the padded result type does not match.
216  if (sliceOp.getSource().getType() != type)
217  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
218 
219  // Exit if the LinalgOps are not high padded.
220  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
221  return getConstantIntValue(ofr) != static_cast<int64_t>(0);
222  }))
223  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
224 
225  // Exit if `padOpSliceOp`, which defines the slice used by
226  // `padOp`, is rank-reducing.
227  auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
228  if (!padOpSliceOp ||
229  sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
230  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
231 
232  // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
233  // of the slice padded by `padOp`.
234  if (llvm::any_of(
235  llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
236  [](std::tuple<OpFoldResult, OpFoldResult> it) {
237  return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
238  }))
239  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
240 
241  // Exit if the padding values do not match.
242  Attribute padOpPadAttr, padAttr;
243  Value padOpPad = padOp.getConstantPaddingValue();
244  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
245  !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
246  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
247 
248  // Return the padded result if the padding values and sizes match.
249  return sliceOp.getSource();
250 }
251 
252 GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
253  Value outputTensor,
254  ArrayRef<int64_t> transposeVector) {
255  auto resultTensorType = cast<RankedTensorType>(outputTensor.getType());
256  Type elementType = resultTensorType.getElementType();
257 
258  assert(isPermutationVector(transposeVector) &&
259  "expect transpose vector to be a permutation");
260  assert(transposeVector.size() ==
261  static_cast<size_t>(resultTensorType.getRank()) &&
262  "expect transpose vector size to match result tensor rank");
263 
264  // Compute the transpose and the indentity indexing maps.
265  SmallVector<AffineMap> indexingMaps = {
267  SmallVector<unsigned>(transposeVector.begin(), transposeVector.end()),
268  b.getContext())),
269  AffineMap::getMultiDimIdentityMap(transposeVector.size(),
270  b.getContext())};
271  SmallVector<utils::IteratorType> iteratorTypes(transposeVector.size(),
272  utils::IteratorType::parallel);
273 
274  // Create a GenericOp to transpose `inputTensor` into `outputTensor`.
275  auto transposeOp =
276  b.create<GenericOp>(loc, resultTensorType, inputTensor, outputTensor,
277  indexingMaps, iteratorTypes);
278  Region &body = transposeOp.getRegion();
279  body.push_back(new Block());
280  body.front().addArguments({elementType, elementType}, {loc, loc});
281 
282  // Create the body of the transpose operation.
284  b.setInsertionPointToEnd(&body.front());
285  b.create<YieldOp>(loc, transposeOp.getRegion().front().getArgument(0));
286  return transposeOp;
287 }
288 
289 GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
290  auto memrefTypeTo = cast<MemRefType>(to.getType());
291 #ifndef NDEBUG
292  auto memrefTypeFrom = cast<MemRefType>(from.getType());
293  assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
294  "`from` and `to` memref must have the same rank");
295 #endif // NDEBUG
296 
297  AffineMap id =
298  AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
299  SmallVector<utils::IteratorType> iteratorTypes(memrefTypeTo.getRank(),
300  utils::IteratorType::parallel);
301  return b.create<linalg::GenericOp>(
302  loc,
303  /*inputs=*/from,
304  /*outputs=*/to,
305  /*indexingMaps=*/llvm::ArrayRef({id, id}),
306  /*iteratorTypes=*/iteratorTypes,
307  [](OpBuilder &b, Location loc, ValueRange args) {
308  b.create<linalg::YieldOp>(loc, args.front());
309  });
310 }
311 
312 /// Specialization to build an scf "for" nest.
313 template <>
315  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
316  ArrayRef<utils::IteratorType> iteratorTypes,
318  ValueRange)>
319  bodyBuilderFn,
320  ArrayRef<linalg::ProcInfo> procInfo) {
321  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
322  "expected as many entries for proc info as number of loops, even if "
323  "they are null entries");
324  SmallVector<Value> iterArgInitValues;
325  if (!linalgOp.hasBufferSemantics())
326  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
327  SmallVector<Value, 4> lbs, ubs, steps;
328  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
330  b, loc, lbs, ubs, steps, iterArgInitValues,
331  [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
332  assert(iterArgs.size() == iterArgInitValues.size() &&
333  "expect the number of output tensors and iter args to match");
334  SmallVector<Value> operandValuesToUse = linalgOp->getOperands();
335  if (!iterArgs.empty()) {
336  operandValuesToUse = linalgOp.getDpsInputs();
337  operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
338  }
339  return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
340  });
341 
342  if (loopNest.loops.empty() || procInfo.empty())
343  return;
344 
345  // Filter out scf.for loops that were created out of parallel dimensions.
346  for (const auto &loop : llvm::enumerate(loopNest.loops)) {
347  if (procInfo[loop.index()].distributionMethod ==
348  DistributionMethod::Cyclic) {
349  mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
350  procInfo[loop.index()].nprocs);
351  }
352  }
353 }
354 
355 /// Specialization to build affine "for" nest.
356 template <>
358  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
359  ArrayRef<utils::IteratorType> iteratorTypes,
361  ValueRange)>
362  bodyBuilderFn,
363  ArrayRef<linalg::ProcInfo> /*procInfo*/) {
364  SmallVector<Value> iterArgInitValues;
365  if (!linalgOp.hasBufferSemantics())
366  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
367  assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
368  SmallVector<Value, 4> lbs, ubs, steps;
369  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
370 
371  // Affine loops require constant steps.
372  SmallVector<int64_t, 4> constantSteps;
373  constantSteps.reserve(steps.size());
374  for (Value v : steps) {
375  auto constVal = getConstantIntValue(v);
376  assert(constVal.has_value() && "Affine loops require constant steps");
377  constantSteps.push_back(constVal.value());
378  }
379 
380  affine::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
381  [&](OpBuilder &b, Location loc, ValueRange ivs) {
382  bodyBuilderFn(b, loc, ivs,
383  linalgOp->getOperands());
384  });
385 }
386 
387 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
389  Value nprocs, Value &lb, Value &ub,
390  Value &step) {
391  AffineExpr d0, d1;
392  bindDims(b.getContext(), d0, d1);
394  lb =
395  affine::makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
396  step = affine::makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
397 }
398 
399 /// Generates a loop nest consisting of scf.parallel and scf.for, depending
400 /// on the `iteratorTypes.` Consecutive parallel loops create a single
401 /// scf.parallel operation; each sequential loop creates a new scf.for
402 /// operation. The body of the innermost loop is populated by
403 /// `bodyBuilderFn` that accepts a range of induction variables for all
404 /// loops. `ivStorage` is used to store the partial list of induction
405 /// variables.
406 // TODO: this function can be made iterative instead. However, it
407 // will have at most as many recursive calls as nested loops, which rarely
408 // exceeds 10.
410  OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
411  ValueRange steps, ArrayRef<utils::IteratorType> iteratorTypes,
413  function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
414  SmallVectorImpl<Value> &ivStorage) {
415  assert(lbs.size() == ubs.size());
416  assert(lbs.size() == steps.size());
417  assert(lbs.size() == iteratorTypes.size());
418  assert(procInfo.empty() || (lbs.size() == procInfo.size()));
419 
420  // If there are no (more) loops to be generated, generate the body and be
421  // done with it.
422  if (iteratorTypes.empty()) {
423  bodyBuilderFn(b, loc, ivStorage);
424  return;
425  }
426 
427  // If there are no outer parallel loops, generate one sequential loop and
428  // recurse.
429  if (!isParallelIterator(iteratorTypes.front())) {
430  LoopNest singleLoop = buildLoopNest(
431  b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
432  [&](OpBuilder &b, Location loc, ValueRange ivs) {
433  ivStorage.append(ivs.begin(), ivs.end());
434  generateParallelLoopNest(
435  b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
436  iteratorTypes.drop_front(),
437  procInfo.empty() ? procInfo : procInfo.drop_front(),
438  bodyBuilderFn, ivStorage);
439  });
440  return;
441  }
442 
443  unsigned nLoops = iteratorTypes.size();
444  unsigned numProcessed = 0;
445  DistributionMethod distributionMethod = DistributionMethod::None;
446  if (procInfo.empty()) {
447  numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();
448  } else {
449  distributionMethod = procInfo.front().distributionMethod;
450  numProcessed =
451  nLoops - procInfo
452  .drop_while([&](linalg::ProcInfo p) {
453  return p.distributionMethod == distributionMethod;
454  })
455  .size();
456  }
457 
458  auto remainderProcInfo =
459  procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
460  switch (distributionMethod) {
462  // Generate a single parallel loop-nest operation for all outermost
463  // parallel loops and recurse.
464  b.create<scf::ParallelOp>(
465  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
466  steps.take_front(numProcessed),
467  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
468  ivStorage.append(localIvs.begin(), localIvs.end());
470  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
471  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
472  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
473  bodyBuilderFn, ivStorage);
474  });
475  return;
476  }
477  case DistributionMethod::Cyclic: {
478  // Generate a single parallel loop-nest operation for all outermost
479  // parallel loops and recurse.
480  b.create<scf::ParallelOp>(
481  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
482  steps.take_front(numProcessed),
483  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
484  ivStorage.append(localIvs.begin(), localIvs.end());
486  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
487  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
488  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
489  bodyBuilderFn, ivStorage);
490  });
491  return;
492  }
493  case DistributionMethod::CyclicNumProcsGeNumIters: {
494  // Check (for the processed loops) that the iteration is in-bounds.
495  ArithBuilder ab(b, loc);
496  Value cond = ab.slt(lbs[0], ubs[0]);
497  for (unsigned i = 1; i < numProcessed; ++i)
498  cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
499  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
500  b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
501  generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed),
502  ubs.drop_front(numProcessed),
503  steps.drop_front(numProcessed),
504  iteratorTypes.drop_front(numProcessed),
505  remainderProcInfo, bodyBuilderFn, ivStorage);
506  b.create<scf::YieldOp>(loc, ValueRange{});
507  });
508  return;
509  }
510  case DistributionMethod::CyclicNumProcsEqNumIters:
511  // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
512  // with inner loop generation.
513  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
515  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
516  steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
517  remainderProcInfo, bodyBuilderFn, ivStorage);
518  return;
519  }
520 }
521 
522 /// Specialization for generating a mix of parallel and sequential scf loops.
523 template <>
525  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
526  ArrayRef<utils::IteratorType> iteratorTypes,
528  ValueRange)>
529  bodyBuilderFn,
530  ArrayRef<linalg::ProcInfo> procInfo) {
531  SmallVector<Value> iterArgInitValues;
532  if (!linalgOp.hasBufferSemantics())
533  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
534  assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
535  // This function may be passed more iterator types than ranges.
536  assert(iteratorTypes.size() >= loopRanges.size() &&
537  "expected iterator type for all ranges");
538  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
539  "expected proc information for all loops when present");
540  iteratorTypes = iteratorTypes.take_front(loopRanges.size());
541  SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
542  unsigned numLoops = iteratorTypes.size();
543  ivs.reserve(numLoops);
544  lbsStorage.reserve(numLoops);
545  ubsStorage.reserve(numLoops);
546  stepsStorage.reserve(numLoops);
547 
548  // Get the loop lb, ub, and step.
549  unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
550 
551  // Modify the lb, ub, and step based on the distribution options.
552  for (const auto &it : llvm::enumerate(procInfo)) {
553  if (it.value().distributionMethod != linalg::DistributionMethod::None) {
555  b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
556  ubsStorage[it.index()], stepsStorage[it.index()]);
557  }
558  }
559  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
561  b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
562  [&](OpBuilder &b, Location loc, ValueRange ivs) {
563  bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
564  },
565  ivs);
566 
567  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
568 }
569 
571  Value valueToTile,
572  const SliceParameters &sliceParams) {
573  auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
574  auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
575  .Case([&](MemRefType) {
576  return builder.create<memref::SubViewOp>(
577  loc, valueToTile, sliceParams.offsets,
578  sliceParams.sizes, sliceParams.strides);
579  })
580  .Case([&](RankedTensorType) {
581  return builder.create<tensor::ExtractSliceOp>(
582  loc, valueToTile, sliceParams.offsets,
583  sliceParams.sizes, sliceParams.strides);
584  })
585  .Default([](ShapedType) -> Operation * {
586  llvm_unreachable("Unexpected shaped type");
587  });
588  return sliceOp->getResult(0);
589 }
590 
591 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
592  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
594  ArrayRef<OpFoldResult> subShapeSizes,
595  bool omitPartialTileCheck) {
596  SliceParameters sliceParams =
597  computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
598  ubs, subShapeSizes, omitPartialTileCheck);
599  return materializeTiledShape(builder, loc, valueToTile, sliceParams);
600 }
601 
603 computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
604  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
606  ArrayRef<OpFoldResult> subShapeSizes,
607  bool omitPartialTileCheck) {
608  auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
609  assert(shapedType && "only shaped types can be tiled");
610  ArrayRef<int64_t> shape = shapedType.getShape();
611  int64_t rank = shapedType.getRank();
612 
613  // Compute offsets/sizes/strides for the tile.
614  SliceParameters sliceParams;
615  sliceParams.offsets.reserve(rank);
616  sliceParams.sizes.reserve(rank);
617  sliceParams.strides.reserve(rank);
618  for (unsigned r = 0; r < rank; ++r) {
619  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
620  if (!isTiled(map.getSubMap({r}), tileSizes)) {
621  sliceParams.offsets.push_back(builder.getIndexAttr(0));
622  OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
623  sliceParams.sizes.push_back(dim);
624  sliceParams.strides.push_back(builder.getIndexAttr(1));
625  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
626  continue;
627  }
628  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
629 
630  // Tiling creates a new slice at the proper index, the slice step is 1
631  // (i.e. the op does not subsample, stepping occurs in the loop).
632  auto m = map.getSubMap({r});
633  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
634  IRRewriter rewriter(builder);
635  OpFoldResult offset = makeComposedFoldedAffineApply(rewriter, loc, m, lbs);
636  sliceParams.offsets.push_back(offset);
637  OpFoldResult closedIntSize =
638  makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
639  // Resulting size needs to be made half open interval again.
640  AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
641  OpFoldResult size =
642  makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
643  LLVM_DEBUG(llvm::dbgs()
644  << "computeSliceParameters: raw size: " << size << "\n");
645  LLVM_DEBUG(llvm::dbgs()
646  << "computeSliceParameters: new offset: " << offset << "\n");
647  sliceParams.strides.push_back(builder.getIndexAttr(1));
648 
649  if (omitPartialTileCheck) {
650  // We statically know that the partial/boundary tile condition is
651  // unnecessary.
652  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
653  sliceParams.sizes.push_back(size);
654  continue;
655  }
656 
657  // The size of the subview / extract_slice should be trimmed to avoid
658  // out-of-bounds accesses, unless:
659  // a. We statically know the subshape size divides the shape size evenly.
660  // b. The subshape size is 1. According to the way the loops are set up,
661  // tensors with "0" dimensions would never be constructed.
662  int64_t shapeSize = shape[r];
663  std::optional<int64_t> sizeCst = getConstantIntValue(size);
664  auto hasTileSizeOne = sizeCst && *sizeCst == 1;
665  auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
666  ((shapeSize % *sizeCst) == 0);
667  if (!hasTileSizeOne && !dividesEvenly) {
668  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
669  << ", size: " << size
670  << ": make sure in bound with affine.min\n");
671 
672  AffineExpr dim0, dim1, dim2;
673  bindDims(builder.getContext(), dim0, dim1, dim2);
674 
675  // Get the dimension size for this dimension. We need to first calculate
676  // the max index and then plus one. This is important because for
677  // convolution ops, we have its input window dimension's affine map of the
678  // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
679  // dimension and `s0` is stride. Directly use the dimension size of
680  // output/filer window dimensions will cause incorrect calculation.
681  AffineMap minusOneMap =
683  .front();
684  AffineMap plusOneMap =
686  .front();
687  SmallVector<OpFoldResult> maxIndices =
688  llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
689  return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
690  {ub});
691  }));
692  OpFoldResult maxIndex =
693  makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
694  OpFoldResult d =
695  makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
696 
697  // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
699  {ArrayRef<AffineExpr>{dim1 - dim2, dim0}})
700  .front();
701  size =
702  makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
703  }
704  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
705  sliceParams.sizes.push_back(size);
706  }
707  return sliceParams;
708 }
709 
712  ArrayRef<OpFoldResult> tileSizes) {
714  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
715  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
716  bool isTiled = !isZeroIndex(tileSizes[idx]);
717  offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
718  LLVM_DEBUG(llvm::dbgs()
719  << "computeTileOffsets: " << offsets.back() << "\n");
720  }
721  return offsets;
722 }
723 
725  ArrayRef<OpFoldResult> tileSizes,
726  ArrayRef<OpFoldResult> sizeBounds) {
728  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
729  bool isTiled = !isZeroIndex(tileSizes[idx]);
730  // Before composing, we need to make range a closed interval.
731  OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
733  IRRewriter rewriter(b);
734  sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
735  LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
736  }
737  return sizes;
738 }
739 
741  if (op.hasBufferSemantics())
742  return {};
743  return llvm::to_vector(
744  llvm::map_range(op.getDpsInitsMutable(), [&](OpOperand &opOperand) {
745  return operands[opOperand.getOperandNumber()].getType();
746  }));
747 }
748 
750  LinalgOp op, ValueRange operands,
751  ValueRange results) {
752  if (op.hasBufferSemantics())
753  return {};
754  SmallVector<Value> tensorResults;
755  tensorResults.reserve(results.size());
756  // Insert a insert_slice for each output tensor.
757  unsigned resultIdx = 0;
758  for (OpOperand &opOperand : op.getDpsInitsMutable()) {
759  // TODO: use an interface/adaptor to avoid leaking position in
760  // `tiledOperands`.
761  Value outputTensor = operands[opOperand.getOperandNumber()];
762  if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
763  Value inserted = builder.create<tensor::InsertSliceOp>(
764  loc, sliceOp.getSource().getType(), results[resultIdx],
765  sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
766  sliceOp.getStrides(), sliceOp.getStaticOffsets(),
767  sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
768  tensorResults.push_back(inserted);
769  } else {
770  tensorResults.push_back(results[resultIdx]);
771  }
772  ++resultIdx;
773  }
774  return tensorResults;
775 }
776 
778 computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
779  ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
780  ArrayRef<OpFoldResult> tileSizes,
781  ArrayRef<OpFoldResult> sizeBounds,
782  bool omitPartialTileCheck) {
783  assert(ivs.size() == static_cast<size_t>(llvm::count_if(
784  llvm::make_range(tileSizes.begin(), tileSizes.end()),
785  [](OpFoldResult v) { return !isZeroIndex(v); })) &&
786  "expected as many ivs as non-zero sizes");
787 
788  // Construct (potentially temporary) mins and maxes on which to apply maps
789  // that define tile subshapes.
791  computeTileOffsets(builder, loc, ivs, tileSizes);
792  SmallVector<OpFoldResult> subShapeSizes =
793  computeTileSizes(builder, loc, tileSizes, sizeBounds);
794 
795  assert(static_cast<int64_t>(valuesToTile.size()) <=
796  linalgOp->getNumOperands() &&
797  "more value to tile than operands.");
799  allSliceParams.reserve(valuesToTile.size());
800  for (auto [opOperand, val] :
801  llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
802  Value shapedOp = val;
803  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
804  AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
805  // Use `opOperand` as is if it is not tiled and not an output tensor. Having
806  // an extract/insert slice pair for all output tensors simplifies follow up
807  // transformations such as padding and bufferization since the
808  // extract/insert slice pairs make the accessed iteration argument
809  // subdomains explicit.
810 
811  Type operandType = opOperand.get().getType();
812  if (!isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
813  linalgOp.isDpsInit(&opOperand))) {
814  allSliceParams.push_back(std::nullopt);
815  LLVM_DEBUG(llvm::dbgs()
816  << ": not tiled: use shape: " << operandType << "\n");
817  continue;
818  }
819  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
820 
821  allSliceParams.push_back(computeSliceParameters(
822  builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
823  omitPartialTileCheck));
824  }
825 
826  return allSliceParams;
827 }
828 
830  LinalgOp linalgOp, ValueRange valuesToTile,
832  ArrayRef<OpFoldResult> tileSizes,
833  ArrayRef<OpFoldResult> sizeBounds,
834  bool omitPartialTileCheck) {
835  SmallVector<std::optional<SliceParameters>> allSliceParameter =
836  computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
837  tileSizes, sizeBounds, omitPartialTileCheck);
838  SmallVector<Value> tiledShapes;
839  for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
840  Value valueToTile = std::get<0>(item);
841  std::optional<SliceParameters> sliceParams = std::get<1>(item);
842  tiledShapes.push_back(
843  sliceParams.has_value()
844  ? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
845  : valueToTile);
846  }
847  return tiledShapes;
848 }
849 
850 void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
851  ArrayRef<OpFoldResult> offsets) {
852  IRRewriter rewriter(b);
853  offsetIndices(rewriter, linalgOp, offsets);
854 }
855 
856 void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
857  ArrayRef<OpFoldResult> offsets) {
858  if (!linalgOp.hasIndexSemantics())
859  return;
860 
861  for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
862  if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
863  continue;
864  OpBuilder::InsertionGuard guard(b);
865  b.setInsertionPointAfter(indexOp);
866  AffineExpr index, offset;
867  bindDims(b.getContext(), index, offset);
869  b, indexOp.getLoc(), index + offset,
870  {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
871  Value materialized =
872  getValueOrCreateConstantIndexOp(b, indexOp.getLoc(), applied);
873  b.replaceOpWithIf(indexOp, materialized, [&](OpOperand &use) {
874  return use.getOwner() != materialized.getDefiningOp();
875  });
876  }
877 }
878 
879 /// Get the reassociation maps to fold the result of a extract_slice (or source
880 /// of a insert_slice) operation with given offsets, and sizes to its
881 /// rank-reduced version. This is only done for the cases where the size is 1
882 /// and offset is 0. Strictly speaking the offset 0 is not required in general,
883 /// but non-zero offsets are not handled by SPIR-V backend at this point (and
884 /// potentially cannot be handled).
885 std::optional<SmallVector<ReassociationIndices>>
887  SmallVector<ReassociationIndices> reassociation;
889  for (const auto &it : llvm::enumerate(mixedSizes)) {
890  auto dim = it.index();
891  auto size = it.value();
892  curr.push_back(dim);
893  auto attr = llvm::dyn_cast_if_present<Attribute>(size);
894  if (attr && cast<IntegerAttr>(attr).getInt() == 1)
895  continue;
896  reassociation.emplace_back(ReassociationIndices{});
897  std::swap(reassociation.back(), curr);
898  }
899  // When the reassociations are not empty, then fold the remaining
900  // unit-dimensions into the last dimension. If the reassociations so far is
901  // empty, then leave it emtpy. This will fold everything to a rank-0 tensor.
902  if (!curr.empty() && !reassociation.empty())
903  reassociation.back().append(curr.begin(), curr.end());
904  return reassociation;
905 }
906 
907 } // namespace linalg
908 } // namespace mlir
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition: Utils.cpp:77
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition: Utils.cpp:129
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:63
DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target, transform::ApplyToEachResultList &results, transform::TransformState &state)
@ None
Affine binary operation expression.
Definition: AffineExpr.h:207
AffineExpr getLHS() const
Definition: AffineExpr.cpp:317
AffineExpr getRHS() const
Definition: AffineExpr.cpp:320
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
int64_t getValue() const
Definition: AffineExpr.cpp:519
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
unsigned getPosition() const
Definition: AffineExpr.cpp:325
Base class for AffineExpr visitors/walkers.
Base type for affine expression.
Definition: AffineExpr.h:68
U cast() const
Definition: AffineExpr.h:293
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:27
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:44
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:275
unsigned getNumResults() const
Definition: AffineMap.cpp:345
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Definition: AffineMap.cpp:255
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:354
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Definition: AffineMap.cpp:225
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:570
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgument getArgument(unsigned i)
Definition: Block.h:122
unsigned getNumArguments()
Definition: Block.h:121
OpListType & getOperations()
Definition: Block.h:130
Operation & front()
Definition: Block.h:146
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:202
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:124
MLIRContext * getContext() const
Definition: Builders.h:55
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:710
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:333
This class helps build Operations.
Definition: Builders.h:206
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:421
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:446
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:397
This class represents a single result from folding an operation.
Definition: OpDefinition.h:266
This class represents an operand of an operation.
Definition: Value.h:261
This is a value defined by a result of an operation.
Definition: Value.h:448
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:460
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:665
result_type_range getResultTypes()
Definition: Operation.h:423
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
void push_back(Block *block)
Definition: Region.h:61
Block & front()
Definition: Region.h:65
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:399
virtual void replaceOpWithIf(Operation *op, ValueRange newValues, bool *allUsesReplaced, llvm::unique_function< bool(OpOperand &) const > functor)
This method replaces the uses of the results of op with the values in newValues when the provided fun...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition: Types.cpp:109
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:372
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
Type getType() const
Return the type of this value.
Definition: Value.h:122
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
Definition: Operation.cpp:1344
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
Definition: AffineOps.cpp:2742
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1229
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
Definition: AffineOps.cpp:1379
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1276
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
Definition: LoopUtils.cpp:1765
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:829
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition: Utils.cpp:149
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
Definition: Utils.cpp:184
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:724
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition: Utils.cpp:289
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
Definition: Utils.cpp:409
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:710
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
Definition: Utils.cpp:188
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition: Utils.cpp:155
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that transposes inputTensor into outputTensor using transposeVector to permute th...
Definition: Utils.cpp:252
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition: Utils.cpp:886
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
Definition: LinalgOps.cpp:97
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:245
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition: Utils.cpp:749
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition: Utils.cpp:169
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:850
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition: Utils.cpp:778
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
Definition: Utils.cpp:192
static Value materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Definition: Utils.cpp:570
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:388
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:740
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:591
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition: Utils.cpp:603
auto m_Val(Value v)
Definition: Matchers.h:450
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:667
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:70
PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder)
Definition: Utils.cpp:24
This header declares functions that assist transformations in the MemRef dialect.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:401
bool isZeroIndex(OpFoldResult v)
Return true if v is an IntegerAttr with value 0 of a ConstantIndexOp with attribute with value 0.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:331
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
Definition: AffineMap.cpp:680
@ Mul
RHS of mul is always a constant or a symbolic expression.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:40
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:310
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:502
bool isPermutationVector(ArrayRef< int64_t > interchange)
Method to check if an interchange vector is a permutation.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:512
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition: Utils.h:58
Value _and(Value lhs, Value rhs)
Definition: Utils.cpp:200
Value slt(Value lhs, Value rhs)
Definition: Utils.cpp:223
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:359
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition: Utils.h:295
DistributionMethod distributionMethod
Definition: Utils.h:298
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition: Utils.h:140
SmallVector< OpFoldResult > strides
Definition: Utils.h:143
SmallVector< OpFoldResult > sizes
Definition: Utils.h:142
SmallVector< OpFoldResult > offsets
Definition: Utils.h:141
LoopVector loops
Definition: SCF.h:73