MLIR  22.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities for the Linalg dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
29 #include "mlir/IR/AffineExpr.h"
31 #include "mlir/IR/AffineMap.h"
32 #include "mlir/IR/Matchers.h"
33 #include "llvm/ADT/TypeSwitch.h"
34 #include "llvm/Support/Debug.h"
35 #include <optional>
36 
37 #define DEBUG_TYPE "linalg-utils"
38 
39 using namespace mlir;
40 using namespace presburger;
41 using namespace mlir::affine;
42 using namespace mlir::linalg;
43 using namespace mlir::scf;
44 
45 namespace {
46 
47 // Helper visitor to determine whether an AffineExpr is tiled.
48 // This is achieved by traversing every AffineDimExpr with position `pos` and
49 // checking whether the corresponding `tileSizes[pos]` is non-zero.
50 // This also enforces only positive coefficients occur in multiplications.
51 //
52 // Example:
53 // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
54 //
55 struct TileCheck : public AffineExprVisitor<TileCheck> {
56  TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
57 
58  void visitDimExpr(AffineDimExpr expr) {
59  isTiled |= !isZeroInteger(tileSizes[expr.getPosition()]);
60  }
61  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
62  visit(expr.getLHS());
63  visit(expr.getRHS());
64  if (expr.getKind() == mlir::AffineExprKind::Mul)
65  assert(cast<AffineConstantExpr>(expr.getRHS()).getValue() > 0 &&
66  "nonpositive multiplying coefficient");
67  }
68  bool isTiled = false;
69  ArrayRef<OpFoldResult> tileSizes;
70 };
71 
72 } // namespace
73 
74 static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
75  if (!expr)
76  return false;
77  TileCheck t(tileSizes);
78  t.visit(expr);
79  return t.isTiled;
80 }
81 
82 // Checks whether the `map varies with respect to a non-zero `tileSize`.
83 static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
84  if (!map)
85  return false;
86  for (unsigned r = 0; r < map.getNumResults(); ++r)
87  if (isTiled(map.getResult(r), tileSizes))
88  return true;
89  return false;
90 }
91 
92 std::optional<RegionMatcher::BinaryOpKind>
93 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
94  auto &region = op.getRegion();
95  if (!region.hasOneBlock())
96  return std::nullopt;
97 
98  Block &block = region.front();
99  if (block.getNumArguments() != 2 ||
100  !block.getArgument(0).getType().isSignlessIntOrFloat() ||
102  return std::nullopt;
103 
104  auto &ops = block.getOperations();
105  if (!llvm::hasSingleElement(block.without_terminator()))
106  return std::nullopt;
107 
108  using mlir::matchers::m_Val;
109  auto a = m_Val(block.getArgument(0));
110  auto b = m_Val(block.getArgument(1));
111 
112  auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
113  if (addPattern.match(&ops.back()))
114  return BinaryOpKind::IAdd;
115 
116  return std::nullopt;
117 }
118 
119 /// Explicit instantiation of loop nest generator for different loop types.
123 
124 /// Given a list of subview ranges, extract individual values for lower, upper
125 /// bounds and steps and put them into the corresponding vectors.
126 static void unpackRanges(OpBuilder &builder, Location loc,
129  SmallVectorImpl<Value> &steps) {
130  for (Range range : ranges) {
131  lbs.emplace_back(
132  getValueOrCreateConstantIndexOp(builder, loc, range.offset));
133  ubs.emplace_back(getValueOrCreateConstantIndexOp(builder, loc, range.size));
134  steps.emplace_back(
135  getValueOrCreateConstantIndexOp(builder, loc, range.stride));
136  }
137 }
138 
139 //===----------------------------------------------------------------------===//
140 // General utilities
141 //===----------------------------------------------------------------------===//
142 //
143 /// The permutation can be obtained from two permutations:
144 /// a) Compute the permutation vector to move the last `numPackedDims` into
145 /// the `innerPosDims` of a shape of rank `rank`.
146 /// b) Compute the permutation vector to move outer dims if the
147 /// `outerPerm` parameter is not empty.
148 /// Apply (b) permutation on (a) permutation to get the final permutation.
151  ArrayRef<int64_t> &outerPerm,
152  PackingMetadata &packingMetadata) {
153  int64_t numPackedDims = innerDimsPos.size();
154  auto lastDims =
155  llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156  packingMetadata = computePackingMetadata(rank, innerDimsPos);
157  SmallVector<int64_t> innerPositionsPerm =
158  computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
159 
160  SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
161  if (!outerPerm.empty())
162  applyPermutationToVector(outerPos, outerPerm);
163  SmallVector<int64_t> outerPositionPerm =
164  computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
165 
166  SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
167  applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
168  return packInverseDestPermutation;
169 }
170 
171 namespace mlir {
172 namespace linalg {
173 
175 
176  PackingMetadata pMetadata;
177  int64_t packedRank = packOp.getDestType().getRank();
178  ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
179  ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
180  SmallVector<int64_t> packInvDestPerm =
181  computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata);
182  return packInvDestPerm;
183 }
184 
186  PackingMetadata metadata;
187  return getUnPackInverseSrcPerm(unpackOp, metadata);
188 }
189 
191  PackingMetadata &metadata) {
192  int64_t unpackRank = unpackOp.getSourceType().getRank();
193  ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
194  ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
195  SmallVector<int64_t> unpackInvSrcPerm =
196  computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata);
197  return unpackInvSrcPerm;
198 }
199 
201  return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
202  return m.isProjectedPermutation(/*allowZeroInResults=*/true);
203  });
204 }
205 
207  if (!r.hasOneBlock())
208  return false;
209  for (Operation &op : r.front()) {
210  if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
211  linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
213  llvm::any_of(op.getResultTypes(),
214  [](Type type) { return !type.isIntOrIndexOrFloat(); }))
215  return false;
216  }
217  return true;
218 }
219 
220 bool isElementwise(LinalgOp op) {
221  if (op.getNumLoops() != op.getNumParallelLoops())
222  return false;
223 
225  return false;
226 
227  // TODO: relax the restrictions on indexing map.
228  for (OpOperand &opOperand : op.getDpsInitsMutable()) {
229  if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
230  return false;
231  }
232  return hasOnlyScalarElementwiseOp(op->getRegion(0));
233 }
234 
235 bool isParallelIterator(utils::IteratorType iteratorType) {
236  return iteratorType == utils::IteratorType::parallel;
237 }
238 
239 bool isReductionIterator(utils::IteratorType iteratorType) {
240  return iteratorType == utils::IteratorType::reduction;
241 }
242 
243 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
244  Value source, Value pad, bool nofold,
245  ValueRange typeDynDims) {
246  // Exit if `source` is not defined by an ExtractSliceOp.
247  auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
248  if (!sliceOp)
249  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
250  typeDynDims);
251 
252  // Search the `source` use-def chain for padded LinalgOps.
253  Value current = sliceOp.getSource();
254  while (current) {
255  auto linalgOp = current.getDefiningOp<LinalgOp>();
256  if (!linalgOp)
257  break;
258  OpResult opResult = cast<OpResult>(current);
259  current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();
260  }
261  auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
262 
263  // Exit if the search fails to match a tensor::PadOp at the end of the matched
264  // LinalgOp sequence.
265  if (!padOp)
266  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
267  typeDynDims);
268 
269  // Exit if the padded result type does not match.
270  if (sliceOp.getSource().getType() != type)
271  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
272  typeDynDims);
273 
274  // Exit if the LinalgOps are not high padded.
275  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
276  return getConstantIntValue(ofr) != static_cast<int64_t>(0);
277  }))
278  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
279  typeDynDims);
280 
281  // Exit if `padOpSliceOp`, which defines the slice used by
282  // `padOp`, is rank-reducing.
283  auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
284  if (!padOpSliceOp ||
285  sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
286  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
287  typeDynDims);
288 
289  // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
290  // of the slice padded by `padOp`.
291  if (llvm::any_of(
292  llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
293  [](std::tuple<OpFoldResult, OpFoldResult> it) {
294  return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
295  }))
296  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
297  typeDynDims);
298 
299  // Exit if the padding values do not match.
300  Attribute padOpPadAttr, padAttr;
301  Value padOpPad = padOp.getConstantPaddingValue();
302  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
303  !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
304  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
305  typeDynDims);
306 
307  // Return the padded result if the padding values and sizes match.
308  return sliceOp.getSource();
309 }
310 
311 GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
312  auto memrefTypeTo = cast<MemRefType>(to.getType());
313 #ifndef NDEBUG
314  auto memrefTypeFrom = cast<MemRefType>(from.getType());
315  assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
316  "`from` and `to` memref must have the same rank");
317 #endif // NDEBUG
318 
319  AffineMap id =
320  AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
321  SmallVector<utils::IteratorType> iteratorTypes(memrefTypeTo.getRank(),
322  utils::IteratorType::parallel);
323  return linalg::GenericOp::create(
324  b, loc,
325  /*inputs=*/from,
326  /*outputs=*/to,
327  /*indexingMaps=*/llvm::ArrayRef({id, id}),
328  /*iteratorTypes=*/iteratorTypes,
329  [](OpBuilder &b, Location loc, ValueRange args) {
330  linalg::YieldOp::create(b, loc, args.front());
331  });
332 }
333 
334 /// Specialization to build an scf "for" nest.
335 template <>
337  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
338  ArrayRef<utils::IteratorType> iteratorTypes,
340  ValueRange)>
341  bodyBuilderFn,
342  ArrayRef<linalg::ProcInfo> procInfo) {
343  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
344  "expected as many entries for proc info as number of loops, even if "
345  "they are null entries");
346  SmallVector<Value> iterArgInitValues;
347  if (!linalgOp.hasPureBufferSemantics())
348  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
349  SmallVector<Value, 4> lbs, ubs, steps;
350  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
352  b, loc, lbs, ubs, steps, iterArgInitValues,
353  [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
354  assert(iterArgs.size() == iterArgInitValues.size() &&
355  "expect the number of output tensors and iter args to match");
356  SmallVector<Value> operandValuesToUse = linalgOp->getOperands();
357  if (!iterArgs.empty()) {
358  operandValuesToUse = linalgOp.getDpsInputs();
359  operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
360  }
361  return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
362  });
363 
364  if (loopNest.loops.empty() || procInfo.empty())
365  return;
366 
367  // Filter out scf.for loops that were created out of parallel dimensions.
368  for (const auto &loop : llvm::enumerate(loopNest.loops)) {
369  if (procInfo[loop.index()].distributionMethod ==
370  DistributionMethod::Cyclic) {
371  mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
372  procInfo[loop.index()].nprocs);
373  }
374  }
375 }
376 
377 /// Specialization to build affine "for" nest.
378 template <>
380  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
381  ArrayRef<utils::IteratorType> iteratorTypes,
383  ValueRange)>
384  bodyBuilderFn,
385  ArrayRef<linalg::ProcInfo> /*procInfo*/) {
386  SmallVector<Value> iterArgInitValues;
387  if (!linalgOp.hasPureBufferSemantics())
388  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
389  assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
390  SmallVector<Value, 4> lbs, ubs, steps;
391  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
392 
393  // Affine loops require constant steps.
394  SmallVector<int64_t, 4> constantSteps;
395  constantSteps.reserve(steps.size());
396  for (Value v : steps) {
397  auto constVal = getConstantIntValue(v);
398  assert(constVal.has_value() && "Affine loops require constant steps");
399  constantSteps.push_back(constVal.value());
400  }
401 
402  affine::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
403  [&](OpBuilder &b, Location loc, ValueRange ivs) {
404  bodyBuilderFn(b, loc, ivs,
405  linalgOp->getOperands());
406  });
407 }
408 
409 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
411  Value nprocs, Value &lb, Value &ub,
412  Value &step) {
413  AffineExpr d0, d1;
414  bindDims(b.getContext(), d0, d1);
416  lb =
417  affine::makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
418  step = affine::makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
419 }
420 
421 /// Generates a loop nest consisting of scf.parallel and scf.for, depending
422 /// on the `iteratorTypes.` Consecutive parallel loops create a single
423 /// scf.parallel operation; each sequential loop creates a new scf.for
424 /// operation. The body of the innermost loop is populated by
425 /// `bodyBuilderFn` that accepts a range of induction variables for all
426 /// loops. `ivStorage` is used to store the partial list of induction
427 /// variables.
428 // TODO: this function can be made iterative instead. However, it
429 // will have at most as many recursive calls as nested loops, which rarely
430 // exceeds 10.
432  OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
433  ValueRange steps, ArrayRef<utils::IteratorType> iteratorTypes,
435  function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
436  SmallVectorImpl<Value> &ivStorage) {
437  assert(lbs.size() == ubs.size());
438  assert(lbs.size() == steps.size());
439  assert(lbs.size() == iteratorTypes.size());
440  assert(procInfo.empty() || (lbs.size() == procInfo.size()));
441 
442  // If there are no (more) loops to be generated, generate the body and be
443  // done with it.
444  if (iteratorTypes.empty()) {
445  bodyBuilderFn(b, loc, ivStorage);
446  return;
447  }
448 
449  // If there are no outer parallel loops, generate one sequential loop and
450  // recurse.
451  if (!isParallelIterator(iteratorTypes.front())) {
452  LoopNest singleLoop = buildLoopNest(
453  b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
454  [&](OpBuilder &b, Location loc, ValueRange ivs) {
455  ivStorage.append(ivs.begin(), ivs.end());
456  generateParallelLoopNest(
457  b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
458  iteratorTypes.drop_front(),
459  procInfo.empty() ? procInfo : procInfo.drop_front(),
460  bodyBuilderFn, ivStorage);
461  });
462  return;
463  }
464 
465  unsigned nLoops = iteratorTypes.size();
466  unsigned numProcessed = 0;
467  DistributionMethod distributionMethod = DistributionMethod::None;
468  if (procInfo.empty()) {
469  numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();
470  } else {
471  distributionMethod = procInfo.front().distributionMethod;
472  numProcessed =
473  nLoops - procInfo
474  .drop_while([&](linalg::ProcInfo p) {
475  return p.distributionMethod == distributionMethod;
476  })
477  .size();
478  }
479 
480  auto remainderProcInfo =
481  procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
482  switch (distributionMethod) {
484  // Generate a single parallel loop-nest operation for all outermost
485  // parallel loops and recurse.
486  scf::ParallelOp::create(
487  b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
488  steps.take_front(numProcessed),
489  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
490  ivStorage.append(localIvs.begin(), localIvs.end());
491  generateParallelLoopNest(
492  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
493  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
494  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
495  bodyBuilderFn, ivStorage);
496  });
497  return;
498  }
499  case DistributionMethod::Cyclic: {
500  // Generate a single parallel loop-nest operation for all outermost
501  // parallel loops and recurse.
502  scf::ParallelOp::create(
503  b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
504  steps.take_front(numProcessed),
505  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
506  ivStorage.append(localIvs.begin(), localIvs.end());
507  generateParallelLoopNest(
508  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
509  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
510  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
511  bodyBuilderFn, ivStorage);
512  });
513  return;
514  }
515  case DistributionMethod::CyclicNumProcsGeNumIters: {
516  // Check (for the processed loops) that the iteration is in-bounds.
517  ArithBuilder ab(b, loc);
518  Value cond = ab.slt(lbs[0], ubs[0]);
519  for (unsigned i = 1; i < numProcessed; ++i)
520  cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
521  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
522  scf::IfOp::create(b, loc, cond, [&](OpBuilder &b, Location loc) {
523  generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed),
524  ubs.drop_front(numProcessed),
525  steps.drop_front(numProcessed),
526  iteratorTypes.drop_front(numProcessed),
527  remainderProcInfo, bodyBuilderFn, ivStorage);
528  scf::YieldOp::create(b, loc, ValueRange{});
529  });
530  return;
531  }
532  case DistributionMethod::CyclicNumProcsEqNumIters:
533  // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
534  // with inner loop generation.
535  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
537  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
538  steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
539  remainderProcInfo, bodyBuilderFn, ivStorage);
540  return;
541  }
542 }
543 
544 /// Specialization for generating a mix of parallel and sequential scf loops.
545 template <>
547  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
548  ArrayRef<utils::IteratorType> iteratorTypes,
550  ValueRange)>
551  bodyBuilderFn,
552  ArrayRef<linalg::ProcInfo> procInfo) {
553  SmallVector<Value> iterArgInitValues;
554  if (!linalgOp.hasPureBufferSemantics())
555  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
556  assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
557  // This function may be passed more iterator types than ranges.
558  assert(iteratorTypes.size() >= loopRanges.size() &&
559  "expected iterator type for all ranges");
560  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
561  "expected proc information for all loops when present");
562  iteratorTypes = iteratorTypes.take_front(loopRanges.size());
563  SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
564  unsigned numLoops = iteratorTypes.size();
565  ivs.reserve(numLoops);
566  lbsStorage.reserve(numLoops);
567  ubsStorage.reserve(numLoops);
568  stepsStorage.reserve(numLoops);
569 
570  // Get the loop lb, ub, and step.
571  unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
572 
573  // Modify the lb, ub, and step based on the distribution options.
574  for (const auto &it : llvm::enumerate(procInfo)) {
575  if (it.value().distributionMethod != linalg::DistributionMethod::None) {
577  b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
578  ubsStorage[it.index()], stepsStorage[it.index()]);
579  }
580  }
581  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
583  b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
584  [&](OpBuilder &b, Location loc, ValueRange ivs) {
585  bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
586  },
587  ivs);
588 
589  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
590 }
591 
593  Value valueToTile,
594  const SliceParameters &sliceParams) {
595  auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
596  auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
597  .Case([&](MemRefType) {
598  return memref::SubViewOp::create(
599  builder, loc, valueToTile, sliceParams.offsets,
600  sliceParams.sizes, sliceParams.strides);
601  })
602  .Case([&](RankedTensorType) {
603  return tensor::ExtractSliceOp::create(
604  builder, loc, valueToTile, sliceParams.offsets,
605  sliceParams.sizes, sliceParams.strides);
606  })
607  .Default([](ShapedType) -> Operation * {
608  llvm_unreachable("Unexpected shaped type");
609  });
610  return sliceOp;
611 }
612 
613 Operation *makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
614  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
617  ArrayRef<OpFoldResult> subShapeSizes,
618  bool omitPartialTileCheck) {
619  SliceParameters sliceParams =
620  computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
621  ubs, subShapeSizes, omitPartialTileCheck);
622  return materializeTiledShape(builder, loc, valueToTile, sliceParams);
623 }
624 
626 computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
627  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
629  ArrayRef<OpFoldResult> subShapeSizes,
630  bool omitPartialTileCheck) {
631  auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
632  assert(shapedType && "only shaped types can be tiled");
633  ArrayRef<int64_t> shape = shapedType.getShape();
634  int64_t rank = shapedType.getRank();
635 
636  // Compute offsets/sizes/strides for the tile.
637  SliceParameters sliceParams;
638  sliceParams.offsets.reserve(rank);
639  sliceParams.sizes.reserve(rank);
640  sliceParams.strides.reserve(rank);
641  for (unsigned r = 0; r < rank; ++r) {
642  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
643  if (!isTiled(map.getSubMap({r}), tileSizes)) {
644  sliceParams.offsets.push_back(builder.getIndexAttr(0));
645  OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
646  sliceParams.sizes.push_back(dim);
647  sliceParams.strides.push_back(builder.getIndexAttr(1));
648  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
649  continue;
650  }
651  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
652 
653  // Tiling creates a new slice at the proper index, the slice step is 1
654  // (i.e. the op does not subsample, stepping occurs in the loop).
655  auto m = map.getSubMap({r});
656  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
657  IRRewriter rewriter(builder);
658  // The offset of the slice is m(lbs) - m(0).
659  SmallVector<Attribute> zeros(lbs.size(), rewriter.getIndexAttr(0));
660  SmallVector<Attribute> mAtZero;
661  [[maybe_unused]] auto res = m.constantFold(zeros, mAtZero);
662  assert(succeeded(res) && "affine_map must be evaluatable (not symbols)");
663  int64_t mAtZeroInt =
664  cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
666  rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
667  sliceParams.offsets.push_back(offset);
668 
669  OpFoldResult closedIntSize =
670  makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
671  // Resulting size needs to be made half open interval again.
672  AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
673  OpFoldResult size =
674  makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
675  LLVM_DEBUG(llvm::dbgs()
676  << "computeSliceParameters: raw size: " << size << "\n");
677  LLVM_DEBUG(llvm::dbgs()
678  << "computeSliceParameters: new offset: " << offset << "\n");
679  sliceParams.strides.push_back(builder.getIndexAttr(1));
680 
681  if (omitPartialTileCheck) {
682  // We statically know that the partial/boundary tile condition is
683  // unnecessary.
684  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
685  sliceParams.sizes.push_back(size);
686  continue;
687  }
688 
689  // The size of the subview / extract_slice should be trimmed to avoid
690  // out-of-bounds accesses, unless:
691  // a. We statically know the subshape size divides the shape size evenly.
692  // b. The subshape size is 1. According to the way the loops are set up,
693  // tensors with "0" dimensions would never be constructed.
694  int64_t shapeSize = shape[r];
695  std::optional<int64_t> sizeCst = getConstantIntValue(size);
696  auto hasTileSizeOne = sizeCst == 1;
697  auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
698  ((shapeSize % *sizeCst) == 0);
699  if (!hasTileSizeOne && !dividesEvenly) {
700  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
701  << ", size: " << size
702  << ": make sure in bound with affine.min\n");
703 
704  AffineExpr dim0, dim1, dim2;
705  MLIRContext *context = builder.getContext();
706  bindDims(context, dim0, dim1, dim2);
707 
708  // Get the dimension size for this dimension. We need to first calculate
709  // the max index and then plus one. This is important because for
710  // convolution ops, we have its input window dimension's affine map of the
711  // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
712  // dimension and `s0` is stride. Directly use the dimension size of
713  // output/filer window dimensions will cause incorrect calculation.
715  {ArrayRef<AffineExpr>{dim0 - 1}}, context)
716  .front();
718  {ArrayRef<AffineExpr>{dim0 + 1}}, context)
719  .front();
720  SmallVector<OpFoldResult> maxIndices =
721  llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
722  return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
723  {ub});
724  }));
725  OpFoldResult maxIndex =
726  makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
727  OpFoldResult d =
728  makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
729 
730  // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
732  {ArrayRef<AffineExpr>{dim1 - dim2, dim0}}, context)
733  .front();
734  size =
735  makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
736  }
737  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
738  sliceParams.sizes.push_back(size);
739  }
740  return sliceParams;
741 }
742 
745  ArrayRef<OpFoldResult> tileSizes) {
747  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
748  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
749  bool isTiled = !isZeroInteger(tileSizes[idx]);
750  offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
751  LLVM_DEBUG(llvm::dbgs()
752  << "computeTileOffsets: " << offsets.back() << "\n");
753  }
754  return offsets;
755 }
756 
758  ArrayRef<OpFoldResult> tileSizes,
759  ArrayRef<OpFoldResult> sizeBounds) {
761  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
762  bool isTiled = !isZeroInteger(tileSizes[idx]);
763  // Before composing, we need to make range a closed interval.
764  OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
766  IRRewriter rewriter(b);
767  sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
768  LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
769  }
770  return sizes;
771 }
772 
774  if (op.hasPureBufferSemantics())
775  return {};
776  return llvm::to_vector(
777  llvm::map_range(op.getDpsInitsMutable(), [&](OpOperand &opOperand) {
778  return operands[opOperand.getOperandNumber()].getType();
779  }));
780 }
781 
783  LinalgOp op, ValueRange operands,
784  ValueRange results) {
785  if (op.hasPureBufferSemantics())
786  return {};
787  SmallVector<Value> tensorResults;
788  tensorResults.reserve(results.size());
789  // Insert a insert_slice for each output tensor.
790  unsigned resultIdx = 0;
791  for (OpOperand &opOperand : op.getDpsInitsMutable()) {
792  // TODO: use an interface/adaptor to avoid leaking position in
793  // `tiledOperands`.
794  Value outputTensor = operands[opOperand.getOperandNumber()];
795  if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
796  Value inserted = tensor::InsertSliceOp::create(
797  builder, loc, sliceOp.getSource().getType(), results[resultIdx],
798  sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
799  sliceOp.getStrides(), sliceOp.getStaticOffsets(),
800  sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
801  tensorResults.push_back(inserted);
802  } else {
803  tensorResults.push_back(results[resultIdx]);
804  }
805  ++resultIdx;
806  }
807  return tensorResults;
808 }
809 
811 computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
812  ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
813  ArrayRef<OpFoldResult> tileSizes,
814  ArrayRef<OpFoldResult> sizeBounds,
815  bool omitPartialTileCheck) {
816  assert(ivs.size() == static_cast<size_t>(llvm::count_if(
817  llvm::make_range(tileSizes.begin(), tileSizes.end()),
818  [](OpFoldResult v) { return !isZeroInteger(v); })) &&
819  "expected as many ivs as non-zero sizes");
820 
821  // Construct (potentially temporary) mins and maxes on which to apply maps
822  // that define tile subshapes.
824  computeTileOffsets(builder, loc, ivs, tileSizes);
825  SmallVector<OpFoldResult> subShapeSizes =
826  computeTileSizes(builder, loc, tileSizes, sizeBounds);
827 
828  assert(static_cast<int64_t>(valuesToTile.size()) <=
829  linalgOp->getNumOperands() &&
830  "more value to tile than operands.");
832  allSliceParams.reserve(valuesToTile.size());
833  for (auto [opOperand, val] :
834  llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
835  Value shapedOp = val;
836  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
837  AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
838  // Use `opOperand` as is if it is not tiled and not an output tensor. Having
839  // an extract/insert slice pair for all output tensors simplifies follow up
840  // transformations such as padding and bufferization since the
841  // extract/insert slice pairs make the accessed iteration argument
842  // subdomains explicit.
843 
844  Type operandType = opOperand.get().getType();
845  if (!isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
846  linalgOp.isDpsInit(&opOperand))) {
847  allSliceParams.push_back(std::nullopt);
848  LLVM_DEBUG(llvm::dbgs()
849  << ": not tiled: use shape: " << operandType << "\n");
850  continue;
851  }
852  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
853 
854  allSliceParams.push_back(computeSliceParameters(
855  builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
856  omitPartialTileCheck));
857  }
858 
859  return allSliceParams;
860 }
861 
863  LinalgOp linalgOp, ValueRange valuesToTile,
865  ArrayRef<OpFoldResult> tileSizes,
866  ArrayRef<OpFoldResult> sizeBounds,
867  bool omitPartialTileCheck) {
868  SmallVector<std::optional<SliceParameters>> allSliceParameter =
869  computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
870  tileSizes, sizeBounds, omitPartialTileCheck);
871  SmallVector<Value> tiledShapes;
872  for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
873  Value valueToTile = std::get<0>(item);
874  std::optional<SliceParameters> sliceParams = std::get<1>(item);
875  tiledShapes.push_back(
876  sliceParams.has_value()
877  ? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
878  ->getResult(0)
879  : valueToTile);
880  }
881  return tiledShapes;
882 }
883 
884 void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
885  ArrayRef<OpFoldResult> offsets) {
886  IRRewriter rewriter(b);
887  offsetIndices(rewriter, linalgOp, offsets);
888 }
889 
890 void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
891  ArrayRef<OpFoldResult> offsets) {
892  if (!linalgOp.hasIndexSemantics())
893  return;
894 
895  for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
896  if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
897  continue;
898  OpBuilder::InsertionGuard guard(b);
899  b.setInsertionPointAfter(indexOp);
900  AffineExpr index, offset;
901  bindDims(b.getContext(), index, offset);
903  b, indexOp.getLoc(), index + offset,
904  {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
905  Value materialized =
906  getValueOrCreateConstantIndexOp(b, indexOp.getLoc(), applied);
907  b.replaceUsesWithIf(indexOp, materialized, [&](OpOperand &use) {
908  return use.getOwner() != materialized.getDefiningOp();
909  });
910  }
911 }
912 
913 /// Get the reassociation maps to fold the result of a extract_slice (or source
914 /// of a insert_slice) operation with given offsets, and sizes to its
915 /// rank-reduced version. This is only done for the cases where the size is 1
916 /// and offset is 0. Strictly speaking the offset 0 is not required in general,
917 /// but non-zero offsets are not handled by SPIR-V backend at this point (and
918 /// potentially cannot be handled).
919 std::optional<SmallVector<ReassociationIndices>>
921  SmallVector<ReassociationIndices> reassociation;
923  for (const auto &it : llvm::enumerate(mixedSizes)) {
924  auto dim = it.index();
925  auto size = it.value();
926  curr.push_back(dim);
927  auto attr = llvm::dyn_cast_if_present<Attribute>(size);
928  if (attr && cast<IntegerAttr>(attr).getInt() == 1)
929  continue;
930  reassociation.emplace_back(ReassociationIndices{});
931  std::swap(reassociation.back(), curr);
932  }
933  // When the reassociations are not empty, then fold the remaining
934  // unit-dimensions into the last dimension. If the reassociations so far is
935  // empty, then leave it emtpy. This will fold everything to a rank-0 tensor.
936  if (!curr.empty() && !reassociation.empty())
937  reassociation.back().append(curr.begin(), curr.end());
938  return reassociation;
939 }
940 
941 } // namespace linalg
942 } // namespace mlir
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition: Utils.cpp:74
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition: Utils.cpp:126
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
Definition: Utils.cpp:150
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:62
SmallVector< int64_t > innerDimsPos
Definition: LinalgOps.cpp:4731
DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target, transform::ApplyToEachResultList &results, transform::TransformState &state)
@ None
Affine binary operation expression.
Definition: AffineExpr.h:214
AffineExpr getLHS() const
Definition: AffineExpr.cpp:338
AffineExpr getRHS() const
Definition: AffineExpr.cpp:341
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:223
unsigned getPosition() const
Definition: AffineExpr.cpp:346
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:33
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:330
unsigned getNumResults() const
Definition: AffineMap.cpp:398
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:407
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:647
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Definition: AffineMap.cpp:308
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
OpListType & getOperations()
Definition: Block.h:137
Operation & front()
Definition: Block.h:153
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:209
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103
MLIRContext * getContext() const
Definition: Builders.h:55
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:750
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:410
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272
This class represents an operand of an operation.
Definition: Value.h:257
This is a value defined by a result of an operation.
Definition: Value.h:447
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:459
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Block & front()
Definition: Region.h:65
bool hasOneBlock()
Return true if this region has exactly one block.
Definition: Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition: Types.cpp:108
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
Definition: Operation.cpp:1397
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
Definition: AffineOps.cpp:2913
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1278
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1331
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
Definition: AffineOps.cpp:1434
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
Definition: LoopUtils.cpp:1709
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:862
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition: Utils.cpp:200
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
Definition: Utils.cpp:235
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:757
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition: Utils.cpp:311
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
Definition: Utils.cpp:431
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:743
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
Definition: Utils.cpp:239
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition: Utils.cpp:206
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Definition: Utils.cpp:592
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition: Utils.cpp:920
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
Definition: LinalgOps.cpp:103
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:259
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition: Utils.cpp:782
SmallVector< int64_t > getPackInverseDestPerm(PackOp packOp)
Definition: Utils.cpp:174
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition: Utils.cpp:220
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:884
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition: Utils.cpp:811
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:613
SmallVector< int64_t > getUnPackInverseSrcPerm(UnPackOp unpackOp, PackingMetadata &metadata)
Definition: Utils.cpp:190
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
Definition: Utils.cpp:243
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:410
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:773
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition: Utils.cpp:626
auto m_Val(Value v)
Definition: Matchers.h:539
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:693
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:64
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Definition: Utils.cpp:23
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:111
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:619
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:629
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition: Utils.h:103
Value _and(Value lhs, Value rhs)
Definition: Utils.cpp:311
Value slt(Value lhs, Value rhs)
Definition: Utils.cpp:334
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:373
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition: Utils.h:309
DistributionMethod distributionMethod
Definition: Utils.h:312
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition: Utils.h:155
SmallVector< OpFoldResult > strides
Definition: Utils.h:158
SmallVector< OpFoldResult > sizes
Definition: Utils.h:157
SmallVector< OpFoldResult > offsets
Definition: Utils.h:156
LoopVector loops
Definition: SCF.h:67