MLIR  21.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities for the Linalg dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
30 #include "mlir/IR/AffineExpr.h"
32 #include "mlir/IR/AffineMap.h"
33 #include "mlir/IR/Matchers.h"
35 #include "mlir/Pass/Pass.h"
36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
38 #include <optional>
39 
40 #define DEBUG_TYPE "linalg-utils"
41 
42 using namespace mlir;
43 using namespace presburger;
44 using namespace mlir::affine;
45 using namespace mlir::linalg;
46 using namespace mlir::scf;
47 
48 namespace {
49 
50 // Helper visitor to determine whether an AffineExpr is tiled.
51 // This is achieved by traversing every AffineDimExpr with position `pos` and
52 // checking whether the corresponding `tileSizes[pos]` is non-zero.
53 // This also enforces only positive coefficients occur in multiplications.
54 //
55 // Example:
56 // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
57 //
58 struct TileCheck : public AffineExprVisitor<TileCheck> {
59  TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
60 
61  void visitDimExpr(AffineDimExpr expr) {
62  isTiled |= !isZeroInteger(tileSizes[expr.getPosition()]);
63  }
64  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
65  visit(expr.getLHS());
66  visit(expr.getRHS());
67  if (expr.getKind() == mlir::AffineExprKind::Mul)
68  assert(cast<AffineConstantExpr>(expr.getRHS()).getValue() > 0 &&
69  "nonpositive multiplying coefficient");
70  }
71  bool isTiled = false;
72  ArrayRef<OpFoldResult> tileSizes;
73 };
74 
75 } // namespace
76 
77 static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
78  if (!expr)
79  return false;
80  TileCheck t(tileSizes);
81  t.visit(expr);
82  return t.isTiled;
83 }
84 
85 // Checks whether the `map varies with respect to a non-zero `tileSize`.
86 static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
87  if (!map)
88  return false;
89  for (unsigned r = 0; r < map.getNumResults(); ++r)
90  if (isTiled(map.getResult(r), tileSizes))
91  return true;
92  return false;
93 }
94 
95 std::optional<RegionMatcher::BinaryOpKind>
96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
97  auto &region = op.getRegion();
98  if (!llvm::hasSingleElement(region))
99  return std::nullopt;
100 
101  Block &block = region.front();
102  if (block.getNumArguments() != 2 ||
103  !block.getArgument(0).getType().isSignlessIntOrFloat() ||
105  return std::nullopt;
106 
107  auto &ops = block.getOperations();
108  if (!llvm::hasSingleElement(block.without_terminator()))
109  return std::nullopt;
110 
111  using mlir::matchers::m_Val;
112  auto a = m_Val(block.getArgument(0));
113  auto b = m_Val(block.getArgument(1));
114 
115  auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
116  if (addPattern.match(&ops.back()))
117  return BinaryOpKind::IAdd;
118 
119  return std::nullopt;
120 }
121 
122 /// Explicit instantiation of loop nest generator for different loop types.
126 
127 /// Given a list of subview ranges, extract individual values for lower, upper
128 /// bounds and steps and put them into the corresponding vectors.
129 static void unpackRanges(OpBuilder &builder, Location loc,
132  SmallVectorImpl<Value> &steps) {
133  for (Range range : ranges) {
134  lbs.emplace_back(
135  getValueOrCreateConstantIndexOp(builder, loc, range.offset));
136  ubs.emplace_back(getValueOrCreateConstantIndexOp(builder, loc, range.size));
137  steps.emplace_back(
138  getValueOrCreateConstantIndexOp(builder, loc, range.stride));
139  }
140 }
141 
142 //===----------------------------------------------------------------------===//
143 // General utilities
144 //===----------------------------------------------------------------------===//
145 //
146 /// The permutation can be obtained from two permutations:
147 /// a) Compute the permutation vector to move the last `numPackedDims` into
148 /// the `innerPosDims` of a shape of rank `rank`.
149 /// b) Compute the permutation vector to move outer dims if the
150 /// `outerPerm` parameter is not empty.
151 /// Apply (b) permutation on (a) permutation to get the final permutation.
154  ArrayRef<int64_t> &outerPerm,
155  PackingMetadata &packingMetadata) {
156  int64_t numPackedDims = innerDimsPos.size();
157  auto lastDims =
158  llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
159  packingMetadata = computePackingMetadata(rank, innerDimsPos);
160  SmallVector<int64_t> innerPositionsPerm =
161  computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
162 
163  SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
164  if (!outerPerm.empty())
165  applyPermutationToVector(outerPos, outerPerm);
166  SmallVector<int64_t> outerPositionPerm =
167  computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
168 
169  SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
170  applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
171  return packInverseDestPermutation;
172 }
173 
174 namespace mlir {
175 namespace linalg {
176 
178 
179  PackingMetadata pMetadata;
180  int64_t packedRank = packOp.getDestType().getRank();
181  ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
182  ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
183  SmallVector<int64_t> packInvDestPerm =
184  computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata);
185  return packInvDestPerm;
186 }
187 
189  PackingMetadata metadata;
190  return getUnPackInverseSrcPerm(unpackOp, metadata);
191 }
192 
194  PackingMetadata &metadata) {
195  int64_t unpackRank = unpackOp.getSourceType().getRank();
196  ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
197  ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
198  SmallVector<int64_t> unpackInvSrcPerm =
199  computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata);
200  return unpackInvSrcPerm;
201 }
202 
204  return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
205  return m.isProjectedPermutation(/*allowZeroInResults=*/true);
206  });
207 }
208 
210  if (!llvm::hasSingleElement(r))
211  return false;
212  for (Operation &op : r.front()) {
213  if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
214  linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
216  llvm::any_of(op.getResultTypes(),
217  [](Type type) { return !type.isIntOrIndexOrFloat(); }))
218  return false;
219  }
220  return true;
221 }
222 
223 bool isElementwise(LinalgOp op) {
224  if (op.getNumLoops() != op.getNumParallelLoops())
225  return false;
226 
228  return false;
229 
230  // TODO: relax the restrictions on indexing map.
231  for (OpOperand &opOperand : op.getDpsInitsMutable()) {
232  if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
233  return false;
234  }
235  return hasOnlyScalarElementwiseOp(op->getRegion(0));
236 }
237 
238 bool isParallelIterator(utils::IteratorType iteratorType) {
239  return iteratorType == utils::IteratorType::parallel;
240 }
241 
242 bool isReductionIterator(utils::IteratorType iteratorType) {
243  return iteratorType == utils::IteratorType::reduction;
244 }
245 
246 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
247  Value source, Value pad, bool nofold,
248  ValueRange typeDynDims) {
249  // Exit if `source` is not defined by an ExtractSliceOp.
250  auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
251  if (!sliceOp)
252  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
253  typeDynDims);
254 
255  // Search the `source` use-def chain for padded LinalgOps.
256  Value current = sliceOp.getSource();
257  while (current) {
258  auto linalgOp = current.getDefiningOp<LinalgOp>();
259  if (!linalgOp)
260  break;
261  OpResult opResult = cast<OpResult>(current);
262  current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();
263  }
264  auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
265 
266  // Exit if the search fails to match a tensor::PadOp at the end of the matched
267  // LinalgOp sequence.
268  if (!padOp)
269  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
270  typeDynDims);
271 
272  // Exit if the padded result type does not match.
273  if (sliceOp.getSource().getType() != type)
274  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
275  typeDynDims);
276 
277  // Exit if the LinalgOps are not high padded.
278  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
279  return getConstantIntValue(ofr) != static_cast<int64_t>(0);
280  }))
281  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
282  typeDynDims);
283 
284  // Exit if `padOpSliceOp`, which defines the slice used by
285  // `padOp`, is rank-reducing.
286  auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
287  if (!padOpSliceOp ||
288  sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
289  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
290  typeDynDims);
291 
292  // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
293  // of the slice padded by `padOp`.
294  if (llvm::any_of(
295  llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
296  [](std::tuple<OpFoldResult, OpFoldResult> it) {
297  return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
298  }))
299  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
300  typeDynDims);
301 
302  // Exit if the padding values do not match.
303  Attribute padOpPadAttr, padAttr;
304  Value padOpPad = padOp.getConstantPaddingValue();
305  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
306  !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
307  return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
308  typeDynDims);
309 
310  // Return the padded result if the padding values and sizes match.
311  return sliceOp.getSource();
312 }
313 
314 GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
315  auto memrefTypeTo = cast<MemRefType>(to.getType());
316 #ifndef NDEBUG
317  auto memrefTypeFrom = cast<MemRefType>(from.getType());
318  assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
319  "`from` and `to` memref must have the same rank");
320 #endif // NDEBUG
321 
322  AffineMap id =
323  AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
324  SmallVector<utils::IteratorType> iteratorTypes(memrefTypeTo.getRank(),
325  utils::IteratorType::parallel);
326  return b.create<linalg::GenericOp>(
327  loc,
328  /*inputs=*/from,
329  /*outputs=*/to,
330  /*indexingMaps=*/llvm::ArrayRef({id, id}),
331  /*iteratorTypes=*/iteratorTypes,
332  [](OpBuilder &b, Location loc, ValueRange args) {
333  b.create<linalg::YieldOp>(loc, args.front());
334  });
335 }
336 
337 /// Specialization to build an scf "for" nest.
338 template <>
340  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
341  ArrayRef<utils::IteratorType> iteratorTypes,
343  ValueRange)>
344  bodyBuilderFn,
345  ArrayRef<linalg::ProcInfo> procInfo) {
346  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
347  "expected as many entries for proc info as number of loops, even if "
348  "they are null entries");
349  SmallVector<Value> iterArgInitValues;
350  if (!linalgOp.hasPureBufferSemantics())
351  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
352  SmallVector<Value, 4> lbs, ubs, steps;
353  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
355  b, loc, lbs, ubs, steps, iterArgInitValues,
356  [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
357  assert(iterArgs.size() == iterArgInitValues.size() &&
358  "expect the number of output tensors and iter args to match");
359  SmallVector<Value> operandValuesToUse = linalgOp->getOperands();
360  if (!iterArgs.empty()) {
361  operandValuesToUse = linalgOp.getDpsInputs();
362  operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
363  }
364  return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
365  });
366 
367  if (loopNest.loops.empty() || procInfo.empty())
368  return;
369 
370  // Filter out scf.for loops that were created out of parallel dimensions.
371  for (const auto &loop : llvm::enumerate(loopNest.loops)) {
372  if (procInfo[loop.index()].distributionMethod ==
373  DistributionMethod::Cyclic) {
374  mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
375  procInfo[loop.index()].nprocs);
376  }
377  }
378 }
379 
380 /// Specialization to build affine "for" nest.
381 template <>
383  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
384  ArrayRef<utils::IteratorType> iteratorTypes,
386  ValueRange)>
387  bodyBuilderFn,
388  ArrayRef<linalg::ProcInfo> /*procInfo*/) {
389  SmallVector<Value> iterArgInitValues;
390  if (!linalgOp.hasPureBufferSemantics())
391  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
392  assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
393  SmallVector<Value, 4> lbs, ubs, steps;
394  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
395 
396  // Affine loops require constant steps.
397  SmallVector<int64_t, 4> constantSteps;
398  constantSteps.reserve(steps.size());
399  for (Value v : steps) {
400  auto constVal = getConstantIntValue(v);
401  assert(constVal.has_value() && "Affine loops require constant steps");
402  constantSteps.push_back(constVal.value());
403  }
404 
405  affine::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
406  [&](OpBuilder &b, Location loc, ValueRange ivs) {
407  bodyBuilderFn(b, loc, ivs,
408  linalgOp->getOperands());
409  });
410 }
411 
412 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
414  Value nprocs, Value &lb, Value &ub,
415  Value &step) {
416  AffineExpr d0, d1;
417  bindDims(b.getContext(), d0, d1);
419  lb =
420  affine::makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
421  step = affine::makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
422 }
423 
424 /// Generates a loop nest consisting of scf.parallel and scf.for, depending
425 /// on the `iteratorTypes.` Consecutive parallel loops create a single
426 /// scf.parallel operation; each sequential loop creates a new scf.for
427 /// operation. The body of the innermost loop is populated by
428 /// `bodyBuilderFn` that accepts a range of induction variables for all
429 /// loops. `ivStorage` is used to store the partial list of induction
430 /// variables.
431 // TODO: this function can be made iterative instead. However, it
432 // will have at most as many recursive calls as nested loops, which rarely
433 // exceeds 10.
435  OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
436  ValueRange steps, ArrayRef<utils::IteratorType> iteratorTypes,
438  function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
439  SmallVectorImpl<Value> &ivStorage) {
440  assert(lbs.size() == ubs.size());
441  assert(lbs.size() == steps.size());
442  assert(lbs.size() == iteratorTypes.size());
443  assert(procInfo.empty() || (lbs.size() == procInfo.size()));
444 
445  // If there are no (more) loops to be generated, generate the body and be
446  // done with it.
447  if (iteratorTypes.empty()) {
448  bodyBuilderFn(b, loc, ivStorage);
449  return;
450  }
451 
452  // If there are no outer parallel loops, generate one sequential loop and
453  // recurse.
454  if (!isParallelIterator(iteratorTypes.front())) {
455  LoopNest singleLoop = buildLoopNest(
456  b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
457  [&](OpBuilder &b, Location loc, ValueRange ivs) {
458  ivStorage.append(ivs.begin(), ivs.end());
459  generateParallelLoopNest(
460  b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
461  iteratorTypes.drop_front(),
462  procInfo.empty() ? procInfo : procInfo.drop_front(),
463  bodyBuilderFn, ivStorage);
464  });
465  return;
466  }
467 
468  unsigned nLoops = iteratorTypes.size();
469  unsigned numProcessed = 0;
470  DistributionMethod distributionMethod = DistributionMethod::None;
471  if (procInfo.empty()) {
472  numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();
473  } else {
474  distributionMethod = procInfo.front().distributionMethod;
475  numProcessed =
476  nLoops - procInfo
477  .drop_while([&](linalg::ProcInfo p) {
478  return p.distributionMethod == distributionMethod;
479  })
480  .size();
481  }
482 
483  auto remainderProcInfo =
484  procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
485  switch (distributionMethod) {
487  // Generate a single parallel loop-nest operation for all outermost
488  // parallel loops and recurse.
489  b.create<scf::ParallelOp>(
490  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
491  steps.take_front(numProcessed),
492  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
493  ivStorage.append(localIvs.begin(), localIvs.end());
495  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
496  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
497  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
498  bodyBuilderFn, ivStorage);
499  });
500  return;
501  }
502  case DistributionMethod::Cyclic: {
503  // Generate a single parallel loop-nest operation for all outermost
504  // parallel loops and recurse.
505  b.create<scf::ParallelOp>(
506  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
507  steps.take_front(numProcessed),
508  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
509  ivStorage.append(localIvs.begin(), localIvs.end());
511  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
512  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
513  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
514  bodyBuilderFn, ivStorage);
515  });
516  return;
517  }
518  case DistributionMethod::CyclicNumProcsGeNumIters: {
519  // Check (for the processed loops) that the iteration is in-bounds.
520  ArithBuilder ab(b, loc);
521  Value cond = ab.slt(lbs[0], ubs[0]);
522  for (unsigned i = 1; i < numProcessed; ++i)
523  cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
524  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
525  b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
526  generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed),
527  ubs.drop_front(numProcessed),
528  steps.drop_front(numProcessed),
529  iteratorTypes.drop_front(numProcessed),
530  remainderProcInfo, bodyBuilderFn, ivStorage);
531  b.create<scf::YieldOp>(loc, ValueRange{});
532  });
533  return;
534  }
535  case DistributionMethod::CyclicNumProcsEqNumIters:
536  // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
537  // with inner loop generation.
538  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
540  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
541  steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
542  remainderProcInfo, bodyBuilderFn, ivStorage);
543  return;
544  }
545 }
546 
547 /// Specialization for generating a mix of parallel and sequential scf loops.
548 template <>
550  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
551  ArrayRef<utils::IteratorType> iteratorTypes,
553  ValueRange)>
554  bodyBuilderFn,
555  ArrayRef<linalg::ProcInfo> procInfo) {
556  SmallVector<Value> iterArgInitValues;
557  if (!linalgOp.hasPureBufferSemantics())
558  llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
559  assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
560  // This function may be passed more iterator types than ranges.
561  assert(iteratorTypes.size() >= loopRanges.size() &&
562  "expected iterator type for all ranges");
563  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
564  "expected proc information for all loops when present");
565  iteratorTypes = iteratorTypes.take_front(loopRanges.size());
566  SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
567  unsigned numLoops = iteratorTypes.size();
568  ivs.reserve(numLoops);
569  lbsStorage.reserve(numLoops);
570  ubsStorage.reserve(numLoops);
571  stepsStorage.reserve(numLoops);
572 
573  // Get the loop lb, ub, and step.
574  unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
575 
576  // Modify the lb, ub, and step based on the distribution options.
577  for (const auto &it : llvm::enumerate(procInfo)) {
578  if (it.value().distributionMethod != linalg::DistributionMethod::None) {
580  b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
581  ubsStorage[it.index()], stepsStorage[it.index()]);
582  }
583  }
584  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
586  b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
587  [&](OpBuilder &b, Location loc, ValueRange ivs) {
588  bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
589  },
590  ivs);
591 
592  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
593 }
594 
596  Value valueToTile,
597  const SliceParameters &sliceParams) {
598  auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
599  auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
600  .Case([&](MemRefType) {
601  return builder.create<memref::SubViewOp>(
602  loc, valueToTile, sliceParams.offsets,
603  sliceParams.sizes, sliceParams.strides);
604  })
605  .Case([&](RankedTensorType) {
606  return builder.create<tensor::ExtractSliceOp>(
607  loc, valueToTile, sliceParams.offsets,
608  sliceParams.sizes, sliceParams.strides);
609  })
610  .Default([](ShapedType) -> Operation * {
611  llvm_unreachable("Unexpected shaped type");
612  });
613  return sliceOp;
614 }
615 
616 Operation *makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
617  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
620  ArrayRef<OpFoldResult> subShapeSizes,
621  bool omitPartialTileCheck) {
622  SliceParameters sliceParams =
623  computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
624  ubs, subShapeSizes, omitPartialTileCheck);
625  return materializeTiledShape(builder, loc, valueToTile, sliceParams);
626 }
627 
629 computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
630  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
632  ArrayRef<OpFoldResult> subShapeSizes,
633  bool omitPartialTileCheck) {
634  auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
635  assert(shapedType && "only shaped types can be tiled");
636  ArrayRef<int64_t> shape = shapedType.getShape();
637  int64_t rank = shapedType.getRank();
638 
639  // Compute offsets/sizes/strides for the tile.
640  SliceParameters sliceParams;
641  sliceParams.offsets.reserve(rank);
642  sliceParams.sizes.reserve(rank);
643  sliceParams.strides.reserve(rank);
644  for (unsigned r = 0; r < rank; ++r) {
645  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
646  if (!isTiled(map.getSubMap({r}), tileSizes)) {
647  sliceParams.offsets.push_back(builder.getIndexAttr(0));
648  OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
649  sliceParams.sizes.push_back(dim);
650  sliceParams.strides.push_back(builder.getIndexAttr(1));
651  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
652  continue;
653  }
654  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
655 
656  // Tiling creates a new slice at the proper index, the slice step is 1
657  // (i.e. the op does not subsample, stepping occurs in the loop).
658  auto m = map.getSubMap({r});
659  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
660  IRRewriter rewriter(builder);
661  // The offset of the slice is m(lbs) - m(0).
662  SmallVector<Attribute> zeros(lbs.size(), rewriter.getIndexAttr(0));
663  SmallVector<Attribute> mAtZero;
664  [[maybe_unused]] auto res = m.constantFold(zeros, mAtZero);
665  assert(succeeded(res) && "affine_map must be evaluatable (not symbols)");
666  int64_t mAtZeroInt =
667  cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
669  rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
670  sliceParams.offsets.push_back(offset);
671 
672  OpFoldResult closedIntSize =
673  makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
674  // Resulting size needs to be made half open interval again.
675  AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
676  OpFoldResult size =
677  makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
678  LLVM_DEBUG(llvm::dbgs()
679  << "computeSliceParameters: raw size: " << size << "\n");
680  LLVM_DEBUG(llvm::dbgs()
681  << "computeSliceParameters: new offset: " << offset << "\n");
682  sliceParams.strides.push_back(builder.getIndexAttr(1));
683 
684  if (omitPartialTileCheck) {
685  // We statically know that the partial/boundary tile condition is
686  // unnecessary.
687  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
688  sliceParams.sizes.push_back(size);
689  continue;
690  }
691 
692  // The size of the subview / extract_slice should be trimmed to avoid
693  // out-of-bounds accesses, unless:
694  // a. We statically know the subshape size divides the shape size evenly.
695  // b. The subshape size is 1. According to the way the loops are set up,
696  // tensors with "0" dimensions would never be constructed.
697  int64_t shapeSize = shape[r];
698  std::optional<int64_t> sizeCst = getConstantIntValue(size);
699  auto hasTileSizeOne = sizeCst == 1;
700  auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
701  ((shapeSize % *sizeCst) == 0);
702  if (!hasTileSizeOne && !dividesEvenly) {
703  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
704  << ", size: " << size
705  << ": make sure in bound with affine.min\n");
706 
707  AffineExpr dim0, dim1, dim2;
708  MLIRContext *context = builder.getContext();
709  bindDims(context, dim0, dim1, dim2);
710 
711  // Get the dimension size for this dimension. We need to first calculate
712  // the max index and then plus one. This is important because for
713  // convolution ops, we have its input window dimension's affine map of the
714  // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
715  // dimension and `s0` is stride. Directly use the dimension size of
716  // output/filer window dimensions will cause incorrect calculation.
718  {ArrayRef<AffineExpr>{dim0 - 1}}, context)
719  .front();
721  {ArrayRef<AffineExpr>{dim0 + 1}}, context)
722  .front();
723  SmallVector<OpFoldResult> maxIndices =
724  llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
725  return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
726  {ub});
727  }));
728  OpFoldResult maxIndex =
729  makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
730  OpFoldResult d =
731  makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
732 
733  // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
735  {ArrayRef<AffineExpr>{dim1 - dim2, dim0}}, context)
736  .front();
737  size =
738  makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
739  }
740  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
741  sliceParams.sizes.push_back(size);
742  }
743  return sliceParams;
744 }
745 
748  ArrayRef<OpFoldResult> tileSizes) {
750  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
751  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
752  bool isTiled = !isZeroInteger(tileSizes[idx]);
753  offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
754  LLVM_DEBUG(llvm::dbgs()
755  << "computeTileOffsets: " << offsets.back() << "\n");
756  }
757  return offsets;
758 }
759 
761  ArrayRef<OpFoldResult> tileSizes,
762  ArrayRef<OpFoldResult> sizeBounds) {
764  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
765  bool isTiled = !isZeroInteger(tileSizes[idx]);
766  // Before composing, we need to make range a closed interval.
767  OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
769  IRRewriter rewriter(b);
770  sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
771  LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
772  }
773  return sizes;
774 }
775 
777  if (op.hasPureBufferSemantics())
778  return {};
779  return llvm::to_vector(
780  llvm::map_range(op.getDpsInitsMutable(), [&](OpOperand &opOperand) {
781  return operands[opOperand.getOperandNumber()].getType();
782  }));
783 }
784 
786  LinalgOp op, ValueRange operands,
787  ValueRange results) {
788  if (op.hasPureBufferSemantics())
789  return {};
790  SmallVector<Value> tensorResults;
791  tensorResults.reserve(results.size());
792  // Insert a insert_slice for each output tensor.
793  unsigned resultIdx = 0;
794  for (OpOperand &opOperand : op.getDpsInitsMutable()) {
795  // TODO: use an interface/adaptor to avoid leaking position in
796  // `tiledOperands`.
797  Value outputTensor = operands[opOperand.getOperandNumber()];
798  if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
799  Value inserted = builder.create<tensor::InsertSliceOp>(
800  loc, sliceOp.getSource().getType(), results[resultIdx],
801  sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
802  sliceOp.getStrides(), sliceOp.getStaticOffsets(),
803  sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
804  tensorResults.push_back(inserted);
805  } else {
806  tensorResults.push_back(results[resultIdx]);
807  }
808  ++resultIdx;
809  }
810  return tensorResults;
811 }
812 
814 computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
815  ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
816  ArrayRef<OpFoldResult> tileSizes,
817  ArrayRef<OpFoldResult> sizeBounds,
818  bool omitPartialTileCheck) {
819  assert(ivs.size() == static_cast<size_t>(llvm::count_if(
820  llvm::make_range(tileSizes.begin(), tileSizes.end()),
821  [](OpFoldResult v) { return !isZeroInteger(v); })) &&
822  "expected as many ivs as non-zero sizes");
823 
824  // Construct (potentially temporary) mins and maxes on which to apply maps
825  // that define tile subshapes.
827  computeTileOffsets(builder, loc, ivs, tileSizes);
828  SmallVector<OpFoldResult> subShapeSizes =
829  computeTileSizes(builder, loc, tileSizes, sizeBounds);
830 
831  assert(static_cast<int64_t>(valuesToTile.size()) <=
832  linalgOp->getNumOperands() &&
833  "more value to tile than operands.");
835  allSliceParams.reserve(valuesToTile.size());
836  for (auto [opOperand, val] :
837  llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
838  Value shapedOp = val;
839  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
840  AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
841  // Use `opOperand` as is if it is not tiled and not an output tensor. Having
842  // an extract/insert slice pair for all output tensors simplifies follow up
843  // transformations such as padding and bufferization since the
844  // extract/insert slice pairs make the accessed iteration argument
845  // subdomains explicit.
846 
847  Type operandType = opOperand.get().getType();
848  if (!isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
849  linalgOp.isDpsInit(&opOperand))) {
850  allSliceParams.push_back(std::nullopt);
851  LLVM_DEBUG(llvm::dbgs()
852  << ": not tiled: use shape: " << operandType << "\n");
853  continue;
854  }
855  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
856 
857  allSliceParams.push_back(computeSliceParameters(
858  builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
859  omitPartialTileCheck));
860  }
861 
862  return allSliceParams;
863 }
864 
866  LinalgOp linalgOp, ValueRange valuesToTile,
868  ArrayRef<OpFoldResult> tileSizes,
869  ArrayRef<OpFoldResult> sizeBounds,
870  bool omitPartialTileCheck) {
871  SmallVector<std::optional<SliceParameters>> allSliceParameter =
872  computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
873  tileSizes, sizeBounds, omitPartialTileCheck);
874  SmallVector<Value> tiledShapes;
875  for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
876  Value valueToTile = std::get<0>(item);
877  std::optional<SliceParameters> sliceParams = std::get<1>(item);
878  tiledShapes.push_back(
879  sliceParams.has_value()
880  ? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
881  ->getResult(0)
882  : valueToTile);
883  }
884  return tiledShapes;
885 }
886 
887 void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
888  ArrayRef<OpFoldResult> offsets) {
889  IRRewriter rewriter(b);
890  offsetIndices(rewriter, linalgOp, offsets);
891 }
892 
893 void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
894  ArrayRef<OpFoldResult> offsets) {
895  if (!linalgOp.hasIndexSemantics())
896  return;
897 
898  for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
899  if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
900  continue;
901  OpBuilder::InsertionGuard guard(b);
902  b.setInsertionPointAfter(indexOp);
903  AffineExpr index, offset;
904  bindDims(b.getContext(), index, offset);
906  b, indexOp.getLoc(), index + offset,
907  {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
908  Value materialized =
909  getValueOrCreateConstantIndexOp(b, indexOp.getLoc(), applied);
910  b.replaceUsesWithIf(indexOp, materialized, [&](OpOperand &use) {
911  return use.getOwner() != materialized.getDefiningOp();
912  });
913  }
914 }
915 
916 /// Get the reassociation maps to fold the result of a extract_slice (or source
917 /// of a insert_slice) operation with given offsets, and sizes to its
918 /// rank-reduced version. This is only done for the cases where the size is 1
919 /// and offset is 0. Strictly speaking the offset 0 is not required in general,
920 /// but non-zero offsets are not handled by SPIR-V backend at this point (and
921 /// potentially cannot be handled).
922 std::optional<SmallVector<ReassociationIndices>>
924  SmallVector<ReassociationIndices> reassociation;
926  for (const auto &it : llvm::enumerate(mixedSizes)) {
927  auto dim = it.index();
928  auto size = it.value();
929  curr.push_back(dim);
930  auto attr = llvm::dyn_cast_if_present<Attribute>(size);
931  if (attr && cast<IntegerAttr>(attr).getInt() == 1)
932  continue;
933  reassociation.emplace_back(ReassociationIndices{});
934  std::swap(reassociation.back(), curr);
935  }
936  // When the reassociations are not empty, then fold the remaining
937  // unit-dimensions into the last dimension. If the reassociations so far is
938  // empty, then leave it emtpy. This will fold everything to a rank-0 tensor.
939  if (!curr.empty() && !reassociation.empty())
940  reassociation.back().append(curr.begin(), curr.end());
941  return reassociation;
942 }
943 
944 } // namespace linalg
945 } // namespace mlir
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition: Utils.cpp:77
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition: Utils.cpp:129
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
Definition: Utils.cpp:153
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:63
SmallVector< int64_t > innerDimsPos
Definition: LinalgOps.cpp:4583
DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target, transform::ApplyToEachResultList &results, transform::TransformState &state)
@ None
Affine binary operation expression.
Definition: AffineExpr.h:214
AffineExpr getLHS() const
Definition: AffineExpr.cpp:340
AffineExpr getRHS() const
Definition: AffineExpr.cpp:343
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:223
unsigned getPosition() const
Definition: AffineExpr.cpp:348
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:35
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:334
unsigned getNumResults() const
Definition: AffineMap.cpp:402
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:411
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:651
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Definition: AffineMap.cpp:312
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
OpListType & getOperations()
Definition: Block.h:137
Operation & front()
Definition: Block.h:153
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:209
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:106
MLIRContext * getContext() const
Definition: Builders.h:55
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:729
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:410
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271
This class represents an operand of an operation.
Definition: Value.h:257
This is a value defined by a result of an operation.
Definition: Value.h:447
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:459
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition: Types.cpp:108
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
Definition: Operation.cpp:1395
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
Definition: AffineOps.cpp:2763
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1175
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
Definition: AffineOps.cpp:1329
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1225
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
Definition: LoopUtils.cpp:1725
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:865
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition: Utils.cpp:203
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
Definition: Utils.cpp:238
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:760
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition: Utils.cpp:314
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
Definition: Utils.cpp:434
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:746
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
Definition: Utils.cpp:242
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition: Utils.cpp:209
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Definition: Utils.cpp:595
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition: Utils.cpp:923
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
Definition: LinalgOps.cpp:108
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:259
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition: Utils.cpp:785
SmallVector< int64_t > getPackInverseDestPerm(PackOp packOp)
Definition: Utils.cpp:177
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition: Utils.cpp:223
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:887
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition: Utils.cpp:814
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:616
SmallVector< int64_t > getUnPackInverseSrcPerm(UnPackOp unpackOp, PackingMetadata &metadata)
Definition: Utils.cpp:193
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
Definition: Utils.cpp:246
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:413
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:776
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition: Utils.cpp:629
auto m_Val(Value v)
Definition: Matchers.h:539
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:694
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:64
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Definition: Utils.cpp:25
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:621
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:631
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition: Utils.h:103
Value _and(Value lhs, Value rhs)
Definition: Utils.cpp:312
Value slt(Value lhs, Value rhs)
Definition: Utils.cpp:335
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:373
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition: Utils.h:309
DistributionMethod distributionMethod
Definition: Utils.h:312
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition: Utils.h:155
SmallVector< OpFoldResult > strides
Definition: Utils.h:158
SmallVector< OpFoldResult > sizes
Definition: Utils.h:157
SmallVector< OpFoldResult > offsets
Definition: Utils.h:156
LoopVector loops
Definition: SCF.h:67