MLIR  16.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities for the Linalg dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
30 #include "mlir/IR/AffineExpr.h"
32 #include "mlir/IR/AffineMap.h"
33 #include "mlir/IR/Matchers.h"
35 #include "mlir/Pass/Pass.h"
36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
38 
39 #define DEBUG_TYPE "linalg-utils"
40 
41 using namespace mlir;
42 using namespace presburger;
43 using namespace mlir::linalg;
44 using namespace mlir::scf;
45 
46 static bool isZero(OpFoldResult v) {
47  if (!v)
48  return false;
49  if (auto attr = v.dyn_cast<Attribute>()) {
50  IntegerAttr intAttr = attr.dyn_cast<IntegerAttr>();
51  return intAttr && intAttr.getValue().isZero();
52  }
53  if (auto cst = v.get<Value>().getDefiningOp<arith::ConstantIndexOp>())
54  return cst.value() == 0;
55  return false;
56 }
57 
58 namespace {
59 
60 // Helper visitor to determine whether an AffineExpr is tiled.
61 // This is achieved by traversing every AffineDimExpr with position `pos` and
62 // checking whether the corresponding `tileSizes[pos]` is non-zero.
63 // This also enforces only positive coefficients occur in multiplications.
64 //
65 // Example:
66 // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
67 //
68 struct TileCheck : public AffineExprVisitor<TileCheck> {
69  TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
70 
71  void visitDimExpr(AffineDimExpr expr) {
72  isTiled |= !isZero(tileSizes[expr.getPosition()]);
73  }
74  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
75  visit(expr.getLHS());
76  visit(expr.getRHS());
77  if (expr.getKind() == mlir::AffineExprKind::Mul)
78  assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 &&
79  "nonpositive multiplying coefficient");
80  }
81  bool isTiled = false;
82  ArrayRef<OpFoldResult> tileSizes;
83 };
84 
85 } // namespace
86 
87 static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
88  if (!expr)
89  return false;
90  TileCheck t(tileSizes);
91  t.visit(expr);
92  return t.isTiled;
93 }
94 
95 // Checks whether the `map varies with respect to a non-zero `tileSize`.
96 static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
97  if (!map)
98  return false;
99  for (unsigned r = 0; r < map.getNumResults(); ++r)
100  if (isTiled(map.getResult(r), tileSizes))
101  return true;
102  return false;
103 }
104 
106 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
107  auto &region = op.getRegion();
108  if (!llvm::hasSingleElement(region))
109  return llvm::None;
110 
111  Block &block = region.front();
112  if (block.getNumArguments() != 2 ||
113  !block.getArgument(0).getType().isSignlessIntOrFloat() ||
115  return llvm::None;
116 
117  auto &ops = block.getOperations();
118  if (!llvm::hasSingleElement(block.without_terminator()))
119  return llvm::None;
120 
121  using mlir::matchers::m_Val;
122  auto a = m_Val(block.getArgument(0));
123  auto b = m_Val(block.getArgument(1));
124 
125  auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
126  if (addPattern.match(&ops.back()))
127  return BinaryOpKind::IAdd;
128 
129  return llvm::None;
130 }
131 
132 /// Explicit instantiation of loop nest generator for different loop types.
136 
137 /// Given a list of subview ranges, extract individual values for lower, upper
138 /// bounds and steps and put them into the corresponding vectors.
139 static void unpackRanges(OpBuilder &builder, Location loc,
142  SmallVectorImpl<Value> &steps) {
143  for (Range range : ranges) {
144  lbs.emplace_back(
145  getValueOrCreateConstantIndexOp(builder, loc, range.offset));
146  ubs.emplace_back(getValueOrCreateConstantIndexOp(builder, loc, range.size));
147  steps.emplace_back(
148  getValueOrCreateConstantIndexOp(builder, loc, range.stride));
149  }
150 }
151 
152 namespace mlir {
153 namespace linalg {
154 
156  return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
157  return m.isProjectedPermutation(/*allowZeroInResults=*/true);
158  });
159 }
160 
162  if (!llvm::hasSingleElement(r))
163  return false;
164  for (Operation &op : r.front()) {
165  if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
166  linalg::YieldOp, linalg::IndexOp>(op) ||
168  llvm::any_of(op.getResultTypes(),
169  [](Type type) { return !type.isIntOrIndexOrFloat(); }))
170  return false;
171  }
172  return true;
173 }
174 
175 bool isElementwise(LinalgOp op) {
176  if (op.getNumLoops() != op.getNumParallelLoops())
177  return false;
178 
180  return false;
181 
182  // TODO: relax the restrictions on indexing map.
183  for (OpOperand *opOperand : op.getDpsInitOperands()) {
184  if (!op.getMatchingIndexingMap(opOperand).isPermutation())
185  return false;
186  }
187  return hasOnlyScalarElementwiseOp(op->getRegion(0));
188 }
189 
190 bool isParallelIterator(utils::IteratorType iteratorType) {
191  return iteratorType == utils::IteratorType::parallel;
192 }
193 
194 bool isReductionIterator(utils::IteratorType iteratorType) {
195  return iteratorType == utils::IteratorType::reduction;
196 }
197 
198 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
199 /// the type of `source`.
200 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim) {
201  if (source.getType().isa<UnrankedMemRefType, MemRefType>())
202  return b.createOrFold<memref::DimOp>(loc, source, dim);
203  if (source.getType().isa<UnrankedTensorType, RankedTensorType>())
204  return b.createOrFold<tensor::DimOp>(loc, source, dim);
205  llvm_unreachable("Expected MemRefType or TensorType");
206 }
207 
209  int64_t dim) {
210  auto shapedType = source.getType().cast<ShapedType>();
211  if (!shapedType.hasRank() || shapedType.isDynamicDim(dim))
212  return createOrFoldDimOp(b, loc, source, dim);
213  return b.getIndexAttr(shapedType.getDimSize(dim));
214 }
215 
216 /// Given an operation, retrieves the value of each dynamic dimension through
217 /// constructing the necessary DimOp operators.
219  SmallVector<Value, 4> dynOperands;
220  auto shapedType = val.getType().cast<ShapedType>();
221  for (const auto &dim : llvm::enumerate(shapedType.getShape())) {
222  if (dim.value() == ShapedType::kDynamic)
223  dynOperands.push_back(createOrFoldDimOp(b, loc, val, dim.index()));
224  }
225  return dynOperands;
226 }
227 
229  SmallVectorImpl<Value> &boundOperands,
230  bool constantRequired) {
231  // Initialize `boundMap` and `boundOperands` to the identity returning
232  // `value`. This combination is the default result of the method if no
233  // simplification is possible.
234  assert(value.getType().isIndex() && "expect value to have index type");
235  boundMap = AffineMap::getMultiDimIdentityMap(1, value.getContext());
236  boundOperands.assign({value});
237  canonicalizeMapAndOperands(&boundMap, &boundOperands);
238 
239  // Continue only if there is an affine index computation to simplify.
240  Operation *definingOp = value.getDefiningOp();
241  if (!definingOp || !isa<AffineApplyOp, AffineMinOp>(definingOp))
242  return;
243 
244  // Get the backward slice containing the affine index computation.
245  SetVector<Operation *> backwardSlice;
246  getBackwardSlice(definingOp, &backwardSlice, [](Operation *op) {
247  return isa<AffineApplyOp, AffineMinOp>(op);
248  });
249  backwardSlice.insert(definingOp);
250 
251  // Setup a system of affine constraints that describe the index computation.
252  FlatAffineValueConstraints constraints;
253 
254  // Helper to find or create an identifier for the given value.
255  auto findOrCreateId = [&](Value value) {
256  if (!constraints.containsVar(value)) {
257  constraints.appendDimVar(value);
258  return true;
259  }
260  unsigned pos;
261  constraints.findVar(value, &pos);
262  return pos < constraints.getNumDimVars();
263  };
264  // Helper to get the position for the given value.
265  auto getPosition = [&](Value value) {
266  unsigned pos;
267  bool exists = constraints.findVar(value, &pos);
268  (void)exists;
269  assert(exists && "expect to find the identifier");
270  return pos;
271  };
272 
273  // Add the affine operations in `backwardSlice` to the constraints.
274  for (Operation *op : llvm::reverse(backwardSlice)) {
275  // Add an identifier for all op results and operands.
276  if (!(llvm::all_of(op->getResults(), findOrCreateId) &&
277  llvm::all_of(op->getOperands(), findOrCreateId)))
278  return;
279 
280  // Add AffineApplyOps to the constraints.
281  if (auto applyOp = dyn_cast<AffineApplyOp>(op)) {
282  AffineMap map = constraints.computeAlignedMap(applyOp.getAffineMap(),
283  applyOp.getOperands());
284  if (failed(constraints.addBound(IntegerPolyhedron::EQ,
285  getPosition(applyOp.getResult()), map)))
286  return;
287  continue;
288  }
289  // Add AffineMinOps to the constraints.
290  auto minOp = cast<AffineMinOp>(op);
291  AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(),
292  minOp.getOperands());
293  if (failed(constraints.addBound(IntegerPolyhedron::UB,
294  getPosition(minOp.getResult()), map,
295  /*isClosedBound=*/true)))
296  return;
297  }
298 
299  // Obtain an upper bound for the affine index computation by projecting out
300  // all temporary results and expressing the upper bound for `value` in terms
301  // of the terminals of the index computation.
302  unsigned pos = getPosition(value);
303  if (constantRequired) {
304  auto ubConst = constraints.getConstantBound64(
305  FlatAffineValueConstraints::BoundType::UB, pos);
306  if (!ubConst)
307  return;
308 
309  boundMap = AffineMap::getConstantMap(*ubConst, value.getContext());
310  return;
311  }
312 
313  SmallVector<AffineMap> lowerBounds(1), upperBounds(1);
314  constraints.getSliceBounds(pos, 1, value.getContext(), &lowerBounds,
315  &upperBounds,
316  /*getClosedUB=*/true);
317  // Verify `upperBounds[0]` is valid and has at least one result.
318  if (!upperBounds[0] || upperBounds[0].getNumResults() == 0)
319  return;
320 
321  // Set `boundMap` and `boundOperands` to the computed upper bound.
322  boundMap = upperBounds[0];
323  constraints.getAllValues(&boundOperands);
324  erase_value(boundOperands, value);
325  canonicalizeMapAndOperands(&boundMap, &boundOperands);
326 }
327 
329  // Compute an upper bound for `value`.
330  AffineMap boundMap;
331  SmallVector<Value> boundOperands;
332  getUpperBoundForIndex(value, boundMap, boundOperands,
333  /*constantRequired=*/true);
334 
335  // Search the results of `boundMap` for constant upper bounds.
336  SmallVector<int64_t> constantBounds;
337  for (AffineExpr result : boundMap.getResults())
338  if (auto constExpr = result.dyn_cast<AffineConstantExpr>())
339  constantBounds.push_back(constExpr.getValue());
340 
341  // Return the minimal upper bound or failure if none is found.
342  if (constantBounds.empty())
343  return failure();
344  return *std::min_element(constantBounds.begin(), constantBounds.end());
345 }
346 
347 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
348  Value source, Value pad, bool nofold) {
349  // Exit if `source` is not defined by an ExtractSliceOp.
350  auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
351  if (!sliceOp)
352  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
353 
354  // Search the `source` use-def chain for padded LinalgOps.
355  Value current = sliceOp.getSource();
356  while (current) {
357  auto linalgOp = current.getDefiningOp<LinalgOp>();
358  if (!linalgOp)
359  break;
360  OpResult opResult = current.cast<OpResult>();
361  current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();
362  }
363  auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
364 
365  // Exit if the search fails to match a tensor::PadOp at the end of the matched
366  // LinalgOp sequence.
367  if (!padOp)
368  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
369 
370  // Exit if the padded result type does not match.
371  if (sliceOp.getSource().getType() != type)
372  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
373 
374  // Exit if the LinalgOps are not high padded.
375  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
376  return getConstantIntValue(ofr) != static_cast<int64_t>(0);
377  }))
378  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
379 
380  // Exit if `padOpSliceOp`, which defines the slice used by
381  // `padOp`, is rank-reducing.
382  auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
383  if (!padOpSliceOp ||
384  sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
385  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
386 
387  // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
388  // of the slice padded by `padOp`.
389  if (llvm::any_of(
390  llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
391  [](std::tuple<OpFoldResult, OpFoldResult> it) {
392  return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
393  }))
394  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
395 
396  // Exit if the padding values do not match.
397  Attribute padOpPadAttr, padAttr;
398  Value padOpPad = padOp.getConstantPaddingValue();
399  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
400  !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
401  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
402 
403  // Return the padded result if the padding values and sizes match.
404  return sliceOp.getSource();
405 }
406 
407 GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
408  Value outputTensor,
409  ArrayRef<int64_t> transposeVector) {
410  auto resultTensorType = outputTensor.getType().cast<RankedTensorType>();
411  Type elementType = resultTensorType.getElementType();
412 
413  assert(isPermutationVector(transposeVector) &&
414  "expect transpose vector to be a permutation");
415  assert(transposeVector.size() ==
416  static_cast<size_t>(resultTensorType.getRank()) &&
417  "expect transpose vector size to match result tensor rank");
418 
419  // Compute the transpose and the indentity indexing maps.
420  SmallVector<AffineMap> indexingMaps = {
422  SmallVector<unsigned>(transposeVector.begin(), transposeVector.end()),
423  b.getContext())),
424  AffineMap::getMultiDimIdentityMap(transposeVector.size(),
425  b.getContext())};
426  SmallVector<utils::IteratorType> iteratorTypes(transposeVector.size(),
427  utils::IteratorType::parallel);
428 
429  // Create a GenericOp to transpose `inputTensor` into `outputTensor`.
430  auto transposeOp =
431  b.create<GenericOp>(loc, resultTensorType, inputTensor, outputTensor,
432  indexingMaps, iteratorTypes);
433  Region &body = transposeOp.getRegion();
434  body.push_back(new Block());
435  body.front().addArguments({elementType, elementType}, {loc, loc});
436 
437  // Create the body of the transpose operation.
439  b.setInsertionPointToEnd(&body.front());
440  b.create<YieldOp>(loc, transposeOp.getRegion().front().getArgument(0));
441  return transposeOp;
442 }
443 
444 GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
445  auto memrefTypeTo = to.getType().cast<MemRefType>();
446 #ifndef NDEBUG
447  auto memrefTypeFrom = from.getType().cast<MemRefType>();
448  assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
449  "`from` and `to` memref must have the same rank");
450 #endif // NDEBUG
451 
452  AffineMap id =
453  AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
454  SmallVector<utils::IteratorType> iteratorTypes(memrefTypeTo.getRank(),
455  utils::IteratorType::parallel);
456  return b.create<linalg::GenericOp>(
457  loc,
458  /*inputs=*/from,
459  /*outputs=*/to,
460  /*indexingMaps=*/llvm::makeArrayRef({id, id}),
461  /*iteratorTypes=*/iteratorTypes,
462  [](OpBuilder &b, Location loc, ValueRange args) {
463  b.create<linalg::YieldOp>(loc, args.front());
464  });
465 }
466 
467 /// Specialization to build an scf "for" nest.
468 template <>
470  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
471  ArrayRef<utils::IteratorType> iteratorTypes,
473  ValueRange)>
474  bodyBuilderFn,
475  ArrayRef<linalg::ProcInfo> procInfo) {
476  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
477  "expected as many entries for proc info as number of loops, even if "
478  "they are null entries");
479  SmallVector<Value> iterArgInitValues = linalgOp.hasBufferSemantics()
481  : linalgOp.getDpsInitOperands();
482 
483  SmallVector<Value, 4> lbs, ubs, steps;
484  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
486  b, loc, lbs, ubs, steps, iterArgInitValues,
487  [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
488  assert(iterArgs.size() == iterArgInitValues.size() &&
489  "expect the number of output tensors and iter args to match");
490  SmallVector<Value> operandValuesToUse = linalgOp->getOperands();
491  if (!iterArgs.empty()) {
492  operandValuesToUse = linalgOp.getDpsInputOperands();
493  operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
494  }
495  return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
496  });
497 
498  if (loopNest.loops.empty() || procInfo.empty())
499  return;
500 
501  // Filter out scf.for loops that were created out of parallel dimensions.
502  for (const auto &loop : llvm::enumerate(loopNest.loops)) {
503  if (procInfo[loop.index()].distributionMethod ==
504  DistributionMethod::Cyclic) {
505  mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
506  procInfo[loop.index()].nprocs);
507  }
508  }
509 }
510 
511 /// Specialization to build affine "for" nest.
512 template <>
514  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
515  ArrayRef<utils::IteratorType> iteratorTypes,
517  ValueRange)>
518  bodyBuilderFn,
519  ArrayRef<linalg::ProcInfo> /*procInfo*/) {
520  SmallVector<Value> iterArgInitValues = linalgOp.hasBufferSemantics()
522  : linalgOp.getDpsInitOperands();
523  assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
524  SmallVector<Value, 4> lbs, ubs, steps;
525  unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
526 
527  // Affine loops require constant steps.
528  SmallVector<int64_t, 4> constantSteps;
529  constantSteps.reserve(steps.size());
530  for (Value v : steps) {
531  auto op = v.getDefiningOp<arith::ConstantIndexOp>();
532  assert(op && "Affine loops require constant steps");
533  constantSteps.push_back(op.value());
534  }
535 
536  mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
537  [&](OpBuilder &b, Location loc, ValueRange ivs) {
538  bodyBuilderFn(b, loc, ivs,
539  linalgOp->getOperands());
540  });
541 }
542 
543 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
545  Value nprocs, Value &lb, Value &ub,
546  Value &step) {
547  AffineExpr d0, d1;
548  bindDims(b.getContext(), d0, d1);
550  lb = makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
551  step = makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
552 }
553 
554 /// Generates a loop nest consisting of scf.parallel and scf.for, depending
555 /// on the `iteratorTypes.` Consecutive parallel loops create a single
556 /// scf.parallel operation; each sequential loop creates a new scf.for
557 /// operation. The body of the innermost loop is populated by
558 /// `bodyBuilderFn` that accepts a range of induction variables for all
559 /// loops. `ivStorage` is used to store the partial list of induction
560 /// variables.
561 // TODO: this function can be made iterative instead. However, it
562 // will have at most as many recursive calls as nested loops, which rarely
563 // exceeds 10.
565  OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
566  ValueRange steps, ArrayRef<utils::IteratorType> iteratorTypes,
568  function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
569  SmallVectorImpl<Value> &ivStorage) {
570  assert(lbs.size() == ubs.size());
571  assert(lbs.size() == steps.size());
572  assert(lbs.size() == iteratorTypes.size());
573  assert(procInfo.empty() || (lbs.size() == procInfo.size()));
574 
575  // If there are no (more) loops to be generated, generate the body and be
576  // done with it.
577  if (iteratorTypes.empty()) {
578  bodyBuilderFn(b, loc, ivStorage);
579  return;
580  }
581 
582  // If there are no outer parallel loops, generate one sequential loop and
583  // recurse.
584  if (!isParallelIterator(iteratorTypes.front())) {
585  LoopNest singleLoop = buildLoopNest(
586  b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
587  [&](OpBuilder &b, Location loc, ValueRange ivs) {
588  ivStorage.append(ivs.begin(), ivs.end());
589  generateParallelLoopNest(
590  b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
591  iteratorTypes.drop_front(),
592  procInfo.empty() ? procInfo : procInfo.drop_front(),
593  bodyBuilderFn, ivStorage);
594  });
595  return;
596  }
597 
598  unsigned nLoops = iteratorTypes.size();
599  unsigned numProcessed = 0;
600  DistributionMethod distributionMethod = DistributionMethod::None;
601  if (procInfo.empty()) {
602  numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();
603  } else {
604  distributionMethod = procInfo.front().distributionMethod;
605  numProcessed =
606  nLoops - procInfo
607  .drop_while([&](linalg::ProcInfo p) {
608  return p.distributionMethod == distributionMethod;
609  })
610  .size();
611  }
612 
613  auto remainderProcInfo =
614  procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
615  switch (distributionMethod) {
617  // Generate a single parallel loop-nest operation for all outermost
618  // parallel loops and recurse.
619  b.create<scf::ParallelOp>(
620  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
621  steps.take_front(numProcessed),
622  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
623  ivStorage.append(localIvs.begin(), localIvs.end());
625  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
626  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
627  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
628  bodyBuilderFn, ivStorage);
629  });
630  return;
631  }
632  case DistributionMethod::Cyclic: {
633  // Generate a single parallel loop-nest operation for all outermost
634  // parallel loops and recurse.
635  b.create<scf::ParallelOp>(
636  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
637  steps.take_front(numProcessed),
638  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
639  ivStorage.append(localIvs.begin(), localIvs.end());
641  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
642  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
643  iteratorTypes.drop_front(numProcessed), remainderProcInfo,
644  bodyBuilderFn, ivStorage);
645  });
646  return;
647  }
648  case DistributionMethod::CyclicNumProcsGeNumIters: {
649  // Check (for the processed loops) that the iteration is in-bounds.
650  ArithBuilder ab(b, loc);
651  Value cond = ab.slt(lbs[0], ubs[0]);
652  for (unsigned i = 1; i < numProcessed; ++i)
653  cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
654  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
655  b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
656  generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed),
657  ubs.drop_front(numProcessed),
658  steps.drop_front(numProcessed),
659  iteratorTypes.drop_front(numProcessed),
660  remainderProcInfo, bodyBuilderFn, ivStorage);
661  b.create<scf::YieldOp>(loc, ValueRange{});
662  });
663  return;
664  }
665  case DistributionMethod::CyclicNumProcsEqNumIters:
666  // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
667  // with inner loop generation.
668  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
670  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
671  steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
672  remainderProcInfo, bodyBuilderFn, ivStorage);
673  return;
674  }
675 }
676 
677 /// Specialization for generating a mix of parallel and sequential scf loops.
678 template <>
680  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
681  ArrayRef<utils::IteratorType> iteratorTypes,
683  ValueRange)>
684  bodyBuilderFn,
685  ArrayRef<linalg::ProcInfo> procInfo) {
686  SmallVector<Value> iterArgInitValues = linalgOp.hasBufferSemantics()
688  : linalgOp.getDpsInitOperands();
689  assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
690  // This function may be passed more iterator types than ranges.
691  assert(iteratorTypes.size() >= loopRanges.size() &&
692  "expected iterator type for all ranges");
693  assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
694  "expected proc information for all loops when present");
695  iteratorTypes = iteratorTypes.take_front(loopRanges.size());
696  SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
697  unsigned numLoops = iteratorTypes.size();
698  ivs.reserve(numLoops);
699  lbsStorage.reserve(numLoops);
700  ubsStorage.reserve(numLoops);
701  stepsStorage.reserve(numLoops);
702 
703  // Get the loop lb, ub, and step.
704  unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
705 
706  // Modify the lb, ub, and step based on the distribution options.
707  for (const auto &it : llvm::enumerate(procInfo)) {
708  if (it.value().distributionMethod != linalg::DistributionMethod::None) {
710  b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
711  ubsStorage[it.index()], stepsStorage[it.index()]);
712  }
713  }
714  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
716  b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
717  [&](OpBuilder &b, Location loc, ValueRange ivs) {
718  bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
719  },
720  ivs);
721 
722  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
723 }
724 
726  Value valueToTile,
727  const SliceParameters &sliceParams) {
728  auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
729  auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
730  .Case([&](MemRefType) {
731  return builder.create<memref::SubViewOp>(
732  loc, valueToTile, sliceParams.offsets,
733  sliceParams.sizes, sliceParams.strides);
734  })
735  .Case([&](RankedTensorType) {
736  return builder.create<tensor::ExtractSliceOp>(
737  loc, valueToTile, sliceParams.offsets,
738  sliceParams.sizes, sliceParams.strides);
739  })
740  .Default([](ShapedType) -> Operation * {
741  llvm_unreachable("Unexpected shaped type");
742  });
743  return sliceOp->getResult(0);
744 }
745 
746 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
747  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
749  ArrayRef<OpFoldResult> subShapeSizes,
750  bool omitPartialTileCheck) {
751  SliceParameters sliceParams =
752  computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
753  ubs, subShapeSizes, omitPartialTileCheck);
754  return materializeTiledShape(builder, loc, valueToTile, sliceParams);
755 }
756 
758 computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
759  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
761  ArrayRef<OpFoldResult> subShapeSizes,
762  bool omitPartialTileCheck) {
763  auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
764  assert(shapedType && "only shaped types can be tiled");
765  ArrayRef<int64_t> shape = shapedType.getShape();
766  int64_t rank = shapedType.getRank();
767 
768  // Compute offsets/sizes/strides for the tile.
769  SliceParameters sliceParams;
770  sliceParams.offsets.reserve(rank);
771  sliceParams.sizes.reserve(rank);
772  sliceParams.strides.reserve(rank);
773  for (unsigned r = 0; r < rank; ++r) {
774  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
775  if (!isTiled(map.getSubMap({r}), tileSizes)) {
776  sliceParams.offsets.push_back(builder.getIndexAttr(0));
777  OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
778  sliceParams.sizes.push_back(dim);
779  sliceParams.strides.push_back(builder.getIndexAttr(1));
780  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
781  continue;
782  }
783  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
784 
785  // Tiling creates a new slice at the proper index, the slice step is 1
786  // (i.e. the op does not subsample, stepping occurs in the loop).
787  auto m = map.getSubMap({r});
788  LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
789  IRRewriter rewriter(builder);
790  OpFoldResult offset = makeComposedFoldedAffineApply(rewriter, loc, m, lbs);
791  sliceParams.offsets.push_back(offset);
792  OpFoldResult closedIntSize =
793  makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
794  // Resulting size needs to be made half open interval again.
795  AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
796  OpFoldResult size =
797  makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
798  LLVM_DEBUG(llvm::dbgs()
799  << "computeSliceParameters: raw size: " << size << "\n");
800  LLVM_DEBUG(llvm::dbgs()
801  << "computeSliceParameters: new offset: " << offset << "\n");
802  sliceParams.strides.push_back(builder.getIndexAttr(1));
803 
804  if (omitPartialTileCheck) {
805  // We statically know that the partial/boundary tile condition is
806  // unnecessary.
807  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
808  sliceParams.sizes.push_back(size);
809  continue;
810  }
811 
812  // The size of the subview / extract_slice should be trimmed to avoid
813  // out-of-bounds accesses, unless:
814  // a. We statically know the subshape size divides the shape size evenly.
815  // b. The subshape size is 1. According to the way the loops are set up,
816  // tensors with "0" dimensions would never be constructed.
817  int64_t shapeSize = shape[r];
818  Optional<int64_t> sizeCst = getConstantIntValue(size);
819  auto hasTileSizeOne = sizeCst && *sizeCst == 1;
820  auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
821  ((shapeSize % *sizeCst) == 0);
822  if (!hasTileSizeOne && !dividesEvenly) {
823  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
824  << ", size: " << size
825  << ": make sure in bound with affine.min\n");
826 
827  AffineExpr dim0, dim1, dim2;
828  bindDims(builder.getContext(), dim0, dim1, dim2);
829 
830  // Get the dimension size for this dimension. We need to first calculate
831  // the max index and then plus one. This is important because for
832  // convolution ops, we have its input window dimension's affine map of the
833  // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
834  // dimension and `s0` is stride. Directly use the dimension size of
835  // output/filer window dimensions will cause incorrect calculation.
836  AffineMap minusOneMap =
838  .front();
839  AffineMap plusOneMap =
841  .front();
842  SmallVector<OpFoldResult> maxIndices =
843  llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
844  return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
845  {ub});
846  }));
847  OpFoldResult maxIndex =
848  makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
849  OpFoldResult d =
850  makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
851 
852  // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
854  {ArrayRef<AffineExpr>{dim1 - dim2, dim0}})
855  .front();
856  size =
857  makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
858  }
859  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
860  sliceParams.sizes.push_back(size);
861  }
862  return sliceParams;
863 }
864 
867  ArrayRef<OpFoldResult> tileSizes) {
869  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
870  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
871  bool isTiled = !isZero(tileSizes[idx]);
872  offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
873  LLVM_DEBUG(llvm::dbgs()
874  << "computeTileOffsets: " << offsets.back() << "\n");
875  }
876  return offsets;
877 }
878 
880  ArrayRef<OpFoldResult> tileSizes,
881  ArrayRef<OpFoldResult> sizeBounds) {
883  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
884  bool isTiled = !isZero(tileSizes[idx]);
885  // Before composing, we need to make range a closed interval.
886  OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
888  IRRewriter rewriter(b);
889  sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
890  LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
891  }
892  return sizes;
893 }
894 
896  if (op.hasBufferSemantics())
897  return {};
898  return llvm::to_vector(
899  llvm::map_range(op.getDpsInitOperands(), [&](OpOperand *opOperand) {
900  return operands[opOperand->getOperandNumber()].getType();
901  }));
902 }
903 
905  LinalgOp op, ValueRange operands,
906  ValueRange results) {
907  if (op.hasBufferSemantics())
908  return {};
909  SmallVector<Value> tensorResults;
910  tensorResults.reserve(results.size());
911  // Insert a insert_slice for each output tensor.
912  unsigned resultIdx = 0;
913  for (OpOperand *opOperand : op.getDpsInitOperands()) {
914  // TODO: use an interface/adaptor to avoid leaking position in
915  // `tiledOperands`.
916  Value outputTensor = operands[opOperand->getOperandNumber()];
917  if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
918  Value inserted = builder.create<tensor::InsertSliceOp>(
919  loc, sliceOp.getSource().getType(), results[resultIdx],
920  sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
921  sliceOp.getStrides(), sliceOp.getStaticOffsets(),
922  sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
923  tensorResults.push_back(inserted);
924  } else {
925  tensorResults.push_back(results[resultIdx]);
926  }
927  ++resultIdx;
928  }
929  return tensorResults;
930 }
931 
933 computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
934  ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
935  ArrayRef<OpFoldResult> tileSizes,
936  ArrayRef<OpFoldResult> sizeBounds,
937  bool omitPartialTileCheck) {
938  assert(ivs.size() == static_cast<size_t>(llvm::count_if(
939  llvm::make_range(tileSizes.begin(), tileSizes.end()),
940  [](OpFoldResult v) { return !isZero(v); })) &&
941  "expected as many ivs as non-zero sizes");
942 
943  // Construct (potentially temporary) mins and maxes on which to apply maps
944  // that define tile subshapes.
946  computeTileOffsets(builder, loc, ivs, tileSizes);
947  SmallVector<OpFoldResult> subShapeSizes =
948  computeTileSizes(builder, loc, tileSizes, sizeBounds);
949 
950  assert(static_cast<int64_t>(valuesToTile.size()) <=
951  linalgOp->getNumOperands() &&
952  "more value to tile than operands.");
954  allSliceParams.reserve(valuesToTile.size());
955  for (auto [opOperand, val] :
956  llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
957  Value shapedOp = val;
958  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
959  AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
960  // Use `opOperand` as is if it is not tiled and not an output tensor. Having
961  // an extract/insert slice pair for all output tensors simplifies follow up
962  // transformations such as padding and bufferization since the
963  // extract/insert slice pairs make the accessed iteration argument
964  // subdomains explicit.
965 
966  Type operandType = opOperand.get().getType();
967  if (!isTiled(map, tileSizes) && !(operandType.isa<RankedTensorType>() &&
968  linalgOp.isDpsInit(&opOperand))) {
969  allSliceParams.push_back(llvm::None);
970  LLVM_DEBUG(llvm::dbgs()
971  << ": not tiled: use shape: " << operandType << "\n");
972  continue;
973  }
974  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
975 
976  allSliceParams.push_back(computeSliceParameters(
977  builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
978  omitPartialTileCheck));
979  }
980 
981  return allSliceParams;
982 }
983 
985  LinalgOp linalgOp, ValueRange valuesToTile,
987  ArrayRef<OpFoldResult> tileSizes,
988  ArrayRef<OpFoldResult> sizeBounds,
989  bool omitPartialTileCheck) {
990  SmallVector<Optional<SliceParameters>> allSliceParameter =
991  computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
992  tileSizes, sizeBounds, omitPartialTileCheck);
993  SmallVector<Value> tiledShapes;
994  for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
995  Value valueToTile = std::get<0>(item);
996  Optional<SliceParameters> sliceParams = std::get<1>(item);
997  tiledShapes.push_back(
998  sliceParams.has_value()
999  ? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
1000  : valueToTile);
1001  }
1002  return tiledShapes;
1003 }
1004 
1005 void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
1006  ArrayRef<OpFoldResult> offsets) {
1007  IRRewriter rewriter(b);
1008  offsetIndices(rewriter, linalgOp, offsets);
1009 }
1010 
1011 void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
1012  ArrayRef<OpFoldResult> offsets) {
1013  if (!linalgOp.hasIndexSemantics())
1014  return;
1015 
1016  for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
1017  if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
1018  continue;
1019  OpBuilder::InsertionGuard guard(b);
1020  b.setInsertionPointAfter(indexOp);
1021  AffineExpr index, offset;
1022  bindDims(b.getContext(), index, offset);
1024  b, indexOp.getLoc(), index + offset,
1025  {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
1026  Value materialized =
1027  getValueOrCreateConstantIndexOp(b, indexOp.getLoc(), applied);
1028  b.replaceOpWithIf(indexOp, materialized, [&](OpOperand &use) {
1029  return use.getOwner() != materialized.getDefiningOp();
1030  });
1031  }
1032 }
1033 
1034 /// Get the reassociation maps to fold the result of a extract_slice (or source
1035 /// of a insert_slice) operation with given offsets, and sizes to its
1036 /// rank-reduced version. This is only done for the cases where the size is 1
1037 /// and offset is 0. Strictly speaking the offset 0 is not required in general,
1038 /// but non-zero offsets are not handled by SPIR-V backend at this point (and
1039 /// potentially cannot be handled).
1042  SmallVector<ReassociationIndices> reassociation;
1043  ReassociationIndices curr;
1044  for (const auto &it : llvm::enumerate(mixedSizes)) {
1045  auto dim = it.index();
1046  auto size = it.value();
1047  curr.push_back(dim);
1048  auto attr = size.dyn_cast<Attribute>();
1049  if (attr && attr.cast<IntegerAttr>().getInt() == 1)
1050  continue;
1051  reassociation.emplace_back(ReassociationIndices{});
1052  std::swap(reassociation.back(), curr);
1053  }
1054  // When the reassociations are not empty, then fold the remaining
1055  // unit-dimensions into the last dimension. If the reassociations so far is
1056  // empty, then leave it emtpy. This will fold everything to a rank-0 tensor.
1057  if (!curr.empty() && !reassociation.empty())
1058  reassociation.back().append(curr.begin(), curr.end());
1059  return reassociation;
1060 }
1061 
1062 /// Return the identity numeric value associated to the give op.
1064  // Builder only used as helper for attribute creation.
1065  OpBuilder b(op->getContext());
1066  Type resultType = op->getResult(0).getType();
1067  if (auto floatType = resultType.dyn_cast<FloatType>()) {
1068  const llvm::fltSemantics &semantic = floatType.getFloatSemantics();
1069  if (isa<arith::AddFOp>(op))
1070  return b.getFloatAttr(resultType, llvm::APFloat::getZero(semantic));
1071  if (isa<arith::MulFOp>(op))
1072  return b.getFloatAttr(resultType, llvm::APFloat(semantic, 1));
1073  if (isa<arith::MaxFOp>(op))
1074  return b.getFloatAttr(resultType,
1075  llvm::APFloat::getInf(semantic, /*Negative=*/true));
1076  if (isa<arith::MinFOp>(op))
1077  return b.getFloatAttr(
1078  resultType, llvm::APFloat::getInf(semantic, /*Negative=*/false));
1079  return Attribute();
1080  }
1081  if (isa<arith::AddIOp, arith::OrIOp, arith::XOrIOp>(op))
1082  return b.getIntegerAttr(resultType, 0);
1083  if (isa<arith::AndIOp>(op))
1084  return b.getIntegerAttr(resultType, -1);
1085  if (isa<arith::MaxSIOp>(op))
1086  return b.getIntegerAttr(resultType, std::numeric_limits<int64_t>::min());
1087  if (isa<arith::MinSIOp>(op))
1088  return b.getIntegerAttr(resultType, std::numeric_limits<int64_t>::max());
1089  if (isa<arith::MulIOp>(op))
1090  return b.getIntegerAttr(resultType, 1);
1091  return llvm::None;
1092 }
1093 
1094 } // namespace linalg
1095 } // namespace mlir
static Value getZero(OpBuilder &b, Location loc, Type elementType)
Get zero value for an element type.
static bool isZero(OpFoldResult v)
Definition: Utils.cpp:46
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition: Utils.cpp:87
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition: Utils.cpp:139
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:62
static constexpr const bool value
static Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source.
static OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
@ None
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition: AffineExpr.h:207
AffineExpr getLHS() const
Definition: AffineExpr.cpp:303
AffineExpr getRHS() const
Definition: AffineExpr.cpp:306
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
int64_t getValue() const
Definition: AffineExpr.cpp:505
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
unsigned getPosition() const
Definition: AffineExpr.cpp:311
Base class for AffineExpr visitors/walkers.
Base type for affine expression.
Definition: AffineExpr.h:68
U cast() const
Definition: AffineExpr.h:291
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:26
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:42
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:256
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:319
unsigned getNumResults() const
Definition: AffineMap.cpp:314
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Definition: AffineMap.cpp:236
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:323
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Definition: AffineMap.cpp:206
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:96
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:530
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgument getArgument(unsigned i)
Definition: Block.h:118
unsigned getNumArguments()
Definition: Block.h:117
OpListType & getOperations()
Definition: Block.h:126
Operation & front()
Definition: Block.h:142
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:198
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:109
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:212
FloatAttr getFloatAttr(Type type, double value)
Definition: Builders.cpp:235
MLIRContext * getContext() const
Definition: Builders.h:54
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
FlatAffineValueConstraints represents an extension of IntegerPolyhedron where each non-local variable...
LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, bool isClosedBound)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
AffineMap computeAlignedMap(AffineMap map, ValueRange operands) const
Align map with this constraint system based on operands.
bool containsVar(Value mayBeVar) const
Returns true if an variable with the specified Value exists, false otherwise.
unsigned appendDimVar(ValueRange vals)
Append variables of the specified kind after the last variable of that kind.
bool findVar(Value val, unsigned *pos) const
Looks up the position of the variable with the specified Value.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool getClosedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
void getAllValues(SmallVectorImpl< Value > *values) const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:589
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:64
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:300
This class helps build Operations.
Definition: Builders.h:198
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:388
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:472
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:422
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:364
This class represents a single result from folding an operation.
Definition: OpDefinition.h:233
This class represents an operand of an operation.
Definition: Value.h:247
This is a value defined by a result of an operation.
Definition: Value.h:442
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:454
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:324
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:147
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:295
result_range getResults()
Definition: Operation.h:332
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
void push_back(Block *block)
Definition: Region.h:61
Block & front()
Definition: Region.h:65
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:398
virtual void replaceOpWithIf(Operation *op, ValueRange newValues, bool *allUsesReplaced, llvm::unique_function< bool(OpOperand &) const > functor)
This method replaces the uses of the results of op with the values in newValues when the provided fun...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
U cast() const
Definition: Types.h:280
U dyn_cast() const
Definition: Types.h:270
bool isa() const
Definition: Types.h:260
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition: Types.cpp:83
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:349
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
Type getType() const
Return the type of this value.
Definition: Value.h:114
U cast() const
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:89
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:40
Optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
Definition: Operation.cpp:1126
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:230
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:984
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition: Utils.cpp:155
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
Definition: Utils.cpp:190
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:879
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition: Utils.cpp:444
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
Definition: Utils.cpp:564
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:865
SmallVector< Optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition: Utils.cpp:933
void getUpperBoundForIndex(Value value, AffineMap &boundMap, SmallVectorImpl< Value > &boundOperands, bool constantRequired=false)
Computes an upper bound for the result value of an index computation.
Definition: Utils.cpp:228
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
Definition: Utils.cpp:194
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition: Utils.cpp:161
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that tansposes inputTensor into outputTensor using transposeVector to permute the...
Definition: Utils.cpp:407
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:315
SmallVector< Value, 4 > getDynOperands(Location loc, Value val, OpBuilder &b)
Given an operation, retrieves the value of each dynamic dimension through constructing the necessary ...
Definition: Utils.cpp:218
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition: Utils.cpp:904
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition: Utils.cpp:175
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:1005
Optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition: Utils.cpp:1041
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
Definition: Utils.cpp:347
static Value materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Definition: Utils.cpp:725
Optional< Attribute > getNeutralElement(Operation *op)
Return the identity numeric value associated to the give op.
Definition: Utils.cpp:1063
FailureOr< int64_t > getConstantUpperBoundForIndex(Value value)
Returns a constant upper bound for the result value of an index computation.
Definition: Utils.cpp:328
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:544
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:895
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:746
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition: Utils.cpp:758
auto m_Val(Value v)
Definition: Matchers.h:364
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:508
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:64
PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder)
Definition: Utils.cpp:21
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:329
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1012
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:336
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
Definition: AffineOps.cpp:1073
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
Definition: AffineMap.cpp:669
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ValueRange operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:964
Optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void getBackwardSlice(Operation *op, SetVector< Operation * > *backwardSlice, TransitiveFilter filter=nullptr)
Fills backwardSlice with the computed backward slice (i.e.
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
Definition: AffineOps.cpp:2387
@ Mul
RHS of mul is always a constant or a symbolic expression.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:53
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
Definition: LoopUtils.cpp:1838
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:255
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1232
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:488
bool isPermutationVector(ArrayRef< int64_t > interchange)
Method to check if an interchange vector is a permutation.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:498
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition: Utils.h:103
Value _and(Value lhs, Value rhs)
Definition: Utils.cpp:136
Value slt(Value lhs, Value rhs)
Definition: Utils.cpp:159
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:481
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition: Utils.h:360
DistributionMethod distributionMethod
Definition: Utils.h:363
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition: Utils.h:197
SmallVector< OpFoldResult > strides
Definition: Utils.h:200
SmallVector< OpFoldResult > sizes
Definition: Utils.h:199
SmallVector< OpFoldResult > offsets
Definition: Utils.h:198
LoopVector loops
Definition: SCF.h:67