MLIR  15.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities for the Linalg dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
29 #include "mlir/IR/AffineExpr.h"
31 #include "mlir/IR/AffineMap.h"
32 #include "mlir/IR/Matchers.h"
34 #include "mlir/Pass/Pass.h"
35 #include "llvm/ADT/TypeSwitch.h"
36 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "linalg-utils"
39 
40 using namespace mlir;
41 using namespace presburger;
42 using namespace mlir::linalg;
43 using namespace mlir::scf;
44 
45 static bool isZero(Value v) {
46  if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
47  return cst.value() == 0;
48  return false;
49 }
50 
51 namespace {
52 
53 // Helper visitor to determine whether an AffineExpr is tiled.
54 // This is achieved by traversing every AffineDimExpr with position `pos` and
55 // checking whether the corresponding `tileSizes[pos]` is non-zero.
56 // This also enforces only positive coefficients occur in multiplications.
57 //
58 // Example:
59 // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
60 //
61 struct TileCheck : public AffineExprVisitor<TileCheck> {
62  TileCheck(ValueRange tileSizes) : tileSizes(tileSizes) {}
63 
64  void visitDimExpr(AffineDimExpr expr) {
65  isTiled |= !isZero(tileSizes[expr.getPosition()]);
66  }
67  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
68  visit(expr.getLHS());
69  visit(expr.getRHS());
70  if (expr.getKind() == mlir::AffineExprKind::Mul)
71  assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 &&
72  "nonpositive multiplying coefficient");
73  }
74  bool isTiled = false;
75  ValueRange tileSizes;
76 };
77 
78 } // namespace
79 
80 static bool isTiled(AffineExpr expr, ValueRange tileSizes) {
81  if (!expr)
82  return false;
83  TileCheck t(tileSizes);
84  t.visit(expr);
85  return t.isTiled;
86 }
87 
88 // Checks whether the `map varies with respect to a non-zero `tileSize`.
89 static bool isTiled(AffineMap map, ValueRange tileSizes) {
90  if (!map)
91  return false;
92  for (unsigned r = 0; r < map.getNumResults(); ++r)
93  if (isTiled(map.getResult(r), tileSizes))
94  return true;
95  return false;
96 }
97 
98 Optional<RegionMatcher::BinaryOpKind>
99 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
100  auto &region = op.region();
101  if (!llvm::hasSingleElement(region))
102  return llvm::None;
103 
104  Block &block = region.front();
105  if (block.getNumArguments() != 2 ||
106  !block.getArgument(0).getType().isSignlessIntOrFloat() ||
108  return llvm::None;
109 
110  auto &ops = block.getOperations();
111  if (!llvm::hasSingleElement(block.without_terminator()))
112  return llvm::None;
113 
114  using mlir::matchers::m_Val;
115  auto a = m_Val(block.getArgument(0));
116  auto b = m_Val(block.getArgument(1));
117 
118  auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
119  if (addPattern.match(&ops.back()))
120  return BinaryOpKind::IAdd;
121 
122  return llvm::None;
123 }
124 
125 /// Explicit instantiation of loop nest generator for different loop types.
129 
130 /// Given a list of subview ranges, extract individual values for lower, upper
131 /// bounds and steps and put them into the corresponding vectors.
134  SmallVectorImpl<Value> &steps) {
135  for (Range range : ranges) {
136  lbs.emplace_back(range.offset);
137  ubs.emplace_back(range.size);
138  steps.emplace_back(range.stride);
139  }
140 }
141 
142 namespace mlir {
143 namespace linalg {
144 
146  return llvm::all_of(op.getIndexingMaps(), [](AffineMap m) {
147  return m.isProjectedPermutation(/*allowZeroInResults=*/true);
148  });
149 }
150 
152  if (!llvm::hasSingleElement(r))
153  return false;
154  for (Operation &op : r.front()) {
155  if (!(isa<arith::ConstantOp, func::ConstantOp, linalg::YieldOp,
156  linalg::IndexOp>(op) ||
158  llvm::any_of(op.getResultTypes(),
159  [](Type type) { return !type.isIntOrIndexOrFloat(); }))
160  return false;
161  }
162  return true;
163 }
164 
165 bool isElementwise(LinalgOp op) {
166  if (op.getNumLoops() != op.getNumParallelLoops())
167  return false;
168 
170  return false;
171 
172  // TODO: relax the restrictions on indexing map.
173  for (OpOperand *opOperand : op.getOutputOperands()) {
174  if (!op.getTiedIndexingMap(opOperand).isPermutation())
175  return false;
176  }
177  return hasOnlyScalarElementwiseOp(op->getRegion(0));
178 }
179 
180 bool isPermutation(ArrayRef<int64_t> permutation) {
181  // Count the number of appearances for all indices.
182  SmallVector<int64_t> indexCounts(permutation.size(), 0);
183  for (auto index : permutation) {
184  // Exit if the index is out-of-range.
185  if (index < 0 || index >= static_cast<int64_t>(permutation.size()))
186  return false;
187  indexCounts[index]++;
188  }
189  // Return true if all indices appear once.
190  return count(indexCounts, 1) == static_cast<int64_t>(permutation.size());
191 }
192 
193 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
194 /// the type of `source`.
195 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim) {
196  if (source.getType().isa<UnrankedMemRefType, MemRefType>())
197  return b.createOrFold<memref::DimOp>(loc, source, dim);
198  if (source.getType().isa<UnrankedTensorType, RankedTensorType>())
199  return b.createOrFold<tensor::DimOp>(loc, source, dim);
200  llvm_unreachable("Expected MemRefType or TensorType");
201 }
202 
203 /// Given an operation, retrieves the value of each dynamic dimension through
204 /// constructing the necessary DimOp operators.
206  SmallVector<Value, 4> dynOperands;
207  auto shapedType = val.getType().cast<ShapedType>();
208  for (const auto &dim : llvm::enumerate(shapedType.getShape())) {
209  if (dim.value() == ShapedType::kDynamicSize)
210  dynOperands.push_back(createOrFoldDimOp(b, loc, val, dim.index()));
211  }
212  return dynOperands;
213 }
214 
216  SmallVectorImpl<Value> &boundOperands,
217  bool constantRequired) {
218  // Initialize `boundMap` and `boundOperands` to the identity returning
219  // `value`. This combination is the default result of the method if no
220  // simplification is possible.
221  assert(value.getType().isIndex() && "expect value to have index type");
222  boundMap = AffineMap::getMultiDimIdentityMap(1, value.getContext());
223  boundOperands.assign({value});
224  canonicalizeMapAndOperands(&boundMap, &boundOperands);
225 
226  // Continue only if there is an affine index computation to simplify.
227  Operation *definingOp = value.getDefiningOp();
228  if (!definingOp || !isa<AffineApplyOp, AffineMinOp>(definingOp))
229  return;
230 
231  // Get the backward slice containing the affine index computation.
232  SetVector<Operation *> backwardSlice;
233  getBackwardSlice(definingOp, &backwardSlice, [](Operation *op) {
234  return isa<AffineApplyOp, AffineMinOp>(op);
235  });
236  backwardSlice.insert(definingOp);
237 
238  // Setup a system of affine constraints that describe the index computation.
239  FlatAffineValueConstraints constraints;
240 
241  // Helper to find or create an identifier for the given value.
242  auto findOrCreateId = [&](Value value) {
243  if (!constraints.containsVar(value)) {
244  constraints.appendDimVar(value);
245  return true;
246  }
247  unsigned pos;
248  constraints.findVar(value, &pos);
249  return pos < constraints.getNumDimVars();
250  };
251  // Helper to get the position for the given value.
252  auto getPosition = [&](Value value) {
253  unsigned pos;
254  bool exists = constraints.findVar(value, &pos);
255  (void)exists;
256  assert(exists && "expect to find the identifier");
257  return pos;
258  };
259 
260  // Add the affine operations in `backwardSlice` to the constraints.
261  for (Operation *op : llvm::reverse(backwardSlice)) {
262  // Add an identifier for all op results and operands.
263  if (!(llvm::all_of(op->getResults(), findOrCreateId) &&
264  llvm::all_of(op->getOperands(), findOrCreateId)))
265  return;
266 
267  // Add AffineApplyOps to the constraints.
268  if (auto applyOp = dyn_cast<AffineApplyOp>(op)) {
269  AffineMap map = constraints.computeAlignedMap(applyOp.getAffineMap(),
270  applyOp.getOperands());
271  if (failed(constraints.addBound(IntegerPolyhedron::EQ,
272  getPosition(applyOp.getResult()), map)))
273  return;
274  continue;
275  }
276  // Add AffineMinOps to the constraints.
277  auto minOp = cast<AffineMinOp>(op);
278  AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(),
279  minOp.getOperands());
280  if (failed(constraints.addBound(IntegerPolyhedron::UB,
281  getPosition(minOp.getResult()), map,
282  /*isClosedBound=*/true)))
283  return;
284  }
285 
286  // Obtain an upper bound for the affine index computation by projecting out
287  // all temporary results and expressing the upper bound for `value` in terms
288  // of the terminals of the index computation.
289  unsigned pos = getPosition(value);
290  if (constantRequired) {
291  auto ubConst = constraints.getConstantBound(
292  FlatAffineValueConstraints::BoundType::UB, pos);
293  if (!ubConst)
294  return;
295 
296  boundMap = AffineMap::getConstantMap(*ubConst, value.getContext());
297  return;
298  }
299 
300  SmallVector<AffineMap> lowerBounds(1), upperBounds(1);
301  constraints.getSliceBounds(pos, 1, value.getContext(), &lowerBounds,
302  &upperBounds,
303  /*getClosedUB=*/true);
304  // Verify `upperBounds[0]` is valid and has at least one result.
305  if (!upperBounds[0] || upperBounds[0].getNumResults() == 0)
306  return;
307 
308  // Set `boundMap` and `boundOperands` to the computed upper bound.
309  boundMap = upperBounds[0];
310  constraints.getAllValues(&boundOperands);
311  erase_value(boundOperands, value);
312  canonicalizeMapAndOperands(&boundMap, &boundOperands);
313 }
314 
316  // Compute an upper bound for `value`.
317  AffineMap boundMap;
318  SmallVector<Value> boundOperands;
319  getUpperBoundForIndex(value, boundMap, boundOperands,
320  /*constantRequired=*/true);
321 
322  // Search the results of `boundMap` for constant upper bounds.
323  SmallVector<int64_t> constantBounds;
324  for (AffineExpr result : boundMap.getResults())
325  if (auto constExpr = result.dyn_cast<AffineConstantExpr>())
326  constantBounds.push_back(constExpr.getValue());
327 
328  // Return the minimal upper bound or failure if none is found.
329  if (constantBounds.empty())
330  return failure();
331  return *std::min_element(constantBounds.begin(), constantBounds.end());
332 }
333 
334 tensor::ExtractSliceOp makeComposedExtractSliceOp(
335  OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
337  assert(source && "expect source to be nonzero");
338 
339  // Do not fold if the producer is not an ExtractSliceOp.
340  auto producerOp = source.getDefiningOp<tensor::ExtractSliceOp>();
341  if (!producerOp)
342  return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
343  strides);
344 
345  // Do not fold if the producer is rank reducing or if there are any non-unit
346  // strides. Supporting non-unit strides complicates the offset computation
347  // since the consumer offsets need to be multiplied by the producer strides.
348  // TODO: support non-unit strides once there are use cases.
349  SmallVector<OpFoldResult> allStrides = producerOp.getMixedStrides();
350  allStrides.append(strides.begin(), strides.end());
351  bool hasNonUnitStride = any_of(allStrides, [](OpFoldResult ofr) {
352  return getConstantIntValue(ofr) != static_cast<int64_t>(1);
353  });
354  if (hasNonUnitStride ||
355  producerOp.getSourceType().getRank() !=
356  producerOp.getResult().getType().cast<ShapedType>().getRank())
357  return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
358  strides);
359 
360  // Fold the producer by adding the offests and extracting the slice directly
361  // from the producer source tensor.
362  SmallVector<OpFoldResult> foldedOffsets(offsets.begin(), offsets.end());
363  AffineExpr dim1, dim2;
364  bindDims(b.getContext(), dim1, dim2);
365  for (const auto &en : enumerate(producerOp.getMixedOffsets())) {
366  SmallVector<Value> offsetValues = {
367  getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]),
368  getValueOrCreateConstantIndexOp(b, loc, en.value())};
369  foldedOffsets[en.index()] =
370  makeComposedAffineApply(b, loc, dim1 + dim2, offsetValues).getResult();
371  }
372  return b.create<tensor::ExtractSliceOp>(loc, producerOp.getSource(),
373  foldedOffsets, sizes, strides);
374 }
375 
376 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
377  Value source, Value pad, bool nofold) {
378  // Exit if `source` is not defined by an ExtractSliceOp.
379  auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
380  if (!sliceOp)
381  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
382 
383  // Search the `source` use-def chain for padded LinalgOps.
384  Value current = sliceOp.getSource();
385  while (current) {
386  auto linalgOp = current.getDefiningOp<LinalgOp>();
387  if (!linalgOp)
388  break;
389  OpResult opResult = current.cast<OpResult>();
390  current = linalgOp.getOutputOperand(opResult.getResultNumber())->get();
391  }
392  auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
393 
394  // Exit if the search fails to match a tensor::PadOp at the end of the matched
395  // LinalgOp sequence.
396  if (!padOp)
397  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
398 
399  // Exit if the padded result type does not match.
400  if (sliceOp.getSource().getType() != type)
401  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
402 
403  // Exit if the LinalgOps are not high padded.
404  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
405  return getConstantIntValue(ofr) != static_cast<int64_t>(0);
406  }))
407  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
408 
409  // Exit if `padOpSliceOp`, which defines the slice used by
410  // `padOp`, is rank-reducing.
411  auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
412  if (!padOpSliceOp ||
413  sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
414  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
415 
416  // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
417  // of the slice padded by `padOp`.
418  if (llvm::any_of(
419  llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
420  [](std::tuple<OpFoldResult, OpFoldResult> it) {
421  return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
422  }))
423  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
424 
425  // Exit if the padding values do not match.
426  Attribute padOpPadAttr, padAttr;
427  Value padOpPad = padOp.getConstantPaddingValue();
428  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
429  !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
430  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
431 
432  // Return the padded result if the padding values and sizes match.
433  return sliceOp.getSource();
434 }
435 
436 GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
437  Value outputTensor,
438  ArrayRef<int64_t> transposeVector) {
439  auto resultTensorType = outputTensor.getType().cast<RankedTensorType>();
440  Type elementType = resultTensorType.getElementType();
441 
442  assert(isPermutation(transposeVector) &&
443  "expect transpose vector to be a permutation");
444  assert(transposeVector.size() ==
445  static_cast<size_t>(resultTensorType.getRank()) &&
446  "expect transpose vector size to match result tensor rank");
447 
448  // Compute the transpose and the indentity indexing maps.
449  SmallVector<AffineMap> indexingMaps = {
451  SmallVector<unsigned>(transposeVector.begin(), transposeVector.end()),
452  b.getContext())),
453  AffineMap::getMultiDimIdentityMap(transposeVector.size(),
454  b.getContext())};
455  SmallVector<llvm::StringRef> iteratorTypes(transposeVector.size(),
457 
458  // Create a GenericOp to transpose `inputTensor` into `outputTensor`.
459  auto transposeOp = b.create<GenericOp>(
460  loc, resultTensorType, inputTensor, outputTensor,
461  b.getAffineMapArrayAttr(indexingMaps), b.getStrArrayAttr(iteratorTypes),
462  /*doc=*/nullptr,
463  /*library_call=*/nullptr);
464  Region &body = transposeOp.getRegion();
465  body.push_back(new Block());
466  body.front().addArguments({elementType, elementType}, {loc, loc});
467 
468  // Create the body of the transpose operation.
470  b.setInsertionPointToEnd(&body.front());
471  b.create<YieldOp>(loc, transposeOp.getRegion().front().getArgument(0));
472  return transposeOp;
473 }
474 
475 GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
476  auto memrefTypeTo = to.getType().cast<MemRefType>();
477 #ifndef NDEBUG
478  auto memrefTypeFrom = from.getType().cast<MemRefType>();
479  assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
480  "`from` and `to` memref must have the same rank");
481 #endif // NDEBUG
482 
483  AffineMap id =
484  AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
485  SmallVector<StringRef> iteratorTypes(memrefTypeTo.getRank(),
487  return b.create<linalg::GenericOp>(
488  loc,
489  /*inputs=*/from,
490  /*outputs=*/to,
491  /*indexingMaps=*/llvm::makeArrayRef({id, id}),
492  /*iteratorTypes=*/iteratorTypes,
493  [](OpBuilder &b, Location loc, ValueRange args) {
494  b.create<linalg::YieldOp>(loc, args.front());
495  });
496 }
497 
498 /// Specialization to build an scf "for" nest.
499 template <>
501  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
502  ArrayRef<Attribute> iteratorTypes,
504  ValueRange)>
505  bodyBuilderFn,
506  Optional<LinalgLoopDistributionOptions> distributionOptions,
507  ArrayRef<StringRef> distributionTypes) {
508  SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
509  // Create procInfo so it dominates loops, if appropriate.
510  SmallVector<ProcInfo, 4> procInfo;
511  SmallVector<DistributionMethod, 0> distributionMethod;
512  if (distributionOptions) {
513  // Collect loop ranges for parallel dimensions.
514  SmallVector<Range, 2> parallelLoopRanges;
515  for (const auto &iteratorType : enumerate(iteratorTypes))
516  if (isParallelIterator(iteratorType.value()))
517  parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);
518 
519  // Get their distribution schemes.
520  distributionMethod = distributionOptions->distributionMethod;
521  if (distributionMethod.size() < parallelLoopRanges.size())
522  parallelLoopRanges.resize(distributionMethod.size());
523  procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges);
524  }
525 
526  SmallVector<Value, 4> lbs, ubs, steps;
527  unpackRanges(loopRanges, lbs, ubs, steps);
529  b, loc, lbs, ubs, steps, iterArgInitValues,
530  [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
531  assert(iterArgs.size() == linalgOp.getOutputTensorOperands().size() &&
532  "expect the number of output tensors and iter args to match");
533  SmallVector<Value> operandValuesToUse =
534  linalgOp.getInputAndOutputOperands();
535  if (!iterArgs.empty()) {
536  operandValuesToUse = linalgOp.getInputOperands();
537  operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
538  }
539  return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
540  });
541 
542  if (!distributionOptions || loopNest.loops.empty())
543  return;
544 
545  // Filter out scf.for loops that were created out of parallel dimensions.
547  for (const auto &iteratorType : enumerate(iteratorTypes))
548  if (isParallelIterator(iteratorType.value()))
549  loops.push_back(loopNest.loops[iteratorType.index()]);
550 
551  // Distribute - only supports cyclic distribution for now.
552  for (auto it : llvm::zip(loops, procInfo, distributionMethod))
553  if (std::get<2>(it) == DistributionMethod::Cyclic)
554  mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId,
555  std::get<1>(it).nprocs);
556 }
557 
558 /// Specialization to build affine "for" nest.
559 template <>
561  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
562  ArrayRef<Attribute> iteratorTypes,
564  ValueRange)>
565  bodyBuilderFn,
567  SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
568  assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
569  SmallVector<Value, 4> lbs, ubs, steps;
570  unpackRanges(loopRanges, lbs, ubs, steps);
571 
572  // Affine loops require constant steps.
573  SmallVector<int64_t, 4> constantSteps;
574  constantSteps.reserve(steps.size());
575  for (Value v : steps) {
576  auto op = v.getDefiningOp<arith::ConstantIndexOp>();
577  assert(op && "Affine loops require constant steps");
578  constantSteps.push_back(op.value());
579  }
580 
581  mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
582  [&](OpBuilder &b, Location loc, ValueRange ivs) {
583  SmallVector<Value> operandValuesToUse =
584  linalgOp.getInputAndOutputOperands();
585  bodyBuilderFn(b, loc, ivs, operandValuesToUse);
586  });
587 }
588 
589 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
591  Value nprocs, Value &lb, Value &ub,
592  Value &step) {
593  AffineExpr d0, d1;
594  bindDims(b.getContext(), d0, d1);
596  lb = makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
597  step = makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
598 }
599 
600 /// Generates a loop nest consisting of scf.parallel and scf.for, depending
601 /// on the `iteratorTypes.` Consecutive parallel loops create a single
602 /// scf.parallel operation; each sequential loop creates a new scf.for
603 /// operation. The body of the innermost loop is populated by
604 /// `bodyBuilderFn` that accepts a range of induction variables for all
605 /// loops. `ivStorage` is used to store the partial list of induction
606 /// variables.
607 // TODO: this function can be made iterative instead. However, it
608 // will have at most as many recursive calls as nested loops, which rarely
609 // exceeds 10.
611  OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
612  ValueRange steps, ArrayRef<Attribute> iteratorTypes,
613  function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
614  SmallVectorImpl<Value> &ivStorage,
615  ArrayRef<DistributionMethod> distributionMethod = {}) {
616  assert(lbs.size() == ubs.size());
617  assert(lbs.size() == steps.size());
618  assert(lbs.size() == iteratorTypes.size());
619 
620  // If there are no (more) loops to be generated, generate the body and be
621  // done with it.
622  if (iteratorTypes.empty()) {
623  bodyBuilderFn(b, loc, ivStorage);
624  return;
625  }
626 
627  // Find the outermost parallel loops and drop their types from the list.
628  unsigned nLoops = iteratorTypes.size();
629  unsigned nOuterPar =
630  nLoops - iteratorTypes.drop_while(isParallelIterator).size();
631 
632  // If there are no outer parallel loops, generate one sequential loop and
633  // recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
634  // in this case.
635  if (nOuterPar == 0) {
636  LoopNest singleLoop = buildLoopNest(
637  b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
638  [&](OpBuilder &b, Location loc, ValueRange ivs) {
639  ivStorage.append(ivs.begin(), ivs.end());
640  generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(),
641  steps.drop_front(),
642  iteratorTypes.drop_front(), bodyBuilderFn,
643  ivStorage, distributionMethod);
644  });
645  return;
646  }
647  if (distributionMethod.empty()) {
648  // Generate a single parallel loop-nest operation for all outermost
649  // parallel loops and recurse.
650  b.create<scf::ParallelOp>(
651  loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
652  steps.take_front(nOuterPar),
653  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
654  ivStorage.append(localIvs.begin(), localIvs.end());
656  nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar),
657  ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar),
658  iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage,
659  (distributionMethod.size() < nOuterPar)
661  : distributionMethod.drop_front(nOuterPar));
662  });
663  return;
664  }
665 
666  // Process all consecutive similarly distributed loops simultaneously.
667  DistributionMethod methodToUse = distributionMethod[0];
668  unsigned numProcessed = 1;
669  for (unsigned i = 1; i < nOuterPar && i < distributionMethod.size(); ++i) {
670  if (distributionMethod[i] != methodToUse)
671  break;
672  numProcessed++;
673  }
674 
675  switch (methodToUse) {
676  case DistributionMethod::Cyclic: {
677  // Generate a single parallel loop-nest operation for all outermost
678  // parallel loops and recurse.
679  b.create<scf::ParallelOp>(
680  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
681  steps.take_front(numProcessed),
682  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
683  ivStorage.append(localIvs.begin(), localIvs.end());
685  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
686  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
687  iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
688  (distributionMethod.size() < numProcessed)
690  : distributionMethod.drop_front(numProcessed));
691  });
692  return;
693  }
694  case DistributionMethod::CyclicNumProcsGeNumIters: {
695  // Check (for the processed loops) that the iteration is in-bounds.
696  ArithBuilder ab(b, loc);
697  Value cond = ab.slt(lbs[0], ubs[0]);
698  for (unsigned i = 1; i < numProcessed; ++i)
699  cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
700  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
701  b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
703  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
704  steps.drop_front(numProcessed),
705  iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
706  distributionMethod.drop_front(numProcessed));
707  b.create<scf::YieldOp>(loc, ValueRange{});
708  });
709  return;
710  }
711  case DistributionMethod::CyclicNumProcsEqNumIters:
712  // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
713  // with inner loop generation.
714  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
716  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
717  steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
718  bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed));
719  return;
720  }
721 }
722 
723 /// Specialization for generating a mix of parallel and sequential scf loops.
724 template <>
726  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
727  ArrayRef<Attribute> iteratorTypes,
729  ValueRange)>
730  bodyBuilderFn,
731  Optional<LinalgLoopDistributionOptions> distributionOptions,
732  ArrayRef<StringRef> distributionTypes) {
733  SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
734  assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
735  // This function may be passed more iterator types than ranges.
736  assert(iteratorTypes.size() >= loopRanges.size() &&
737  "expected iterator type for all ranges");
738  iteratorTypes = iteratorTypes.take_front(loopRanges.size());
739  SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
740  unsigned numLoops = iteratorTypes.size();
741  ivs.reserve(numLoops);
742  lbsStorage.reserve(numLoops);
743  ubsStorage.reserve(numLoops);
744  stepsStorage.reserve(numLoops);
745 
746  // Get the loop lb, ub, and step.
747  unpackRanges(loopRanges, lbsStorage, ubsStorage, stepsStorage);
748 
749  // Modify the lb, ub, and step based on the distribution options.
750  SmallVector<DistributionMethod, 0> distributionMethod;
751  if (distributionOptions) {
752  auto &options = *distributionOptions;
753  distributionMethod.assign(distributionOptions->distributionMethod.begin(),
754  distributionOptions->distributionMethod.end());
755  SmallVector<Range, 2> parallelLoopRanges;
756  for (const auto &iteratorType : enumerate(iteratorTypes)) {
757  if (isParallelIterator(iteratorType.value()))
758  parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);
759  }
760  if (distributionMethod.size() < parallelLoopRanges.size())
761  parallelLoopRanges.resize(distributionMethod.size());
762  SmallVector<ProcInfo, 2> procInfo =
763  options.procInfo(b, loc, parallelLoopRanges);
764  unsigned index = 0;
765  for (const auto &iteratorType : enumerate(iteratorTypes)) {
766  if (index >= procInfo.size())
767  break;
768  if (isParallelIterator(iteratorType.value())) {
769  unsigned i = iteratorType.index();
770  updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId,
771  procInfo[index].nprocs, lbsStorage[i],
772  ubsStorage[i], stepsStorage[i]);
773  index++;
774  }
775  }
776  }
777  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
779  b, loc, lbs, ubs, steps, iteratorTypes,
780  [&](OpBuilder &b, Location loc, ValueRange ivs) {
781  SmallVector<Value> operandValuesToUse =
782  linalgOp.getInputAndOutputOperands();
783  bodyBuilderFn(b, loc, ivs, operandValuesToUse);
784  },
785  ivs, distributionMethod);
786 
787  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
788 }
789 
791  AffineExpr expr, ValueRange operands) {
792  AffineMap map = AffineMap::inferFromExprList({expr}).front();
793  SmallVector<Value> normalizedOperands(operands.begin(), operands.end());
794  mlir::fullyComposeAffineMapAndOperands(&map, &normalizedOperands);
795  canonicalizeMapAndOperands(&map, &normalizedOperands);
796  return b.createOrFold<AffineApplyOp>(loc, map, normalizedOperands);
797 }
798 
799 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
800  ValueRange tileSizes, AffineMap map, ValueRange lbs,
801  ValueRange ubs, ValueRange subShapeSizes,
802  bool omitPartialTileCheck) {
803  auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
804  assert(shapedType && "only shaped types can be tiled");
805  ArrayRef<int64_t> shape = shapedType.getShape();
806  int64_t rank = shapedType.getRank();
807 
808  // Construct a new subview / extract_slice for the tile.
809  SmallVector<OpFoldResult, 4> offsets, sizes, strides;
810  offsets.reserve(rank);
811  sizes.reserve(rank);
812  strides.reserve(rank);
813  for (unsigned r = 0; r < rank; ++r) {
814  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: for dim#" << r);
815  if (!isTiled(map.getSubMap({r}), tileSizes)) {
816  offsets.push_back(builder.getIndexAttr(0));
817  Value dim = createOrFoldDimOp(builder, loc, valueToTile, r);
818  sizes.push_back(getAsOpFoldResult(dim));
819  strides.push_back(builder.getIndexAttr(1));
820  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
821  continue;
822  }
823  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
824 
825  // Tiling creates a new slice at the proper index, the slice step is 1
826  // (i.e. the op does not subsample, stepping occurs in the loop).
827  auto m = map.getSubMap({r});
828  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: submap: " << m << "\n");
829  auto offset = applyMapToValues(builder, loc, m, lbs).front();
830  offsets.push_back(getAsOpFoldResult(offset));
831  auto closedIntSize =
832  applyMapToValues(builder, loc, m, subShapeSizes).front();
833  // Resulting size needs to be made half open interval again.
834  AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
835  Value size =
836  fullyComposeAndAffineApply(builder, loc, s0 + 1, closedIntSize);
837  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n");
838  LLVM_DEBUG(llvm::dbgs()
839  << "makeTiledShape: new offset: " << offset << "\n");
840  strides.push_back(builder.getIndexAttr(1));
841 
842  if (omitPartialTileCheck) {
843  // We statically know that the partial/boundary tile condition is
844  // unnecessary.
845  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
846  sizes.push_back(getAsOpFoldResult(size));
847  continue;
848  }
849 
850  // The size of the subview / extract_slice should be trimmed to avoid
851  // out-of-bounds accesses, unless:
852  // a. We statically know the subshape size divides the shape size evenly.
853  // b. The subshape size is 1. According to the way the loops are set up,
854  // tensors with "0" dimensions would never be constructed.
855  int64_t shapeSize = shape[r];
856  auto sizeCst = size.getDefiningOp<arith::ConstantIndexOp>();
857  auto hasTileSizeOne = sizeCst && sizeCst.value() == 1;
858  auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
859  ((shapeSize % sizeCst.value()) == 0);
860  if (!hasTileSizeOne && !dividesEvenly) {
861  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
862  << ", size: " << size
863  << ": make sure in bound with affine.min\n");
864 
865  AffineExpr dim0, dim1, dim2;
866  bindDims(builder.getContext(), dim0, dim1, dim2);
867 
868  // Get the dimension size for this dimension. We need to first calculate
869  // the max index and then plus one. This is important because for
870  // convolution ops, we have its input window dimension's affine map of the
871  // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
872  // dimension and `s0` is stride. Directly use the dimension size of
873  // output/filer window dimensions will cause incorrect calculation.
874  AffineMap minusOneMap =
876  .front();
877  AffineMap plusOneMap =
879  .front();
880  auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) {
881  return makeComposedAffineApply(builder, loc, minusOneMap, {ub})
882  .getResult();
883  }));
884  Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
885  Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
886 
887  // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
889  {ArrayRef<AffineExpr>{dim1 - dim2, dim0}})
890  .front();
891  SmallVector<Value, 4> operands{size, d, offset};
892  fullyComposeAffineMapAndOperands(&minMap, &operands);
893  canonicalizeMapAndOperands(&minMap, &operands);
894  size = builder.create<AffineMinOp>(loc, builder.getIndexType(), minMap,
895  operands);
896  }
897  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
898  sizes.push_back(getAsOpFoldResult(size));
899  }
900 
901  auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
902  .Case([&](MemRefType) {
903  return builder.create<memref::SubViewOp>(
904  loc, valueToTile, offsets, sizes, strides);
905  })
906  .Case([&](RankedTensorType) {
908  builder, loc, valueToTile, offsets, sizes, strides);
909  })
910  .Default([](ShapedType) -> Operation * {
911  llvm_unreachable("Unexpected shaped type");
912  });
913  return sliceOp->getResult(0);
914 }
915 
917  ValueRange ivs, ValueRange tileSizes) {
918  SmallVector<Value> offsets;
919  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
920  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
921  bool isTiled = !isZero(tileSizes[idx]);
922  offsets.push_back(
923  isTiled ? ivs[idxIvs++]
924  : b.create<arith::ConstantIndexOp>(loc, 0).getResult());
925  LLVM_DEBUG(llvm::dbgs()
926  << "computeTileOffsets: " << offsets.back() << "\n");
927  }
928  return offsets;
929 }
930 
932  ValueRange tileSizes,
933  ArrayRef<Value> sizeBounds) {
934  SmallVector<Value> sizes;
935  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
936  bool isTiled = !isZero(tileSizes[idx]);
937  // Before composing, we need to make range a closed interval.
938  Value size = isTiled ? tileSizes[idx] : sizeBounds[idx];
940  sizes.push_back(fullyComposeAndAffineApply(b, loc, d0 - 1, size));
941  LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
942  }
943  return sizes;
944 }
945 
947  LinalgOp linalgOp,
948  ArrayRef<Value> valuesToTile,
949  ValueRange ivs, ValueRange tileSizes,
950  ArrayRef<Value> sizeBounds,
951  bool omitPartialTileCheck) {
952  assert(ivs.size() == static_cast<size_t>(llvm::count_if(
953  llvm::make_range(tileSizes.begin(), tileSizes.end()),
954  [](Value v) { return !isZero(v); })) &&
955  "expected as many ivs as non-zero sizes");
956 
957  // Construct (potentially temporary) mins and maxes on which to apply maps
958  // that define tile subshapes.
959  SmallVector<Value> lbs = computeTileOffsets(b, loc, ivs, tileSizes);
960  SmallVector<Value> subShapeSizes =
961  computeTileSizes(b, loc, tileSizes, sizeBounds);
962 
963  assert(static_cast<int64_t>(valuesToTile.size()) ==
964  linalgOp.getNumInputsAndOutputs() &&
965  "expected one value to tile for every operand");
966  SmallVector<Value, 4> tiledShapes;
967  tiledShapes.reserve(valuesToTile.size());
968  for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) {
969  Value shapedOp = valuesToTile[opOperand->getOperandNumber()];
970  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
971  AffineMap map = linalgOp.getTiedIndexingMap(opOperand);
972  // Use `opOperand` as is if it is not tiled and not an output tensor. Having
973  // an extract/insert slice pair for all output tensors simplifies follow up
974  // transformations such as padding and bufferization since the
975  // extract/insert slice pairs make the accessed iteration argument
976  // subdomains explicit.
977  if (!isTiled(map, tileSizes) && !linalgOp.isOutputTensor(opOperand)) {
978  tiledShapes.push_back(shapedOp);
979  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use shape: "
980  << opOperand->get().getType() << "\n");
981  continue;
982  }
983  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
984 
985  tiledShapes.push_back(makeTiledShape(b, loc, shapedOp, tileSizes, map, lbs,
986  sizeBounds, subShapeSizes,
987  omitPartialTileCheck));
988  }
989 
990  return tiledShapes;
991 }
992 
993 void addTileLoopIvsToIndexOpResults(OpBuilder &b, LinalgOp tiledOp,
994  ArrayRef<Value> ivs) {
995  if (tiledOp.hasIndexSemantics()) {
996  for (IndexOp indexOp : tiledOp.getBlock()->getOps<IndexOp>()) {
997  if (ivs[indexOp.dim()] == nullptr)
998  continue;
999  OpBuilder::InsertionGuard guard(b);
1000  b.setInsertionPointAfter(indexOp);
1001  AffineExpr index, offset;
1002  bindDims(b.getContext(), index, offset);
1003  AffineApplyOp applyOp = makeComposedAffineApply(
1004  b, indexOp.getLoc(), index + offset,
1005  ValueRange{indexOp.getResult(), ivs[indexOp.dim()]});
1006  indexOp.getResult().replaceAllUsesExcept(applyOp, applyOp);
1007  }
1008  }
1009 }
1010 
1011 } // namespace linalg
1012 } // namespace mlir
Affine binary operation expression.
Definition: AffineExpr.h:207
TODO: Remove this file when SCCP and integer range analysis have been ported to the new framework...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:445
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition: Utils.cpp:475
This class contains a list of basic blocks and a link to the parent operation it is attached to...
Definition: Region.h:26
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
Definition: AffineMap.cpp:658
Value slt(Value lhs, Value rhs)
Definition: Utils.cpp:110
MLIRContext * getContext() const
Definition: Builders.h:54
constexpr StringRef getParallelIteratorTypeName()
Use to encode that a particular iterator type has parallel semantics.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:458
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:514
bool isParallelIterator(Attribute attr)
This is a value defined by a result of an operation.
Definition: Value.h:425
operand_range getOperands()
Returns an iterator on the underlying Value&#39;s.
Definition: Operation.h:302
Block represents an ordered list of Operations.
Definition: Block.h:29
Block & front()
Definition: Region.h:65
This class represents a single result from folding an operation.
Definition: OpDefinition.h:229
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
Definition: Operation.cpp:1139
OpListType & getOperations()
Definition: Block.h:128
SmallVector< Value > computeTileSizes(OpBuilder &b, Location loc, ValueRange tileSizes, ArrayRef< Value > sizeBounds)
Compute tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:931
void getBackwardSlice(Operation *op, SetVector< Operation *> *backwardSlice, TransitiveFilter filter=nullptr)
Fills backwardSlice with the computed backward slice (i.e.
void push_back(Block *block)
Definition: Region.h:61
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Definition: AffineMap.cpp:205
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition: Utils.cpp:151
bool containsVar(Value mayBeVar) const
Returns true if an variable with the specified Value exists, false otherwise.
std::vector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:63
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition: Utils.cpp:145
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:590
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ValueRange operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:708
unsigned appendDimVar(ValueRange vals)
Append variables of the specified kind after the last variable of that kind.
static bool isPermutation(std::vector< PermutationTy > permutation)
Definition: IRAffine.cpp:57
Operation & front()
Definition: Block.h:144
static Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source...
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:200
unsigned getPosition() const
Definition: AffineExpr.cpp:312
BlockArgument getArgument(unsigned i)
Definition: Block.h:120
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition: Utils.h:97
static constexpr const bool value
void getUpperBoundForIndex(Value value, AffineMap &boundMap, SmallVectorImpl< Value > &boundOperands, bool constantRequired=false)
Computes an upper bound for the result value of an index computation.
Definition: Utils.cpp:215
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
Auxiliary range data structure to unpack the offset, size and stride operands into a list of triples...
AffineExpr getRHS() const
Definition: AffineExpr.cpp:307
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:311
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:350
Base class for AffineExpr visitors/walkers.
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:95
ArrayAttr getAffineMapArrayAttr(ArrayRef< AffineMap > values)
Definition: Builders.cpp:258
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:380
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ValueRange tileSizes, AffineMap map, ValueRange lbs, ValueRange ubs, ValueRange subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:799
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
AffineMap computeAlignedMap(AffineMap map, ValueRange operands) const
Align map with this constraint system based on operands.
static void unpackRanges(ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition: Utils.cpp:132
This class provides support for representing a failure result, or a valid value of type T...
Definition: LogicalResult.h:77
AffineExpr getLHS() const
Definition: AffineExpr.cpp:304
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< Attribute > iteratorTypes, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage, ArrayRef< DistributionMethod > distributionMethod={})
Generates a loop nest consisting of scf.parallel and scf.for, depending on the `iteratorTypes.
Definition: Utils.cpp:610
U dyn_cast() const
Definition: Types.h:256
unsigned getNumArguments()
Definition: Block.h:119
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:499
Attributes are known-constant values of operations.
Definition: Attributes.h:24
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:699
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:234
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:437
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:262
bool isIndex() const
Definition: Types.cpp:28
Base type for affine expression.
Definition: AffineExpr.h:68
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:873
RHS of mul is always a constant or a symbolic expression.
SmallVector< Value, 4 > applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values)
Returns the values obtained by applying map to the list of values.
Definition: AffineOps.cpp:736
unsigned getNumResults() const
Definition: AffineMap.cpp:302
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
Definition: Utils.cpp:376
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:41
U cast() const
Definition: AffineExpr.h:291
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:259
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:307
SmallVector< Value > computeTileOffsets(OpBuilder &b, Location loc, ValueRange ivs, ValueRange tileSizes)
Compute tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:916
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:489
Value _and(Value lhs, Value rhs)
Definition: Utils.cpp:92
bool findVar(Value val, unsigned *pos) const
Looks up the position of the variable with the specified Value.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:72
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:505
static llvm::ManagedStatic< PassManagerOptions > options
void getAllValues(SmallVectorImpl< Value > *values) const
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:25
static bool isZero(Value v)
Definition: Utils.cpp:45
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:286
SmallVector< Value, 4 > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ArrayRef< Value > valuesToTile, ValueRange ivs, ValueRange tileSizes, ArrayRef< Value > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder...
Definition: Utils.cpp:946
Type getType() const
Return the type of this value.
Definition: Value.h:118
IndexType getIndexType()
Definition: Builders.cpp:48
FlatAffineValueConstraints represents an extension of IntegerPolyhedron where each non-local variable...
static bool isTiled(AffineExpr expr, ValueRange tileSizes)
Definition: Utils.cpp:80
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:333
tensor::ExtractSliceOp makeComposedExtractSliceOp(OpBuilder &b, Location loc, Value source, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ArrayRef< OpFoldResult > strides)
Create an ExtractSliceOp and, if source is defined by an ExtractSliceOp, fold it by adding the offset...
Definition: Utils.cpp:334
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
Specialization of arith.constant op that returns an integer of index type.
Definition: Arithmetic.h:80
static Value fullyComposeAndAffineApply(OpBuilder &b, Location loc, AffineExpr expr, ValueRange operands)
Definition: Utils.cpp:790
PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder)
Definition: Utils.cpp:39
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
SmallVector< Value, 4 > getDynOperands(Location loc, Value val, OpBuilder &b)
Given an operation, retrieves the value of each dynamic dimension through constructing the necessary ...
Definition: Utils.cpp:205
This class represents an operand of an operation.
Definition: Value.h:251
auto m_Val(Value v)
Definition: Matchers.h:368
U cast() const
Definition: Value.h:108
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:374
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition: Types.cpp:81
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with &#39;numDims&#39; identity result dim exprs.
Definition: AffineMap.cpp:244
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:328
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:53
FailureOr< int64_t > getConstantUpperBoundForIndex(Value value)
Returns a constant upper bound for the result value of an index computation.
Definition: Utils.cpp:315
bool isa() const
Definition: Types.h:246
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Definition: AffineMap.cpp:235
static void visit(Operation *op, DenseSet< Operation *> &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation...
Definition: PDL.cpp:62
Optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors...
Definition: LoopUtils.cpp:1830
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that tansposes inputTensor into outputTensor using transposeVector to permute the...
Definition: Utils.cpp:436
result_range getResults()
Definition: Operation.h:339
This class helps build Operations.
Definition: Builders.h:184
This class provides an abstraction over the different types of ranges over Values.
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:95
LoopVector loops
Definition: SCF.h:67
Optional< int64_t > getConstantBound(BoundType type, unsigned pos) const
Returns the constant bound for the pos^th variable if there is one; None otherwise.
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:121
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool getClosedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
void addTileLoopIvsToIndexOpResults(OpBuilder &b, LinalgOp tiledOp, ArrayRef< Value > ivs)
Add the tile loop induction variables ivs to the IndexOp results found in the body of the tiledOp to ...
Definition: Utils.cpp:993
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value...
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition: Utils.cpp:165
U cast() const
Definition: Types.h:262
LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, bool isClosedBound)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
Definition: AffineOps.cpp:2038
ArrayAttr getStrArrayAttr(ArrayRef< StringRef > values)
Definition: Builders.cpp:246