MLIR  14.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities for the Linalg dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
23 #include "mlir/Dialect/SCF/SCF.h"
29 #include "mlir/IR/AffineExpr.h"
31 #include "mlir/IR/AffineMap.h"
32 #include "mlir/IR/Matchers.h"
34 #include "mlir/Pass/Pass.h"
35 #include "llvm/ADT/TypeSwitch.h"
36 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "linalg-utils"
39 
40 using namespace mlir;
41 using namespace mlir::linalg;
42 using namespace mlir::scf;
43 
44 static bool isZero(Value v) {
45  if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
46  return cst.value() == 0;
47  return false;
48 }
49 
50 namespace {
51 
52 // Helper visitor to determine whether an AffineExpr is tiled.
53 // This is achieved by traversing every AffineDimExpr with position `pos` and
54 // checking whether the corresponding `tileSizes[pos]` is non-zero.
55 // This also enforces only positive coefficients occur in multiplications.
56 //
57 // Example:
58 // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
59 //
60 struct TileCheck : public AffineExprVisitor<TileCheck> {
61  TileCheck(ValueRange tileSizes) : isTiled(false), tileSizes(tileSizes) {}
62 
63  void visitDimExpr(AffineDimExpr expr) {
64  isTiled |= !isZero(tileSizes[expr.getPosition()]);
65  }
66  void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
67  visit(expr.getLHS());
68  visit(expr.getRHS());
69  if (expr.getKind() == mlir::AffineExprKind::Mul)
70  assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 &&
71  "nonpositive multiplying coefficient");
72  }
73  bool isTiled;
74  ValueRange tileSizes;
75 };
76 
77 } // namespace
78 
79 static bool isTiled(AffineExpr expr, ValueRange tileSizes) {
80  if (!expr)
81  return false;
82  TileCheck t(tileSizes);
83  t.visit(expr);
84  return t.isTiled;
85 }
86 
87 // Checks whether the `map varies with respect to a non-zero `tileSize`.
88 static bool isTiled(AffineMap map, ValueRange tileSizes) {
89  if (!map)
90  return false;
91  for (unsigned r = 0; r < map.getNumResults(); ++r)
92  if (isTiled(map.getResult(r), tileSizes))
93  return true;
94  return false;
95 }
96 
99  auto &region = op.region();
100  if (!llvm::hasSingleElement(region))
101  return llvm::None;
102 
103  Block &block = region.front();
104  if (block.getNumArguments() != 2 ||
105  !block.getArgument(0).getType().isSignlessIntOrFloat() ||
107  return llvm::None;
108 
109  auto &ops = block.getOperations();
110  if (!llvm::hasSingleElement(block.without_terminator()))
111  return llvm::None;
112 
113  using mlir::matchers::m_Val;
114  auto a = m_Val(block.getArgument(0));
115  auto b = m_Val(block.getArgument(1));
116 
117  auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
118  if (addPattern.match(&ops.back()))
119  return BinaryOpKind::IAdd;
120 
121  return llvm::None;
122 }
123 
124 /// Explicit instantiation of loop nest generator for different loop types.
129 
130 /// Given a list of subview ranges, extract individual values for lower, upper
131 /// bounds and steps and put them into the corresponding vectors.
134  SmallVectorImpl<Value> &steps) {
135  for (Range range : ranges) {
136  lbs.emplace_back(range.offset);
137  ubs.emplace_back(range.size);
138  steps.emplace_back(range.stride);
139  }
140 }
141 
142 namespace mlir {
143 namespace linalg {
144 
145 bool isPermutation(ArrayRef<int64_t> permutation) {
146  // Count the number of appearances for all indices.
147  SmallVector<int64_t> indexCounts(permutation.size(), 0);
148  for (auto index : permutation) {
149  // Exit if the index is out-of-range.
150  if (index < 0 || index >= static_cast<int64_t>(permutation.size()))
151  return false;
152  indexCounts[index]++;
153  }
154  // Return true if all indices appear once.
155  return count(indexCounts, 1) == static_cast<int64_t>(permutation.size());
156 }
157 
158 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
159 /// the type of `source`.
160 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim) {
161  if (source.getType().isa<UnrankedMemRefType, MemRefType>())
162  return b.createOrFold<memref::DimOp>(loc, source, dim);
163  if (source.getType().isa<UnrankedTensorType, RankedTensorType>())
164  return b.createOrFold<tensor::DimOp>(loc, source, dim);
165  llvm_unreachable("Expected MemRefType or TensorType");
166 }
167 
168 /// Given an operation, retrieves the value of each dynamic dimension through
169 /// constructing the necessary DimOp operators.
171  SmallVector<Value, 4> dynOperands;
172  auto shapedType = val.getType().cast<ShapedType>();
173  for (const auto &dim : llvm::enumerate(shapedType.getShape())) {
174  if (dim.value() == ShapedType::kDynamicSize)
175  dynOperands.push_back(createOrFoldDimOp(b, loc, val, dim.index()));
176  }
177  return dynOperands;
178 }
179 
181  SmallVectorImpl<Value> &boundOperands) {
182  // Initialize `boundMap` and `boundOperands` to the identity returning
183  // `value`. This combination is the default result of the method if no
184  // simplification is possible.
185  assert(value.getType().isIndex() && "expect value to have index type");
186  boundMap = AffineMap::getMultiDimIdentityMap(1, value.getContext());
187  boundOperands.assign({value});
188  canonicalizeMapAndOperands(&boundMap, &boundOperands);
189 
190  // Continue only if there is an affine index computation to simplify.
191  Operation *definingOp = value.getDefiningOp();
192  if (!definingOp || !isa<AffineApplyOp, AffineMinOp>(definingOp))
193  return;
194 
195  // Get the backward slice containing the affine index computation.
196  SetVector<Operation *> backwardSlice;
197  getBackwardSlice(definingOp, &backwardSlice, [](Operation *op) {
198  return isa<AffineApplyOp, AffineMinOp>(op);
199  });
200  backwardSlice.insert(definingOp);
201 
202  // Setup a system of affine constraints that describe the index computation.
203  FlatAffineValueConstraints constraints;
204 
205  // Helper to find or create an identifier for the given value.
206  auto findOrCreateId = [&](Value value) {
207  if (!constraints.containsId(value)) {
208  constraints.appendDimId(value);
209  return true;
210  }
211  unsigned pos;
212  constraints.findId(value, &pos);
213  return pos < constraints.getNumDimIds();
214  };
215  // Helper to get the position for the given value.
216  auto getPosition = [&](Value value) {
217  unsigned pos;
218  bool exists = constraints.findId(value, &pos);
219  (void)exists;
220  assert(exists && "expect to find the identifier");
221  return pos;
222  };
223 
224  // Add the affine operations in `backwardSlice` to the constraints.
225  for (Operation *op : llvm::reverse(backwardSlice)) {
226  // Add an identifier for all op results and operands.
227  if (!(llvm::all_of(op->getResults(), findOrCreateId) &&
228  llvm::all_of(op->getOperands(), findOrCreateId)))
229  return;
230  // Add AffineApplyOps to the constraints.
231  if (auto applyOp = dyn_cast<AffineApplyOp>(op)) {
232  AffineValueMap valueMap(applyOp.getAffineMap(), applyOp.getOperands(),
233  applyOp.getResult());
234  if (failed(constraints.composeMap(&valueMap)))
235  return;
236  continue;
237  }
238  // Add AffineMinOps to the constraints.
239  auto minOp = cast<AffineMinOp>(op);
240  AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(),
241  minOp.getOperands());
242  if (failed(constraints.addBound(FlatAffineConstraints::UB,
243  getPosition(minOp.getResult()), map)))
244  return;
245  }
246 
247  // Obtain an upper bound for the affine index computation by projecting out
248  // all temporary results and expressing the upper bound for `value` in terms
249  // of the terminals of the index computation.
250  SmallVector<AffineMap> lowerBounds(1), upperBounds(1);
251  constraints.getSliceBounds(getPosition(value), 1, value.getContext(),
252  &lowerBounds, &upperBounds);
253 
254  // Verify `upperBounds[0]` is valid and has at least one result.
255  if (!upperBounds[0] || upperBounds[0].getNumResults() == 0)
256  return;
257 
258  // Set `boundMap` and `boundOperands` to the computed upper bound.
259  boundMap = upperBounds[0];
260  constraints.getAllValues(&boundOperands);
261  erase_value(boundOperands, value);
262  canonicalizeMapAndOperands(&boundMap, &boundOperands);
263 }
264 
266  // Compute an upper bound for `value`.
267  AffineMap boundMap;
268  SmallVector<Value> boundOperands;
269  getUpperBoundForIndex(value, boundMap, boundOperands);
270 
271  // Search the results of `boundMap` for constant upper bounds.
272  SmallVector<int64_t> constantBounds;
273  for (AffineExpr result : boundMap.getResults())
274  if (auto constExpr = result.dyn_cast<AffineConstantExpr>())
275  constantBounds.push_back(constExpr.getValue());
276 
277  // Return the minimal upper bound or failure if none is found.
278  if (constantBounds.empty())
279  return failure();
280  return *std::min_element(constantBounds.begin(), constantBounds.end());
281 }
282 
283 tensor::ExtractSliceOp makeComposedExtractSliceOp(
284  OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
286  assert(source && "expect source to be nonzero");
287 
288  // Do not fold if the producer is not an ExtractSliceOp.
289  auto producerOp = source.getDefiningOp<tensor::ExtractSliceOp>();
290  if (!producerOp)
291  return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
292  strides);
293 
294  // Do not fold if the producer is rank reducing or if there are any non-unit
295  // strides. Supporting non-unit strides complicates the offset computation
296  // since the consumer offsets need to be multiplied by the producer strides.
297  // TODO: support non-unit strides once there are use cases.
298  SmallVector<OpFoldResult> allStrides = producerOp.getMixedStrides();
299  allStrides.append(strides.begin(), strides.end());
300  bool hasNonUnitStride = any_of(allStrides, [](OpFoldResult ofr) {
301  return getConstantIntValue(ofr) != static_cast<int64_t>(1);
302  });
303  if (hasNonUnitStride ||
304  producerOp.getSourceType().getRank() !=
305  producerOp.getResult().getType().cast<ShapedType>().getRank())
306  return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
307  strides);
308 
309  // Fold the producer by adding the offests and extracting the slice directly
310  // from the producer source tensor.
311  SmallVector<OpFoldResult> foldedOffsets(offsets.begin(), offsets.end());
312  AffineExpr dim1, dim2;
313  bindDims(b.getContext(), dim1, dim2);
314  for (const auto &en : enumerate(producerOp.getMixedOffsets())) {
315  SmallVector<Value> offsetValues = {
316  getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]),
317  getValueOrCreateConstantIndexOp(b, loc, en.value())};
318  foldedOffsets[en.index()] =
319  makeComposedAffineApply(b, loc, dim1 + dim2, offsetValues).getResult();
320  }
321  return b.create<tensor::ExtractSliceOp>(loc, producerOp.source(),
322  foldedOffsets, sizes, strides);
323 }
324 
325 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
326  Value source, Value pad, bool nofold) {
327  assert(type.hasStaticShape() && "expect tensor type to have static shape");
328 
329  // Exit if `source` is not defined by an ExtractSliceOp.
330  auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
331  if (!sliceOp)
332  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
333 
334  // Search the `source` use-def chain for padded LinalgOps.
335  Value current = sliceOp.source();
336  while (current) {
337  auto linalgOp = current.getDefiningOp<LinalgOp>();
338  if (!linalgOp)
339  break;
340  OpResult opResult = current.cast<OpResult>();
341  current = linalgOp.getOutputOperand(opResult.getResultNumber())->get();
342  }
343  auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
344 
345  // Exit if the search fails to match a tensor::PadOp at the end of the matched
346  // LinalgOp sequence.
347  if (!padOp)
348  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
349 
350  // Exit if the padded result type does not match.
351  if (sliceOp.source().getType() != type)
352  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
353 
354  // Exit if the LinalgOps are not high padded.
355  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
356  return getConstantIntValue(ofr) != static_cast<int64_t>(0);
357  }))
358  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
359 
360  // Exit if `padOpSliceOp`, which defines the slice used by
361  // `padOp`, is rank-reducing.
362  auto padOpSliceOp = padOp.source().getDefiningOp<tensor::ExtractSliceOp>();
363  if (!padOpSliceOp ||
364  sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
365  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
366 
367  // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
368  // of the slice padded by `padOp`.
369  if (llvm::any_of(
370  llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
371  [](std::tuple<OpFoldResult, OpFoldResult> it) {
372  return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
373  }))
374  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
375 
376  // Exit if the padding values do not match.
377  Attribute padOpPadAttr, padAttr;
378  Value padOpPad = padOp.getConstantPaddingValue();
379  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
380  !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
381  return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
382 
383  // Return the padded result if the padding values and sizes match.
384  return sliceOp.source();
385 }
386 
387 GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
388  Value outputTensor,
389  ArrayRef<int64_t> transposeVector) {
390  auto resultTensorType = outputTensor.getType().cast<RankedTensorType>();
391  Type elementType = resultTensorType.getElementType();
392 
393  assert(isPermutation(transposeVector) &&
394  "expect transpose vector to be a permutation");
395  assert(transposeVector.size() ==
396  static_cast<size_t>(resultTensorType.getRank()) &&
397  "expect transpose vector size to match result tensor rank");
398 
399  // Compute the transpose and the indentity indexing maps.
400  SmallVector<AffineMap> indexingMaps = {
402  SmallVector<unsigned>(transposeVector.begin(), transposeVector.end()),
403  b.getContext())),
404  AffineMap::getMultiDimIdentityMap(transposeVector.size(),
405  b.getContext())};
406  SmallVector<llvm::StringRef> iteratorTypes(transposeVector.size(),
408 
409  // Create a GenericOp to transpose `inputTensor` into `outputTensor`.
410  auto transposeOp = b.create<GenericOp>(
411  loc, resultTensorType, inputTensor, outputTensor,
412  b.getAffineMapArrayAttr(indexingMaps), b.getStrArrayAttr(iteratorTypes),
413  /*doc=*/nullptr,
414  /*library_call=*/nullptr);
415  Region &body = transposeOp.getRegion();
416  body.push_back(new Block());
417  body.front().addArguments({elementType, elementType}, {loc, loc});
418 
419  // Create the body of the transpose operation.
421  b.setInsertionPointToEnd(&body.front());
422  b.create<YieldOp>(loc, transposeOp.getRegion().front().getArgument(0));
423  return transposeOp;
424 }
425 
426 /// Specialization to build an scf "for" nest.
427 template <>
429  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
430  ArrayRef<Attribute> iteratorTypes,
432  ValueRange)>
433  bodyBuilderFn,
434  Optional<LinalgLoopDistributionOptions> distributionOptions,
435  ArrayRef<StringRef> distributionTypes) {
436  SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
437  // Create procInfo so it dominates loops, if appropriate.
438  SmallVector<ProcInfo, 4> procInfo;
439  SmallVector<DistributionMethod, 0> distributionMethod;
440  if (distributionOptions.hasValue()) {
441  // Collect loop ranges for parallel dimensions.
442  SmallVector<Range, 2> parallelLoopRanges;
443  for (const auto &iteratorType : enumerate(iteratorTypes))
444  if (isParallelIterator(iteratorType.value()))
445  parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);
446 
447  // Get their distribution schemes.
448  distributionMethod = distributionOptions->distributionMethod;
449  if (distributionMethod.size() < parallelLoopRanges.size())
450  parallelLoopRanges.resize(distributionMethod.size());
451  procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges);
452  }
453 
454  SmallVector<Value, 4> lbs, ubs, steps;
455  unpackRanges(loopRanges, lbs, ubs, steps);
457  b, loc, lbs, ubs, steps, iterArgInitValues,
458  [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
459  assert(iterArgs.size() == linalgOp.getOutputTensorOperands().size() &&
460  "expect the number of output tensors and iter args to match");
461  SmallVector<Value> operandValuesToUse =
462  linalgOp.getInputAndOutputOperands();
463  if (!iterArgs.empty()) {
464  operandValuesToUse = linalgOp.getInputOperands();
465  operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
466  }
467  return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
468  });
469 
470  if (!distributionOptions || loopNest.loops.empty())
471  return;
472 
473  // Filter out scf.for loops that were created out of parallel dimensions.
475  for (const auto &iteratorType : enumerate(iteratorTypes))
476  if (isParallelIterator(iteratorType.value()))
477  loops.push_back(loopNest.loops[iteratorType.index()]);
478 
479  // Distribute - only supports cyclic distribution for now.
480  for (auto it : llvm::zip(loops, procInfo, distributionMethod))
481  if (std::get<2>(it) == DistributionMethod::Cyclic)
482  mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId,
483  std::get<1>(it).nprocs);
484 }
485 
486 /// Specialization to build affine "for" nest.
487 template <>
489  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
490  ArrayRef<Attribute> iteratorTypes,
492  ValueRange)>
493  bodyBuilderFn,
495  SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
496  assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
497  SmallVector<Value, 4> lbs, ubs, steps;
498  unpackRanges(loopRanges, lbs, ubs, steps);
499 
500  // Affine loops require constant steps.
501  SmallVector<int64_t, 4> constantSteps;
502  constantSteps.reserve(steps.size());
503  for (Value v : steps) {
504  auto op = v.getDefiningOp<arith::ConstantIndexOp>();
505  assert(op && "Affine loops require constant steps");
506  constantSteps.push_back(op.value());
507  }
508 
509  mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
510  [&](OpBuilder &b, Location loc, ValueRange ivs) {
511  SmallVector<Value> operandValuesToUse =
512  linalgOp.getInputAndOutputOperands();
513  bodyBuilderFn(b, loc, ivs, operandValuesToUse);
514  });
515 }
516 
517 /// Specialization to build an linalg.tiled_loop
518 template <>
520  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
521  ArrayRef<Attribute> iteratorTypes,
523  ValueRange)>
524  bodyBuilderFn,
525  Optional<LinalgLoopDistributionOptions> distributionOptions,
526  ArrayRef<StringRef> distributionTypes) {
527  SmallVector<ProcInfo, 2> procInfo;
528  SmallVector<Value, 4> lbs, ubs, steps;
529  unpackRanges(loopRanges, lbs, ubs, steps);
530 
531  auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc,
532  ValueRange ivs, ValueRange inputs,
533  ValueRange outputs) {
534  SmallVector<Value> operandValuesToUse = inputs;
535  operandValuesToUse.append(outputs.begin(), outputs.end());
536  scf::ValueVector results =
537  bodyBuilderFn(nestedBuilder, nestedLoc, ivs, operandValuesToUse);
538  nestedBuilder.create<linalg::YieldOp>(nestedLoc, results);
539  };
540 
541  SmallVector<Value> inputOperands = linalgOp.getInputOperands();
542  SmallVector<Value> outputOperands = linalgOp.getOutputOperands();
543  auto tiledLoop =
544  b.create<TiledLoopOp>(loc, lbs, ubs, steps, inputOperands, outputOperands,
545  b.getArrayAttr(iteratorTypes), wrappedBuilderFn);
546  if (!distributionTypes.empty())
547  tiledLoop.setDistributionTypes(b, distributionTypes);
548 }
549 
550 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
552  Value nprocs, Value &lb, Value &ub,
553  Value &step) {
554  AffineExpr d0, d1;
555  bindDims(b.getContext(), d0, d1);
557  lb = makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
558  step = makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
559 }
560 
561 /// Generates a loop nest consisting of scf.parallel and scf.for, depending
562 /// on the `iteratorTypes.` Consecutive parallel loops create a single
563 /// scf.parallel operation; each sequential loop creates a new scf.for
564 /// operation. The body of the innermost loop is populated by
565 /// `bodyBuilderFn` that accepts a range of induction variables for all
566 /// loops. `ivStorage` is used to store the partial list of induction
567 /// variables.
568 // TODO: this function can be made iterative instead. However, it
569 // will have at most as many recursive calls as nested loops, which rarely
570 // exceeds 10.
572  OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
573  ValueRange steps, ArrayRef<Attribute> iteratorTypes,
574  function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
575  SmallVectorImpl<Value> &ivStorage,
576  ArrayRef<DistributionMethod> distributionMethod = {}) {
577  assert(lbs.size() == ubs.size());
578  assert(lbs.size() == steps.size());
579  assert(lbs.size() == iteratorTypes.size());
580 
581  // If there are no (more) loops to be generated, generate the body and be
582  // done with it.
583  if (iteratorTypes.empty()) {
584  bodyBuilderFn(b, loc, ivStorage);
585  return;
586  }
587 
588  // Find the outermost parallel loops and drop their types from the list.
589  unsigned nLoops = iteratorTypes.size();
590  unsigned nOuterPar =
591  nLoops - iteratorTypes.drop_while(isParallelIterator).size();
592 
593  // If there are no outer parallel loops, generate one sequential loop and
594  // recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
595  // in this case.
596  if (nOuterPar == 0) {
597  LoopNest singleLoop = buildLoopNest(
598  b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
599  [&](OpBuilder &b, Location loc, ValueRange ivs) {
600  ivStorage.append(ivs.begin(), ivs.end());
601  generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(),
602  steps.drop_front(),
603  iteratorTypes.drop_front(), bodyBuilderFn,
604  ivStorage, distributionMethod);
605  });
606  return;
607  }
608  if (distributionMethod.empty()) {
609  // Generate a single parallel loop-nest operation for all outermost
610  // parallel loops and recurse.
611  b.create<scf::ParallelOp>(
612  loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
613  steps.take_front(nOuterPar),
614  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
615  ivStorage.append(localIvs.begin(), localIvs.end());
617  nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar),
618  ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar),
619  iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage,
620  (distributionMethod.size() < nOuterPar)
622  : distributionMethod.drop_front(nOuterPar));
623  });
624  return;
625  }
626 
627  // Process all consecutive similarly distributed loops simultaneously.
628  DistributionMethod methodToUse = distributionMethod[0];
629  unsigned numProcessed = 1;
630  for (unsigned i = 1; i < nOuterPar && i < distributionMethod.size(); ++i) {
631  if (distributionMethod[i] != methodToUse)
632  break;
633  numProcessed++;
634  }
635 
636  switch (methodToUse) {
638  // Generate a single parallel loop-nest operation for all outermost
639  // parallel loops and recurse.
640  b.create<scf::ParallelOp>(
641  loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
642  steps.take_front(numProcessed),
643  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
644  ivStorage.append(localIvs.begin(), localIvs.end());
646  nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
647  ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
648  iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
649  (distributionMethod.size() < numProcessed)
651  : distributionMethod.drop_front(numProcessed));
652  });
653  return;
654  }
656  // Check (for the processed loops) that the iteration is in-bounds.
657  ArithBuilder ab(b, loc);
658  Value cond = ab.slt(lbs[0], ubs[0]);
659  for (unsigned i = 1; i < numProcessed; ++i)
660  cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
661  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
662  b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
664  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
665  steps.drop_front(numProcessed),
666  iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
667  distributionMethod.drop_front(numProcessed));
668  b.create<scf::YieldOp>(loc, ValueRange{});
669  });
670  return;
671  }
673  // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
674  // with inner loop generation.
675  ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
677  b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
678  steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
679  bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed));
680  return;
681  }
682 }
683 
684 /// Specialization for generating a mix of parallel and sequential scf loops.
685 template <>
687  OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
688  ArrayRef<Attribute> iteratorTypes,
690  ValueRange)>
691  bodyBuilderFn,
692  Optional<LinalgLoopDistributionOptions> distributionOptions,
693  ArrayRef<StringRef> distributionTypes) {
694  SmallVector<Value> iterArgInitValues = linalgOp.getOutputTensorOperands();
695  assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
696  // This function may be passed more iterator types than ranges.
697  assert(iteratorTypes.size() >= loopRanges.size() &&
698  "expected iterator type for all ranges");
699  iteratorTypes = iteratorTypes.take_front(loopRanges.size());
700  SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
701  unsigned numLoops = iteratorTypes.size();
702  ivs.reserve(numLoops);
703  lbsStorage.reserve(numLoops);
704  ubsStorage.reserve(numLoops);
705  stepsStorage.reserve(numLoops);
706 
707  // Get the loop lb, ub, and step.
708  unpackRanges(loopRanges, lbsStorage, ubsStorage, stepsStorage);
709 
710  // Modify the lb, ub, and step based on the distribution options.
711  SmallVector<DistributionMethod, 0> distributionMethod;
712  if (distributionOptions) {
713  auto &options = distributionOptions.getValue();
714  distributionMethod.assign(distributionOptions->distributionMethod.begin(),
715  distributionOptions->distributionMethod.end());
716  SmallVector<Range, 2> parallelLoopRanges;
717  for (const auto &iteratorType : enumerate(iteratorTypes)) {
718  if (isParallelIterator(iteratorType.value()))
719  parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);
720  }
721  if (distributionMethod.size() < parallelLoopRanges.size())
722  parallelLoopRanges.resize(distributionMethod.size());
723  SmallVector<ProcInfo, 2> procInfo =
724  options.procInfo(b, loc, parallelLoopRanges);
725  unsigned index = 0;
726  for (const auto &iteratorType : enumerate(iteratorTypes)) {
727  if (index >= procInfo.size())
728  break;
729  if (isParallelIterator(iteratorType.value())) {
730  unsigned i = iteratorType.index();
731  updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId,
732  procInfo[index].nprocs, lbsStorage[i],
733  ubsStorage[i], stepsStorage[i]);
734  index++;
735  }
736  }
737  }
738  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
740  b, loc, lbs, ubs, steps, iteratorTypes,
741  [&](OpBuilder &b, Location loc, ValueRange ivs) {
742  SmallVector<Value> operandValuesToUse =
743  linalgOp.getInputAndOutputOperands();
744  bodyBuilderFn(b, loc, ivs, operandValuesToUse);
745  },
746  ivs, distributionMethod);
747 
748  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
749 }
750 
752  AffineExpr expr, ValueRange operands) {
753  AffineMap map = AffineMap::inferFromExprList({expr}).front();
754  SmallVector<Value> normalizedOperands(operands.begin(), operands.end());
755  mlir::fullyComposeAffineMapAndOperands(&map, &normalizedOperands);
756  canonicalizeMapAndOperands(&map, &normalizedOperands);
757  return b.createOrFold<AffineApplyOp>(loc, map, normalizedOperands);
758 }
759 
760 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
761  ValueRange tileSizes, AffineMap map, ValueRange lbs,
762  ValueRange ubs, ValueRange subShapeSizes) {
763  auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
764  assert(shapedType && "only shaped types can be tiled");
765  ArrayRef<int64_t> shape = shapedType.getShape();
766  int64_t rank = shapedType.getRank();
767 
768  // Construct a new subview / extract_slice for the tile.
769  SmallVector<OpFoldResult, 4> offsets, sizes, strides;
770  offsets.reserve(rank);
771  sizes.reserve(rank);
772  strides.reserve(rank);
773  for (unsigned r = 0; r < rank; ++r) {
774  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: for dim#" << r);
775  if (!isTiled(map.getSubMap({r}), tileSizes)) {
776  offsets.push_back(builder.getIndexAttr(0));
777  Value dim = createOrFoldDimOp(builder, loc, valueToTile, r);
778  sizes.push_back(getAsOpFoldResult(dim));
779  strides.push_back(builder.getIndexAttr(1));
780  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
781  continue;
782  }
783  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
784 
785  // Tiling creates a new slice at the proper index, the slice step is 1
786  // (i.e. the op does not subsample, stepping occurs in the loop).
787  auto m = map.getSubMap({r});
788  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: submap: " << m << "\n");
789  auto offset = applyMapToValues(builder, loc, m, lbs).front();
790  offsets.push_back(offset);
791  auto closedIntSize =
792  applyMapToValues(builder, loc, m, subShapeSizes).front();
793  // Resulting size needs to be made half open interval again.
794  AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
795  Value size =
796  fullyComposeAndAffineApply(builder, loc, s0 + 1, closedIntSize);
797  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n");
798 
799  // The size of the subview / extract_slice should be trimmed to avoid
800  // out-of-bounds accesses, unless:
801  // a. We statically know the subshape size divides the shape size evenly.
802  // b. The subshape size is 1. According to the way the loops are set up,
803  // tensors with "0" dimensions would never be constructed.
804  int64_t shapeSize = shape[r];
805  auto sizeCst = size.getDefiningOp<arith::ConstantIndexOp>();
806  auto hasTileSizeOne = sizeCst && sizeCst.value() == 1;
807  auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
808  ((shapeSize % sizeCst.value()) == 0);
809  if (!hasTileSizeOne && !dividesEvenly) {
810  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
811  << ", size: " << size
812  << ": make sure in bound with affine.min\n");
813 
814  AffineExpr dim0, dim1, dim2;
815  bindDims(builder.getContext(), dim0, dim1, dim2);
816 
817  // Get the dimension size for this dimension. We need to first calculate
818  // the max index and then plus one. This is important because for
819  // convolution ops, we have its input window dimension's affine map of the
820  // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
821  // dimension and `s0` is stride. Directly use the dimension size of
822  // output/filer window dimensions will cause incorrect calculation.
823  AffineMap minusOneMap =
825  .front();
826  AffineMap plusOneMap =
828  .front();
829  auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) {
830  return makeComposedAffineApply(builder, loc, minusOneMap, {ub})
831  .getResult();
832  }));
833  Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
834  Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
835 
836  // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
838  {ArrayRef<AffineExpr>{dim0, dim1 - dim2}})
839  .front();
840  SmallVector<Value, 4> operands{size, d, offset};
841  fullyComposeAffineMapAndOperands(&minMap, &operands);
842  canonicalizeMapAndOperands(&minMap, &operands);
843  size = builder.create<AffineMinOp>(loc, builder.getIndexType(), minMap,
844  operands);
845  }
846 
847  sizes.push_back(size);
848  LLVM_DEBUG(llvm::dbgs()
849  << "makeTiledShape: new offset: " << offset << "\n");
850  LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
851  strides.push_back(builder.getIndexAttr(1));
852  }
853 
854  auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
855  .Case([&](MemRefType) {
856  return builder.create<memref::SubViewOp>(
857  loc, valueToTile, offsets, sizes, strides);
858  })
859  .Case([&](RankedTensorType) {
861  builder, loc, valueToTile, offsets, sizes, strides);
862  })
863  .Default([](ShapedType) -> Operation * {
864  llvm_unreachable("Unexpected shaped type");
865  });
866  return sliceOp->getResult(0);
867 }
868 
870  ValueRange ivs, ValueRange tileSizes) {
871  SmallVector<Value> offsets;
872  for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
873  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
874  bool isTiled = !isZero(tileSizes[idx]);
875  offsets.push_back(
876  isTiled ? ivs[idxIvs++]
877  : b.create<arith::ConstantIndexOp>(loc, 0).getResult());
878  LLVM_DEBUG(llvm::dbgs()
879  << "computeTileOffsets: " << offsets.back() << "\n");
880  }
881  return offsets;
882 }
883 
885  ValueRange tileSizes,
886  ArrayRef<Value> sizeBounds) {
887  SmallVector<Value> sizes;
888  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
889  bool isTiled = !isZero(tileSizes[idx]);
890  // Before composing, we need to make range a closed interval.
891  Value size = isTiled ? tileSizes[idx] : sizeBounds[idx];
893  sizes.push_back(fullyComposeAndAffineApply(b, loc, d0 - 1, size));
894  LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
895  }
896  return sizes;
897 }
898 
900  LinalgOp linalgOp,
901  ArrayRef<Value> valuesToTile,
902  ValueRange ivs, ValueRange tileSizes,
903  ArrayRef<Value> sizeBounds) {
904  assert(ivs.size() == static_cast<size_t>(llvm::count_if(
905  llvm::make_range(tileSizes.begin(), tileSizes.end()),
906  [](Value v) { return !isZero(v); })) &&
907  "expected as many ivs as non-zero sizes");
908 
909  // Construct (potentially temporary) mins and maxes on which to apply maps
910  // that define tile subshapes.
911  SmallVector<Value> lbs = computeTileOffsets(b, loc, ivs, tileSizes);
912  SmallVector<Value> subShapeSizes =
913  computeTileSizes(b, loc, ivs, tileSizes, sizeBounds);
914 
915  assert(static_cast<int64_t>(valuesToTile.size()) ==
916  linalgOp.getNumInputsAndOutputs() &&
917  "expected one value to tile for every operand");
918  SmallVector<Value, 4> tiledShapes;
919  tiledShapes.reserve(valuesToTile.size());
920  for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) {
921  Value shapedOp = valuesToTile[opOperand->getOperandNumber()];
922  LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
923  AffineMap map = linalgOp.getTiedIndexingMap(opOperand);
924  // Use `opOperand` as is if it is not tiled and not an output tensor. Having
925  // an extract/insert slice pair for all output tensors simplifies follow up
926  // transformations such as padding and bufferization since the
927  // extract/insert slice pairs make the accessed iteration argument
928  // subdomains explicit.
929  if (!isTiled(map, tileSizes) && !linalgOp.isOutputTensor(opOperand)) {
930  tiledShapes.push_back(shapedOp);
931  LLVM_DEBUG(llvm::dbgs() << ": not tiled: use shape: "
932  << opOperand->get().getType() << "\n");
933  continue;
934  }
935  LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
936 
937  tiledShapes.push_back(makeTiledShape(b, loc, shapedOp, tileSizes, map, lbs,
938  sizeBounds, subShapeSizes));
939  }
940 
941  return tiledShapes;
942 }
943 
944 void addTileLoopIvsToIndexOpResults(OpBuilder &b, LinalgOp tiledOp,
945  ArrayRef<Value> ivs) {
946  if (tiledOp.hasIndexSemantics()) {
947  for (IndexOp indexOp : tiledOp.getBlock()->getOps<IndexOp>()) {
948  if (ivs[indexOp.dim()] == nullptr)
949  continue;
950  OpBuilder::InsertionGuard guard(b);
951  b.setInsertionPointAfter(indexOp);
952  AffineExpr index, offset;
953  bindDims(b.getContext(), index, offset);
954  AffineApplyOp applyOp = makeComposedAffineApply(
955  b, indexOp.getLoc(), index + offset,
956  ValueRange{indexOp.getResult(), ivs[indexOp.dim()]});
957  indexOp.getResult().replaceAllUsesExcept(applyOp, applyOp);
958  }
959  }
960 }
961 
962 } // namespace linalg
963 } // namespace mlir
Affine binary operation expression.
Definition: AffineExpr.h:207
Include the generated interface declarations.
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:427
OpTy create(Location location, Args &&...args)
Create an operation of specific op type at the current insertion point.
Definition: Builders.h:430
This class contains a list of basic blocks and a link to the parent operation it is attached to...
Definition: Region.h:26
unsigned getNumDimIds() const
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
Definition: AffineMap.cpp:673
Value slt(Value lhs, Value rhs)
Definition: Utils.cpp:90
MLIRContext * getContext() const
Definition: Builders.h:54
constexpr StringRef getParallelIteratorTypeName()
Use to encode that a particular iterator type has parallel semantics.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:444
SmallVector< Value > computeTileSizes(OpBuilder &b, Location loc, ValueRange ivs, ValueRange tileSizes, ArrayRef< Value > sizeBounds)
Compute tile sizes, given a list of loop ivs, tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:884
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:519
bool isParallelIterator(Attribute attr)
This is a value defined by a result of an operation.
Definition: Value.h:423
operand_range getOperands()
Returns an iterator on the underlying Value&#39;s.
Definition: Operation.h:247
Block represents an ordered list of Operations.
Definition: Block.h:29
This class represents a single result from folding an operation.
Definition: OpDefinition.h:244
OpListType & getOperations()
Definition: Block.h:128
void getBackwardSlice(Operation *op, SetVector< Operation *> *backwardSlice, TransitiveFilter filter=nullptr)
Fills backwardSlice with the computed backward slice (i.e.
void push_back(Block *block)
Definition: Region.h:61
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Definition: AffineMap.cpp:205
std::vector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:59
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:551
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ValueRange tileSizes, AffineMap map, ValueRange lbs, ValueRange ubs, ValueRange subShapeSizes)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:760
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ValueRange operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:714
Operation & front()
Definition: Block.h:144
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:200
unsigned getPosition() const
Definition: AffineExpr.cpp:312
BlockArgument getArgument(unsigned i)
Definition: Block.h:120
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition: Utils.h:92
static constexpr const bool value
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
Auxiliary range data structure to unpack the offset, size and stride operands into a list of triples...
AffineExpr getRHS() const
Definition: AffineExpr.cpp:307
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:315
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:343
bool containsId(Value val) const
Returns true if an identifier with the specified Value exists, false otherwise.
Base class for AffineExpr visitors/walkers.
ArrayAttr getAffineMapArrayAttr(ArrayRef< AffineMap > values)
Definition: Builders.cpp:258
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
AffineMap computeAlignedMap(AffineMap map, ValueRange operands) const
Align map with this constraint system based on operands.
static void unpackRanges(ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition: Utils.cpp:132
This class provides support for representing a failure result, or a valid value of type T...
Definition: LogicalResult.h:77
AffineExpr getLHS() const
Definition: AffineExpr.cpp:304
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< Attribute > iteratorTypes, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage, ArrayRef< DistributionMethod > distributionMethod={})
Generates a loop nest consisting of scf.parallel and scf.for, depending on the `iteratorTypes.
Definition: Utils.cpp:571
U dyn_cast() const
Definition: Types.h:244
unsigned getNumArguments()
Definition: Block.h:119
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:501
Attributes are known-constant values of operations.
Definition: Attributes.h:24
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:705
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:206
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:435
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:315
bool isIndex() const
Definition: Types.cpp:28
Base type for affine expression.
Definition: AffineExpr.h:68
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:879
PadOp createPadHighOp(Type type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder)
Definition: Utils.cpp:39
RHS of mul is always a constant or a symbolic expression.
SmallVector< Value, 4 > applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values)
Returns the values obtained by applying map to the list of values.
Definition: AffineOps.cpp:742
unsigned getNumResults() const
Definition: AffineMap.cpp:302
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
Definition: Utils.cpp:325
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:38
U cast() const
Definition: AffineExpr.h:291
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:231
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:311
SmallVector< Value > computeTileOffsets(OpBuilder &b, Location loc, ValueRange ivs, ValueRange tileSizes)
Compute tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:869
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:491
Value _and(Value lhs, Value rhs)
Definition: Utils.cpp:72
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:72
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
LogicalResult composeMap(const AffineValueMap *vMap)
Composes the affine value map with this FlatAffineValueConstrains, adding the results of the map as d...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:84
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition: SCF.cpp:467
bool isPermutation(ArrayRef< int64_t > permutation)
Check if permutation is a permutation of the range [0, permutation.size()).
Definition: Utils.cpp:145
static llvm::ManagedStatic< PassManagerOptions > options
void getAllValues(SmallVectorImpl< Value > *values) const
AffineExprKind getKind() const
Return the classification for this type.
Definition: AffineExpr.cpp:25
static bool isZero(Value v)
Definition: Utils.cpp:44
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:279
Type getType() const
Return the type of this value.
Definition: Value.h:117
IndexType getIndexType()
Definition: Builders.cpp:48
An extension of FlatAffineConstraints in which dimensions and symbols can optionally be associated wi...
static bool isTiled(AffineExpr expr, ValueRange tileSizes)
Definition: Utils.cpp:79
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:266
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< Attribute > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, Optional< LinalgLoopDistributionOptions >=None, ArrayRef< StringRef > distributionTypes={})
SmallVector< Value, 4 > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ArrayRef< Value > valuesToTile, ValueRange ivs, ValueRange tileSizes, ArrayRef< Value > sizeBounds)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder...
Definition: Utils.cpp:899
tensor::ExtractSliceOp makeComposedExtractSliceOp(OpBuilder &b, Location loc, Value source, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ArrayRef< OpFoldResult > strides)
Create an ExtractSliceOp and, if source is defined by an ExtractSliceOp, fold it by adding the offset...
Definition: Utils.cpp:283
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
Specialization of arith.constant op that returns an integer of index type.
Definition: Arithmetic.h:78
static Value fullyComposeAndAffineApply(OpBuilder &b, Location loc, AffineExpr expr, ValueRange operands)
Definition: Utils.cpp:751
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
SmallVector< Value, 4 > getDynOperands(Location loc, Value val, OpBuilder &b)
Given an operation, retrieves the value of each dynamic dimension through constructing the necessary ...
Definition: Utils.cpp:170
void getUpperBoundForIndex(Value value, AffineMap &boundMap, SmallVectorImpl< Value > &boundOperands)
Computes an upper bound for the result value of an index computation.
Definition: Utils.cpp:180
This class represents an operand of an operation.
Definition: Value.h:249
auto m_Val(Value v)
Definition: Matchers.h:294
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps)
Computes the lower and upper bounds of the first num dimensional identifiers (starting at offset) as ...
U cast() const
Definition: Value.h:107
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:367
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition: Types.cpp:81
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes...
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with &#39;numDims&#39; identity result dim exprs.
Definition: AffineMap.cpp:244
LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the identifier at the specified position with constraints being drawn from the speci...
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:328
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:54
FailureOr< int64_t > getConstantUpperBoundForIndex(Value value)
Returns a constant upper bound for the result value of an index computation.
Definition: Utils.cpp:265
bool findId(Value val, unsigned *pos) const
Looks up the position of the identifier with the specified Value.
bool isa() const
Definition: Types.h:234
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source...
Definition: Utils.cpp:160
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Definition: AffineMap.cpp:235
static void visit(Operation *op, DenseSet< Operation *> &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation...
Definition: PDL.cpp:62
unsigned appendDimId(ValueRange vals)
Append identifiers of the specified kind after the last identifier of that kind.
Optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors...
Definition: LoopUtils.cpp:1832
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that tansposes inputTensor into outputTensor using transposeVector to permute the...
Definition: Utils.cpp:387
result_range getResults()
Definition: Operation.h:284
This class helps build Operations.
Definition: Builders.h:177
ArrayAttr getArrayAttr(ArrayRef< Attribute > value)
Definition: Builders.cpp:205
This class provides an abstraction over the different types of ranges over Values.
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:95
LoopVector loops
Definition: SCF.h:63
static Optional< BinaryOpKind > matchAsScalarBinaryOp(GenericOp op)
Matches the given linalg op if its body is performing binary operation on int or float scalar values ...
Definition: Utils.cpp:98
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:120
void addTileLoopIvsToIndexOpResults(OpBuilder &b, LinalgOp tiledOp, ArrayRef< Value > ivs)
Add the tile loop induction variables ivs to the IndexOp results found in the body of the tiledOp to ...
Definition: Utils.cpp:944
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value...
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
U cast() const
Definition: Types.h:250
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
Definition: AffineOps.cpp:1930
ArrayAttr getStrArrayAttr(ArrayRef< StringRef > values)
Definition: Builders.cpp:246