MLIR  19.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utility methods for working with the Vector dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
23 #include "mlir/IR/Builders.h"
24 #include "mlir/IR/IntegerSet.h"
25 #include "mlir/IR/Operation.h"
26 #include "mlir/IR/TypeUtilities.h"
27 #include "mlir/Support/LLVM.h"
29 
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/ADT/SetVector.h"
32 
33 #define DEBUG_TYPE "vector-utils"
34 
35 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
36 #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
37 
38 using namespace mlir;
39 
40 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
41 /// the type of `source`.
43  int64_t dim) {
44  if (isa<UnrankedMemRefType, MemRefType>(source.getType()))
45  return b.createOrFold<memref::DimOp>(loc, source, dim);
46  if (isa<UnrankedTensorType, RankedTensorType>(source.getType()))
47  return b.createOrFold<tensor::DimOp>(loc, source, dim);
48  llvm_unreachable("Expected MemRefType or TensorType");
49 }
50 
51 /// Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1'
52 /// should be transposed with each other within the context of their 2D
53 /// transposition slice.
54 ///
55 /// Example 1: dim0 = 0, dim1 = 2, transp = [2, 1, 0]
56 /// Return true: dim0 and dim1 are transposed within the context of their 2D
57 /// transposition slice ([1, 0]).
58 ///
59 /// Example 2: dim0 = 0, dim1 = 1, transp = [2, 1, 0]
60 /// Return true: dim0 and dim1 are transposed within the context of their 2D
61 /// transposition slice ([1, 0]). Paradoxically, note how dim1 (1) is *not*
62 /// transposed within the full context of the transposition.
63 ///
64 /// Example 3: dim0 = 0, dim1 = 1, transp = [2, 0, 1]
65 /// Return false: dim0 and dim1 are *not* transposed within the context of
66 /// their 2D transposition slice ([0, 1]). Paradoxically, note how dim0 (0)
67 /// and dim1 (1) are transposed within the full context of the of the
68 /// transposition.
69 static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1,
70  ArrayRef<int64_t> transp) {
71  // Perform a linear scan along the dimensions of the transposed pattern. If
72  // dim0 is found first, dim0 and dim1 are not transposed within the context of
73  // their 2D slice. Otherwise, 'dim1' is found first and they are transposed.
74  for (int64_t permDim : transp) {
75  if (permDim == dim0)
76  return false;
77  if (permDim == dim1)
78  return true;
79  }
80 
81  llvm_unreachable("Ill-formed transpose pattern");
82 }
83 
85 mlir::vector::isTranspose2DSlice(vector::TransposeOp op) {
86  VectorType srcType = op.getSourceVectorType();
87  SmallVector<int64_t> srcGtOneDims;
88  for (auto [index, size] : llvm::enumerate(srcType.getShape()))
89  if (size > 1)
90  srcGtOneDims.push_back(index);
91 
92  if (srcGtOneDims.size() != 2)
93  return failure();
94 
95  // Check whether the two source vector dimensions that are greater than one
96  // must be transposed with each other so that we can apply one of the 2-D
97  // transpose pattens. Otherwise, these patterns are not applicable.
98  if (!areDimsTransposedIn2DSlice(srcGtOneDims[0], srcGtOneDims[1],
99  op.getPermutation()))
100  return failure();
101 
102  return std::pair<int, int>(srcGtOneDims[0], srcGtOneDims[1]);
103 }
104 
105 /// Constructs a permutation map from memref indices to vector dimension.
106 ///
107 /// The implementation uses the knowledge of the mapping of enclosing loop to
108 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
109 /// map with:
110 /// - keys representing "vectorized enclosing loops";
111 /// - values representing the corresponding vector dimension.
112 /// The algorithm traverses "vectorized enclosing loops" and extracts the
113 /// at-most-one MemRef index that is invariant along said loop. This index is
114 /// guaranteed to be at most one by construction: otherwise the MemRef is not
115 /// vectorizable.
116 /// If this invariant index is found, it is added to the permutation_map at the
117 /// proper vector dimension.
118 /// If no index is found to be invariant, 0 is added to the permutation_map and
119 /// corresponds to a vector broadcast along that dimension.
120 ///
121 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
122 /// signalling that no permutation map can be constructed given
123 /// `enclosingLoopToVectorDim`.
124 ///
125 /// Examples can be found in the documentation of `makePermutationMap`, in the
126 /// header file.
128  ArrayRef<Value> indices,
129  const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
130  if (enclosingLoopToVectorDim.empty())
131  return AffineMap();
132  MLIRContext *context =
133  enclosingLoopToVectorDim.begin()->getFirst()->getContext();
134  SmallVector<AffineExpr> perm(enclosingLoopToVectorDim.size(),
135  getAffineConstantExpr(0, context));
136 
137  for (auto kvp : enclosingLoopToVectorDim) {
138  assert(kvp.second < perm.size());
139  auto invariants = affine::getInvariantAccesses(
140  cast<affine::AffineForOp>(kvp.first).getInductionVar(), indices);
141  unsigned numIndices = indices.size();
142  unsigned countInvariantIndices = 0;
143  for (unsigned dim = 0; dim < numIndices; ++dim) {
144  if (!invariants.count(indices[dim])) {
145  assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
146  "permutationMap already has an entry along dim");
147  perm[kvp.second] = getAffineDimExpr(dim, context);
148  } else {
149  ++countInvariantIndices;
150  }
151  }
152  assert((countInvariantIndices == numIndices ||
153  countInvariantIndices == numIndices - 1) &&
154  "Vectorization prerequisite violated: at most 1 index may be "
155  "invariant wrt a vectorized loop");
156  (void)countInvariantIndices;
157  }
158  return AffineMap::get(indices.size(), 0, perm, context);
159 }
160 
161 /// Implementation detail that walks up the parents and records the ones with
162 /// the specified type.
163 /// TODO: could also be implemented as a collect parents followed by a
164 /// filter and made available outside this file.
165 template <typename T>
168  auto *current = block->getParentOp();
169  while (current) {
170  if ([[maybe_unused]] auto typedParent = dyn_cast<T>(current)) {
171  assert(res.count(current) == 0 && "Already inserted");
172  res.insert(current);
173  }
174  current = current->getParentOp();
175  }
176  return res;
177 }
178 
179 /// Returns the enclosing AffineForOp, from closest to farthest.
181  return getParentsOfType<affine::AffineForOp>(block);
182 }
183 
185  Block *insertPoint, ArrayRef<Value> indices,
186  const DenseMap<Operation *, unsigned> &loopToVectorDim) {
187  DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
188  auto enclosingLoops = getEnclosingforOps(insertPoint);
189  for (auto *forInst : enclosingLoops) {
190  auto it = loopToVectorDim.find(forInst);
191  if (it != loopToVectorDim.end()) {
192  enclosingLoopToVectorDim.insert(*it);
193  }
194  }
195  return ::makePermutationMap(indices, enclosingLoopToVectorDim);
196 }
197 
199  Operation *op, ArrayRef<Value> indices,
200  const DenseMap<Operation *, unsigned> &loopToVectorDim) {
201  return makePermutationMap(op->getBlock(), indices, loopToVectorDim);
202 }
203 
204 bool matcher::operatesOnSuperVectorsOf(Operation &op,
205  VectorType subVectorType) {
206  // First, extract the vector type and distinguish between:
207  // a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
208  // vector.transfer_write); and
209  // b. ops that *may* lower a super-vector (all other ops).
210  // The ops that *may* lower a super-vector only do so if the super-vector to
211  // sub-vector ratio exists. The ops that *must* lower a super-vector are
212  // explicitly checked for this property.
213  /// TODO: there should be a single function for all ops to do this so we
214  /// do not have to special case. Maybe a trait, or just a method, unclear atm.
215  bool mustDivide = false;
216  (void)mustDivide;
217  VectorType superVectorType;
218  if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
219  superVectorType = transfer.getVectorType();
220  mustDivide = true;
221  } else if (op.getNumResults() == 0) {
222  if (!isa<func::ReturnOp>(op)) {
223  op.emitError("NYI: assuming only return operations can have 0 "
224  " results at this point");
225  }
226  return false;
227  } else if (op.getNumResults() == 1) {
228  if (auto v = dyn_cast<VectorType>(op.getResult(0).getType())) {
229  superVectorType = v;
230  } else {
231  // Not a vector type.
232  return false;
233  }
234  } else {
235  // Not a vector.transfer and has more than 1 result, fail hard for now to
236  // wake us up when something changes.
237  op.emitError("NYI: operation has more than 1 result");
238  return false;
239  }
240 
241  // Get the ratio.
242  auto ratio =
243  computeShapeRatio(superVectorType.getShape(), subVectorType.getShape());
244 
245  // Sanity check.
246  assert((ratio || !mustDivide) &&
247  "vector.transfer operation in which super-vector size is not an"
248  " integer multiple of sub-vector size");
249 
250  // This catches cases that are not strictly necessary to have multiplicity but
251  // still aren't divisible by the sub-vector shape.
252  // This could be useful information if we wanted to reshape at the level of
253  // the vector type (but we would have to look at the compute and distinguish
254  // between parallel, reduction and possibly other cases.
255  return ratio.has_value();
256 }
257 
258 bool vector::isContiguousSlice(MemRefType memrefType, VectorType vectorType) {
259  if (vectorType.isScalable())
260  return false;
261 
262  ArrayRef<int64_t> vectorShape = vectorType.getShape();
263  auto vecRank = vectorType.getRank();
264 
265  if (!trailingNDimsContiguous(memrefType, vecRank))
266  return false;
267 
268  // Extract the trailing dims and strides of the input memref
269  auto memrefShape = memrefType.getShape().take_back(vecRank);
270 
271  // Compare the dims of `vectorType` against `memrefType` (in reverse).
272  // In the most basic case, all dims will match.
273  auto firstNonMatchingDim =
274  std::mismatch(vectorShape.rbegin(), vectorShape.rend(),
275  memrefShape.rbegin(), memrefShape.rend());
276  if (firstNonMatchingDim.first == vectorShape.rend())
277  return true;
278 
279  // One non-matching dim is still fine, however the remaining leading dims of
280  // `vectorType` need to be 1.
281  SmallVector<int64_t> leadingDims(++firstNonMatchingDim.first,
282  vectorShape.rend());
283 
284  return llvm::all_of(leadingDims, [](auto x) { return x == 1; });
285 }
286 
287 std::optional<StaticTileOffsetRange>
288 vector::createUnrollIterator(VectorType vType, int64_t targetRank) {
289  if (vType.getRank() <= targetRank)
290  return {};
291  // Attempt to unroll until targetRank or the first scalable dimension (which
292  // cannot be unrolled).
293  auto shapeToUnroll = vType.getShape().drop_back(targetRank);
294  auto scalableDimsToUnroll = vType.getScalableDims().drop_back(targetRank);
295  auto it =
296  std::find(scalableDimsToUnroll.begin(), scalableDimsToUnroll.end(), true);
297  auto firstScalableDim = it - scalableDimsToUnroll.begin();
298  if (firstScalableDim == 0)
299  return {};
300  // All scalable dimensions should be removed now.
301  scalableDimsToUnroll = scalableDimsToUnroll.slice(0, firstScalableDim);
302  assert(!llvm::is_contained(scalableDimsToUnroll, true) &&
303  "unexpected leading scalable dimension");
304  // Create an unroll iterator for leading dimensions.
305  shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim);
306  return StaticTileOffsetRange(shapeToUnroll, /*unrollStep=*/1);
307 }
308 
310  Operation *xfer,
311  RewriterBase &rewriter) {
312  auto loc = xfer->getLoc();
313 
315  .Case<vector::TransferReadOp>(
316  [&](auto readOp) { return readOp.getSource(); })
317  .Case<vector::TransferWriteOp>(
318  [&](auto writeOp) { return writeOp.getOperand(1); });
319 
320  SmallVector<OpFoldResult> mixedSourceDims =
321  hasTensorSemantics ? tensor::getMixedSizes(rewriter, loc, base)
322  : memref::getMixedSizes(rewriter, loc, base);
323  return mixedSourceDims;
324 }
325 
326 bool vector::isLinearizableVector(VectorType type) {
327  auto numScalableDims = llvm::count(type.getScalableDims(), true);
328  return (type.getRank() > 1) && (numScalableDims <= 1);
329 }
330 
332  Value source, ArrayRef<int64_t> readShape,
333  Value padValue,
334  bool useInBoundsInsteadOfMasking) {
335  assert(llvm::none_of(readShape,
336  [](int64_t s) { return s == ShapedType::kDynamic; }) &&
337  "expected static shape");
338  auto sourceShapedType = cast<ShapedType>(source.getType());
339  auto sourceShape = sourceShapedType.getShape();
340  assert(sourceShape.size() == readShape.size() && "expected same ranks.");
341  auto maskType = VectorType::get(readShape, builder.getI1Type());
342  auto vectorType = VectorType::get(readShape, padValue.getType());
343  assert(padValue.getType() == sourceShapedType.getElementType() &&
344  "expected same pad element type to match source element type");
345  int64_t readRank = readShape.size();
346  auto zero = builder.create<arith::ConstantIndexOp>(loc, 0);
347  SmallVector<bool> inBoundsVal(readRank, true);
348  if (useInBoundsInsteadOfMasking) {
349  // Update the inBounds attribute.
350  for (unsigned i = 0; i < readRank; i++)
351  inBoundsVal[i] = (sourceShape[i] == readShape[i]) &&
352  !ShapedType::isDynamic(sourceShape[i]);
353  }
354  auto transferReadOp = builder.create<vector::TransferReadOp>(
355  loc,
356  /*vectorType=*/vectorType,
357  /*source=*/source,
358  /*indices=*/SmallVector<Value>(readRank, zero),
359  /*padding=*/padValue,
360  /*inBounds=*/inBoundsVal);
361 
362  if (llvm::equal(readShape, sourceShape) || useInBoundsInsteadOfMasking)
363  return transferReadOp;
364  SmallVector<OpFoldResult> mixedSourceDims =
365  tensor::getMixedSizes(builder, loc, source);
366  Value mask =
367  builder.create<vector::CreateMaskOp>(loc, maskType, mixedSourceDims);
368  return mlir::vector::maskOperation(builder, transferReadOp, mask)
369  ->getResult(0);
370 }
371 
374  ArrayRef<int64_t> inputVectorSizes) {
375  LDBG("Iteration space static sizes:");
376  LLVM_DEBUG(llvm::interleaveComma(shape, llvm::dbgs()));
377  LLVM_DEBUG(llvm::dbgs() << "\n");
378 
379  if (inputVectorSizes.size() != shape.size()) {
380  LDBG("Input vector sizes don't match the number of loops");
381  return failure();
382  }
383  if (ShapedType::isDynamicShape(inputVectorSizes)) {
384  LDBG("Input vector sizes can't have dynamic dimensions");
385  return failure();
386  }
387  if (!llvm::all_of(llvm::zip(shape, inputVectorSizes),
388  [](std::tuple<int64_t, int64_t> sizePair) {
389  int64_t staticSize = std::get<0>(sizePair);
390  int64_t inputSize = std::get<1>(sizePair);
391  return ShapedType::isDynamic(staticSize) ||
392  staticSize <= inputSize;
393  })) {
394  LDBG("Input vector sizes must be greater than or equal to iteration space "
395  "static sizes");
396  return failure();
397  }
398  return success();
399 }
static VectorShape vectorShape(Type type)
static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1, ArrayRef< int64_t > transp)
Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1' should be transposed with ...
Definition: VectorUtils.cpp:69
static SetVector< Operation * > getEnclosingforOps(Block *block)
Returns the enclosing AffineForOp, from closest to farthest.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
static SetVector< Operation * > getParentsOfType(Block *block)
Implementation detail that walks up the parents and records the ones with the specified type.
#define LDBG(X)
Definition: VectorUtils.cpp:36
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:47
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Block represents an ordered list of Operations.
Definition: Block.h:31
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
IntegerType getI1Type()
Definition: Builders.cpp:73
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:209
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:522
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:464
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:400
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:92
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
bool hasTensorSemantics(Operation *op)
Return "true" if the given op has tensor semantics and should be bufferized.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given memref value.
Definition: MemRefOps.cpp:77
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition: TensorOps.cpp:61
bool isContiguousSlice(MemRefType memrefType, VectorType vectorType)
Return true if vectorType is a contiguous slice of memrefType.
LogicalResult isValidMaskedInputVector(ArrayRef< int64_t > shape, ArrayRef< int64_t > inputVectorSizes)
Returns success if inputVectorSizes is a valid masking configuraion for given shape,...
Operation * maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, Value passthru=Value())
Creates a vector.mask operation around a maskable operation.
FailureOr< std::pair< int, int > > isTranspose2DSlice(vector::TransposeOp op)
Returns two dims that are greater than one if the transposition is applied on a 2D slice.
Definition: VectorUtils.cpp:85
std::optional< StaticTileOffsetRange > createUnrollIterator(VectorType vType, int64_t targetRank=1)
Returns an iterator for all positions in the leading dimensions of vType up to the targetRank.
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source.
Definition: VectorUtils.cpp:42
bool isLinearizableVector(VectorType type)
Returns true if the input Vector type can be linearized.
Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, ArrayRef< int64_t > readShape, Value padValue, bool useInBoundsInsteadOfMasking)
Create a TransferReadOp from source with static shape readShape.
SmallVector< OpFoldResult > getMixedSizesXfer(bool hasTensorSemantics, Operation *xfer, RewriterBase &rewriter)
A wrapper for getMixedSizes for vector.transfer_read and vector.transfer_write Ops (for source and de...
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:623
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:599
bool trailingNDimsContiguous(MemRefType type, int64_t n)
Return "true" if the last N dimensions of the given type are contiguous.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26