MLIR  22.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utility methods for working with the Vector dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
23 #include "mlir/IR/Builders.h"
24 #include "mlir/IR/IntegerSet.h"
25 #include "mlir/IR/Operation.h"
26 #include "mlir/IR/TypeUtilities.h"
27 #include "mlir/Support/LLVM.h"
28 
29 #include "llvm/ADT/DenseSet.h"
30 #include "llvm/Support/DebugLog.h"
31 #include "llvm/Support/InterleavedRange.h"
32 
33 #define DEBUG_TYPE "vector-utils"
34 
35 using namespace mlir;
36 
37 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
38 /// the type of `source`.
40  int64_t dim) {
41  if (isa<UnrankedMemRefType, MemRefType>(source.getType()))
42  return b.createOrFold<memref::DimOp>(loc, source, dim);
43  if (isa<UnrankedTensorType, RankedTensorType>(source.getType()))
44  return b.createOrFold<tensor::DimOp>(loc, source, dim);
45  llvm_unreachable("Expected MemRefType or TensorType");
46 }
47 
48 /// Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1'
49 /// should be transposed with each other within the context of their 2D
50 /// transposition slice.
51 ///
52 /// Example 1: dim0 = 0, dim1 = 2, transp = [2, 1, 0]
53 /// Return true: dim0 and dim1 are transposed within the context of their 2D
54 /// transposition slice ([1, 0]).
55 ///
56 /// Example 2: dim0 = 0, dim1 = 1, transp = [2, 1, 0]
57 /// Return true: dim0 and dim1 are transposed within the context of their 2D
58 /// transposition slice ([1, 0]). Paradoxically, note how dim1 (1) is *not*
59 /// transposed within the full context of the transposition.
60 ///
61 /// Example 3: dim0 = 0, dim1 = 1, transp = [2, 0, 1]
62 /// Return false: dim0 and dim1 are *not* transposed within the context of
63 /// their 2D transposition slice ([0, 1]). Paradoxically, note how dim0 (0)
64 /// and dim1 (1) are transposed within the full context of the of the
65 /// transposition.
66 static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1,
67  ArrayRef<int64_t> transp) {
68  // Perform a linear scan along the dimensions of the transposed pattern. If
69  // dim0 is found first, dim0 and dim1 are not transposed within the context of
70  // their 2D slice. Otherwise, 'dim1' is found first and they are transposed.
71  for (int64_t permDim : transp) {
72  if (permDim == dim0)
73  return false;
74  if (permDim == dim1)
75  return true;
76  }
77 
78  llvm_unreachable("Ill-formed transpose pattern");
79 }
80 
81 FailureOr<std::pair<int, int>>
82 mlir::vector::isTranspose2DSlice(vector::TransposeOp op) {
83  VectorType srcType = op.getSourceVectorType();
84  SmallVector<int64_t> srcGtOneDims;
85  for (auto [index, size] : llvm::enumerate(srcType.getShape()))
86  if (size > 1)
87  srcGtOneDims.push_back(index);
88 
89  if (srcGtOneDims.size() != 2)
90  return failure();
91 
92  // Check whether the two source vector dimensions that are greater than one
93  // must be transposed with each other so that we can apply one of the 2-D
94  // transpose pattens. Otherwise, these patterns are not applicable.
95  if (!areDimsTransposedIn2DSlice(srcGtOneDims[0], srcGtOneDims[1],
96  op.getPermutation()))
97  return failure();
98 
99  return std::pair<int, int>(srcGtOneDims[0], srcGtOneDims[1]);
100 }
101 
102 /// Constructs a permutation map from memref indices to vector dimension.
103 ///
104 /// The implementation uses the knowledge of the mapping of enclosing loop to
105 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
106 /// map with:
107 /// - keys representing "vectorized enclosing loops";
108 /// - values representing the corresponding vector dimension.
109 /// The algorithm traverses "vectorized enclosing loops" and extracts the
110 /// at-most-one MemRef index that is invariant along said loop. This index is
111 /// guaranteed to be at most one by construction: otherwise the MemRef is not
112 /// vectorizable.
113 /// If this invariant index is found, it is added to the permutation_map at the
114 /// proper vector dimension.
115 /// If no index is found to be invariant, 0 is added to the permutation_map and
116 /// corresponds to a vector broadcast along that dimension.
117 ///
118 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
119 /// signalling that no permutation map can be constructed given
120 /// `enclosingLoopToVectorDim`.
121 ///
122 /// Examples can be found in the documentation of `makePermutationMap`, in the
123 /// header file.
125  ArrayRef<Value> indices,
126  const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
127  if (enclosingLoopToVectorDim.empty())
128  return AffineMap();
129  MLIRContext *context =
130  enclosingLoopToVectorDim.begin()->getFirst()->getContext();
131  SmallVector<AffineExpr> perm(enclosingLoopToVectorDim.size(),
132  getAffineConstantExpr(0, context));
133 
134  for (auto kvp : enclosingLoopToVectorDim) {
135  assert(kvp.second < perm.size());
136  auto invariants = affine::getInvariantAccesses(
137  cast<affine::AffineForOp>(kvp.first).getInductionVar(), indices);
138  unsigned numIndices = indices.size();
139  unsigned countInvariantIndices = 0;
140  for (unsigned dim = 0; dim < numIndices; ++dim) {
141  if (!invariants.count(indices[dim])) {
142  assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
143  "permutationMap already has an entry along dim");
144  perm[kvp.second] = getAffineDimExpr(dim, context);
145  } else {
146  ++countInvariantIndices;
147  }
148  }
149  assert((countInvariantIndices == numIndices ||
150  countInvariantIndices == numIndices - 1) &&
151  "Vectorization prerequisite violated: at most 1 index may be "
152  "invariant wrt a vectorized loop");
153  (void)countInvariantIndices;
154  }
155  return AffineMap::get(indices.size(), 0, perm, context);
156 }
157 
158 /// Implementation detail that walks up the parents and records the ones with
159 /// the specified type.
160 /// TODO: could also be implemented as a collect parents followed by a
161 /// filter and made available outside this file.
162 template <typename T>
165  auto *current = block->getParentOp();
166  while (current) {
167  if ([[maybe_unused]] auto typedParent = dyn_cast<T>(current)) {
168  assert(res.count(current) == 0 && "Already inserted");
169  res.insert(current);
170  }
171  current = current->getParentOp();
172  }
173  return res;
174 }
175 
176 /// Returns the enclosing AffineForOp, from closest to farthest.
178  return getParentsOfType<affine::AffineForOp>(block);
179 }
180 
182  Block *insertPoint, ArrayRef<Value> indices,
183  const DenseMap<Operation *, unsigned> &loopToVectorDim) {
184  DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
185  auto enclosingLoops = getEnclosingforOps(insertPoint);
186  for (auto *forInst : enclosingLoops) {
187  auto it = loopToVectorDim.find(forInst);
188  if (it != loopToVectorDim.end()) {
189  enclosingLoopToVectorDim.insert(*it);
190  }
191  }
192  return ::makePermutationMap(indices, enclosingLoopToVectorDim);
193 }
194 
196  Operation *op, ArrayRef<Value> indices,
197  const DenseMap<Operation *, unsigned> &loopToVectorDim) {
198  return makePermutationMap(op->getBlock(), indices, loopToVectorDim);
199 }
200 
201 bool matcher::operatesOnSuperVectorsOf(Operation &op,
202  VectorType subVectorType) {
203  // First, extract the vector type and distinguish between:
204  // a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
205  // vector.transfer_write); and
206  // b. ops that *may* lower a super-vector (all other ops).
207  // The ops that *may* lower a super-vector only do so if the super-vector to
208  // sub-vector ratio exists. The ops that *must* lower a super-vector are
209  // explicitly checked for this property.
210  /// TODO: there should be a single function for all ops to do this so we
211  /// do not have to special case. Maybe a trait, or just a method, unclear atm.
212  bool mustDivide = false;
213  (void)mustDivide;
214  VectorType superVectorType;
215  if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
216  superVectorType = transfer.getVectorType();
217  mustDivide = true;
218  } else if (op.getNumResults() == 0) {
219  if (!isa<func::ReturnOp>(op)) {
220  op.emitError("NYI: assuming only return operations can have 0 "
221  " results at this point");
222  }
223  return false;
224  } else if (op.getNumResults() == 1) {
225  if (auto v = dyn_cast<VectorType>(op.getResult(0).getType())) {
226  superVectorType = v;
227  } else {
228  // Not a vector type.
229  return false;
230  }
231  } else {
232  // Not a vector.transfer and has more than 1 result, fail hard for now to
233  // wake us up when something changes.
234  op.emitError("NYI: operation has more than 1 result");
235  return false;
236  }
237 
238  // Get the ratio.
239  auto ratio =
240  computeShapeRatio(superVectorType.getShape(), subVectorType.getShape());
241 
242  // Sanity check.
243  assert((ratio || !mustDivide) &&
244  "vector.transfer operation in which super-vector size is not an"
245  " integer multiple of sub-vector size");
246 
247  // This catches cases that are not strictly necessary to have multiplicity but
248  // still aren't divisible by the sub-vector shape.
249  // This could be useful information if we wanted to reshape at the level of
250  // the vector type (but we would have to look at the compute and distinguish
251  // between parallel, reduction and possibly other cases.
252  return ratio.has_value();
253 }
254 
255 bool vector::isContiguousSlice(MemRefType memrefType, VectorType vectorType) {
256  if (vectorType.isScalable())
257  return false;
258 
259  // Ignore a leading sequence of adjacent unit dimensions in the vector.
261  vectorType.getShape().drop_while([](auto v) { return v == 1; });
262  auto vecRank = vectorShape.size();
263 
264  if (!memrefType.areTrailingDimsContiguous(vecRank))
265  return false;
266 
267  // Extract the trailing dims of the input memref
268  auto memrefShape = memrefType.getShape().take_back(vecRank);
269 
270  // Compare the dims of `vectorType` against `memrefType`.
271  // All of the dimensions, except the first must match.
272  return llvm::equal(vectorShape.drop_front(), memrefShape.drop_front());
273 }
274 
275 std::optional<StaticTileOffsetRange>
276 vector::createUnrollIterator(VectorType vType, int64_t targetRank) {
277  if (vType.getRank() <= targetRank)
278  return {};
279  // Attempt to unroll until targetRank or the first scalable dimension (which
280  // cannot be unrolled).
281  auto shapeToUnroll = vType.getShape().drop_back(targetRank);
282  auto inputScalableVecDimsToUnroll =
283  vType.getScalableDims().drop_back(targetRank);
284  auto it = llvm::find(inputScalableVecDimsToUnroll, true);
285  auto firstScalableDim = it - inputScalableVecDimsToUnroll.begin();
286  if (firstScalableDim == 0)
287  return {};
288  // All scalable dimensions should be removed now.
289  inputScalableVecDimsToUnroll =
290  inputScalableVecDimsToUnroll.slice(0, firstScalableDim);
291  assert(!llvm::is_contained(inputScalableVecDimsToUnroll, true) &&
292  "unexpected leading scalable dimension");
293  // Create an unroll iterator for leading dimensions.
294  shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim);
295  return StaticTileOffsetRange(shapeToUnroll, /*unrollStep=*/1);
296 }
297 
299  Operation *xfer,
300  RewriterBase &rewriter) {
301  auto loc = xfer->getLoc();
302 
304  .Case<vector::TransferReadOp>(
305  [&](auto readOp) { return readOp.getBase(); })
306  .Case<vector::TransferWriteOp>(
307  [&](auto writeOp) { return writeOp.getOperand(1); });
308 
309  SmallVector<OpFoldResult> mixedSourceDims =
310  hasTensorSemantics ? tensor::getMixedSizes(rewriter, loc, base)
311  : memref::getMixedSizes(rewriter, loc, base);
312  return mixedSourceDims;
313 }
314 
315 bool vector::isLinearizableVector(VectorType type) {
316  return (type.getRank() > 1) && (type.getNumScalableDims() <= 1);
317 }
318 
320  Value source,
321  ArrayRef<int64_t> inputVectorSizes,
322  Value padValue,
323  bool useInBoundsInsteadOfMasking,
324  ArrayRef<bool> inputScalableVecDims) {
325  assert(!llvm::is_contained(inputVectorSizes, ShapedType::kDynamic) &&
326  "invalid input vector sizes");
327  auto sourceShapedType = cast<ShapedType>(source.getType());
328  auto sourceShape = sourceShapedType.getShape();
329  assert(sourceShape.size() == inputVectorSizes.size() &&
330  "expected same ranks.");
331  auto vectorType = VectorType::get(inputVectorSizes, padValue.getType(),
332  inputScalableVecDims);
333  assert(padValue.getType() == sourceShapedType.getElementType() &&
334  "expected same pad element type to match source element type");
335  int64_t readRank = inputVectorSizes.size();
336  auto zero = arith::ConstantIndexOp::create(builder, loc, 0);
337  SmallVector<bool> inBoundsVal(readRank, true);
338 
339  if (useInBoundsInsteadOfMasking) {
340  // Update the inBounds attribute.
341  // FIXME: This computation is too weak - it ignores the read indices.
342  for (unsigned i = 0; i < readRank; i++)
343  inBoundsVal[i] = (sourceShape[i] == inputVectorSizes[i]) &&
344  ShapedType::isStatic(sourceShape[i]);
345  }
346  auto transferReadOp = vector::TransferReadOp::create(
347  builder, loc,
348  /*vectorType=*/vectorType,
349  /*source=*/source,
350  /*indices=*/SmallVector<Value>(readRank, zero),
351  /*padding=*/padValue,
352  /*inBounds=*/inBoundsVal);
353 
354  if (llvm::equal(inputVectorSizes, sourceShape) || useInBoundsInsteadOfMasking)
355  return transferReadOp;
356  SmallVector<OpFoldResult> mixedSourceDims =
357  isa<MemRefType>(source.getType())
358  ? memref::getMixedSizes(builder, loc, source)
359  : tensor::getMixedSizes(builder, loc, source);
360 
361  auto maskType = VectorType::get(inputVectorSizes, builder.getI1Type(),
362  inputScalableVecDims);
363  Value mask =
364  vector::CreateMaskOp::create(builder, loc, maskType, mixedSourceDims);
365  return mlir::vector::maskOperation(builder, transferReadOp, mask)
366  ->getResult(0);
367 }
368 
369 LogicalResult
371  ArrayRef<int64_t> inputVectorSizes) {
372  LDBG() << "Iteration space static sizes:" << llvm::interleaved(shape);
373 
374  if (inputVectorSizes.size() != shape.size()) {
375  LDBG() << "Input vector sizes don't match the number of loops";
376  return failure();
377  }
378  if (ShapedType::isDynamicShape(inputVectorSizes)) {
379  LDBG() << "Input vector sizes can't have dynamic dimensions";
380  return failure();
381  }
382  if (!llvm::all_of(llvm::zip(shape, inputVectorSizes),
383  [](std::tuple<int64_t, int64_t> sizePair) {
384  int64_t staticSize = std::get<0>(sizePair);
385  int64_t inputSize = std::get<1>(sizePair);
386  return ShapedType::isDynamic(staticSize) ||
387  staticSize <= inputSize;
388  })) {
389  LDBG() << "Input vector sizes must be greater than or equal to iteration "
390  "space static sizes";
391  return failure();
392  }
393  return success();
394 }
395 
396 LogicalResult vector::unrollVectorOp(Operation *op, PatternRewriter &rewriter,
397  vector::UnrollVectorOpFn unrollFn) {
398  assert(op->getNumResults() == 1 && "expected single result");
399  assert(isa<VectorType>(op->getResult(0).getType()) && "expected vector type");
400  VectorType resultTy = cast<VectorType>(op->getResult(0).getType());
401  if (resultTy.getRank() < 2)
402  return rewriter.notifyMatchFailure(op, "already 1-D");
403 
404  // Unrolling doesn't take vscale into account. Pattern is disabled for
405  // vectors with leading scalable dim(s).
406  if (resultTy.getScalableDims().front())
407  return rewriter.notifyMatchFailure(op, "cannot unroll scalable dim");
408 
409  Location loc = op->getLoc();
410  Value result = ub::PoisonOp::create(rewriter, loc, resultTy);
411  VectorType subTy = VectorType::Builder(resultTy).dropDim(0);
412 
413  for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {
414  Value subVector = unrollFn(rewriter, loc, subTy, i);
415  result = vector::InsertOp::create(rewriter, loc, subVector, result, i);
416  }
417 
418  rewriter.replaceOp(op, result);
419  return success();
420 }
static std::optional< VectorShape > vectorShape(Type type)
static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1, ArrayRef< int64_t > transp)
Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1' should be transposed with ...
Definition: VectorUtils.cpp:66
static SetVector< Operation * > getEnclosingforOps(Block *block)
Returns the enclosing AffineForOp, from closest to farthest.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
static SetVector< Operation * > getParentsOfType(Block *block)
Implementation detail that walks up the parents and records the ones with the specified type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Block represents an ordered list of Operations.
Definition: Block.h:33
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:31
IntegerType getI1Type()
Definition: Builders.cpp:52
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
This class helps build Operations.
Definition: Builders.h:205
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:517
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:783
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:716
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:286
Builder & dropDim(unsigned pos)
Erase a dim from shape @pos.
Definition: BuiltinTypes.h:311
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition: ArithOps.cpp:359
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
bool hasTensorSemantics(Operation *op)
Return "true" if the given op has tensor semantics and should be bufferized.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given memref value.
Definition: MemRefOps.cpp:77
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition: TensorOps.cpp:70
bool isContiguousSlice(MemRefType memrefType, VectorType vectorType)
Return true if vectorType is a contiguous slice of memrefType, in the sense that it can be read/writt...
LogicalResult isValidMaskedInputVector(ArrayRef< int64_t > shape, ArrayRef< int64_t > inputVectorSizes)
Returns success if inputVectorSizes is a valid masking configuraion for given shape,...
Operation * maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, Value passthru=Value())
Creates a vector.mask operation around a maskable operation.
FailureOr< std::pair< int, int > > isTranspose2DSlice(vector::TransposeOp op)
Returns two dims that are greater than one if the transposition is applied on a 2D slice.
Definition: VectorUtils.cpp:82
std::optional< StaticTileOffsetRange > createUnrollIterator(VectorType vType, int64_t targetRank=1)
Returns an iterator for all positions in the leading dimensions of vType up to the targetRank.
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source.
Definition: VectorUtils.cpp:39
bool isLinearizableVector(VectorType type)
Returns true if the input Vector type can be linearized.
SmallVector< OpFoldResult > getMixedSizesXfer(bool hasTensorSemantics, Operation *xfer, RewriterBase &rewriter)
A wrapper for getMixedSizes for vector.transfer_read and vector.transfer_write Ops (for source and de...
Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, ArrayRef< int64_t > inputVectorSizes, Value padValue, bool useInBoundsInsteadOfMasking=false, ArrayRef< bool > inputScalableVecDims={})
Creates a TransferReadOp from source.
LogicalResult unrollVectorOp(Operation *op, PatternRewriter &rewriter, UnrollVectorOpFn unrollFn)
Include the generated interface declarations.
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:643
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:619