MLIR 23.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1//===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
2//
3// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utility methods for working with the Vector dialect.
10//
11//===----------------------------------------------------------------------===//
12
14
24#include "mlir/IR/Builders.h"
25#include "mlir/IR/IntegerSet.h"
26#include "mlir/IR/Matchers.h"
27#include "mlir/IR/Operation.h"
29#include "mlir/Support/LLVM.h"
30
31#include "llvm/ADT/DenseSet.h"
32#include "llvm/Support/DebugLog.h"
33#include "llvm/Support/InterleavedRange.h"
34
35#define DEBUG_TYPE "vector-utils"
36
37using namespace mlir;
38
39/// Helper function that creates a memref::DimOp or tensor::DimOp depending on
40/// the type of `source`.
42 int64_t dim) {
43 if (isa<UnrankedMemRefType, MemRefType>(source.getType()))
44 return b.createOrFold<memref::DimOp>(loc, source, dim);
45 if (isa<UnrankedTensorType, RankedTensorType>(source.getType()))
46 return b.createOrFold<tensor::DimOp>(loc, source, dim);
47 llvm_unreachable("Expected MemRefType or TensorType");
48}
49
50/// Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1'
51/// should be transposed with each other within the context of their 2D
52/// transposition slice.
53///
54/// Example 1: dim0 = 0, dim1 = 2, transp = [2, 1, 0]
55/// Return true: dim0 and dim1 are transposed within the context of their 2D
56/// transposition slice ([1, 0]).
57///
58/// Example 2: dim0 = 0, dim1 = 1, transp = [2, 1, 0]
59/// Return true: dim0 and dim1 are transposed within the context of their 2D
60/// transposition slice ([1, 0]). Paradoxically, note how dim1 (1) is *not*
61/// transposed within the full context of the transposition.
62///
63/// Example 3: dim0 = 0, dim1 = 1, transp = [2, 0, 1]
64/// Return false: dim0 and dim1 are *not* transposed within the context of
65/// their 2D transposition slice ([0, 1]). Paradoxically, note how dim0 (0)
66/// and dim1 (1) are transposed within the full context of the of the
67/// transposition.
69 ArrayRef<int64_t> transp) {
70 // Perform a linear scan along the dimensions of the transposed pattern. If
71 // dim0 is found first, dim0 and dim1 are not transposed within the context of
72 // their 2D slice. Otherwise, 'dim1' is found first and they are transposed.
73 for (int64_t permDim : transp) {
74 if (permDim == dim0)
75 return false;
76 if (permDim == dim1)
77 return true;
78 }
79
80 llvm_unreachable("Ill-formed transpose pattern");
81}
82
83FailureOr<std::pair<int, int>>
84mlir::vector::isTranspose2DSlice(vector::TransposeOp op) {
85 VectorType srcType = op.getSourceVectorType();
86 SmallVector<int64_t> srcGtOneDims;
87 for (auto [index, size] : llvm::enumerate(srcType.getShape()))
88 if (size > 1)
89 srcGtOneDims.push_back(index);
90
91 if (srcGtOneDims.size() != 2)
92 return failure();
93
94 // Check whether the two source vector dimensions that are greater than one
95 // must be transposed with each other so that we can apply one of the 2-D
96 // transpose patterns. Otherwise, these patterns are not applicable.
97 if (!areDimsTransposedIn2DSlice(srcGtOneDims[0], srcGtOneDims[1],
98 op.getPermutation()))
99 return failure();
100
101 return std::pair<int, int>(srcGtOneDims[0], srcGtOneDims[1]);
102}
103
104/// Constructs a permutation map from memref indices to vector dimension.
105///
106/// The implementation uses the knowledge of the mapping of enclosing loop to
107/// vector dimension. `enclosingLoopToVectorDim` carries this information as a
108/// map with:
109/// - keys representing "vectorized enclosing loops";
110/// - values representing the corresponding vector dimension.
111/// The algorithm traverses "vectorized enclosing loops" and extracts the
112/// at-most-one MemRef index that is invariant along said loop. This index is
113/// guaranteed to be at most one by construction: otherwise the MemRef is not
114/// vectorizable.
115/// If this invariant index is found, it is added to the permutation_map at the
116/// proper vector dimension.
117/// If no index is found to be invariant, 0 is added to the permutation_map and
118/// corresponds to a vector broadcast along that dimension.
119///
120/// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
121/// signalling that no permutation map can be constructed given
122/// `enclosingLoopToVectorDim`.
123///
124/// Examples can be found in the documentation of `makePermutationMap`, in the
125/// header file.
128 const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
129 if (enclosingLoopToVectorDim.empty())
130 return AffineMap();
131 MLIRContext *context =
132 enclosingLoopToVectorDim.begin()->getFirst()->getContext();
133 SmallVector<AffineExpr> perm(enclosingLoopToVectorDim.size(),
134 getAffineConstantExpr(0, context));
135
136 for (auto kvp : enclosingLoopToVectorDim) {
137 assert(kvp.second < perm.size());
138 auto invariants = affine::getInvariantAccesses(
139 cast<affine::AffineForOp>(kvp.first).getInductionVar(), indices);
140 unsigned numIndices = indices.size();
141 unsigned countInvariantIndices = 0;
142 for (unsigned dim = 0; dim < numIndices; ++dim) {
143 if (!invariants.count(indices[dim])) {
144 assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
145 "permutationMap already has an entry along dim");
146 perm[kvp.second] = getAffineDimExpr(dim, context);
147 } else {
148 ++countInvariantIndices;
149 }
150 }
151 assert((countInvariantIndices == numIndices ||
152 countInvariantIndices == numIndices - 1) &&
153 "Vectorization prerequisite violated: at most 1 index may be "
154 "invariant wrt a vectorized loop");
155 (void)countInvariantIndices;
156 }
157 return AffineMap::get(indices.size(), 0, perm, context);
158}
159
160/// Implementation detail that walks up the parents and records the ones with
161/// the specified type.
162/// TODO: could also be implemented as a collect parents followed by a
163/// filter and made available outside this file.
164template <typename T>
167 auto *current = block->getParentOp();
168 while (current) {
169 if ([[maybe_unused]] auto typedParent = dyn_cast<T>(current)) {
170 assert(res.count(current) == 0 && "Already inserted");
171 res.insert(current);
172 }
173 current = current->getParentOp();
174 }
175 return res;
176}
177
178/// Returns the enclosing AffineForOp, from closest to farthest.
182
183AffineMap mlir::makePermutationMap(
184 Block *insertPoint, ArrayRef<Value> indices,
185 const DenseMap<Operation *, unsigned> &loopToVectorDim) {
186 DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
187 auto enclosingLoops = getEnclosingforOps(insertPoint);
188 for (auto *forInst : enclosingLoops) {
189 auto it = loopToVectorDim.find(forInst);
190 if (it != loopToVectorDim.end()) {
191 enclosingLoopToVectorDim.insert(*it);
192 }
193 }
194 return ::makePermutationMap(indices, enclosingLoopToVectorDim);
195}
196
197AffineMap mlir::makePermutationMap(
199 const DenseMap<Operation *, unsigned> &loopToVectorDim) {
200 return makePermutationMap(op->getBlock(), indices, loopToVectorDim);
201}
202
203bool matcher::operatesOnSuperVectorsOf(Operation &op,
204 VectorType subVectorType) {
205 // First, extract the vector type and distinguish between:
206 // a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
207 // vector.transfer_write); and
208 // b. ops that *may* lower a super-vector (all other ops).
209 // The ops that *may* lower a super-vector only do so if the super-vector to
210 // sub-vector ratio exists. The ops that *must* lower a super-vector are
211 // explicitly checked for this property.
212 /// TODO: there should be a single function for all ops to do this so we
213 /// do not have to special case. Maybe a trait, or just a method, unclear atm.
214 VectorType superVectorType;
215 if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
216 superVectorType = transfer.getVectorType();
217 } else if (op.getNumResults() == 0) {
218 if (!isa<func::ReturnOp>(op)) {
219 op.emitError("NYI: assuming only return operations can have 0 "
220 " results at this point");
221 }
222 return false;
223 } else if (op.getNumResults() == 1) {
224 if (auto v = dyn_cast<VectorType>(op.getResult(0).getType())) {
225 superVectorType = v;
226 } else {
227 // Not a vector type.
228 return false;
229 }
230 } else {
231 // Not a vector.transfer and has more than 1 result, fail hard for now to
232 // wake us up when something changes.
233 op.emitError("NYI: operation has more than 1 result");
234 return false;
235 }
236
237 // Get the ratio. If the shapes are incompatible (e.g., different ranks or
238 // non-integer divisibility), the operation does not operate on a super-vector
239 // of the given sub-vector type.
240 auto ratio =
241 computeShapeRatio(superVectorType.getShape(), subVectorType.getShape());
242 return ratio.has_value();
243}
244
245bool vector::isContiguousSlice(MemRefType memrefType, VectorType vectorType) {
246 if (vectorType.isScalable())
247 return false;
248
249 // Ignore a leading sequence of adjacent unit dimensions in the vector.
251 vectorType.getShape().drop_while([](auto v) { return v == 1; });
252 auto vecRank = vectorShape.size();
253
254 // A single element is always contiguous.
255 if (vecRank == 0)
256 return true;
257
258 if (!memrefType.areTrailingDimsContiguous(vecRank))
259 return false;
260
261 // Extract the trailing dims of the input memref
262 auto memrefShape = memrefType.getShape().take_back(vecRank);
263
264 // Compare the dims of `vectorType` against `memrefType`.
265 // All of the dimensions, except the first must match.
266 return llvm::equal(vectorShape.drop_front(), memrefShape.drop_front());
267}
268
269std::optional<StaticTileOffsetRange>
270vector::createUnrollIterator(VectorType vType, int64_t targetRank) {
271 if (vType.getRank() <= targetRank)
272 return {};
273 // Attempt to unroll until targetRank or the first scalable dimension (which
274 // cannot be unrolled).
275 auto shapeToUnroll = vType.getShape().drop_back(targetRank);
276 auto inputScalableVecDimsToUnroll =
277 vType.getScalableDims().drop_back(targetRank);
278 const auto *it = llvm::find(inputScalableVecDimsToUnroll, true);
279 auto firstScalableDim = it - inputScalableVecDimsToUnroll.begin();
280 if (firstScalableDim == 0)
281 return {};
282 // All scalable dimensions should be removed now.
283 inputScalableVecDimsToUnroll =
284 inputScalableVecDimsToUnroll.slice(0, firstScalableDim);
285 assert(!llvm::is_contained(inputScalableVecDimsToUnroll, true) &&
286 "unexpected leading scalable dimension");
287 // Create an unroll iterator for leading dimensions.
288 shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim);
289 return StaticTileOffsetRange(shapeToUnroll, /*unrollStep=*/1);
290}
291
293 Operation *xfer,
294 RewriterBase &rewriter) {
295 auto loc = xfer->getLoc();
296
297 Value base =
299 .Case([&](vector::TransferReadOp readOp) { return readOp.getBase(); })
300 .Case([&](vector::TransferWriteOp writeOp) {
301 return writeOp.getOperand(1);
302 });
303
304 SmallVector<OpFoldResult> mixedSourceDims =
305 hasTensorSemantics ? tensor::getMixedSizes(rewriter, loc, base)
306 : memref::getMixedSizes(rewriter, loc, base);
307 return mixedSourceDims;
308}
309
310bool vector::isLinearizableVector(VectorType type) {
311 return (type.getRank() > 1) && (type.getNumScalableDims() <= 1);
312}
313
314/// Determines whether a mask for xfer_read/write is trivially "all true"
315///
316/// Given all the inputs required to generate a mask (mask sizes and shapes),
317/// and an xfer_read/write operation (indices and the source/destination tensor
318/// shape), determines whether the corresponding mask would be trivially
319/// foldable (i.e., trivially "all true").
320///
321/// Use this method to avoid generating spurious masks and relying on
322/// vectorization post-processing to remove them.
323///
324/// Pre-conditions for a mask to be trivially foldable:
325/// * All involved shapes (mask + destination tensor) are static.
326/// * All indices are constant.
327/// * All mask sizes are constant (including `arith.constant`).
328///
329/// If the pre-conditions are met, the method checks for each destination
330/// dimension `d`:
331/// (1) destDimSize[rankDiff + d] <= maskShape[d]
332/// (2) destDimSize[rankDiff + d] <= index[d] + maskSize[d]
333///
334/// rankDiff = rank(dest) - rank(mask).
335///
336/// This method takes a conservative view: it may return false even if the mask
337/// is technically foldable.
338///
339/// EXAMPLE 1 (trivially foldable, all shapes match, mask sizes match the shape
340/// of the dest tensor):
341/// %c0 = arith.constant 0 : index
342/// %mask = vector.create_mask 5, 1
343/// vector.mask %mask {
344/// vector.transfer_write %vecToStore_1, %dest{[%c0, %c0]
345/// {in_bounds = [true, true]}
346/// : vector<5x1xi32>, tensor<5x1xi32>
347/// }
348///
349/// EXAMPLE 2 (not trivially foldable - vector shape exceeds the tensor shape,
350/// mask is required to avoid out-of-bounds write):
351/// %c0 = arith.constant 0 : index
352/// %mask = vector.create_mask 5, 1
353/// vector.mask %mask {
354/// vector.transfer_write %vecToStore_2, %dest[%c0, %c0]
355/// {in_bounds = [true, true]}
356/// : vector<8x1xi32>, tensor<5x1xi32>
357/// }
360 ArrayRef<int64_t> baseShape,
361 ArrayRef<int64_t> maskShape) {
362 // Masking is unavoidable in the case of dynamic tensors.
363 if (ShapedType::isDynamicShape(baseShape))
364 return false;
365
366 // Collect all constant mask sizes.
367 SmallVector<int64_t, 4> cstMaskSizes;
368 for (auto [i, dimSize] : llvm::enumerate(maskSizes)) {
369 if (auto intSize = getConstantIntValue(dimSize)) {
370 cstMaskSizes.push_back(*intSize);
371 }
372 }
373
374 // If any of the mask sizes is non-constant, bail out.
375 if (cstMaskSizes.size() != maskShape.size())
376 return false;
377
378 // Collect all constant indices.
379 SmallVector<int64_t, 4> cstIndices;
380 for (auto [i, idx] : llvm::enumerate(indices)) {
381 APSInt intVal;
382 if (matchPattern(idx, m_ConstantInt(&intVal))) {
383 cstIndices.push_back(intVal.getSExtValue());
384 }
385 }
386
387 // If any of the indices is non-constant, bail out.
388 if (cstIndices.size() != baseShape.size())
389 return false;
390
391 // Go over all destination dims and check (1) and (2). Take into account that:
392 // * The number of mask sizes will match the rank of the vector to
393 // load/store. This could be lower than the rank of the destination tensor.
394 // * Mask sizes could be larger than the corresponding mask shape (hence
395 // `clamp`).
396 // TODO: The 2nd item should be rejected by the verifier.
397 int64_t rankDiff = baseShape.size() - cstMaskSizes.size();
398 for (auto [i, idx] : llvm::enumerate(cstMaskSizes)) {
399 if (/*(1)*/ maskShape[i] > baseShape[rankDiff + i] ||
400 /*(2)*/ baseShape[rankDiff + i] <
401 (std::clamp(cstMaskSizes[i], int64_t(0), maskShape[i]) +
402 cstIndices[i]))
403 return false;
404 }
405
406 return true;
407}
408
410 Value source,
411 ArrayRef<int64_t> inputVectorSizes,
412 std::optional<Value> padValue,
413 bool useInBoundsInsteadOfMasking,
414 ArrayRef<bool> inputScalableVecDims) {
415 VectorType vecToReadTy = VectorType::get(
416 inputVectorSizes, cast<ShapedType>(source.getType()).getElementType(),
417 inputScalableVecDims);
418
419 return createReadOrMaskedRead(builder, loc, source, vecToReadTy, padValue,
420 useInBoundsInsteadOfMasking);
421}
422
423/// Compute the in_bounds attribute for a transfer op given its permutation map
424/// and the source being accessed. Dimension i is in-bounds when the map result
425/// is an AffineDimExpr pointing to a static source dimension divisible by the
426/// vector size, or an AffineConstantExpr (broadcast).
428 AffineMap permutationMap, VectorType vectorType, ShapedType sourceType) {
429 SmallVector<bool> inBounds(vectorType.getRank(), false);
430 for (unsigned i = 0; i < (unsigned)vectorType.getRank(); ++i) {
431 AffineExpr expr = permutationMap.getResult(i);
432 if (auto dimExpr = dyn_cast<AffineDimExpr>(expr)) {
433 unsigned memDim = dimExpr.getPosition();
434 if (!sourceType.isDynamicDim(memDim) &&
435 sourceType.getDimSize(memDim) % vectorType.getDimSize(i) == 0)
436 inBounds[i] = true;
437 } else if (isa<AffineConstantExpr>(expr)) {
438 inBounds[i] = true;
439 }
440 }
441 return inBounds;
442}
443
445 Value source,
446 const VectorType &vecToReadTy,
447 std::optional<Value> padValue,
448 bool useInBoundsInsteadOfMasking,
449 ArrayRef<Value> customIndices,
450 AffineMap permutationMap) {
451 assert(!llvm::is_contained(vecToReadTy.getScalableDims(),
452 ShapedType::kDynamic) &&
453 "invalid input vector sizes");
454 auto sourceShapedType = cast<ShapedType>(source.getType());
455 auto sourceShape = sourceShapedType.getShape();
456
457 int64_t vecToReadRank = vecToReadTy.getRank();
458 auto vecToReadShape = vecToReadTy.getShape();
459
460 // The permutation map maps the source's index space to the vector's, so its
461 // dims must match the source rank and its results the vector rank. Without a
462 // map, a minor identity is implied, requiring the two ranks to match.
463 assert(sourceShape.size() == (permutationMap
464 ? permutationMap.getNumDims()
465 : static_cast<size_t>(vecToReadRank)) &&
466 "expected source rank to match permutation map dims or vector rank.");
467 assert((!permutationMap || permutationMap.getNumResults() ==
468 static_cast<size_t>(vecToReadRank)) &&
469 "expected permutation map results to match vector rank.");
470 assert((!padValue.has_value() ||
471 padValue.value().getType() == sourceShapedType.getElementType()) &&
472 "expected same pad element type to match source element type");
473
474 SmallVector<bool> inBoundsVal(vecToReadRank, true);
475
476 if (useInBoundsInsteadOfMasking) {
477 if (permutationMap) {
478 // Update the inBounds attribute.
479 // FIXME: This computation is too weak - it ignores the read indices.
481 permutationMap, vecToReadTy, cast<ShapedType>(source.getType()));
482 } else {
483 // Update the inBounds attribute.
484 // FIXME: This computation is too weak - it ignores the read indices.
485 for (unsigned i = 0; i < vecToReadRank; i++)
486 inBoundsVal[i] = (sourceShape[i] == vecToReadShape[i]) &&
487 ShapedType::isStatic(sourceShape[i]);
488 }
489 }
490 // The transfer op expects one index per source dimension.
491 assert(
492 (customIndices.empty() || customIndices.size() == sourceShape.size()) &&
493 "expected as many custom indices as source dims.");
495 customIndices.empty()
496 ? indices.assign(sourceShape.size(),
497 arith::ConstantIndexOp::create(builder, loc, 0))
498 : indices.assign(customIndices.begin(), customIndices.end());
499
500 // A null permutation map means the builder defaults to a minor identity map.
501 auto transferReadOp =
502 vector::TransferReadOp::create(builder, loc, /*vectorType=*/vecToReadTy,
503 /*source=*/source,
504 /*indices=*/indices,
505 /*padding=*/padValue,
506 /*permutationMap=*/permutationMap,
507 /*inBounds=*/inBoundsVal);
508
509 if (useInBoundsInsteadOfMasking)
510 return transferReadOp;
511
512 SmallVector<OpFoldResult> mixedSourceDims =
513 isa<MemRefType>(source.getType())
514 ? memref::getMixedSizes(builder, loc, source)
515 : tensor::getMixedSizes(builder, loc, source);
516
517 if (isMaskTriviallyFoldable(mixedSourceDims, indices, sourceShape,
518 vecToReadShape))
519 return transferReadOp;
520
521 auto maskType = vecToReadTy.cloneWith(/*shape=*/{}, builder.getI1Type());
522 Value mask =
523 vector::CreateMaskOp::create(builder, loc, maskType, mixedSourceDims);
524 return mlir::vector::maskOperation(builder, transferReadOp, mask)
525 ->getResult(0);
526}
527
529 Value vecToStore, Value dest,
530 SmallVector<Value> writeIndices,
531 bool useInBoundsInsteadOfMasking,
532 AffineMap permutationMap) {
533
534 ShapedType destType = cast<ShapedType>(dest.getType());
535 int64_t destRank = destType.getRank();
536 auto destShape = destType.getShape();
537
538 VectorType vecToStoreType = cast<VectorType>(vecToStore.getType());
539 int64_t vecToStoreRank = vecToStoreType.getRank();
540 auto vecToStoreShape = vecToStoreType.getShape();
541
542 // Compute the in_bounds attribute
543 SmallVector<bool> inBoundsVal(vecToStoreRank, true);
544 if (useInBoundsInsteadOfMasking) {
545 if (permutationMap) {
546 // Update the inBounds attribute.
547 // FIXME: This computation is too weak - it ignores the write indices.
549 permutationMap, vecToStoreType, cast<ShapedType>(dest.getType()));
550 } else {
551 // Update the inBounds attribute.
552 // FIXME: This computation is too weak - it ignores the write indices.
553 for (unsigned i = 0; i < vecToStoreRank; i++)
554 inBoundsVal[i] =
555 (destShape[destRank - vecToStoreRank + i] >= vecToStoreShape[i]) &&
556 ShapedType::isStatic(destShape[destRank - vecToStoreRank + i]);
557 }
558 }
559
560 // If missing, initialize the write indices to 0.
561 bool useDefaultWriteIdxs = writeIndices.empty();
562 assert((useDefaultWriteIdxs ||
563 writeIndices.size() == static_cast<size_t>(destRank)) &&
564 "Invalid number of write indices!");
565 if (useDefaultWriteIdxs) {
566 auto zero = arith::ConstantIndexOp::create(builder, loc, 0);
567 writeIndices.assign(destRank, zero);
568 }
569
570 // Generate the xfer_write Op. A null permutation map means the builder
571 // defaults to a minor identity map.
572 Operation *write =
573 vector::TransferWriteOp::create(builder, loc,
574 /*vector=*/vecToStore,
575 /*dest=*/dest,
576 /*indices=*/writeIndices,
577 /*permutationMap=*/permutationMap,
578 /*inBounds=*/inBoundsVal);
579
580 // If masking is disabled, exit.
581 if (useInBoundsInsteadOfMasking)
582 return write;
583
584 // Check if masking is needed. If not, exit.
585 if (llvm::equal(vecToStoreShape, destShape.take_back(vecToStoreRank)))
586 return write;
587
588 // Compute the mask and mask the write Op.
589 auto writeMaskType = VectorType::get(vecToStoreShape, builder.getI1Type(),
590 vecToStoreType.getScalableDims());
591
592 SmallVector<OpFoldResult> destSizes =
593 isa<MemRefType>(dest.getType())
594 ? memref::getMixedSizes(builder, loc, dest)
595 : tensor::getMixedSizes(builder, loc, dest);
596
597 // Compute sizes for write-mask
599 if (useDefaultWriteIdxs) {
600 maskSizes = SmallVector<OpFoldResult>(destSizes.end() - vecToStoreRank,
601 destSizes.end());
602 } else {
603 size_t diff = destShape.size() - vecToStoreRank;
604 for (int64_t idx = 0; idx < vecToStoreRank; idx++) {
605 auto value =
606 getValueOrCreateConstantIndexOp(builder, loc, destSizes[diff + idx]);
607 auto neg =
608 builder.createOrFold<arith::SubIOp>(loc, value, writeIndices[idx]);
609 maskSizes.push_back(OpFoldResult(neg));
610 }
611 }
612
613 if (isMaskTriviallyFoldable(maskSizes, writeIndices, destShape,
614 vecToStoreShape))
615 return write;
616
617 Value maskForWrite =
618 builder.createOrFold<vector::CreateMaskOp>(loc, writeMaskType, maskSizes);
619 return mlir::vector::maskOperation(builder, write, maskForWrite);
620}
621
622LogicalResult
624 ArrayRef<int64_t> inputVectorSizes) {
625 LDBG() << "Iteration space static sizes:" << llvm::interleaved(shape);
626
627 if (inputVectorSizes.size() != shape.size()) {
628 LDBG() << "Input vector sizes don't match the number of loops";
629 return failure();
630 }
631 if (ShapedType::isDynamicShape(inputVectorSizes)) {
632 LDBG() << "Input vector sizes can't have dynamic dimensions";
633 return failure();
634 }
635 if (!llvm::all_of(llvm::zip(shape, inputVectorSizes),
636 [](std::tuple<int64_t, int64_t> sizePair) {
637 int64_t staticSize = std::get<0>(sizePair);
638 int64_t inputSize = std::get<1>(sizePair);
639 return ShapedType::isDynamic(staticSize) ||
640 staticSize <= inputSize;
641 })) {
642 LDBG() << "Input vector sizes must be greater than or equal to iteration "
643 "space static sizes";
644 return failure();
645 }
646 return success();
647}
648
649/// Takes a 2+ dimensional vector as an input
650/// returns n vector values produced by n vector.extract operations.
651/// I.e. calling unrollVectorValue([[%v]], rewriter) such that
652///
653/// %v : vector<nxaxb...>
654///
655/// will produce the following IR changes
656///
657/// %v0 = vector.extract %v[0] : vector<axbx...> from vector<nxaxb...>
658/// %v1 = vector.extract %v[1] : vector<axbx...> from vector<nxaxb...>
659/// ...
660/// %vnminusone = vector.extract %v[n-1] : vector<axbx...> from ...
661///
662/// and returns SmallVector<Value> r = {[[%v0]], [[%v1]], ..., [[%vnminusone]]}
663FailureOr<SmallVector<Value>>
665 RewriterBase &rewriter) {
666 SmallVector<Value> subvectors;
667 VectorType ty = cast<VectorType>(vector.getType());
668 Location loc = vector.getLoc();
669 if (ty.getRank() < 2)
670 return rewriter.notifyMatchFailure(loc, "already 1-D");
671
672 // Unrolling doesn't take vscale into account. Pattern is disabled for
673 // vectors with leading scalable dim(s).
674 if (ty.getScalableDims().front())
675 return rewriter.notifyMatchFailure(loc, "cannot unroll scalable dim");
676
677 for (int64_t i = 0, e = ty.getShape().front(); i < e; ++i) {
678 subvectors.push_back(vector::ExtractOp::create(rewriter, loc, vector, i));
679 }
680
681 return subvectors;
682}
683
685 vector::UnrollVectorOpFn unrollFn) {
686 assert(op->getNumResults() == 1 && "expected single result");
687 assert(isa<VectorType>(op->getResult(0).getType()) && "expected vector type");
688 VectorType resultTy = cast<VectorType>(op->getResult(0).getType());
689 if (resultTy.getRank() < 2)
690 return rewriter.notifyMatchFailure(op, "already 1-D");
691
692 // Unrolling doesn't take vscale into account. Pattern is disabled for
693 // vectors with leading scalable dim(s).
694 if (resultTy.getScalableDims().front())
695 return rewriter.notifyMatchFailure(op, "cannot unroll scalable dim");
696
697 Location loc = op->getLoc();
698 Value result = ub::PoisonOp::create(rewriter, loc, resultTy);
699 VectorType subTy = VectorType::Builder(resultTy).dropDim(0);
700
701 for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {
702 Value subVector = unrollFn(rewriter, loc, subTy, i);
703 result = vector::InsertOp::create(rewriter, loc, subVector, result, i);
704 }
705
706 rewriter.replaceOp(op, result);
707 return success();
708}
return success()
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static std::optional< VectorShape > vectorShape(Type type)
static SetVector< Operation * > getParentsOfType(Block *block)
Implementation detail that walks up the parents and records the ones with the specified type.
static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1, ArrayRef< int64_t > transp)
Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1' should be transposed with ...
static SetVector< Operation * > getEnclosingforOps(Block *block)
Returns the enclosing AffineForOp, from closest to farthest.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
static bool isMaskTriviallyFoldable(SmallVector< OpFoldResult > &maskSizes, SmallVector< Value > &indices, ArrayRef< int64_t > baseShape, ArrayRef< int64_t > maskShape)
Determines whether a mask for xfer_read/write is trivially "all true".
static SmallVector< bool > computeInBoundsFromPermutationMap(AffineMap permutationMap, VectorType vectorType, ShapedType sourceType)
Compute the in_bounds attribute for a transfer op given its permutation map and the source being acce...
Base type for affine expression.
Definition AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumDims() const
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
Block represents an ordered list of Operations.
Definition Block.h:33
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
IntegerType getI1Type()
Definition Builders.cpp:57
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:209
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:528
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:87
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:230
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:432
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:240
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:429
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
This is a builder type that keeps local references to arguments.
Builder & dropDim(unsigned pos)
Erase a dim from shape @pos.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:384
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given memref value.
Definition MemRefOps.cpp:79
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition TensorOps.cpp:69
bool isContiguousSlice(MemRefType memrefType, VectorType vectorType)
Return true if vectorType is a contiguous slice of memrefType, in the sense that it can be read/writt...
Operation * maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, Value passthru=Value())
Creates a vector.mask operation around a maskable operation.
LogicalResult isValidMaskedInputVector(ArrayRef< int64_t > shape, ArrayRef< int64_t > inputVectorSizes)
Returns success if inputVectorSizes is a valid masking configuraion for given shape,...
Operation * createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vecToStore, Value dest, SmallVector< Value > writeIndices={}, bool useInBoundsInsteadOfMasking=false, AffineMap permutationMap=AffineMap())
Create a TransferWriteOp of vecToStore into dest.
Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, const VectorType &vecToReadTy, std::optional< Value > padValue=std::nullopt, bool useInBoundsInsteadOfMasking=false, ArrayRef< Value > indices={}, AffineMap permutationMap=AffineMap())
Creates a TransferReadOp from source.
FailureOr< std::pair< int, int > > isTranspose2DSlice(vector::TransposeOp op)
Returns two dims that are greater than one if the transposition is applied on a 2D slice.
FailureOr< SmallVector< Value > > unrollVectorValue(TypedValue< VectorType >, RewriterBase &)
Generic utility for unrolling values of type vector<NxAxBx...> to N values of type vector<AxBx....
std::optional< StaticTileOffsetRange > createUnrollIterator(VectorType vType, int64_t targetRank=1)
Returns an iterator for all positions in the leading dimensions of vType up to the targetRank.
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source.
bool isLinearizableVector(VectorType type)
Returns true if the input Vector type can be linearized.
function_ref< Value(PatternRewriter &, Location, VectorType, int64_t)> UnrollVectorOpFn
Generic utility for unrolling n-D vector operations to (n-1)-D operations.
SmallVector< OpFoldResult > getMixedSizesXfer(bool hasTensorSemantics, Operation *xfer, RewriterBase &rewriter)
A wrapper for getMixedSizes for vector.transfer_read and vector.transfer_write Ops (for source and de...
LogicalResult unrollVectorOp(Operation *op, PatternRewriter &rewriter, UnrollVectorOpFn unrollFn)
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
Definition Matchers.h:527
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:125
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition Value.h:494
llvm::TypeSwitch< T, ResultT > TypeSwitch
Definition LLVM.h:139
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:114
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:120
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.