31#include "llvm/ADT/DenseSet.h"
32#include "llvm/Support/DebugLog.h"
33#include "llvm/Support/InterleavedRange.h"
35#define DEBUG_TYPE "vector-utils"
43 if (isa<UnrankedMemRefType, MemRefType>(source.
getType()))
44 return b.createOrFold<memref::DimOp>(loc, source, dim);
45 if (isa<UnrankedTensorType, RankedTensorType>(source.
getType()))
46 return b.createOrFold<tensor::DimOp>(loc, source, dim);
47 llvm_unreachable(
"Expected MemRefType or TensorType");
73 for (
int64_t permDim : transp) {
80 llvm_unreachable(
"Ill-formed transpose pattern");
83FailureOr<std::pair<int, int>>
85 VectorType srcType = op.getSourceVectorType();
87 for (
auto [
index, size] : llvm::enumerate(srcType.getShape()))
89 srcGtOneDims.push_back(
index);
91 if (srcGtOneDims.size() != 2)
101 return std::pair<int, int>(srcGtOneDims[0], srcGtOneDims[1]);
129 if (enclosingLoopToVectorDim.empty())
132 enclosingLoopToVectorDim.begin()->getFirst()->getContext();
136 for (
auto kvp : enclosingLoopToVectorDim) {
137 assert(kvp.second < perm.size());
139 cast<affine::AffineForOp>(kvp.first).getInductionVar(),
indices);
140 unsigned numIndices =
indices.size();
141 unsigned countInvariantIndices = 0;
142 for (
unsigned dim = 0; dim < numIndices; ++dim) {
143 if (!invariants.count(
indices[dim])) {
145 "permutationMap already has an entry along dim");
148 ++countInvariantIndices;
151 assert((countInvariantIndices == numIndices ||
152 countInvariantIndices == numIndices - 1) &&
153 "Vectorization prerequisite violated: at most 1 index may be "
154 "invariant wrt a vectorized loop");
155 (
void)countInvariantIndices;
169 if ([[maybe_unused]]
auto typedParent = dyn_cast<T>(current)) {
170 assert(res.count(current) == 0 &&
"Already inserted");
173 current = current->getParentOp();
188 for (
auto *forInst : enclosingLoops) {
189 auto it = loopToVectorDim.find(forInst);
190 if (it != loopToVectorDim.end()) {
191 enclosingLoopToVectorDim.insert(*it);
194 return ::makePermutationMap(
indices, enclosingLoopToVectorDim);
203bool matcher::operatesOnSuperVectorsOf(
Operation &op,
204 VectorType subVectorType) {
214 VectorType superVectorType;
215 if (
auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
216 superVectorType = transfer.getVectorType();
218 if (!isa<func::ReturnOp>(op)) {
219 op.
emitError(
"NYI: assuming only return operations can have 0 "
220 " results at this point");
233 op.
emitError(
"NYI: operation has more than 1 result");
242 return ratio.has_value();
246 if (vectorType.isScalable())
251 vectorType.getShape().drop_while([](
auto v) {
return v == 1; });
258 if (!memrefType.areTrailingDimsContiguous(vecRank))
262 auto memrefShape = memrefType.getShape().take_back(vecRank);
266 return llvm::equal(
vectorShape.drop_front(), memrefShape.drop_front());
269std::optional<StaticTileOffsetRange>
271 if (vType.getRank() <= targetRank)
275 auto shapeToUnroll = vType.getShape().drop_back(targetRank);
276 auto inputScalableVecDimsToUnroll =
277 vType.getScalableDims().drop_back(targetRank);
278 const auto *it = llvm::find(inputScalableVecDimsToUnroll,
true);
279 auto firstScalableDim = it - inputScalableVecDimsToUnroll.begin();
280 if (firstScalableDim == 0)
283 inputScalableVecDimsToUnroll =
284 inputScalableVecDimsToUnroll.slice(0, firstScalableDim);
285 assert(!llvm::is_contained(inputScalableVecDimsToUnroll,
true) &&
286 "unexpected leading scalable dimension");
288 shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim);
295 auto loc = xfer->
getLoc();
299 .Case([&](vector::TransferReadOp readOp) {
return readOp.getBase(); })
300 .Case([&](vector::TransferWriteOp writeOp) {
301 return writeOp.getOperand(1);
307 return mixedSourceDims;
311 return (type.getRank() > 1) && (type.getNumScalableDims() <= 1);
363 if (ShapedType::isDynamicShape(baseShape))
368 for (
auto [i, dimSize] : llvm::enumerate(maskSizes)) {
370 cstMaskSizes.push_back(*intSize);
375 if (cstMaskSizes.size() != maskShape.size())
380 for (
auto [i, idx] : llvm::enumerate(
indices)) {
383 cstIndices.push_back(intVal.getSExtValue());
388 if (cstIndices.size() != baseShape.size())
397 int64_t rankDiff = baseShape.size() - cstMaskSizes.size();
398 for (
auto [i, idx] : llvm::enumerate(cstMaskSizes)) {
399 if ( maskShape[i] > baseShape[rankDiff + i] ||
400 baseShape[rankDiff + i] <
401 (std::clamp(cstMaskSizes[i],
int64_t(0), maskShape[i]) +
412 std::optional<Value> padValue,
413 bool useInBoundsInsteadOfMasking,
415 VectorType vecToReadTy = VectorType::get(
416 inputVectorSizes, cast<ShapedType>(source.
getType()).getElementType(),
417 inputScalableVecDims);
420 useInBoundsInsteadOfMasking);
428 AffineMap permutationMap, VectorType vectorType, ShapedType sourceType) {
430 for (
unsigned i = 0; i < (
unsigned)vectorType.getRank(); ++i) {
432 if (
auto dimExpr = dyn_cast<AffineDimExpr>(expr)) {
433 unsigned memDim = dimExpr.getPosition();
434 if (!sourceType.isDynamicDim(memDim) &&
435 sourceType.getDimSize(memDim) % vectorType.getDimSize(i) == 0)
437 }
else if (isa<AffineConstantExpr>(expr)) {
446 const VectorType &vecToReadTy,
447 std::optional<Value> padValue,
448 bool useInBoundsInsteadOfMasking,
451 assert(!llvm::is_contained(vecToReadTy.getScalableDims(),
452 ShapedType::kDynamic) &&
453 "invalid input vector sizes");
454 auto sourceShapedType = cast<ShapedType>(source.
getType());
455 auto sourceShape = sourceShapedType.getShape();
457 int64_t vecToReadRank = vecToReadTy.getRank();
458 auto vecToReadShape = vecToReadTy.getShape();
463 assert(sourceShape.size() == (permutationMap
465 :
static_cast<size_t>(vecToReadRank)) &&
466 "expected source rank to match permutation map dims or vector rank.");
468 static_cast<size_t>(vecToReadRank)) &&
469 "expected permutation map results to match vector rank.");
470 assert((!padValue.has_value() ||
471 padValue.value().getType() == sourceShapedType.getElementType()) &&
472 "expected same pad element type to match source element type");
476 if (useInBoundsInsteadOfMasking) {
477 if (permutationMap) {
481 permutationMap, vecToReadTy, cast<ShapedType>(source.
getType()));
485 for (
unsigned i = 0; i < vecToReadRank; i++)
486 inBoundsVal[i] = (sourceShape[i] == vecToReadShape[i]) &&
487 ShapedType::isStatic(sourceShape[i]);
492 (customIndices.empty() || customIndices.size() == sourceShape.size()) &&
493 "expected as many custom indices as source dims.");
495 customIndices.empty()
496 ?
indices.assign(sourceShape.size(),
498 :
indices.assign(customIndices.begin(), customIndices.end());
501 auto transferReadOp =
502 vector::TransferReadOp::create(builder, loc, vecToReadTy,
509 if (useInBoundsInsteadOfMasking)
510 return transferReadOp;
513 isa<MemRefType>(source.
getType())
519 return transferReadOp;
521 auto maskType = vecToReadTy.cloneWith({}, builder.
getI1Type());
523 vector::CreateMaskOp::create(builder, loc, maskType, mixedSourceDims);
531 bool useInBoundsInsteadOfMasking,
534 ShapedType destType = cast<ShapedType>(dest.
getType());
535 int64_t destRank = destType.getRank();
536 auto destShape = destType.getShape();
538 VectorType vecToStoreType = cast<VectorType>(vecToStore.
getType());
539 int64_t vecToStoreRank = vecToStoreType.getRank();
540 auto vecToStoreShape = vecToStoreType.getShape();
544 if (useInBoundsInsteadOfMasking) {
545 if (permutationMap) {
549 permutationMap, vecToStoreType, cast<ShapedType>(dest.
getType()));
553 for (
unsigned i = 0; i < vecToStoreRank; i++)
555 (destShape[destRank - vecToStoreRank + i] >= vecToStoreShape[i]) &&
556 ShapedType::isStatic(destShape[destRank - vecToStoreRank + i]);
561 bool useDefaultWriteIdxs = writeIndices.empty();
562 assert((useDefaultWriteIdxs ||
563 writeIndices.size() ==
static_cast<size_t>(destRank)) &&
564 "Invalid number of write indices!");
565 if (useDefaultWriteIdxs) {
567 writeIndices.assign(destRank, zero);
573 vector::TransferWriteOp::create(builder, loc,
581 if (useInBoundsInsteadOfMasking)
585 if (llvm::equal(vecToStoreShape, destShape.take_back(vecToStoreRank)))
589 auto writeMaskType = VectorType::get(vecToStoreShape, builder.
getI1Type(),
590 vecToStoreType.getScalableDims());
593 isa<MemRefType>(dest.
getType())
599 if (useDefaultWriteIdxs) {
603 size_t diff = destShape.size() - vecToStoreRank;
604 for (
int64_t idx = 0; idx < vecToStoreRank; idx++) {
608 builder.
createOrFold<arith::SubIOp>(loc, value, writeIndices[idx]);
618 builder.
createOrFold<vector::CreateMaskOp>(loc, writeMaskType, maskSizes);
625 LDBG() <<
"Iteration space static sizes:" << llvm::interleaved(
shape);
627 if (inputVectorSizes.size() !=
shape.size()) {
628 LDBG() <<
"Input vector sizes don't match the number of loops";
631 if (ShapedType::isDynamicShape(inputVectorSizes)) {
632 LDBG() <<
"Input vector sizes can't have dynamic dimensions";
635 if (!llvm::all_of(llvm::zip(
shape, inputVectorSizes),
636 [](std::tuple<int64_t, int64_t> sizePair) {
637 int64_t staticSize = std::get<0>(sizePair);
638 int64_t inputSize = std::get<1>(sizePair);
639 return ShapedType::isDynamic(staticSize) ||
640 staticSize <= inputSize;
642 LDBG() <<
"Input vector sizes must be greater than or equal to iteration "
643 "space static sizes";
663FailureOr<SmallVector<Value>>
667 VectorType ty = cast<VectorType>(
vector.getType());
669 if (ty.getRank() < 2)
674 if (ty.getScalableDims().front())
677 for (
int64_t i = 0, e = ty.getShape().front(); i < e; ++i) {
678 subvectors.push_back(vector::ExtractOp::create(rewriter, loc,
vector, i));
686 assert(op->
getNumResults() == 1 &&
"expected single result");
687 assert(isa<VectorType>(op->
getResult(0).
getType()) &&
"expected vector type");
689 if (resultTy.getRank() < 2)
694 if (resultTy.getScalableDims().front())
698 Value result = ub::PoisonOp::create(rewriter, loc, resultTy);
701 for (
int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {
702 Value subVector = unrollFn(rewriter, loc, subTy, i);
703 result = vector::InsertOp::create(rewriter, loc, subVector,
result, i);
static std::optional< VectorShape > vectorShape(Type type)
static SetVector< Operation * > getParentsOfType(Block *block)
Implementation detail that walks up the parents and records the ones with the specified type.
static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1, ArrayRef< int64_t > transp)
Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1' should be transposed with ...
static SetVector< Operation * > getEnclosingforOps(Block *block)
Returns the enclosing AffineForOp, from closest to farthest.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
static bool isMaskTriviallyFoldable(SmallVector< OpFoldResult > &maskSizes, SmallVector< Value > &indices, ArrayRef< int64_t > baseShape, ArrayRef< int64_t > maskShape)
Determines whether a mask for xfer_read/write is trivially "all true".
static SmallVector< bool > computeInBoundsFromPermutationMap(AffineMap permutationMap, VectorType vectorType, ShapedType sourceType)
Compute the in_bounds attribute for a transfer op given its permutation map and the source being acce...
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumDims() const
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
Block represents an ordered list of Operations.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
Block * getBlock()
Returns the operation block that contains this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
unsigned getNumResults()
Return the number of results held by this operation.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
This is a builder type that keeps local references to arguments.
Builder & dropDim(unsigned pos)
Erase a dim from shape @pos.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given memref value.
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
bool isContiguousSlice(MemRefType memrefType, VectorType vectorType)
Return true if vectorType is a contiguous slice of memrefType, in the sense that it can be read/writt...
Operation * maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, Value passthru=Value())
Creates a vector.mask operation around a maskable operation.
LogicalResult isValidMaskedInputVector(ArrayRef< int64_t > shape, ArrayRef< int64_t > inputVectorSizes)
Returns success if inputVectorSizes is a valid masking configuraion for given shape,...
Operation * createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vecToStore, Value dest, SmallVector< Value > writeIndices={}, bool useInBoundsInsteadOfMasking=false, AffineMap permutationMap=AffineMap())
Create a TransferWriteOp of vecToStore into dest.
Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, const VectorType &vecToReadTy, std::optional< Value > padValue=std::nullopt, bool useInBoundsInsteadOfMasking=false, ArrayRef< Value > indices={}, AffineMap permutationMap=AffineMap())
Creates a TransferReadOp from source.
FailureOr< std::pair< int, int > > isTranspose2DSlice(vector::TransposeOp op)
Returns two dims that are greater than one if the transposition is applied on a 2D slice.
FailureOr< SmallVector< Value > > unrollVectorValue(TypedValue< VectorType >, RewriterBase &)
Generic utility for unrolling values of type vector<NxAxBx...> to N values of type vector<AxBx....
std::optional< StaticTileOffsetRange > createUnrollIterator(VectorType vType, int64_t targetRank=1)
Returns an iterator for all positions in the leading dimensions of vType up to the targetRank.
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source.
bool isLinearizableVector(VectorType type)
Returns true if the input Vector type can be linearized.
function_ref< Value(PatternRewriter &, Location, VectorType, int64_t)> UnrollVectorOpFn
Generic utility for unrolling n-D vector operations to (n-1)-D operations.
SmallVector< OpFoldResult > getMixedSizesXfer(bool hasTensorSemantics, Operation *xfer, RewriterBase &rewriter)
A wrapper for getMixedSizes for vector.transfer_read and vector.transfer_write Ops (for source and de...
LogicalResult unrollVectorOp(Operation *op, PatternRewriter &rewriter, UnrollVectorOpFn unrollFn)
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
llvm::SetVector< T, Vector, Set, N > SetVector
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.