29#include "llvm/ADT/DenseSet.h"
30#include "llvm/Support/DebugLog.h"
31#include "llvm/Support/InterleavedRange.h"
33#define DEBUG_TYPE "vector-utils"
41 if (isa<UnrankedMemRefType, MemRefType>(source.
getType()))
42 return b.createOrFold<memref::DimOp>(loc, source, dim);
43 if (isa<UnrankedTensorType, RankedTensorType>(source.
getType()))
44 return b.createOrFold<tensor::DimOp>(loc, source, dim);
45 llvm_unreachable(
"Expected MemRefType or TensorType");
71 for (
int64_t permDim : transp) {
78 llvm_unreachable(
"Ill-formed transpose pattern");
81FailureOr<std::pair<int, int>>
83 VectorType srcType = op.getSourceVectorType();
85 for (
auto [
index, size] : llvm::enumerate(srcType.getShape()))
87 srcGtOneDims.push_back(
index);
89 if (srcGtOneDims.size() != 2)
99 return std::pair<int, int>(srcGtOneDims[0], srcGtOneDims[1]);
127 if (enclosingLoopToVectorDim.empty())
130 enclosingLoopToVectorDim.begin()->getFirst()->getContext();
134 for (
auto kvp : enclosingLoopToVectorDim) {
135 assert(kvp.second < perm.size());
137 cast<affine::AffineForOp>(kvp.first).getInductionVar(),
indices);
138 unsigned numIndices =
indices.size();
139 unsigned countInvariantIndices = 0;
140 for (
unsigned dim = 0; dim < numIndices; ++dim) {
141 if (!invariants.count(
indices[dim])) {
143 "permutationMap already has an entry along dim");
146 ++countInvariantIndices;
149 assert((countInvariantIndices == numIndices ||
150 countInvariantIndices == numIndices - 1) &&
151 "Vectorization prerequisite violated: at most 1 index may be "
152 "invariant wrt a vectorized loop");
153 (
void)countInvariantIndices;
167 if ([[maybe_unused]]
auto typedParent = dyn_cast<T>(current)) {
168 assert(res.count(current) == 0 &&
"Already inserted");
171 current = current->getParentOp();
186 for (
auto *forInst : enclosingLoops) {
187 auto it = loopToVectorDim.find(forInst);
188 if (it != loopToVectorDim.end()) {
189 enclosingLoopToVectorDim.insert(*it);
192 return ::makePermutationMap(
indices, enclosingLoopToVectorDim);
201bool matcher::operatesOnSuperVectorsOf(
Operation &op,
202 VectorType subVectorType) {
212 bool mustDivide =
false;
214 VectorType superVectorType;
215 if (
auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
216 superVectorType = transfer.getVectorType();
219 if (!isa<func::ReturnOp>(op)) {
220 op.
emitError(
"NYI: assuming only return operations can have 0 "
221 " results at this point");
234 op.
emitError(
"NYI: operation has more than 1 result");
243 assert((ratio || !mustDivide) &&
244 "vector.transfer operation in which super-vector size is not an"
245 " integer multiple of sub-vector size");
252 return ratio.has_value();
256 if (vectorType.isScalable())
261 vectorType.getShape().drop_while([](
auto v) {
return v == 1; });
264 if (!memrefType.areTrailingDimsContiguous(vecRank))
268 auto memrefShape = memrefType.getShape().take_back(vecRank);
272 return llvm::equal(
vectorShape.drop_front(), memrefShape.drop_front());
275std::optional<StaticTileOffsetRange>
277 if (vType.getRank() <= targetRank)
281 auto shapeToUnroll = vType.getShape().drop_back(targetRank);
282 auto inputScalableVecDimsToUnroll =
283 vType.getScalableDims().drop_back(targetRank);
284 const auto *it = llvm::find(inputScalableVecDimsToUnroll,
true);
285 auto firstScalableDim = it - inputScalableVecDimsToUnroll.begin();
286 if (firstScalableDim == 0)
289 inputScalableVecDimsToUnroll =
290 inputScalableVecDimsToUnroll.slice(0, firstScalableDim);
291 assert(!llvm::is_contained(inputScalableVecDimsToUnroll,
true) &&
292 "unexpected leading scalable dimension");
294 shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim);
301 auto loc = xfer->
getLoc();
304 .Case<vector::TransferReadOp>(
305 [&](
auto readOp) {
return readOp.getBase(); })
306 .Case<vector::TransferWriteOp>(
307 [&](
auto writeOp) {
return writeOp.getOperand(1); });
312 return mixedSourceDims;
316 return (type.getRank() > 1) && (type.getNumScalableDims() <= 1);
322 std::optional<Value> padValue,
323 bool useInBoundsInsteadOfMasking,
325 VectorType vecToReadTy = VectorType::get(
326 inputVectorSizes, cast<ShapedType>(source.
getType()).getElementType(),
327 inputScalableVecDims);
330 useInBoundsInsteadOfMasking);
335 const VectorType &vecToReadTy,
336 std::optional<Value> padValue,
337 bool useInBoundsInsteadOfMasking) {
338 assert(!llvm::is_contained(vecToReadTy.getScalableDims(),
339 ShapedType::kDynamic) &&
340 "invalid input vector sizes");
341 auto sourceShapedType = cast<ShapedType>(source.
getType());
342 auto sourceShape = sourceShapedType.getShape();
344 int64_t vecToReadRank = vecToReadTy.getRank();
345 auto vecToReadShape = vecToReadTy.getShape();
347 assert(sourceShape.size() ==
static_cast<size_t>(vecToReadRank) &&
348 "expected same ranks.");
349 assert((!padValue.has_value() ||
350 padValue.value().getType() == sourceShapedType.getElementType()) &&
351 "expected same pad element type to match source element type");
356 if (useInBoundsInsteadOfMasking) {
359 for (
unsigned i = 0; i < vecToReadRank; i++)
360 inBoundsVal[i] = (sourceShape[i] == vecToReadShape[i]) &&
361 ShapedType::isStatic(sourceShape[i]);
363 auto transferReadOp = vector::TransferReadOp::create(
371 if (llvm::equal(vecToReadTy.getShape(), sourceShape) ||
372 useInBoundsInsteadOfMasking)
373 return transferReadOp;
375 isa<MemRefType>(source.
getType())
379 auto maskType = vecToReadTy.cloneWith({}, builder.
getI1Type());
381 vector::CreateMaskOp::create(builder, loc, maskType, mixedSourceDims);
389 LDBG() <<
"Iteration space static sizes:" << llvm::interleaved(
shape);
391 if (inputVectorSizes.size() !=
shape.size()) {
392 LDBG() <<
"Input vector sizes don't match the number of loops";
395 if (ShapedType::isDynamicShape(inputVectorSizes)) {
396 LDBG() <<
"Input vector sizes can't have dynamic dimensions";
399 if (!llvm::all_of(llvm::zip(
shape, inputVectorSizes),
400 [](std::tuple<int64_t, int64_t> sizePair) {
401 int64_t staticSize = std::get<0>(sizePair);
402 int64_t inputSize = std::get<1>(sizePair);
403 return ShapedType::isDynamic(staticSize) ||
404 staticSize <= inputSize;
406 LDBG() <<
"Input vector sizes must be greater than or equal to iteration "
407 "space static sizes";
427FailureOr<SmallVector<Value>>
431 VectorType ty = cast<VectorType>(
vector.getType());
433 if (ty.getRank() < 2)
438 if (ty.getScalableDims().front())
441 for (
int64_t i = 0, e = ty.getShape().front(); i < e; ++i) {
442 subvectors.push_back(vector::ExtractOp::create(rewriter, loc,
vector, i));
450 assert(op->
getNumResults() == 1 &&
"expected single result");
451 assert(isa<VectorType>(op->
getResult(0).
getType()) &&
"expected vector type");
453 if (resultTy.getRank() < 2)
458 if (resultTy.getScalableDims().front())
462 Value result = ub::PoisonOp::create(rewriter, loc, resultTy);
465 for (
int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {
466 Value subVector = unrollFn(rewriter, loc, subTy, i);
467 result = vector::InsertOp::create(rewriter, loc, subVector,
result, i);
static std::optional< VectorShape > vectorShape(Type type)
static SetVector< Operation * > getParentsOfType(Block *block)
Implementation detail that walks up the parents and records the ones with the specified type.
static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1, ArrayRef< int64_t > transp)
Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1' should be transposed with ...
static SetVector< Operation * > getEnclosingforOps(Block *block)
Returns the enclosing AffineForOp, from closest to farthest.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Block represents an ordered list of Operations.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
Operation is the basic unit of execution within MLIR.
Block * getBlock()
Returns the operation block that contains this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
unsigned getNumResults()
Return the number of results held by this operation.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
This is a builder type that keeps local references to arguments.
Builder & dropDim(unsigned pos)
Erase a dim from shape @pos.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given memref value.
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
bool isContiguousSlice(MemRefType memrefType, VectorType vectorType)
Return true if vectorType is a contiguous slice of memrefType, in the sense that it can be read/writt...
Operation * maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, Value passthru=Value())
Creates a vector.mask operation around a maskable operation.
LogicalResult isValidMaskedInputVector(ArrayRef< int64_t > shape, ArrayRef< int64_t > inputVectorSizes)
Returns success if inputVectorSizes is a valid masking configuraion for given shape,...
FailureOr< std::pair< int, int > > isTranspose2DSlice(vector::TransposeOp op)
Returns two dims that are greater than one if the transposition is applied on a 2D slice.
FailureOr< SmallVector< Value > > unrollVectorValue(TypedValue< VectorType >, RewriterBase &)
Generic utility for unrolling values of type vector<NxAxBx...> to N values of type vector<AxBx....
std::optional< StaticTileOffsetRange > createUnrollIterator(VectorType vType, int64_t targetRank=1)
Returns an iterator for all positions in the leading dimensions of vType up to the targetRank.
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source.
bool isLinearizableVector(VectorType type)
Returns true if the input Vector type can be linearized.
Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, const VectorType &vecToReadTy, std::optional< Value > padValue=std::nullopt, bool useInBoundsInsteadOfMasking=false)
Creates a TransferReadOp from source.
function_ref< Value(PatternRewriter &, Location, VectorType, int64_t)> UnrollVectorOpFn
Generic utility for unrolling n-D vector operations to (n-1)-D operations.
SmallVector< OpFoldResult > getMixedSizesXfer(bool hasTensorSemantics, Operation *xfer, RewriterBase &rewriter)
A wrapper for getMixedSizes for vector.transfer_read and vector.transfer_write Ops (for source and de...
LogicalResult unrollVectorOp(Operation *op, PatternRewriter &rewriter, UnrollVectorOpFn unrollFn)
Include the generated interface declarations.
llvm::SetVector< T, Vector, Set, N > SetVector
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
llvm::TypeSwitch< T, ResultT > TypeSwitch
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.