#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/Utils/Utils.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypeInterfaces.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#include <type_traits>

Classes
struct	VectorizationState
	Contains the vectorization state and related methods used across the vectorization process of a given operation. More...

struct	VectorizationResult

struct	VectorizePadOpUserPattern< OpTy >
	Base pattern for rewriting tensor::PadOps whose result is consumed by a given operation type OpTy. More...

struct	PadOpVectorizationWithTransferReadPattern
	Rewrite use of tensor::PadOp result in TransferReadOp. More...

struct	PadOpVectorizationWithTransferWritePattern
	Rewrite use of tensor::PadOp result in TransferWriteOp. More...

struct	InsertSliceVectorizePattern
	Rewrite tensor.insert.slice as a vector.transfer_read + vector.transfer_write pair. More...

struct	PadOpVectorizationWithInsertSlicePattern
	Rewrite use of tensor::PadOp result in InsertSliceOp. More...

struct	VectorizeConvolution

Macros
#define	DEBUG_TYPE "linalg-vectorization"

#define	DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")

#define	LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")

Typedefs
using	CustomVectorizationPrecondition = std::function< LogicalResult(Operation *, bool)>

using	CustomVectorizationHook = std::function< VectorizationResult(Operation *, const IRMapping &)>

Enumerations
enum class	Conv1DOpOrder { W , Ncw , Nwc }
	Helper enum to represent conv1d input traversal order. More...

enum	VectorizationStatus { Failure = 0 , NoReplace , NewOp }
	Helper data structure to represent the result of vectorization. More...

enum	VectorMemoryAccessKind { ScalarBroadcast , Contiguous , Gather }

Functions
static FailureOr< Operation * >	vectorizeConvolution (RewriterBase &rewriter, LinalgOp convOp, ArrayRef< int64_t > inputVecSizes={}, ArrayRef< bool > inputVecScalableFlags={}, bool flatten1DDepthwiseConv=false)
	Try to vectorize `convOp` as a convolution. More...

template<typename OpType >
static OpType	getSingleOpOfType (Block &block)
	Return the unique instance of OpType in `block` if it is indeed unique. More...

static SmallVector< Value >	extractConvInputSlices (RewriterBase &rewriter, Location loc, Value input, int64_t nSize, int64_t wSize, int64_t cSize, int64_t kwSize, int strideW, int dilationW, int64_t wSizeStep, bool isSingleChanneled)
	Helper function to extract the input slices after filter is unrolled along kw. More...

static SmallVector< Value >	extractConvFilterSlices (RewriterBase &rewriter, Location loc, Value filter, int64_t kwSize)
	Helper function to extract the filter slices after filter is unrolled along kw. More...

static SmallVector< Value >	extractConvResultSlices (RewriterBase &rewriter, Location loc, Value res, int64_t nSize, int64_t wSize, int64_t fSize, int64_t wSizeStep, bool isSingleChanneled)
	Helper function to extract the result slices after filter is unrolled along kw. More...

static Value	insertConvResultSlices (RewriterBase &rewriter, Location loc, Value res, int64_t wSize, int64_t wSizeStep, SmallVectorImpl< Value > &resVals, bool isSingleChanneled)
	Helper function to insert the computed result slices. More...

static AffineMap	reindexIndexingMap (AffineMap map)
	Given an indexing `map` coming from a LinalgOp indexing, restricted to a projectedPermutation, compress the unused dimensions to serve as a permutation_map for a vector transfer operation. More...

static Operation *	matchLinalgReduction (OpOperand *outputOperand)
	Check whether `outputOperand` is a reduction with a single combiner operation. More...

static Value	broadcastIfNeeded (OpBuilder &b, Value value, Type dstType)
	Broadcast `value` to a vector of `shape` if possible. More...

static Operation *	buildMultiDimReduce (OpBuilder &b, Operation *reduceOp, Value valueToReduce, Value acc, ArrayRef< bool > dimsToMask)
	Create MultiDimReductionOp to compute the reduction for `reductionOp`. More...

static SmallVector< bool >	getDimsToReduce (LinalgOp linalgOp)

static bool	hasReductionIterator (LinalgOp &op)
	Check if `op` is a linalg.reduce or a linalg.generic that has at least one reduction iterator. More...

static Value	buildVectorWrite (RewriterBase &rewriter, Value value, OpOperand *outputOperand, VectorizationState &state)
	Build a vector.transfer_write of `value` into `outputOperand` at indices set to all `0`; where `outputOperand` is an output operand of the LinalgOp currently being vectorized. More...

static VectorizationResult	vectorizeLinalgYield (RewriterBase &rewriter, Operation *op, const IRMapping &bvm, VectorizationState &state, LinalgOp linalgOp, SmallVectorImpl< Value > &newResults)
	Helper function to vectorize the terminator of a `linalgOp`. More...

static VectorizationResult	vectorizeLinalgIndex (RewriterBase &rewriter, VectorizationState &state, Operation *op, LinalgOp linalgOp)
	Helper function to vectorize the index operations of a `linalgOp`. More...

static LogicalResult	tensorExtractVectorizationPrecondition (Operation *op, bool vectorizeNDExtract)
	Helper function to check if the tensor.extract can be vectorized by the custom hook vectorizeTensorExtract. More...

static Value	calculateGatherOffset (RewriterBase &rewriter, VectorizationState &state, tensor::ExtractOp extractOp, const IRMapping &bvm)
	Calculates the offsets (`$index_vec`) for `vector.gather` operations generated from `tensor.extract`. More...

static uint64_t	getTrailingNonUnitLoopDimIdx (LinalgOp linalgOp)
	Find the index of the trailing non-unit dim in linalgOp. More...

static bool	isLoopInvariantIdx (LinalgOp &linalgOp, Value &val, VectorType resType)
	Checks whether `val` can be used for calculating a loop invariant index. More...

static bool	isContiguousLoadIdx (LinalgOp &linalgOp, Value &val, bool &foundIndexOp, VectorType resType)
	Check whether `val` could be used for calculating the trailing index for a contiguous load operation. More...

static VectorMemoryAccessKind	getTensorExtractMemoryAccessPattern (tensor::ExtractOp extractOp, LinalgOp &linalgOp, VectorType resType)
	Infer the memory access pattern for the input ExtractOp. More...

static VectorizationResult	vectorizeTensorExtract (RewriterBase &rewriter, VectorizationState &state, Operation *op, LinalgOp linalgOp, const IRMapping &bvm)
	Helper function to vectorize the tensor.extract operations. More...

static Operation *	reduceIfNeeded (OpBuilder &b, LinalgOp linalgOp, Operation *op, Value reduceValue, Value initialValue, const IRMapping &bvm)
	Emit reduction operations if the shapes of the value to reduce is different that the result shape. More...

static VectorizationResult	vectorizeOneOp (RewriterBase &rewriter, VectorizationState &state, LinalgOp linalgOp, Operation *op, const IRMapping &bvm, ArrayRef< CustomVectorizationHook > customVectorizationHooks)
	Generic vectorization for a single operation `op`, given already vectorized operands carried by `bvm`. More...

static LogicalResult	vectorizeAsLinalgGeneric (RewriterBase &rewriter, VectorizationState &state, LinalgOp linalgOp, SmallVectorImpl< Value > &newResults)
	Generic vectorization function that rewrites the body of a `linalgOp` into vector form. More...

static SmallVector< int64_t >	getTiledPackShape (tensor::PackOp packOp, ArrayRef< int64_t > destShape)
	Given a tensor::PackOp, return the `dest` shape before any packing permutations. More...

static Operation *	createWriteOrMaskedWrite (OpBuilder &builder, Location loc, Value input, SmallVector< OpFoldResult > destSizes, ArrayRef< int64_t > inputVectorSizes, bool useInBoundsInsteadOfMasking)
	Given an input, the mixed destSizes, and the vector sizes for vectorization, create an empty destination tensor and create a TransferWriteOp from the input to the empty tensor. More...

static LogicalResult	vectorizeAsTensorPackOp (RewriterBase &rewriter, tensor::PackOp packOp, ArrayRef< int64_t > inputVectorSizes, SmallVectorImpl< Value > &newResults)
	Vectorize tensor::PackOp with (1) static innerTiles (2) constant padding value and (3) input vector sizes into: masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds As in the following example: pack = tensor.pack src inner_dims_pos = [2, 1] inner_tiles = [16, 2] into dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32> More...

static LogicalResult	vectorizeAsTensorUnpackOp (RewriterBase &rewriter, tensor::UnPackOp unpackOp, ArrayRef< int64_t > inputVectorSizes, SmallVectorImpl< Value > &newResults)
	Vectorize a `tensor::UnPackOp` to these 4 Ops: Vector::TransferReadOp - Reads a vector from the source tensor vector::TransposeOp - Transpose the Source tensor ShapeCastOp - Reshape the data based on the target. More...

static LogicalResult	vectorizeAsTensorPadOp (RewriterBase &rewriter, tensor::PadOp padOp, ArrayRef< int64_t > inputVectorSizes, SmallVectorImpl< Value > &newResults)
	Vectorize a `padOp` with (1) static result type, (2) constant padding value and (3) all-zero lowPad to `transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value))`. More...

static LogicalResult	reductionPreconditions (LinalgOp op)

static LogicalResult	vectorizeDynamicConvOpPrecondition (linalg::LinalgOp conv, bool flatten1DDepthwiseConv)

static LogicalResult	vectorizeDynamicLinalgOpPrecondition (linalg::LinalgOp op, bool flatten1DDepthwiseConv)

static LogicalResult	vectorizeUnPackOpPrecondition (tensor::UnPackOp unpackOp, ArrayRef< int64_t > inputVectorSizes)
	Need to check if the inner-tiles are static/constant. More...

static LogicalResult	vectorizeLinalgOpPrecondition (LinalgOp linalgOp, ArrayRef< int64_t > inputVectorSizes, bool vectorizeNDExtract, bool flatten1DDepthwiseConv)

static LogicalResult	vectorizePackOpPrecondition (tensor::PackOp packOp, ArrayRef< int64_t > inputVectorSizes)

static LogicalResult	vectorizePadOpPrecondition (tensor::PadOp padOp, ArrayRef< int64_t > inputVectorSizes)

static LogicalResult	vectorizeScalableVectorPrecondition (Operation *op, ArrayRef< int64_t > inputVectorSizes, ArrayRef< bool > inputScalableVecDims)
	Preconditions for scalable vectors. More...

static void	convertAffineApply (RewriterBase &rewriter, LinalgOp linalgOp)
	Converts affine.apply Ops to arithmetic operations. More...

static Value	getStaticPadVal (Operation *op)
	Returns the effective Pad value for the input op, provided it's a scalar. More...

static bool	mayExistInterleavedUses (Operation firstOp, Operation secondOp, ValueRange values)
	Check whether there is any interleaved use of any `values` between `firstOp` and `secondOp`. More...

static memref::SubViewOp	getSubViewUseIfUnique (Value v)
	Return the unique subview use of `v` if it is indeed unique, null otherwise. More...

template<int N>
static void	bindShapeDims (ShapedType shapedType)

template<int N, typename IntTy , typename... IntTy2>
static void	bindShapeDims (ShapedType shapedType, IntTy &val, IntTy2 &...vals)

template<typename... IntTy>
static void	bindShapeDims (ShapedType shapedType, IntTy &...vals)
	Bind a pack of int& to the leading dimensions of shapedType.getShape(). More...

Macro Definition Documentation

◆ DBGS

#define DBGS ( ) (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")

Definition at line 52 of file Vectorization.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE "linalg-vectorization"

Definition at line 50 of file Vectorization.cpp.

◆ LDBG

#define LDBG ( X ) LLVM_DEBUG(DBGS() << X << "\n")

Definition at line 53 of file Vectorization.cpp.

Typedef Documentation

◆ CustomVectorizationHook

using CustomVectorizationHook = std::function<VectorizationResult(Operation *, const IRMapping &)>

Definition at line 698 of file Vectorization.cpp.

◆ CustomVectorizationPrecondition

using CustomVectorizationPrecondition = std::function<LogicalResult(Operation *, bool)>

Definition at line 692 of file Vectorization.cpp.

Enumeration Type Documentation

◆ Conv1DOpOrder

enum Conv1DOpOrder

strong

Helper enum to represent conv1d input traversal order.

Enumerator
W
Ncw
Nwc

Definition at line 516 of file Vectorization.cpp.

◆ VectorizationStatus

enum VectorizationStatus

Helper data structure to represent the result of vectorization.

In certain specific cases, like terminators, we do not want to propagate/

Enumerator
Failure	Op failed to vectorize.
NoReplace	Op vectorized and custom function took care of replacement logic.
NewOp	Op vectorized into a new Op whose results will replace original Op's results.

Definition at line 524 of file Vectorization.cpp.

◆ VectorMemoryAccessKind

enum VectorMemoryAccessKind

Enumerator
ScalarBroadcast
Contiguous
Gather

Definition at line 842 of file Vectorization.cpp.

Function Documentation

◆ bindShapeDims() [1/3]

template<int N>

static void bindShapeDims ( ShapedType shapedType )

static

Definition at line 2980 of file Vectorization.cpp.

Referenced by bindShapeDims().

◆ bindShapeDims() [2/3]

template<typename... IntTy>

static void bindShapeDims	(	ShapedType	shapedType,
		IntTy &...	vals
	)

static

Bind a pack of int& to the leading dimensions of shapedType.getShape().

Definition at line 2990 of file Vectorization.cpp.

◆ bindShapeDims() [3/3]

template<int N, typename IntTy , typename... IntTy2>

static void bindShapeDims	(	ShapedType	shapedType,
		IntTy &	val,
		IntTy2 &...	vals
	)

static

Definition at line 2983 of file Vectorization.cpp.

References bindShapeDims().

◆ broadcastIfNeeded()

static Value broadcastIfNeeded	(	OpBuilder &	b,
		Value	value,
		Type	dstType
	)

static

Broadcast value to a vector of shape if possible.

Return value otherwise.

Definition at line 591 of file Vectorization.cpp.

References mlir::OpBuilder::createOrFold(), mlir::OpBuilder::getInsertionPoint(), mlir::Value::getType(), mlir::vector::isBroadcastableTo(), and mlir::vector::Success.

◆ buildMultiDimReduce()

static Operation* buildMultiDimReduce	(	OpBuilder &	b,
		Operation *	reduceOp,
		Value	valueToReduce,
		Value	acc,
		ArrayRef< bool >	dimsToMask
	)

static

Create MultiDimReductionOp to compute the reduction for reductionOp.

This assumes that reductionOp has two operands and one of them is the reduction initial value.buildMultiDimReduce

Definition at line 608 of file Vectorization.cpp.

References mlir::OpBuilder::create(), mlir::linalg::getCombinerOpKind(), and mlir::Operation::getLoc().

Referenced by reduceIfNeeded().

◆ buildVectorWrite()

static Value buildVectorWrite	(	RewriterBase &	rewriter,
		Value	value,
		OpOperand *	outputOperand,
		VectorizationState &	state
	)

static

Build a vector.transfer_write of value into outputOperand at indices set to all 0; where outputOperand is an output operand of the LinalgOp currently being vectorized.

If dest has null rank, build an memref.store. Return the produced value or null if no value is produced.

Definition at line 636 of file Vectorization.cpp.

◆ calculateGatherOffset()

static Value calculateGatherOffset	(	RewriterBase &	rewriter,
		VectorizationState &	state,
		tensor::ExtractOp	extractOp,
		const IRMapping &	bvm
	)

static

Calculates the offsets ($index_vec) for vector.gather operations generated from tensor.extract.

The offset is calculated as follows (example using scalar values):

offset = extractOp.indices[0] for (i = 1; i < numIndices; i++) offset = extractOp.dimSize[i] * offset + extractOp.indices[i];

For tensor<45 x 80 x 15 x f32> and index [1, 2, 3], this leads to: offset = ( ( 1 ) * 80 + 2 ) * 15 + 3

Definition at line 811 of file Vectorization.cpp.

◆ convertAffineApply()

static void convertAffineApply	(	RewriterBase &	rewriter,
		LinalgOp	linalgOp
	)

static

Converts affine.apply Ops to arithmetic operations.

Definition at line 2151 of file Vectorization.cpp.

References mlir::affine::expandAffineExpr(), mlir::RewriterBase::replaceOp(), and mlir::OpBuilder::setInsertionPoint().

◆ createWriteOrMaskedWrite()

static Operation* createWriteOrMaskedWrite	(	OpBuilder &	builder,
		Location	loc,
		Value	input,
		SmallVector< OpFoldResult >	destSizes,
		ArrayRef< int64_t >	inputVectorSizes,
		bool	useInBoundsInsteadOfMasking
	)

static

Given an input, the mixed destSizes, and the vector sizes for vectorization, create an empty destination tensor and create a TransferWriteOp from the input to the empty tensor.

If the destination shape is not the same as the inputVectorSizes for the first rank(inputVectorSizes) dims, then create a mask for the write. If useInBoundsInsteadOfMasking is set, then update the inBounds attribute of the transfer write op instead of masking.

Definition at line 1484 of file Vectorization.cpp.

References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getI1Type(), mlir::Value::getType(), and mlir::vector::maskOperation().

Referenced by vectorizeAsTensorPackOp(), vectorizeAsTensorPadOp(), and vectorizeAsTensorUnpackOp().

◆ extractConvFilterSlices()

static SmallVector<Value> extractConvFilterSlices	(	RewriterBase &	rewriter,
		Location	loc,
		Value	filter,
		int64_t	kwSize
	)

static

Helper function to extract the filter slices after filter is unrolled along kw.

Definition at line 116 of file Vectorization.cpp.

References mlir::OpBuilder::create().

◆ extractConvInputSlices()

static SmallVector<Value> extractConvInputSlices	(	RewriterBase &	rewriter,
		Location	loc,
		Value	input,
		int64_t	nSize,
		int64_t	wSize,
		int64_t	cSize,
		int64_t	kwSize,
		int	strideW,
		int	dilationW,
		int64_t	wSizeStep,
		bool	isSingleChanneled
	)

static

Helper function to extract the input slices after filter is unrolled along kw.

Definition at line 81 of file Vectorization.cpp.

References mlir::OpBuilder::create().

◆ extractConvResultSlices()

static SmallVector<Value> extractConvResultSlices	(	RewriterBase &	rewriter,
		Location	loc,
		Value	res,
		int64_t	nSize,
		int64_t	wSize,
		int64_t	fSize,
		int64_t	wSizeStep,
		bool	isSingleChanneled
	)

static

Helper function to extract the result slices after filter is unrolled along kw.

Definition at line 132 of file Vectorization.cpp.

References mlir::OpBuilder::create().

◆ getDimsToReduce()

static SmallVector<bool> getDimsToReduce ( LinalgOp linalgOp )

static

Definition at line 617 of file Vectorization.cpp.

References mlir::linalg::isReductionIterator().

Referenced by reduceIfNeeded().

◆ getSingleOpOfType()

template<typename OpType >

static OpType getSingleOpOfType ( Block & block )

static

Return the unique instance of OpType in block if it is indeed unique.

Return null if none or more than 1 instances exist.

Definition at line 65 of file Vectorization.cpp.

References mlir::WalkResult::advance(), mlir::WalkResult::interrupt(), and mlir::Block::walk().

◆ getStaticPadVal()

static Value getStaticPadVal ( Operation * op )

static

Returns the effective Pad value for the input op, provided it's a scalar.

Many Ops exhibit pad-like behaviour, but this isn't always explicit. If this Op performs padding, retrieve the padding value provided that it's a scalar and static/fixed for all the padded values. Returns an empty value otherwise.

Definition at line 2543 of file Vectorization.cpp.

Referenced by InsertSliceVectorizePattern::matchAndRewrite().

◆ getSubViewUseIfUnique()

static memref::SubViewOp getSubViewUseIfUnique ( Value v )

static

Return the unique subview use of v if it is indeed unique, null otherwise.

Definition at line 2828 of file Vectorization.cpp.

References mlir::Value::getUses().

Referenced by mlir::linalg::LinalgCopyVTRForwardingPattern::matchAndRewrite(), and mlir::linalg::LinalgCopyVTWForwardingPattern::matchAndRewrite().

◆ getTensorExtractMemoryAccessPattern()

static VectorMemoryAccessKind getTensorExtractMemoryAccessPattern	(	tensor::ExtractOp	extractOp,
		LinalgOp &	linalgOp,
		VectorType	resType
	)

static

Infer the memory access pattern for the input ExtractOp.

Based on the ExtratOp result shape and the access indices, decides whether this Op corresponds to a contiguous load (including a broadcast of a scalar) or a gather load. When analysing the ExtractOp indices (to identify contiguous laods), this method looks for "loop" invariant indices (e.g. block arguments) and indices that change linearly (e.g. via linalg.index Op).

Note that it is always safe to use gather load operations for contiguous loads (albeit slow), but not vice-versa. When in doubt, bail out and assume that extractOp is a gather load.

Definition at line 993 of file Vectorization.cpp.

References Contiguous, mlir::detail::enumerate(), Gather, isContiguousLoadIdx(), isLoopInvariantIdx(), LDBG, and ScalarBroadcast.

◆ getTiledPackShape()

static SmallVector<int64_t> getTiledPackShape	(	tensor::PackOp	packOp,
		ArrayRef< int64_t >	destShape
	)

static

Given a tensor::PackOp, return the dest shape before any packing permutations.

Definition at line 1473 of file Vectorization.cpp.

References mlir::applyPermutation(), and mlir::tensor::getPackInverseDestPerm().

Referenced by vectorizeAsTensorPackOp().

◆ getTrailingNonUnitLoopDimIdx()

static uint64_t getTrailingNonUnitLoopDimIdx ( LinalgOp linalgOp )

static

Find the index of the trailing non-unit dim in linalgOp.

This hook is used when checking whether tensor.extract Op (within a linalg.generic Op) represents a contiguous load operation.

Note that when calling this hook, it is assumed that the output vector is effectively 1D. Other cases (i.e. reading n-D vectors) should've been labelled as a gather load before entering this method.

Following on from the above, it is assumed that:

for statically shaped loops, when no masks are used, only one dim is != 1 (that's what the shape of the output vector is based on).
for dynamically shaped loops, there might be more non-unit dims as the output vector type is user-specified.

TODO: Statically shaped loops + vector masking

Definition at line 859 of file Vectorization.cpp.

Referenced by isContiguousLoadIdx().

◆ hasReductionIterator()

static bool hasReductionIterator ( LinalgOp & op )

static

Check if op is a linalg.reduce or a linalg.generic that has at least one reduction iterator.

Definition at line 624 of file Vectorization.cpp.

References mlir::linalg::isReductionIterator().

Referenced by vectorizeDynamicLinalgOpPrecondition(), and vectorizeScalableVectorPrecondition().

◆ insertConvResultSlices()

static Value insertConvResultSlices	(	RewriterBase &	rewriter,
		Location	loc,
		Value	res,
		int64_t	wSize,
		int64_t	wSizeStep,
		SmallVectorImpl< Value > &	resVals,
		bool	isSingleChanneled
	)

static

Helper function to insert the computed result slices.

Definition at line 158 of file Vectorization.cpp.

References mlir::OpBuilder::create().

◆ isContiguousLoadIdx()

static bool isContiguousLoadIdx	(	LinalgOp &	linalgOp,
		Value &	val,
		bool &	foundIndexOp,
		VectorType	resType
	)

static

Check whether val could be used for calculating the trailing index for a contiguous load operation.

There are currently 3 types of values that are allowed here:

loop-invariant values,
values that increment by 1 with every loop iteration,
results of basic arithmetic operations (linear and continuous) involving 1., 2. and 3. This method returns True if indeed only such values are used in calculating val.

Additionally, the trailing index for a contiguous load operation should increment by 1 with every loop iteration, i.e. be based on:

linalg.index <dim> , where <dim> is the trailing non-unit dim of the iteration space (this way, linalg.index <dim> increments by 1 with every loop iteration). foundIndexOp is updated to true when such Op is found.

Definition at line 937 of file Vectorization.cpp.

References mlir::Value::getDefiningOp(), and getTrailingNonUnitLoopDimIdx().

Referenced by getTensorExtractMemoryAccessPattern().

◆ isLoopInvariantIdx()

static bool isLoopInvariantIdx	(	LinalgOp &	linalgOp,
		Value &	val,
		VectorType	resType
	)

static

Checks whether val can be used for calculating a loop invariant index.

Definition at line 877 of file Vectorization.cpp.

References mlir::Value::getDefiningOp().

Referenced by getTensorExtractMemoryAccessPattern().

◆ matchLinalgReduction()

static Operation* matchLinalgReduction ( OpOperand * outputOperand )

static

Check whether outputOperand is a reduction with a single combiner operation.

Return the combiner operation of the reduction. Return nullptr otherwise. Multiple reduction operations would impose an ordering between reduction dimensions and is currently unsupported in Linalg. This limitation is motivated by the fact that e.g. min(max(X)) != max(min(X))

Definition at line 575 of file Vectorization.cpp.

References mlir::OpOperand::getOperandNumber(), mlir::detail::IROperandBase::getOwner(), and mlir::matchReduction().

Referenced by reductionPreconditions().

◆ mayExistInterleavedUses()

static bool mayExistInterleavedUses	(	Operation *	firstOp,
		Operation *	secondOp,
		ValueRange	values
	)

static

Check whether there is any interleaved use of any values between firstOp and secondOp.

Conservatively return true if any op or value is in a different block.

Definition at line 2801 of file Vectorization.cpp.

References mlir::Operation::getBlock(), mlir::Operation::isBeforeInBlock(), and LDBG.

Referenced by mlir::linalg::LinalgCopyVTRForwardingPattern::matchAndRewrite(), and mlir::linalg::LinalgCopyVTWForwardingPattern::matchAndRewrite().

◆ reduceIfNeeded()

static Operation* reduceIfNeeded	(	OpBuilder &	b,
		LinalgOp	linalgOp,
		Operation *	op,
		Value	reduceValue,
		Value	initialValue,
		const IRMapping &	bvm
	)

static

Emit reduction operations if the shapes of the value to reduce is different that the result shape.

Definition at line 1207 of file Vectorization.cpp.

References buildMultiDimReduce(), getDimsToReduce(), mlir::Value::getType(), and mlir::IRMapping::lookup().

◆ reductionPreconditions()

static LogicalResult reductionPreconditions ( LinalgOp op )

static

Definition at line 1788 of file Vectorization.cpp.

References mlir::linalg::getCombinerOpKind(), mlir::AffineMap::isPermutation(), mlir::linalg::isReductionIterator(), LDBG, and matchLinalgReduction().

Referenced by vectorizeDynamicLinalgOpPrecondition(), and vectorizeLinalgOpPrecondition().

◆ reindexIndexingMap()

static AffineMap reindexIndexingMap ( AffineMap map )

static

Given an indexing map coming from a LinalgOp indexing, restricted to a projectedPermutation, compress the unused dimensions to serve as a permutation_map for a vector transfer operation.

For example, given a linalg op such as:

%0 = linalg.generic {
     indexing_maps = affine_map<(d0, d1, d2, d3, d4) -> (d4, d0, d2)>,
     indexing_maps = affine_map<(d0, d1, d2, d3, d4) -> (d1, d3)>
   }
  ins(%0 : tensor<2x3x4xf32>)
 outs(%1 : tensor<5x6xf32>)

the iteration domain size of the linalg op is 3x5x4x6x2. The first affine map is reindexed to affine_map<(d0, d1, d2) -> (d2, d0, d1)>, the second affine map is reindexed to affine_map<(d0, d1) -> (d0, d1)>.

Definition at line 505 of file Vectorization.cpp.

References mlir::compressUnusedDims(), and mlir::AffineMap::isProjectedPermutation().

◆ tensorExtractVectorizationPrecondition()

static LogicalResult tensorExtractVectorizationPrecondition	(	Operation *	op,
		bool	vectorizeNDExtract
	)

static

Helper function to check if the tensor.extract can be vectorized by the custom hook vectorizeTensorExtract.

Definition at line 777 of file Vectorization.cpp.

Referenced by vectorizeLinalgOpPrecondition().

◆ vectorizeAsLinalgGeneric()

static LogicalResult vectorizeAsLinalgGeneric	(	RewriterBase &	rewriter,
		VectorizationState &	state,
		LinalgOp	linalgOp,
		SmallVectorImpl< Value > &	newResults
	)

static

Generic vectorization function that rewrites the body of a linalgOp into vector form.

Generic vectorization proceeds as follows:

Verify the linalgOp has one non-empty region.
Values defined above the region are mapped to themselves and will be broadcasted on a per-need basis by their consumers.
Each region argument is vectorized into a vector.transfer_read (or 0-d load). TODO: Reuse opportunities for RAR dependencies. 4a. Register CustomVectorizationHook for YieldOp to capture the results. 4rewriter. Register CustomVectorizationHook for IndexOp to access the iteration indices.
Iteratively call vectorizeOneOp on the region operations.

When broadcastToMaximalCommonShape is set to true, eager broadcasting is performed to the maximal common vector size implied by the linalgOp iteration space. This eager broadcasting is introduced in the permutation_map of the vector.transfer_read operations. The eager broadcasting makes it trivial to detrmine where broadcast, transposes and reductions should occur, without any bookkeeping. The tradeoff is that, in the absence of good canonicalizations, the amount of work increases. This is not deemed a problem as we expect canonicalizations and foldings to aggressively clean up the useless work.

Definition at line 1357 of file Vectorization.cpp.

◆ vectorizeAsTensorPackOp()

static LogicalResult vectorizeAsTensorPackOp	(	RewriterBase &	rewriter,
		tensor::PackOp	packOp,
		ArrayRef< int64_t >	inputVectorSizes,
		SmallVectorImpl< Value > &	newResults
	)

static

Vectorize tensor::PackOp with (1) static innerTiles (2) constant padding value and (3) input vector sizes into: masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds As in the following example: pack = tensor.pack src inner_dims_pos = [2, 1] inner_tiles = [16, 2] into dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32>

This pack would be vectorized to:

load = vector.mask mask { vector.transfer_read arg0[c0, c0, c0], cst {in_bounds = [true, true, true]} : tensor<32x7x16xf32>, vector<32x8x16xf32> } : vector<32x8x16xi1> -> vector<32x8x16xf32> shape_cast = vector.shape_cast load : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> transpose = vector.transpose shape_cast, [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> write = vector.transfer_write transpose, empty[c0_0, c0_0, c0_0, c0_0, c0_0] {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>

If the (3) input vector sizes are not provided, the vector sizes are determined by the result tensor shape. Also, we update the inBounds attribute instead of masking.

Definition at line 1557 of file Vectorization.cpp.

References mlir::applyPermutationToVector(), mlir::OpBuilder::create(), mlir::vector::createReadOrMaskedRead(), createWriteOrMaskedWrite(), mlir::detail::enumerate(), mlir::get(), mlir::tensor::getPackInverseDestPerm(), getTiledPackShape(), mlir::Builder::getZeroAttr(), mlir::invertPermutationVector(), and mlir::OpBuilder::setInsertionPoint().

◆ vectorizeAsTensorPadOp()

static LogicalResult vectorizeAsTensorPadOp	(	RewriterBase &	rewriter,
		tensor::PadOp	padOp,
		ArrayRef< int64_t >	inputVectorSizes,
		SmallVectorImpl< Value > &	newResults
	)

static

Vectorize a padOp with (1) static result type, (2) constant padding value and (3) all-zero lowPad to transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value)).

Definition at line 1760 of file Vectorization.cpp.

References mlir::vector::createReadOrMaskedRead(), createWriteOrMaskedWrite(), mlir::Operation::getResult(), and mlir::OpBuilder::setInsertionPoint().

◆ vectorizeAsTensorUnpackOp()

static LogicalResult vectorizeAsTensorUnpackOp	(	RewriterBase &	rewriter,
		tensor::UnPackOp	unpackOp,
		ArrayRef< int64_t >	inputVectorSizes,
		SmallVectorImpl< Value > &	newResults
	)

static

Vectorize a tensor::UnPackOp to these 4 Ops: Vector::TransferReadOp - Reads a vector from the source tensor vector::TransposeOp - Transpose the Source tensor ShapeCastOp - Reshape the data based on the target.

vector::TransferWriteOp. - Write the result vector back to the destination tensor. If the vector sizes are not provided:

the vector sizes are determined by the input operand and attributes,
update the inBounds attribute instead of masking.

Definition at line 1632 of file Vectorization.cpp.

References mlir::applyPermutationToVector(), mlir::OpBuilder::create(), mlir::vector::createReadOrMaskedRead(), createWriteOrMaskedWrite(), mlir::detail::divideCeil(), mlir::detail::enumerate(), mlir::get(), mlir::Operation::getResult(), mlir::tensor::getUnPackInverseSrcPerm(), mlir::Builder::getZeroAttr(), LDBG, and mlir::OpBuilder::setInsertionPoint().

◆ vectorizeConvolution()

static FailureOr< Operation * > vectorizeConvolution	(	RewriterBase &	rewriter,
		LinalgOp	convOp,
		ArrayRef< int64_t >	inputVecSizes = `{}`,
		ArrayRef< bool >	inputVecScalableFlags = `{}`,
		bool	flatten1DDepthwiseConv = `false`
	)

static

Try to vectorize convOp as a convolution.

Helper function to vectorize a LinalgOp with convolution semantics.

Definition at line 3822 of file Vectorization.cpp.

Referenced by VectorizeConvolution::matchAndRewrite().

◆ vectorizeDynamicConvOpPrecondition()

static LogicalResult vectorizeDynamicConvOpPrecondition	(	linalg::LinalgOp	conv,
		bool	flatten1DDepthwiseConv
	)

static

Definition at line 1808 of file Vectorization.cpp.

References mlir::Value::getType(), and LDBG.

Referenced by vectorizeDynamicLinalgOpPrecondition().

◆ vectorizeDynamicLinalgOpPrecondition()

static LogicalResult vectorizeDynamicLinalgOpPrecondition	(	linalg::LinalgOp	op,
		bool	flatten1DDepthwiseConv
	)

static

Definition at line 1836 of file Vectorization.cpp.

References hasReductionIterator(), mlir::linalg::isElementwise(), LDBG, reductionPreconditions(), and vectorizeDynamicConvOpPrecondition().

Referenced by vectorizeLinalgOpPrecondition().

◆ vectorizeLinalgIndex()

static VectorizationResult vectorizeLinalgIndex	(	RewriterBase &	rewriter,
		VectorizationState &	state,
		Operation *	op,
		LinalgOp	linalgOp
	)

static

Helper function to vectorize the index operations of a linalgOp.

Return VectorizationStatus::NewOp to signal the vectorization algorithm that it should map the produced operations. This function is meant to be used as a CustomVectorizationHook.

Definition at line 733 of file Vectorization.cpp.

◆ vectorizeLinalgOpPrecondition()

static LogicalResult vectorizeLinalgOpPrecondition	(	LinalgOp	linalgOp,
		ArrayRef< int64_t >	inputVectorSizes,
		bool	vectorizeNDExtract,
		bool	flatten1DDepthwiseConv
	)

static

Definition at line 1877 of file Vectorization.cpp.

References mlir::linalg::allIndexingsAreProjectedPermutation(), mlir::Region::front(), mlir::Operation::getRegion(), mlir::linalg::isElementwise(), mlir::vector::isValidMaskedInputVector(), LDBG, reductionPreconditions(), tensorExtractVectorizationPrecondition(), and vectorizeDynamicLinalgOpPrecondition().

Referenced by mlir::linalg::vectorizeOpPrecondition().

◆ vectorizeLinalgYield()

static VectorizationResult vectorizeLinalgYield	(	RewriterBase &	rewriter,
		Operation *	op,
		const IRMapping &	bvm,
		VectorizationState &	state,
		LinalgOp	linalgOp,
		SmallVectorImpl< Value > &	newResults
	)

static

Helper function to vectorize the terminator of a linalgOp.

New result vector values are appended to newResults. Return VectorizationStatus::NoReplace to signal the vectorization algorithm that it should not try to map produced operations and instead return the results using the newResults vector making them available to the vectorization algorithm for RAUW. This function is meant to be used as a CustomVectorizationHook.

Definition at line 709 of file Vectorization.cpp.

◆ vectorizeOneOp()

static VectorizationResult vectorizeOneOp	(	RewriterBase &	rewriter,
		VectorizationState &	state,
		LinalgOp	linalgOp,
		Operation *	op,
		const IRMapping &	bvm,
		ArrayRef< CustomVectorizationHook >	customVectorizationHooks
	)

static

Generic vectorization for a single operation op, given already vectorized operands carried by bvm.

Vectorization occurs as follows:

Try to apply any of the customVectorizationHooks and return its result on success.
Clone any constant in the current scope without vectorization: each consumer of the constant will later determine the shape to which the constant needs to be broadcast to.
Fail on any remaining non ElementwiseMappable op. It is the purpose of the customVectorizationHooks to cover such cases.
Clone op in vector form to a vector of shape prescribed by the first operand of maximal rank. Other operands have smaller rank and are broadcast accordingly. It is assumed this broadcast is always legal, otherwise, it means one of the customVectorizationHooks is incorrect.

This function assumes all operands of op have been vectorized and are in the bvm mapping. As a consequence, this function is meant to be called on a topologically-sorted list of ops. This function does not update bvm but returns a VectorizationStatus that instructs the caller what bvm update needs to occur.

Definition at line 1243 of file Vectorization.cpp.

◆ vectorizePackOpPrecondition()

static LogicalResult vectorizePackOpPrecondition	(	tensor::PackOp	packOp,
		ArrayRef< int64_t >	inputVectorSizes
	)

static

Definition at line 1945 of file Vectorization.cpp.

References mlir::vector::isValidMaskedInputVector(), LDBG, mlir::m_Constant(), and mlir::matchPattern().

Referenced by mlir::linalg::vectorizeOpPrecondition().

◆ vectorizePadOpPrecondition()

static LogicalResult vectorizePadOpPrecondition	(	tensor::PadOp	padOp,
		ArrayRef< int64_t >	inputVectorSizes
	)

static

Definition at line 1978 of file Vectorization.cpp.

References mlir::vector::isValidMaskedInputVector(), and LDBG.

Referenced by mlir::linalg::vectorizeOpPrecondition().

◆ vectorizeScalableVectorPrecondition()

static LogicalResult vectorizeScalableVectorPrecondition	(	Operation *	op,
		ArrayRef< int64_t >	inputVectorSizes,
		ArrayRef< bool >	inputScalableVecDims
	)

static

Preconditions for scalable vectors.

This is quite restrictive - it models the fact that in practice we would only make selected dimensions scalable.

Definition at line 2005 of file Vectorization.cpp.

References hasReductionIterator(), mlir::linalg::isElementwise(), and LDBG.

Referenced by mlir::linalg::vectorizeOpPrecondition().

◆ vectorizeTensorExtract()

static VectorizationResult vectorizeTensorExtract	(	RewriterBase &	rewriter,
		VectorizationState &	state,
		Operation *	op,
		LinalgOp	linalgOp,
		const IRMapping &	bvm
	)

static

Helper function to vectorize the tensor.extract operations.

Returns VectorizationStatus::NewOp to signal the vectorization algorithm that it should map the produced operations. This function is meant to be used as a CustomVectorizationHook.

Definition at line 1074 of file Vectorization.cpp.

◆ vectorizeUnPackOpPrecondition()

static LogicalResult vectorizeUnPackOpPrecondition	(	tensor::UnPackOp	unpackOp,
		ArrayRef< int64_t >	inputVectorSizes
	)

static

Need to check if the inner-tiles are static/constant.

Definition at line 1857 of file Vectorization.cpp.

References mlir::vector::isValidMaskedInputVector(), and LDBG.

Referenced by mlir::linalg::vectorizeOpPrecondition().

Classes

Macros

Typedefs

Enumerations

Functions

Macro Definition Documentation

◆ DBGS

◆ DEBUG_TYPE

◆ LDBG

Typedef Documentation

◆ CustomVectorizationHook

◆ CustomVectorizationPrecondition

Enumeration Type Documentation

◆ Conv1DOpOrder

◆ VectorizationStatus

◆ VectorMemoryAccessKind

Function Documentation

◆ bindShapeDims() [1/3]

◆ bindShapeDims() [2/3]

◆ bindShapeDims() [3/3]

◆ broadcastIfNeeded()

◆ buildMultiDimReduce()

◆ buildVectorWrite()

◆ calculateGatherOffset()

◆ convertAffineApply()

◆ createWriteOrMaskedWrite()

◆ extractConvFilterSlices()

◆ extractConvInputSlices()

◆ extractConvResultSlices()

◆ getDimsToReduce()

◆ getSingleOpOfType()

◆ getStaticPadVal()

◆ getSubViewUseIfUnique()

◆ getTensorExtractMemoryAccessPattern()

◆ getTiledPackShape()

◆ getTrailingNonUnitLoopDimIdx()

◆ hasReductionIterator()

◆ insertConvResultSlices()

◆ isContiguousLoadIdx()

◆ isLoopInvariantIdx()

◆ matchLinalgReduction()

◆ mayExistInterleavedUses()

◆ reduceIfNeeded()

◆ reductionPreconditions()

◆ reindexIndexingMap()

◆ tensorExtractVectorizationPrecondition()

◆ vectorizeAsLinalgGeneric()

◆ vectorizeAsTensorPackOp()

◆ vectorizeAsTensorPadOp()

◆ vectorizeAsTensorUnpackOp()

◆ vectorizeConvolution()

◆ vectorizeDynamicConvOpPrecondition()

◆ vectorizeDynamicLinalgOpPrecondition()

◆ vectorizeLinalgIndex()

◆ vectorizeLinalgOpPrecondition()

◆ vectorizeLinalgYield()

◆ vectorizeOneOp()

◆ vectorizePackOpPrecondition()

◆ vectorizePadOpPrecondition()

◆ vectorizeScalableVectorPrecondition()

◆ vectorizeTensorExtract()

◆ vectorizeUnPackOpPrecondition()