MLIR 22.0.0git
Vectorization.cpp File Reference
#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypeInterfaces.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/DebugLog.h"
#include "llvm/Support/InterleavedRange.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>

Go to the source code of this file.

Classes

struct  VectorizationState
 Contains the vectorization state and related methods used across the vectorization process of a given operation. More...
struct  VectorizationHookResult
 VectorizationHookResult contains the vectorized op returned from a CustomVectorizationHook. More...

Macros

#define DEBUG_TYPE   "linalg-vectorization"

Typedefs

using CustomVectorizationPrecondition
using CustomVectorizationHook

Enumerations

enum class  Conv1DOpOrder { W , Ncw , Nwc }
 Helper enum to represent conv1d input traversal order. More...
enum  VectorizationHookStatus { Failure = 0 , NoReplace , NewOp }
 Helper data structure to represent the result of vectorization for a single operation. More...
enum  VectorMemoryAccessKind { ScalarBroadcast , Contiguous , Gather }

Functions

static FailureOr< Operation * > vectorizeConvolution (RewriterBase &rewriter, LinalgOp convOp, ArrayRef< int64_t > inputVecSizes={}, ArrayRef< bool > inputVecScalableFlags={}, bool flatten1DDepthwiseConv=false)
 Try to vectorize convOp as a convolution.
static LogicalResult vectorizeAsInsertSliceOp (RewriterBase &rewriter, tensor::InsertSliceOp sliceOp, ArrayRef< int64_t > inputVectorSizes, SmallVectorImpl< Value > &newResults)
 Vectorize tensor::InsertSliceOp with:
static Value getStaticPadVal (Operation *op)
 Returns the effective Pad value for the input op, provided it's a scalar.
template<typename OpType>
static OpType getSingleOpOfType (Block &block)
 Return the unique instance of OpType in block if it is indeed unique.
static SmallVector< ValueextractConvInputSlices (RewriterBase &rewriter, Location loc, Value input, int64_t nSize, int64_t wSize, int64_t cSize, int64_t kwSize, int strideW, int dilationW, int64_t wSizeStep, bool isSingleChanneled)
 Helper function to extract the input slices after filter is unrolled along kw.
static SmallVector< ValueextractConvFilterSlices (RewriterBase &rewriter, Location loc, Value filter, int64_t kwSize)
 Helper function to extract the filter slices after filter is unrolled along kw.
static SmallVector< ValueextractConvResultSlices (RewriterBase &rewriter, Location loc, Value res, int64_t nSize, int64_t wSize, int64_t fSize, int64_t wSizeStep, bool isSingleChanneled)
 Helper function to extract the result slices after filter is unrolled along kw.
static Value insertConvResultSlices (RewriterBase &rewriter, Location loc, Value res, int64_t wSize, int64_t wSizeStep, SmallVectorImpl< Value > &resVals, bool isSingleChanneled)
 Helper function to insert the computed result slices.
static AffineMap reindexIndexingMap (AffineMap map)
 Given an indexing map coming from a LinalgOp indexing, restricted to a projectedPermutation, compress the unused dimensions to serve as a permutation_map for a vector transfer operation.
static OperationmatchLinalgReduction (OpOperand *outputOperand)
 Check whether outputOperand is a reduction with a single combiner operation.
static Value broadcastIfNeeded (OpBuilder &b, Value value, Type dstType)
 Broadcast value to a vector of shape if possible.
static OperationbuildMultiDimReduce (OpBuilder &b, Operation *reduceOp, Value valueToReduce, Value acc, ArrayRef< bool > dimsToMask)
 Create MultiDimReductionOp to compute the reduction for reductionOp.
static SmallVector< boolgetDimsToReduce (LinalgOp linalgOp)
static bool hasReductionIterator (LinalgOp &op)
 Check if op is a linalg.reduce or a linalg.generic that has at least one reduction iterator.
static Value buildVectorWrite (RewriterBase &rewriter, Value value, OpOperand *outputOperand, VectorizationState &state)
 Build a vector.transfer_write of value into outputOperand at indices set to all 0; where outputOperand is an output operand of the LinalgOp currently being vectorized.
static VectorizationHookResult vectorizeLinalgYield (RewriterBase &rewriter, Operation *op, const IRMapping &bvm, VectorizationState &state, LinalgOp linalgOp, SmallVectorImpl< Value > &newResults)
 Helper function to vectorize the terminator of a linalgOp.
static VectorizationHookResult vectorizeLinalgIndex (RewriterBase &rewriter, VectorizationState &state, Operation *op, LinalgOp linalgOp)
 Helper function to vectorize the index operations of a linalgOp.
static LogicalResult tensorExtractVectorizationPrecondition (Operation *op, bool vectorizeNDExtract)
 Helper function to check if the tensor.extract can be vectorized by the custom hook vectorizeTensorExtract.
static Value calculateGatherOffset (RewriterBase &rewriter, VectorizationState &state, tensor::ExtractOp extractOp, const IRMapping &bvm)
 Calculates the offsets ($index_vec) for vector.gather operations generated from tensor.extract.
static uint64_t getTrailingNonUnitLoopDimIdx (LinalgOp linalgOp)
 Find the index of the trailing non-unit dim in linalgOp.
static bool isLoopInvariantIdx (LinalgOp &linalgOp, Value &val, VectorType resType)
 Checks whether val can be used for calculating a loop invariant index.
static bool isContiguousLoadIdx (LinalgOp &linalgOp, Value &val, bool &foundIndexOp, VectorType resType)
 Check whether val could be used for calculating the trailing index for a contiguous load operation.
static VectorMemoryAccessKind getTensorExtractMemoryAccessPattern (tensor::ExtractOp extractOp, LinalgOp &linalgOp, VectorType resType)
 Infer the memory access pattern for the input ExtractOp.
static VectorizationHookResult vectorizeTensorExtract (RewriterBase &rewriter, VectorizationState &state, Operation *op, LinalgOp linalgOp, const IRMapping &bvm)
 Helper function to vectorize the tensor.extract operations.
static OperationreduceIfNeeded (OpBuilder &b, LinalgOp linalgOp, Operation *op, Value reduceValue, Value initialValue, const IRMapping &bvm)
 Emit reduction operations if the shapes of the value to reduce is different that the result shape.
static VectorizationHookResult vectorizeOneOp (RewriterBase &rewriter, VectorizationState &state, LinalgOp linalgOp, Operation *op, const IRMapping &bvm, ArrayRef< CustomVectorizationHook > customVectorizationHooks)
 Generic vectorization for a single operation op, given already vectorized operands carried by bvm.
static LogicalResult vectorizeAsLinalgGeneric (RewriterBase &rewriter, VectorizationState &state, LinalgOp linalgOp, SmallVectorImpl< Value > &newResults)
 Generic vectorization function that rewrites the body of a linalgOp into vector form.
static bool isMaskTriviallyFoldable (SmallVector< OpFoldResult > &maskSizes, SmallVector< Value > &writeIdxs, ArrayRef< int64_t > destShape, ArrayRef< int64_t > maskShape)
 Determines whether a mask for xfer_write is trivially "all true".
static OperationcreateWriteOrMaskedWrite (OpBuilder &builder, Location loc, Value vecToStore, Value dest, SmallVector< Value > writeIndices={}, bool useInBoundsInsteadOfMasking=false)
 Creates an optionally masked TransferWriteOp.
static VectorType getCollapsedVecType (VectorType type, ArrayRef< AffineMap > reassociation)
 Given the re-associations, "collapses" the input Vector type.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "linalg-vectorization"

Definition at line 50 of file Vectorization.cpp.

Typedef Documentation

◆ CustomVectorizationHook

Initial value:
std::function<VectorizationHookResult(Operation *, const IRMapping &)>
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
VectorizationHookResult contains the vectorized op returned from a CustomVectorizationHook.

Definition at line 793 of file Vectorization.cpp.

◆ CustomVectorizationPrecondition

Initial value:
std::function<LogicalResult(Operation *, bool)>

Definition at line 787 of file Vectorization.cpp.

Enumeration Type Documentation

◆ Conv1DOpOrder

enum class Conv1DOpOrder
strong

Helper enum to represent conv1d input traversal order.

Enumerator
Ncw 
Nwc 

Definition at line 606 of file Vectorization.cpp.

◆ VectorizationHookStatus

Helper data structure to represent the result of vectorization for a single operation.

In certain specific cases, like terminators, we do not want to propagate.

Enumerator
Failure 

Op failed to vectorize.

NoReplace 

Op vectorized and custom function took care of replacement logic.

NewOp 

Op vectorized into a new Op whose results will replace original Op's results.

Definition at line 615 of file Vectorization.cpp.

◆ VectorMemoryAccessKind

Enumerator
ScalarBroadcast 
Contiguous 
Gather 

Definition at line 936 of file Vectorization.cpp.

Function Documentation

◆ broadcastIfNeeded()

Value broadcastIfNeeded ( OpBuilder & b,
Value value,
Type dstType )
static

Broadcast value to a vector of shape if possible.

Return value otherwise.

Definition at line 685 of file Vectorization.cpp.

References b, mlir::Value::getType(), mlir::vector::isBroadcastableTo(), and mlir::vector::Success.

Referenced by buildVectorWrite(), calculateGatherOffset(), and vectorizeOneOp().

◆ buildMultiDimReduce()

Operation * buildMultiDimReduce ( OpBuilder & b,
Operation * reduceOp,
Value valueToReduce,
Value acc,
ArrayRef< bool > dimsToMask )
static

Create MultiDimReductionOp to compute the reduction for reductionOp.

This assumes that reductionOp has two operands and one of them is the reduction initial value.buildMultiDimReduce

Definition at line 702 of file Vectorization.cpp.

References b, mlir::linalg::getCombinerOpKind(), and mlir::Operation::getLoc().

Referenced by reduceIfNeeded().

◆ buildVectorWrite()

Value buildVectorWrite ( RewriterBase & rewriter,
Value value,
OpOperand * outputOperand,
VectorizationState & state )
static

Build a vector.transfer_write of value into outputOperand at indices set to all 0; where outputOperand is an output operand of the LinalgOp currently being vectorized.

If dest has null rank, build an memref.store. Return the produced value or null if no value is produced.

Definition at line 730 of file Vectorization.cpp.

References broadcastIfNeeded(), mlir::arith::ConstantIndexOp::create(), mlir::IROperand< DerivedT, IRValueT >::get(), mlir::Builder::getBoolArrayAttr(), mlir::AffineMap::getContext(), mlir::getElementTypeOrSelf(), mlir::AffineMap::getFilteredIdentityMap(), mlir::Value::getLoc(), mlir::AffineMap::getNumInputs(), mlir::detail::IROperandBase::getOwner(), mlir::Operation::getResult(), mlir::Operation::getResults(), mlir::Value::getType(), indices, mlir::inversePermutation(), and reindexIndexingMap().

Referenced by vectorizeLinalgYield().

◆ calculateGatherOffset()

Value calculateGatherOffset ( RewriterBase & rewriter,
VectorizationState & state,
tensor::ExtractOp extractOp,
const IRMapping & bvm )
static

Calculates the offsets ($index_vec) for vector.gather operations generated from tensor.extract.

The offset is calculated as follows (example using scalar values):

offset = extractOp.indices[0] for (i = 1; i < numIndices; i++) offset = extractOp.dimSize[i] * offset + extractOp.indices[i];

For tensor<45 x 80 x 15 x f32> and index [1, 2, 3], this leads to: offset = ( ( 1 ) * 80 + 2 ) * 15 + 3

Definition at line 905 of file Vectorization.cpp.

References broadcastIfNeeded(), mlir::arith::ConstantIndexOp::create(), mlir::Builder::getIndexType(), and mlir::IRMapping::lookup().

Referenced by vectorizeTensorExtract().

◆ createWriteOrMaskedWrite()

Operation * createWriteOrMaskedWrite ( OpBuilder & builder,
Location loc,
Value vecToStore,
Value dest,
SmallVector< Value > writeIndices = {},
bool useInBoundsInsteadOfMasking = false )
static

Creates an optionally masked TransferWriteOp.

Generates the following operation: res = vector.transfer_write vecToStore into dest

If shape(vecToStore) != shape(dest), masking is used to ensure correctness:

mask = vector.create_mask(destShape) : vecToStoreShape res = vector.mask mask { vector.transfer_write vecToStore into dest }

The mask shape is identical to vecToStore (with the element type == i1), and the mask values are based on the shape of the dest tensor.

If useInBoundsInsteadOfMasking is set to true, the in_bounds attribute is used instead of masking:

write = vector.transfer_write vecToStore into dest in_bounds_flags = (...) res = vector.transfer_write input into dest {in_bounds = in_bounds_flags}

Finally, writeIndices specifies the offsets to use. If empty, all indices are set to 0.

Definition at line 1690 of file Vectorization.cpp.

◆ extractConvFilterSlices()

SmallVector< Value > extractConvFilterSlices ( RewriterBase & rewriter,
Location loc,
Value filter,
int64_t kwSize )
static

Helper function to extract the filter slices after filter is unrolled along kw.

Definition at line 145 of file Vectorization.cpp.

References result.

◆ extractConvInputSlices()

SmallVector< Value > extractConvInputSlices ( RewriterBase & rewriter,
Location loc,
Value input,
int64_t nSize,
int64_t wSize,
int64_t cSize,
int64_t kwSize,
int strideW,
int dilationW,
int64_t wSizeStep,
bool isSingleChanneled )
static

Helper function to extract the input slices after filter is unrolled along kw.

Definition at line 109 of file Vectorization.cpp.

References result.

◆ extractConvResultSlices()

SmallVector< Value > extractConvResultSlices ( RewriterBase & rewriter,
Location loc,
Value res,
int64_t nSize,
int64_t wSize,
int64_t fSize,
int64_t wSizeStep,
bool isSingleChanneled )
static

Helper function to extract the result slices after filter is unrolled along kw.

Definition at line 161 of file Vectorization.cpp.

References result.

◆ getCollapsedVecType()

VectorType getCollapsedVecType ( VectorType type,
ArrayRef< AffineMap > reassociation )
static

Given the re-associations, "collapses" the input Vector type.

This is similar to CollapseShapeOp::inferCollapsedType with two notable differences:

  • We can safely assume that there are no dynamic sizes.
  • Scalable flags are updated alongside regular dims.

When collapsing scalable flags, conservatively avoids cases with two scalable dims. We could re-visit this in the future.

EXAMPLE: type = vector<4x16x[8]x16xf32> reassociation = [(d0, d1, d2, d3) -> (d0, d1), (d0, d1, d2, d3) -> (d2, d3)] Result: vector<64x[128]xf32>

Definition at line 1773 of file Vectorization.cpp.

References mlir::isReassociationValid().

◆ getDimsToReduce()

SmallVector< bool > getDimsToReduce ( LinalgOp linalgOp)
static

Definition at line 711 of file Vectorization.cpp.

References mlir::linalg::isReductionIterator().

Referenced by reduceIfNeeded().

◆ getSingleOpOfType()

template<typename OpType>
OpType getSingleOpOfType ( Block & block)
static

Return the unique instance of OpType in block if it is indeed unique.

Return null if none or more than 1 instances exist.

Definition at line 93 of file Vectorization.cpp.

References mlir::WalkResult::advance(), mlir::WalkResult::interrupt(), and mlir::Block::walk().

◆ getStaticPadVal()

Value getStaticPadVal ( Operation * op)
static

Returns the effective Pad value for the input op, provided it's a scalar.

Many Ops exhibit pad-like behaviour, but this isn't always explicit. If this Op performs padding, retrieve the padding value provided that it's a scalar and static/fixed for all the padded values. Returns an empty value otherwise.

◆ getTensorExtractMemoryAccessPattern()

VectorMemoryAccessKind getTensorExtractMemoryAccessPattern ( tensor::ExtractOp extractOp,
LinalgOp & linalgOp,
VectorType resType )
static

Infer the memory access pattern for the input ExtractOp.

Based on the ExtratOp result shape and the access indices, decides whether this Op corresponds to a contiguous load (including a broadcast of a scalar) or a gather load. When analysing the ExtractOp indices (to identify contiguous laods), this method looks for "loop" invariant indices (e.g. block arguments) and indices that change linearly (e.g. via linalg.index Op).

Note that it is always safe to use gather load operations for contiguous loads (albeit slow), but not vice-versa. When in doubt, bail out and assume that extractOp is a gather load.

Definition at line 1085 of file Vectorization.cpp.

References Contiguous, Gather, indices, isContiguousLoadIdx(), isLoopInvariantIdx(), and ScalarBroadcast.

Referenced by vectorizeTensorExtract().

◆ getTrailingNonUnitLoopDimIdx()

uint64_t getTrailingNonUnitLoopDimIdx ( LinalgOp linalgOp)
static

Find the index of the trailing non-unit dim in linalgOp.

This hook is used when checking whether tensor.extract Op (within a linalg.generic Op) represents a contiguous load operation.

Note that when calling this hook, it is assumed that the output vector is effectively 1D. Other cases (i.e. reading n-D vectors) should've been labelled as a gather load before entering this method.

Following on from the above, it is assumed that:

  • for statically shaped loops, when no masks are used, only one dim is != 1 (that's what the shape of the output vector is based on).
  • for dynamically shaped loops, there might be more non-unit dims as the output vector type is user-specified.

TODO: Statically shaped loops + vector masking

Definition at line 953 of file Vectorization.cpp.

Referenced by isContiguousLoadIdx().

◆ hasReductionIterator()

bool hasReductionIterator ( LinalgOp & op)
static

Check if op is a linalg.reduce or a linalg.generic that has at least one reduction iterator.

Definition at line 718 of file Vectorization.cpp.

References mlir::linalg::isReductionIterator().

◆ insertConvResultSlices()

Value insertConvResultSlices ( RewriterBase & rewriter,
Location loc,
Value res,
int64_t wSize,
int64_t wSizeStep,
SmallVectorImpl< Value > & resVals,
bool isSingleChanneled )
static

Helper function to insert the computed result slices.

Definition at line 189 of file Vectorization.cpp.

◆ isContiguousLoadIdx()

bool isContiguousLoadIdx ( LinalgOp & linalgOp,
Value & val,
bool & foundIndexOp,
VectorType resType )
static

Check whether val could be used for calculating the trailing index for a contiguous load operation.

There are currently 3 types of values that are allowed here:

  1. loop-invariant values,
  2. values that increment by 1 with every loop iteration,
  3. results of basic arithmetic operations (linear and continuous) involving 1., 2. and 3. This method returns True if indeed only such values are used in calculating val.

Additionally, the trailing index for a contiguous load operation should increment by 1 with every loop iteration, i.e. be based on:

  • linalg.index <dim> , where <dim> is the trailing non-unit dim of the iteration space (this way, linalg.index <dim> increments by 1 with every loop iteration). foundIndexOp is updated to true when such Op is found.

Definition at line 1030 of file Vectorization.cpp.

References mlir::Value::getDefiningOp(), getTrailingNonUnitLoopDimIdx(), isContiguousLoadIdx(), and result.

Referenced by getTensorExtractMemoryAccessPattern(), and isContiguousLoadIdx().

◆ isLoopInvariantIdx()

bool isLoopInvariantIdx ( LinalgOp & linalgOp,
Value & val,
VectorType resType )
static

Checks whether val can be used for calculating a loop invariant index.

Definition at line 971 of file Vectorization.cpp.

References mlir::Value::getDefiningOp(), isLoopInvariantIdx(), and result.

Referenced by getTensorExtractMemoryAccessPattern(), and isLoopInvariantIdx().

◆ isMaskTriviallyFoldable()

bool isMaskTriviallyFoldable ( SmallVector< OpFoldResult > & maskSizes,
SmallVector< Value > & writeIdxs,
ArrayRef< int64_t > destShape,
ArrayRef< int64_t > maskShape )
static

Determines whether a mask for xfer_write is trivially "all true".

Given all the inputs required to generate a mask (mask sizes and shapes), and an xfer_write operation (write indices and the destination tensor shape), determines whether the corresponding mask would be trivially foldable (i.e., trivially "all true").

Use this method to avoid generating spurious masks and relaying on vectorization post-processing to remove them.

Pre-conditions for a mask to be trivially foldable:

  • All involved shapes (mask + destination tensor) are static.
  • All write indices are constant.
  • All mask sizes are constant (including arith.constant).

If the pre-conditions are met, the method checks for each destination dimension d: (1) destDimSize[rankDiff + d] <= maskShape[d] (2) destDimSize[rankDiff + d] <= writeIndex[d] + maskSize[d]

rankDiff = rank(dest) - rank(mask).

This method takes a conservative view: it may return false even if the mask is technically foldable.

EXAMPLE 1 (trivially foldable, all shapes match, mask sizes match the shape of the dest tensor): c0 = arith.constant 0 : index mask = vector.create_mask 5, 1 vector.mask mask { vector.transfer_write vecToStore_1, dest{[c0, c0] {in_bounds = [true, true]} : vector<5x1xi32>, tensor<5x1xi32> }

EXAMPLE 2 (not trivially foldable - vector shape exceeds the tensor shape, mask is required to avoid out-of-bounds write): c0 = arith.constant 0 : index mask = vector.create_mask 5, 1 vector.mask mask { vector.transfer_write vecToStore_2, dest[c0, c0] {in_bounds = [true, true]} : vector<8x1xi32>, tensor<5x1xi32> }

TODO: Re-use in createReadOrMaskedRead

Definition at line 1613 of file Vectorization.cpp.

References mlir::getConstantIntValue(), mlir::m_ConstantInt(), and mlir::matchPattern().

◆ matchLinalgReduction()

Operation * matchLinalgReduction ( OpOperand * outputOperand)
static

Check whether outputOperand is a reduction with a single combiner operation.

Return the combiner operation of the reduction. Return nullptr otherwise. Multiple reduction operations would impose an ordering between reduction dimensions and is currently unsupported in Linalg. This limitation is motivated by the fact that e.g. min(max(X)) != max(min(X))

Definition at line 669 of file Vectorization.cpp.

References mlir::OpOperand::getOperandNumber(), mlir::detail::IROperandBase::getOwner(), and mlir::matchReduction().

◆ reduceIfNeeded()

Operation * reduceIfNeeded ( OpBuilder & b,
LinalgOp linalgOp,
Operation * op,
Value reduceValue,
Value initialValue,
const IRMapping & bvm )
static

Emit reduction operations if the shapes of the value to reduce is different that the result shape.

Definition at line 1301 of file Vectorization.cpp.

References b, buildMultiDimReduce(), getDimsToReduce(), mlir::Value::getType(), and mlir::IRMapping::lookup().

Referenced by vectorizeOneOp().

◆ reindexIndexingMap()

AffineMap reindexIndexingMap ( AffineMap map)
static

Given an indexing map coming from a LinalgOp indexing, restricted to a projectedPermutation, compress the unused dimensions to serve as a permutation_map for a vector transfer operation.

For example, given a linalg op such as:

%0 = linalg.generic {
indexing_maps = affine_map<(d0, d1, d2, d3, d4) -> (d4, d0, d2)>,
indexing_maps = affine_map<(d0, d1, d2, d3, d4) -> (d1, d3)>
}
ins(%0 : tensor<2x3x4xf32>)
outs(%1 : tensor<5x6xf32>)

the iteration domain size of the linalg op is 3x5x4x6x2. The first affine map is reindexed to affine_map<(d0, d1, d2) -> (d2, d0, d1)>, the second affine map is reindexed to affine_map<(d0, d1) -> (d0, d1)>.

Definition at line 595 of file Vectorization.cpp.

References mlir::compressUnusedDims(), and mlir::AffineMap::isProjectedPermutation().

Referenced by buildVectorWrite(), and vectorizeAsLinalgGeneric().

◆ tensorExtractVectorizationPrecondition()

LogicalResult tensorExtractVectorizationPrecondition ( Operation * op,
bool vectorizeNDExtract )
static

Helper function to check if the tensor.extract can be vectorized by the custom hook vectorizeTensorExtract.

Definition at line 872 of file Vectorization.cpp.

References success().

◆ vectorizeAsInsertSliceOp()

LogicalResult vectorizeAsInsertSliceOp ( RewriterBase & rewriter,
tensor::InsertSliceOp sliceOp,
ArrayRef< int64_t > inputVectorSizes,
SmallVectorImpl< Value > & newResults )
static

Vectorize tensor::InsertSliceOp with:

  • vector::TransferReadOp + vector::TransferWriteOp The vector sizes are either:
  • user-provided in inputVectorSizes, or
  • inferred from the static dims in the input and output tensors. Bails out if:
  • vector sizes are not user-provided, and
  • at least one dim is dynamic (in both the input and output tensors).

Before: !t_in_type = tensor<1x2x3xf32> !t_out_type = tensor<9x8x7x1x2x3xf32> !v_type = vector<1x2x3xf32> inserted_slice = tensor.insert_slice src into dest ... : !t_in_type into !t_out_type After: read = vector.transfer_read src[...], pad ... : !t_in_type, !v_type write = vector.transfer_write read, dest ... : !v_type, !t_out_type

◆ vectorizeAsLinalgGeneric()

LogicalResult vectorizeAsLinalgGeneric ( RewriterBase & rewriter,
VectorizationState & state,
LinalgOp linalgOp,
SmallVectorImpl< Value > & newResults )
static

Generic vectorization function that rewrites the body of a linalgOp into vector form.

Generic vectorization proceeds as follows:

  1. Verify the linalgOp has one non-empty region.
  2. Values defined above the region are mapped to themselves and will be broadcasted on a per-need basis by their consumers.
  3. Each region argument is vectorized into a vector.transfer_read (or 0-d load). TODO: Reuse opportunities for RAR dependencies. 4a. Register CustomVectorizationHook for YieldOp to capture the results. 4rewriter. Register CustomVectorizationHook for IndexOp to access the iteration indices.
  4. Iteratively call vectorizeOneOp on the region operations.

When broadcastToMaximalCommonShape is set to true, eager broadcasting is performed to the maximal common vector size implied by the linalgOp iteration space. This eager broadcasting is introduced in the permutation_map of the vector.transfer_read operations. The eager broadcasting makes it trivial to determine where broadcast, transposes and reductions should occur, without any bookkeeping. The tradeoff is that, in the absence of good canonicalizations, the amount of work increases. This is not deemed a problem as we expect canonicalizations and foldings to aggressively clean up the useless work.

Definition at line 1452 of file Vectorization.cpp.

References mlir::AffineMap::compose(), mlir::arith::ConstantIndexOp::create(), Failure, mlir::BlockArgument::getArgNumber(), mlir::Builder::getBoolArrayAttr(), mlir::getElementTypeOrSelf(), mlir::Block::getOperations(), mlir::Operation::getResult(), mlir::Operation::getResults(), mlir::getUsedValuesDefinedAbove(), indices, mlir::inverseAndBroadcastProjectedPermutation(), mlir::inversePermutation(), mlir::IRMapping::map(), NewOp, reindexIndexingMap(), result, success(), vectorizeLinalgIndex(), vectorizeLinalgYield(), vectorizeOneOp(), and vectorizeTensorExtract().

◆ vectorizeConvolution()

FailureOr< Operation * > vectorizeConvolution ( RewriterBase & rewriter,
LinalgOp convOp,
ArrayRef< int64_t > inputVecSizes = {},
ArrayRef< bool > inputVecScalableFlags = {},
bool flatten1DDepthwiseConv = false )
static

Try to vectorize convOp as a convolution.

◆ vectorizeLinalgIndex()

VectorizationHookResult vectorizeLinalgIndex ( RewriterBase & rewriter,
VectorizationState & state,
Operation * op,
LinalgOp linalgOp )
static

Helper function to vectorize the index operations of a linalgOp.

Return VectorizationHookStatus::NewOp to signal the vectorization algorithm that it should map the produced operations. This function is meant to be used as a CustomVectorizationHook.

Definition at line 828 of file Vectorization.cpp.

References Failure, mlir::Builder::getIndexType(), mlir::AffineMap::getPermutationMap(), and NewOp.

Referenced by vectorizeAsLinalgGeneric().

◆ vectorizeLinalgYield()

VectorizationHookResult vectorizeLinalgYield ( RewriterBase & rewriter,
Operation * op,
const IRMapping & bvm,
VectorizationState & state,
LinalgOp linalgOp,
SmallVectorImpl< Value > & newResults )
static

Helper function to vectorize the terminator of a linalgOp.

New result vector values are appended to newResults. Return VectorizationHookStatus::NoReplace to signal the vectorization algorithm that it should not try to map produced operations and instead return the results using the newResults vector making them available to the vectorization algorithm for RAUW. This function is meant to be used as a CustomVectorizationHook.

Definition at line 804 of file Vectorization.cpp.

References buildVectorWrite(), Failure, mlir::IRMapping::lookup(), and NoReplace.

Referenced by vectorizeAsLinalgGeneric().

◆ vectorizeOneOp()

VectorizationHookResult vectorizeOneOp ( RewriterBase & rewriter,
VectorizationState & state,
LinalgOp linalgOp,
Operation * op,
const IRMapping & bvm,
ArrayRef< CustomVectorizationHook > customVectorizationHooks )
static

Generic vectorization for a single operation op, given already vectorized operands carried by bvm.

Vectorization occurs as follows:

  1. Try to apply any of the customVectorizationHooks and return its result on success.
  2. Clone any constant in the current scope without vectorization: each consumer of the constant will later determine the shape to which the constant needs to be broadcast to.
  3. Fail on any remaining non ElementwiseMappable op. It is the purpose of the customVectorizationHooks to cover such cases.
  4. Clone op in vector form to a vector of shape prescribed by the first operand of maximal rank. Other operands have smaller rank and are broadcast accordingly. It is assumed this broadcast is always legal, otherwise, it means one of the customVectorizationHooks is incorrect.

This function assumes all operands of op have been vectorized and are in the bvm mapping. As a consequence, this function is meant to be called on a topologically-sorted list of ops. This function does not update bvm but returns a VectorizationHookStatus that instructs the caller what bvm update needs to occur.

Definition at line 1337 of file Vectorization.cpp.

References broadcastIfNeeded(), mlir::OpBuilder::clone(), mlir::OpBuilder::create(), Failure, mlir::Operation::getAttrs(), mlir::getElementTypeOrSelf(), mlir::OperationName::getIdentifier(), mlir::Operation::getLoc(), mlir::Operation::getName(), mlir::Operation::getOperands(), mlir::Operation::getResultTypes(), mlir::Value::getType(), mlir::OpTrait::hasElementwiseMappableTraits(), mlir::IRMapping::lookup(), mlir::matchReduction(), NewOp, reduceIfNeeded(), and result.

Referenced by vectorizeAsLinalgGeneric().

◆ vectorizeTensorExtract()

VectorizationHookResult vectorizeTensorExtract ( RewriterBase & rewriter,
VectorizationState & state,
Operation * op,
LinalgOp linalgOp,
const IRMapping & bvm )
static