MLIR
22.0.0git
|
#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypeInterfaces.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/DebugLog.h"
#include "llvm/Support/InterleavedRange.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
Go to the source code of this file.
Classes | |
struct | VectorizationState |
Contains the vectorization state and related methods used across the vectorization process of a given operation. More... | |
struct | VectorizationHookResult |
VectorizationHookResult contains the vectorized op returned from a CustomVectorizationHook. More... | |
struct | VectorizePadOpUserPattern< OpTy > |
Base pattern for rewriting tensor::PadOps whose result is consumed by a given operation type OpTy. More... | |
struct | PadOpVectorizationWithTransferReadPattern |
Rewrite use of tensor::PadOp result in TransferReadOp. More... | |
struct | PadOpVectorizationWithTransferWritePattern |
Rewrite use of tensor::PadOp result in TransferWriteOp. More... | |
struct | PadOpVectorizationWithInsertSlicePattern |
Rewrite use of tensor::PadOp result in InsertSliceOp. More... | |
struct | VectorizeConvolution |
Macros | |
#define | DEBUG_TYPE "linalg-vectorization" |
Typedefs | |
using | CustomVectorizationPrecondition = std::function< LogicalResult(Operation *, bool)> |
using | CustomVectorizationHook = std::function< VectorizationHookResult(Operation *, const IRMapping &)> |
Enumerations | |
enum class | Conv1DOpOrder { W , Ncw , Nwc } |
Helper enum to represent conv1d input traversal order. More... | |
enum | VectorizationHookStatus { Failure = 0 , NoReplace , NewOp } |
Helper data structure to represent the result of vectorization for a single operation. More... | |
enum | VectorMemoryAccessKind { ScalarBroadcast , Contiguous , Gather } |
Functions | |
static FailureOr< Operation * > | vectorizeConvolution (RewriterBase &rewriter, LinalgOp convOp, ArrayRef< int64_t > inputVecSizes={}, ArrayRef< bool > inputVecScalableFlags={}, bool flatten1DDepthwiseConv=false) |
Try to vectorize convOp as a convolution. More... | |
static LogicalResult | vectorizeAsInsertSliceOp (RewriterBase &rewriter, tensor::InsertSliceOp sliceOp, ArrayRef< int64_t > inputVectorSizes, SmallVectorImpl< Value > &newResults) |
Vectorize tensor::InsertSliceOp with: More... | |
static Value | getStaticPadVal (Operation *op) |
Returns the effective Pad value for the input op, provided it's a scalar. More... | |
template<typename OpType > | |
static OpType | getSingleOpOfType (Block &block) |
Return the unique instance of OpType in block if it is indeed unique. More... | |
static SmallVector< Value > | extractConvInputSlices (RewriterBase &rewriter, Location loc, Value input, int64_t nSize, int64_t wSize, int64_t cSize, int64_t kwSize, int strideW, int dilationW, int64_t wSizeStep, bool isSingleChanneled) |
Helper function to extract the input slices after filter is unrolled along kw. More... | |
static SmallVector< Value > | extractConvFilterSlices (RewriterBase &rewriter, Location loc, Value filter, int64_t kwSize) |
Helper function to extract the filter slices after filter is unrolled along kw. More... | |
static SmallVector< Value > | extractConvResultSlices (RewriterBase &rewriter, Location loc, Value res, int64_t nSize, int64_t wSize, int64_t fSize, int64_t wSizeStep, bool isSingleChanneled) |
Helper function to extract the result slices after filter is unrolled along kw. More... | |
static Value | insertConvResultSlices (RewriterBase &rewriter, Location loc, Value res, int64_t wSize, int64_t wSizeStep, SmallVectorImpl< Value > &resVals, bool isSingleChanneled) |
Helper function to insert the computed result slices. More... | |
static AffineMap | reindexIndexingMap (AffineMap map) |
Given an indexing map coming from a LinalgOp indexing, restricted to a projectedPermutation, compress the unused dimensions to serve as a permutation_map for a vector transfer operation. More... | |
static Operation * | matchLinalgReduction (OpOperand *outputOperand) |
Check whether outputOperand is a reduction with a single combiner operation. More... | |
static Value | broadcastIfNeeded (OpBuilder &b, Value value, Type dstType) |
Broadcast value to a vector of shape if possible. More... | |
static Operation * | buildMultiDimReduce (OpBuilder &b, Operation *reduceOp, Value valueToReduce, Value acc, ArrayRef< bool > dimsToMask) |
Create MultiDimReductionOp to compute the reduction for reductionOp . More... | |
static SmallVector< bool > | getDimsToReduce (LinalgOp linalgOp) |
static bool | hasReductionIterator (LinalgOp &op) |
Check if op is a linalg.reduce or a linalg.generic that has at least one reduction iterator. More... | |
static Value | buildVectorWrite (RewriterBase &rewriter, Value value, OpOperand *outputOperand, VectorizationState &state) |
Build a vector.transfer_write of value into outputOperand at indices set to all 0 ; where outputOperand is an output operand of the LinalgOp currently being vectorized. More... | |
static VectorizationHookResult | vectorizeLinalgYield (RewriterBase &rewriter, Operation *op, const IRMapping &bvm, VectorizationState &state, LinalgOp linalgOp, SmallVectorImpl< Value > &newResults) |
Helper function to vectorize the terminator of a linalgOp . More... | |
static VectorizationHookResult | vectorizeLinalgIndex (RewriterBase &rewriter, VectorizationState &state, Operation *op, LinalgOp linalgOp) |
Helper function to vectorize the index operations of a linalgOp . More... | |
static LogicalResult | tensorExtractVectorizationPrecondition (Operation *op, bool vectorizeNDExtract) |
Helper function to check if the tensor.extract can be vectorized by the custom hook vectorizeTensorExtract. More... | |
static Value | calculateGatherOffset (RewriterBase &rewriter, VectorizationState &state, tensor::ExtractOp extractOp, const IRMapping &bvm) |
Calculates the offsets ($index_vec ) for vector.gather operations generated from tensor.extract . More... | |
static uint64_t | getTrailingNonUnitLoopDimIdx (LinalgOp linalgOp) |
Find the index of the trailing non-unit dim in linalgOp. More... | |
static bool | isLoopInvariantIdx (LinalgOp &linalgOp, Value &val, VectorType resType) |
Checks whether val can be used for calculating a loop invariant index. More... | |
static bool | isContiguousLoadIdx (LinalgOp &linalgOp, Value &val, bool &foundIndexOp, VectorType resType) |
Check whether val could be used for calculating the trailing index for a contiguous load operation. More... | |
static VectorMemoryAccessKind | getTensorExtractMemoryAccessPattern (tensor::ExtractOp extractOp, LinalgOp &linalgOp, VectorType resType) |
Infer the memory access pattern for the input ExtractOp. More... | |
static VectorizationHookResult | vectorizeTensorExtract (RewriterBase &rewriter, VectorizationState &state, Operation *op, LinalgOp linalgOp, const IRMapping &bvm) |
Helper function to vectorize the tensor.extract operations. More... | |
static Operation * | reduceIfNeeded (OpBuilder &b, LinalgOp linalgOp, Operation *op, Value reduceValue, Value initialValue, const IRMapping &bvm) |
Emit reduction operations if the shapes of the value to reduce is different that the result shape. More... | |
static VectorizationHookResult | vectorizeOneOp (RewriterBase &rewriter, VectorizationState &state, LinalgOp linalgOp, Operation *op, const IRMapping &bvm, ArrayRef< CustomVectorizationHook > customVectorizationHooks) |
Generic vectorization for a single operation op , given already vectorized operands carried by bvm . More... | |
static LogicalResult | vectorizeAsLinalgGeneric (RewriterBase &rewriter, VectorizationState &state, LinalgOp linalgOp, SmallVectorImpl< Value > &newResults) |
Generic vectorization function that rewrites the body of a linalgOp into vector form. More... | |
static SmallVector< int64_t > | getTiledPackShape (linalg::PackOp packOp, ArrayRef< int64_t > destShape) |
Given a linalg::PackOp, return the dest shape before any packing permutations. More... | |
static bool | isMaskTriviallyFoldable (SmallVector< OpFoldResult > &maskSizes, SmallVector< Value > &writeIdxs, ArrayRef< int64_t > destShape, ArrayRef< int64_t > maskShape) |
Determines whether a mask for xfer_write is trivially "all true". More... | |
static Operation * | createWriteOrMaskedWrite (OpBuilder &builder, Location loc, Value vecToStore, Value dest, SmallVector< Value > writeIndices={}, bool useInBoundsInsteadOfMasking=false) |
Creates an optionally masked TransferWriteOp. More... | |
static LogicalResult | vectorizeAsTensorPackOp (RewriterBase &rewriter, linalg::PackOp packOp, ArrayRef< int64_t > inputVectorSizes, SmallVectorImpl< Value > &newResults) |
Vectorize linalg::PackOp with (1) static inner_tiles (2) constant padding value and (3) input vector sizes into: More... | |
static VectorType | getCollapsedVecType (VectorType type, ArrayRef< AffineMap > reassociation) |
Given the re-associations, "collapses" the input Vector type. More... | |
static LogicalResult | vectorizeAsTensorUnpackOp (RewriterBase &rewriter, linalg::UnPackOp unpackOp, ArrayRef< int64_t > inputVectorSizes, ArrayRef< bool > inputScalableVecDims, SmallVectorImpl< Value > &newResults) |
Vectorize linalg.unpack as: More... | |
static LogicalResult | vectorizeAsTensorPadOp (RewriterBase &rewriter, tensor::PadOp padOp, ArrayRef< int64_t > inputVectorSizes, SmallVectorImpl< Value > &newResults) |
Vectorize a padOp with (1) static result type, (2) constant padding value and (3) all-zero lowPad to transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value)) . More... | |
static LogicalResult | reductionPreconditions (LinalgOp op) |
static LogicalResult | vectorizeDynamicConvOpPrecondition (linalg::LinalgOp conv, bool flatten1DDepthwiseConv) |
static LogicalResult | vectorizeDynamicLinalgOpPrecondition (linalg::LinalgOp op, bool flatten1DDepthwiseConv) |
static LogicalResult | vectorizeUnPackOpPrecondition (linalg::UnPackOp unpackOp, ArrayRef< int64_t > inputVectorSizes) |
This hook considers two cases: (1) If the input-vector-sizes are empty, then the vector sizes will be infered. More... | |
static LogicalResult | vectorizeInsertSliceOpPrecondition (tensor::InsertSliceOp sliceOp, ArrayRef< int64_t > inputVectorSizes) |
static LogicalResult | vectorizeAsLinalgContraction (RewriterBase &rewriter, VectorizationState &state, LinalgOp linalgOp, SmallVectorImpl< Value > &newResults) |
Vectorize a named linalg contraction op into: vector::TransferReadOp - Reads vectors from the operands vector::ContractionOp - Performs contraction vector::TransferWriteOp - Write the result vector back to the destination The operands shapes are preserved and loaded directly into vectors. More... | |
static bool | isCastOfBlockArgument (Operation *op) |
static std::optional< ConvOperationKind > | getConvOperationKind (Operation *reduceOp) |
static bool | isSupportedPoolKind (vector::CombiningKind kind) |
static LogicalResult | vectorizeConvOpPrecondition (linalg::LinalgOp convOp) |
static LogicalResult | vectorizeLinalgOpPrecondition (LinalgOp linalgOp, ArrayRef< int64_t > inputVectorSizes, bool vectorizeNDExtract, bool flatten1DDepthwiseConv) |
static LogicalResult | vectorizePackOpPrecondition (linalg::PackOp packOp, ArrayRef< int64_t > inputVectorSizes) |
static LogicalResult | vectorizePadOpPrecondition (tensor::PadOp padOp, ArrayRef< int64_t > inputVectorSizes) |
static LogicalResult | vectorizeScalableVectorPrecondition (Operation *op, ArrayRef< int64_t > inputVectorSizes, ArrayRef< bool > inputScalableVecDims) |
Preconditions for scalable vectors. More... | |
static void | convertAffineApply (RewriterBase &rewriter, LinalgOp linalgOp) |
Converts affine.apply Ops to arithmetic operations. More... | |
static bool | mayExistInterleavedUses (Operation *firstOp, Operation *secondOp, ValueRange values) |
Check whether there is any interleaved use of any values between firstOp and secondOp . More... | |
static memref::SubViewOp | getSubViewUseIfUnique (Value v) |
Return the unique subview use of v if it is indeed unique, null otherwise. More... | |
template<int N> | |
static void | bindShapeDims (ShapedType shapedType) |
template<int N, typename IntTy , typename... IntTy2> | |
static void | bindShapeDims (ShapedType shapedType, IntTy &val, IntTy2 &...vals) |
template<typename... IntTy> | |
static void | bindShapeDims (ShapedType shapedType, IntTy &...vals) |
Bind a pack of int& to the leading dimensions of shapedType.getShape(). More... | |
#define DEBUG_TYPE "linalg-vectorization" |
Definition at line 50 of file Vectorization.cpp.
using CustomVectorizationHook = std::function<VectorizationHookResult(Operation *, const IRMapping &)> |
Definition at line 759 of file Vectorization.cpp.
using CustomVectorizationPrecondition = std::function<LogicalResult(Operation *, bool)> |
Definition at line 753 of file Vectorization.cpp.
|
strong |
Helper enum to represent conv1d input traversal order.
Enumerator | |
---|---|
W | |
Ncw | |
Nwc |
Definition at line 572 of file Vectorization.cpp.
Helper data structure to represent the result of vectorization for a single operation.
In certain specific cases, like terminators, we do not want to propagate.
Definition at line 581 of file Vectorization.cpp.
Enumerator | |
---|---|
ScalarBroadcast | |
Contiguous | |
Gather |
Definition at line 902 of file Vectorization.cpp.
|
static |
Definition at line 3437 of file Vectorization.cpp.
Referenced by bindShapeDims().
|
static |
Bind a pack of int& to the leading dimensions of shapedType.getShape().
Definition at line 3447 of file Vectorization.cpp.
|
static |
Definition at line 3440 of file Vectorization.cpp.
References bindShapeDims().
Broadcast value
to a vector of shape
if possible.
Return value otherwise.
Definition at line 651 of file Vectorization.cpp.
References mlir::OpBuilder::createOrFold(), mlir::OpBuilder::getInsertionPoint(), mlir::Value::getType(), mlir::vector::isBroadcastableTo(), and mlir::vector::Success.
|
static |
Create MultiDimReductionOp to compute the reduction for reductionOp
.
This assumes that reductionOp
has two operands and one of them is the reduction initial value.buildMultiDimReduce
Definition at line 668 of file Vectorization.cpp.
References mlir::linalg::getCombinerOpKind(), and mlir::Operation::getLoc().
Referenced by reduceIfNeeded().
|
static |
Build a vector.transfer_write of value
into outputOperand
at indices set to all 0
; where outputOperand
is an output operand of the LinalgOp currently being vectorized.
If dest
has null rank, build an memref.store. Return the produced value or null if no value is produced.
Definition at line 696 of file Vectorization.cpp.
|
static |
Calculates the offsets ($index_vec
) for vector.gather
operations generated from tensor.extract
.
The offset is calculated as follows (example using scalar values):
offset = extractOp.indices[0] for (i = 1; i < numIndices; i++) offset = extractOp.dimSize[i] * offset + extractOp.indices[i];
For tensor<45 x 80 x 15 x f32> and index [1, 2, 3], this leads to: offset = ( ( 1 ) * 80 + 2 ) * 15 + 3
Definition at line 871 of file Vectorization.cpp.
|
static |
Converts affine.apply Ops to arithmetic operations.
Definition at line 2650 of file Vectorization.cpp.
References mlir::affine::expandAffineExpr(), mlir::RewriterBase::replaceOp(), and mlir::OpBuilder::setInsertionPoint().
|
static |
Creates an optionally masked TransferWriteOp.
Generates the following operation: res = vector.transfer_write vecToStore into dest
If shape(vecToStore) != shape(dest), masking is used to ensure correctness:
mask = vector.create_mask(destShape) : vecToStoreShape res = vector.mask mask { vector.transfer_write vecToStore into dest }
The mask shape is identical to vecToStore
(with the element type == i1), and the mask values are based on the shape of the dest
tensor.
If useInBoundsInsteadOfMasking
is set to true
, the in_bounds
attribute is used instead of masking:
write = vector.transfer_write vecToStore into dest in_bounds_flags = (...) res = vector.transfer_write input into dest {in_bounds = in_bounds_flags}
Finally, writeIndices
specifies the offsets to use. If empty, all indices are set to 0.
Definition at line 1663 of file Vectorization.cpp.
Referenced by vectorizeAsInsertSliceOp(), vectorizeAsTensorPackOp(), vectorizeAsTensorPadOp(), and vectorizeAsTensorUnpackOp().
|
static |
Helper function to extract the filter slices after filter is unrolled along kw.
Definition at line 145 of file Vectorization.cpp.
|
static |
Helper function to extract the input slices after filter is unrolled along kw.
Definition at line 109 of file Vectorization.cpp.
|
static |
Helper function to extract the result slices after filter is unrolled along kw.
Definition at line 161 of file Vectorization.cpp.
|
static |
Given the re-associations, "collapses" the input Vector type.
This is similar to CollapseShapeOp::inferCollapsedType with two notable differences:
When collapsing scalable flags, conservatively avoids cases with two scalable dims. We could re-visit this in the future.
EXAMPLE: type = vector<4x16x[8]x16xf32> reassociation = [(d0, d1, d2, d3) -> (d0, d1), (d0, d1, d2, d3) -> (d2, d3)] Result: vector<64x[128]xf32>
Definition at line 1851 of file Vectorization.cpp.
References mlir::get(), and mlir::isReassociationValid().
Referenced by vectorizeAsTensorUnpackOp().
|
static |
Definition at line 2250 of file Vectorization.cpp.
References mlir::Operation::getOperands(), mlir::Operation::getResultTypes(), isCastOfBlockArgument(), and mlir::Operation::operand_end().
Referenced by vectorizeConvOpPrecondition().
|
static |
Definition at line 677 of file Vectorization.cpp.
References mlir::linalg::isReductionIterator().
Referenced by reduceIfNeeded().
|
static |
Return the unique instance of OpType in block
if it is indeed unique.
Return null if none or more than 1 instances exist.
Definition at line 93 of file Vectorization.cpp.
References mlir::WalkResult::advance(), mlir::WalkResult::interrupt(), and mlir::Block::walk().
Returns the effective Pad value for the input op, provided it's a scalar.
Many Ops exhibit pad-like behaviour, but this isn't always explicit. If this Op performs padding, retrieve the padding value provided that it's a scalar and static/fixed for all the padded values. Returns an empty value otherwise.
Many Ops exhibit pad-like behaviour, but this isn't always explicit. If this Op performs padding, retrieve the padding value provided that it's a scalar and static/fixed for all the padded values. Returns an empty value otherwise.
TODO: This is used twice (when checking vectorization pre-conditions and when vectorizing). Cache results instead of re-running.
Definition at line 3045 of file Vectorization.cpp.
Referenced by vectorizeAsInsertSliceOp(), and vectorizeInsertSliceOpPrecondition().
|
static |
Return the unique subview use of v
if it is indeed unique, null otherwise.
Definition at line 3285 of file Vectorization.cpp.
References mlir::Value::getUses().
Referenced by mlir::linalg::LinalgCopyVTRForwardingPattern::matchAndRewrite(), and mlir::linalg::LinalgCopyVTWForwardingPattern::matchAndRewrite().
|
static |
Infer the memory access pattern for the input ExtractOp.
Based on the ExtratOp result shape and the access indices, decides whether this Op corresponds to a contiguous load (including a broadcast of a scalar) or a gather load. When analysing the ExtractOp indices (to identify contiguous laods), this method looks for "loop" invariant indices (e.g. block arguments) and indices that change linearly (e.g. via linalg.index
Op).
Note that it is always safe to use gather load operations for contiguous loads (albeit slow), but not vice-versa. When in doubt, bail out and assume that extractOp
is a gather load.
Definition at line 1051 of file Vectorization.cpp.
References Contiguous, mlir::detail::enumerate(), Gather, isContiguousLoadIdx(), isLoopInvariantIdx(), and ScalarBroadcast.
|
static |
Given a linalg::PackOp, return the dest
shape before any packing permutations.
Definition at line 1535 of file Vectorization.cpp.
References mlir::applyPermutation(), and mlir::linalg::getPackInverseDestPerm().
Referenced by vectorizeAsTensorPackOp().
|
static |
Find the index of the trailing non-unit dim in linalgOp.
This hook is used when checking whether tensor.extract
Op (within a linalg.generic
Op) represents a contiguous load operation.
Note that when calling this hook, it is assumed that the output vector is effectively 1D. Other cases (i.e. reading n-D vectors) should've been labelled as a gather load before entering this method.
Following on from the above, it is assumed that:
TODO: Statically shaped loops + vector masking
Definition at line 919 of file Vectorization.cpp.
Referenced by isContiguousLoadIdx().
|
static |
Check if op
is a linalg.reduce or a linalg.generic that has at least one reduction iterator.
Definition at line 684 of file Vectorization.cpp.
References mlir::linalg::isReductionIterator().
Referenced by vectorizeDynamicLinalgOpPrecondition(), and vectorizeScalableVectorPrecondition().
|
static |
Helper function to insert the computed result slices.
Definition at line 189 of file Vectorization.cpp.
|
static |
Definition at line 2234 of file Vectorization.cpp.
References mlir::Operation::getNumOperands(), and mlir::Operation::getOperand().
Referenced by getConvOperationKind().
|
static |
Check whether val
could be used for calculating the trailing index for a contiguous load operation.
There are currently 3 types of values that are allowed here:
val.
Additionally, the trailing index for a contiguous load operation should increment by 1 with every loop iteration, i.e. be based on:
linalg.index <dim>
, where <dim> is the trailing non-unit dim of the iteration space (this way, linalg.index <dim>
increments by 1 with every loop iteration). foundIndexOp
is updated to true
when such Op is found. Definition at line 996 of file Vectorization.cpp.
References mlir::Value::getDefiningOp(), and getTrailingNonUnitLoopDimIdx().
Referenced by getTensorExtractMemoryAccessPattern().
|
static |
Checks whether val
can be used for calculating a loop invariant index.
Definition at line 937 of file Vectorization.cpp.
References mlir::Value::getDefiningOp().
Referenced by getTensorExtractMemoryAccessPattern().
|
static |
Determines whether a mask for xfer_write is trivially "all true".
Given all the inputs required to generate a mask (mask sizes and shapes), and an xfer_write operation (write indices and the destination tensor shape), determines whether the corresponding mask would be trivially foldable (i.e., trivially "all true").
Use this method to avoid generating spurious masks and relaying on vectorization post-processing to remove them.
Pre-conditions for a mask to be trivially foldable:
arith.constant
).If the pre-conditions are met, the method checks for each destination dimension d
: (1) destDimSize[rankDiff + d] <= maskShape[d] (2) destDimSize[rankDiff + d] <= writeIndex[d] + maskSize[d]
rankDiff = rank(dest) - rank(mask).
This method takes a conservative view: it may return false even if the mask is technically foldable.
EXAMPLE 1 (trivially foldable, all shapes match, mask sizes match the shape of the dest tensor): c0 = arith.constant 0 : index mask = vector.create_mask 5, 1 vector.mask mask { vector.transfer_write vecToStore_1, dest{[c0, c0] {in_bounds = [true, true]} : vector<5x1xi32>, tensor<5x1xi32> }
EXAMPLE 2 (not trivially foldable - vector shape exceeds the tensor shape, mask is required to avoid out-of-bounds write): c0 = arith.constant 0 : index mask = vector.create_mask 5, 1 vector.mask mask { vector.transfer_write vecToStore_2, dest[c0, c0] {in_bounds = [true, true]} : vector<8x1xi32>, tensor<5x1xi32> }
TODO: Re-use in createReadOrMaskedRead
Definition at line 1586 of file Vectorization.cpp.
References clamp(), mlir::detail::enumerate(), mlir::getConstantIntValue(), mlir::m_ConstantInt(), and mlir::matchPattern().
|
static |
Definition at line 2292 of file Vectorization.cpp.
Referenced by vectorizeConvOpPrecondition().
Check whether outputOperand
is a reduction with a single combiner operation.
Return the combiner operation of the reduction. Return nullptr otherwise. Multiple reduction operations would impose an ordering between reduction dimensions and is currently unsupported in Linalg. This limitation is motivated by the fact that e.g. min(max(X)) != max(min(X))
Definition at line 635 of file Vectorization.cpp.
References mlir::OpOperand::getOperandNumber(), mlir::detail::IROperandBase::getOwner(), and mlir::matchReduction().
Referenced by reductionPreconditions(), and vectorizeConvOpPrecondition().
|
static |
Check whether there is any interleaved use of any values
between firstOp
and secondOp
.
Conservatively return true
if any op or value is in a different block.
Definition at line 3258 of file Vectorization.cpp.
References mlir::Operation::getBlock(), and mlir::Operation::isBeforeInBlock().
Referenced by mlir::linalg::LinalgCopyVTRForwardingPattern::matchAndRewrite(), and mlir::linalg::LinalgCopyVTWForwardingPattern::matchAndRewrite().
|
static |
Emit reduction operations if the shapes of the value to reduce is different that the result shape.
Definition at line 1267 of file Vectorization.cpp.
References buildMultiDimReduce(), getDimsToReduce(), mlir::Value::getType(), and mlir::IRMapping::lookup().
|
static |
Definition at line 2013 of file Vectorization.cpp.
References mlir::linalg::getCombinerOpKind(), mlir::AffineMap::isPermutation(), mlir::linalg::isReductionIterator(), and matchLinalgReduction().
Referenced by vectorizeDynamicLinalgOpPrecondition(), and vectorizeLinalgOpPrecondition().
Given an indexing map
coming from a LinalgOp indexing, restricted to a projectedPermutation, compress the unused dimensions to serve as a permutation_map for a vector transfer operation.
For example, given a linalg op such as:
the iteration domain size of the linalg op is 3x5x4x6x2. The first affine map is reindexed to affine_map<(d0, d1, d2) -> (d2, d0, d1)>
, the second affine map is reindexed to affine_map<(d0, d1) -> (d0, d1)>
.
Definition at line 561 of file Vectorization.cpp.
References mlir::compressUnusedDims(), and mlir::AffineMap::isProjectedPermutation().
|
static |
Helper function to check if the tensor.extract can be vectorized by the custom hook vectorizeTensorExtract.
Definition at line 838 of file Vectorization.cpp.
Referenced by vectorizeLinalgOpPrecondition().
|
static |
Vectorize tensor::InsertSliceOp with:
inputVectorSizes
, orBefore: !t_in_type = tensor<1x2x3xf32> !t_out_type = tensor<9x8x7x1x2x3xf32> !v_type = vector<1x2x3xf32> inserted_slice = tensor.insert_slice src into dest ... : !t_in_type into !t_out_type After: read = vector.transfer_read src[...], pad ... : !t_in_type, !v_type write = vector.transfer_write read, dest ... : !v_type, !t_out_type
Definition at line 3089 of file Vectorization.cpp.
References mlir::arith::ConstantIndexOp::create(), mlir::vector::createReadOrMaskedRead(), createWriteOrMaskedWrite(), mlir::get(), mlir::Operation::getResult(), getStaticPadVal(), mlir::getValueOrCreateConstantIndexOp(), mlir::Builder::getZeroAttr(), and mlir::OpBuilder::setInsertionPoint().
|
static |
Vectorize a named linalg contraction op into: vector::TransferReadOp - Reads vectors from the operands vector::ContractionOp - Performs contraction vector::TransferWriteOp - Write the result vector back to the destination The operands shapes are preserved and loaded directly into vectors.
Any further permutations or numerical casting remain within contraction op.
Definition at line 2152 of file Vectorization.cpp.
|
static |
Generic vectorization function that rewrites the body of a linalgOp
into vector form.
Generic vectorization proceeds as follows:
linalgOp
has one non-empty region.When broadcastToMaximalCommonShape
is set to true, eager broadcasting is performed to the maximal common vector size implied by the linalgOp
iteration space. This eager broadcasting is introduced in the permutation_map of the vector.transfer_read operations. The eager broadcasting makes it trivial to determine where broadcast, transposes and reductions should occur, without any bookkeeping. The tradeoff is that, in the absence of good canonicalizations, the amount of work increases. This is not deemed a problem as we expect canonicalizations and foldings to aggressively clean up the useless work.
Definition at line 1418 of file Vectorization.cpp.
|
static |
Vectorize linalg::PackOp with (1) static inner_tiles (2) constant padding value and (3) input vector sizes into:
masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds
As in the following example: pack = tensor.pack src inner_dims_pos = [2, 1] inner_tiles = [16, 2] into dst : tensor<32x8x16xf32> -> tensor<32x4x1x16x2xf32>
This pack would be vectorized to:
load = vector.mask mask { vector.transfer_read arg0[c0, c0, c0], cst {in_bounds = [true, true, true]} : tensor<32x7x16xf32>, vector<32x8x16xf32> } : vector<32x8x16xi1> -> vector<32x8x16xf32> shape_cast = vector.shape_cast load : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> transpose = vector.transpose shape_cast, [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> write = vector.transfer_write transpose, empty[c0_0, c0_0, c0_0, c0_0, c0_0] {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
If the (3) input vector sizes are not provided, the vector sizes are determined by the result tensor shape and the in_bounds
attribute is used instead of masking to mark out-of-bounds accesses.
NOTE: The input vector sizes specify the dimensions corresponding to the outer dimensions of the output tensor. The remaining dimensions are computed based on, e.g., the static inner tiles. Supporting dynamic inner tiles will require the user to specify the missing vector sizes. This is left as a TODO.
Definition at line 1765 of file Vectorization.cpp.
References mlir::applyPermutationToVector(), mlir::vector::createReadOrMaskedRead(), createWriteOrMaskedWrite(), mlir::detail::enumerate(), mlir::get(), mlir::linalg::getPackInverseDestPerm(), mlir::Operation::getResult(), getTiledPackShape(), mlir::Builder::getZeroAttr(), innerDimsPos, innerTiles, mlir::invertPermutationVector(), outerDimsPerm, and mlir::OpBuilder::setInsertionPoint().
|
static |
Vectorize a padOp
with (1) static result type, (2) constant padding value and (3) all-zero lowPad to transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value))
.
Definition at line 1983 of file Vectorization.cpp.
References mlir::vector::createReadOrMaskedRead(), createWriteOrMaskedWrite(), mlir::Operation::getResult(), and mlir::OpBuilder::setInsertionPoint().
|
static |
Vectorize linalg.unpack
as:
The input-vector-sizes specify the read vector sizes (i.e. the vector sizes for the xfer_read operation). This is sufficient to infer the other vector sizes required here.
If the vector sizes are not provided:
EXAMPLE (no vector sizes):
is vectorized as:
Definition at line 1912 of file Vectorization.cpp.
References mlir::convertReassociationIndicesToExprs(), mlir::vector::createReadOrMaskedRead(), createWriteOrMaskedWrite(), getCollapsedVecType(), mlir::Builder::getContext(), mlir::Operation::getResult(), mlir::getSymbolLessAffineMaps(), mlir::linalg::getUnPackInverseSrcPerm(), mlir::Builder::getZeroAttr(), and mlir::OpBuilder::setInsertionPoint().
|
static |
Try to vectorize convOp
as a convolution.
Helper function to vectorize a LinalgOp with convolution semantics.
Definition at line 4208 of file Vectorization.cpp.
Referenced by VectorizeConvolution::matchAndRewrite().
|
static |
Definition at line 2309 of file Vectorization.cpp.
References mlir::linalg::getCombinerOpKind(), getConvOperationKind(), isSupportedPoolKind(), and matchLinalgReduction().
Referenced by vectorizeLinalgOpPrecondition().
|
static |
Definition at line 2033 of file Vectorization.cpp.
References mlir::Value::getType().
Referenced by vectorizeDynamicLinalgOpPrecondition().
|
static |
Definition at line 2061 of file Vectorization.cpp.
References hasReductionIterator(), mlir::linalg::isElementwise(), reductionPreconditions(), and vectorizeDynamicConvOpPrecondition().
Referenced by vectorizeLinalgOpPrecondition().
|
static |
Definition at line 2113 of file Vectorization.cpp.
References getStaticPadVal().
Referenced by mlir::linalg::vectorizeOpPrecondition().
|
static |
Helper function to vectorize the index operations of a linalgOp
.
Return VectorizationHookStatus::NewOp to signal the vectorization algorithm that it should map the produced operations. This function is meant to be used as a CustomVectorizationHook.
Definition at line 794 of file Vectorization.cpp.
|
static |
Definition at line 2354 of file Vectorization.cpp.
References mlir::linalg::allIndexingsAreProjectedPermutation(), mlir::remark::failed(), mlir::Region::front(), mlir::Operation::getRegion(), mlir::linalg::isElementwise(), mlir::vector::isValidMaskedInputVector(), reductionPreconditions(), tensorExtractVectorizationPrecondition(), vectorizeConvOpPrecondition(), and vectorizeDynamicLinalgOpPrecondition().
Referenced by mlir::linalg::vectorizeOpPrecondition().
|
static |
Helper function to vectorize the terminator of a linalgOp
.
New result vector values are appended to newResults
. Return VectorizationHookStatus::NoReplace to signal the vectorization algorithm that it should not try to map produced operations and instead return the results using the newResults
vector making them available to the vectorization algorithm for RAUW. This function is meant to be used as a CustomVectorizationHook.
Definition at line 770 of file Vectorization.cpp.
|
static |
Generic vectorization for a single operation op
, given already vectorized operands carried by bvm
.
Vectorization occurs as follows:
customVectorizationHooks
and return its result on success.ElementwiseMappable
op. It is the purpose of the customVectorizationHooks
to cover such cases.op
in vector form to a vector of shape prescribed by the first operand of maximal rank. Other operands have smaller rank and are broadcast accordingly. It is assumed this broadcast is always legal, otherwise, it means one of the customVectorizationHooks
is incorrect.This function assumes all operands of op
have been vectorized and are in the bvm
mapping. As a consequence, this function is meant to be called on a topologically-sorted list of ops. This function does not update bvm
but returns a VectorizationHookStatus that instructs the caller what bvm
update needs to occur.
Definition at line 1303 of file Vectorization.cpp.
|
static |
Definition at line 2423 of file Vectorization.cpp.
References mlir::remark::failed(), mlir::vector::isValidMaskedInputVector(), mlir::m_Constant(), and mlir::matchPattern().
Referenced by mlir::linalg::vectorizeOpPrecondition().
|
static |
Definition at line 2457 of file Vectorization.cpp.
References mlir::detail::enumerate(), mlir::remark::failed(), and mlir::vector::isValidMaskedInputVector().
Referenced by mlir::linalg::vectorizeOpPrecondition().
|
static |
Preconditions for scalable vectors.
For Ops implementing the LinalgOp interface, this is quite restrictive - it models the fact that in practice we would only make selected dimensions scalable. For other Ops (e.g. linalg.unpack
), this will succeed unconditionally - we are yet to identify meaningful conditions.
Definition at line 2502 of file Vectorization.cpp.
References hasReductionIterator(), and mlir::linalg::isElementwise().
Referenced by mlir::linalg::vectorizeOpPrecondition().
|
static |
Helper function to vectorize the tensor.extract operations.
Returns VectorizationHookStatus::NewOp to signal the vectorization algorithm that it should map the produced operations. This function is meant to be used as a CustomVectorizationHook.
Definition at line 1132 of file Vectorization.cpp.
|
static |
This hook considers two cases: (1) If the input-vector-sizes are empty, then the vector sizes will be infered.
This is only possible when all shapes are static. (2) If the input-vector-sizes are non-empty (i.e. user provided), then carry out basic sanity-checking.
Definition at line 2086 of file Vectorization.cpp.
References mlir::remark::failed(), and mlir::vector::isValidMaskedInputVector().
Referenced by mlir::linalg::vectorizeOpPrecondition().