|
MLIR
22.0.0git
|
#include "mlir/Dialect/MemRef/IR/MemRef.h"#include "mlir/Dialect/UB/IR/UBOps.h"#include "mlir/Dialect/Utils/IndexingUtils.h"#include "mlir/Dialect/Utils/StructuredOpsUtils.h"#include "mlir/Dialect/Vector/IR/VectorOps.h"#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"#include "mlir/Dialect/Vector/Utils/VectorUtils.h"#include "mlir/IR/BuiltinTypes.h"#include "mlir/IR/Location.h"#include "mlir/IR/PatternMatch.h"#include "mlir/IR/TypeUtilities.h"Go to the source code of this file.
Macros | |
| #define | DEBUG_TYPE "lower-vector-transpose" |
Functions | |
| static void | pruneNonTransposedDims (ArrayRef< int64_t > transpose, SmallVectorImpl< int64_t > &result) |
| Given a 'transpose' pattern, prune the rightmost dimensions that are not transposed. More... | |
| static bool | isShuffleLike (VectorTransposeLowering lowering) |
| Returns true if the lowering option is a vector shuffle based approach. More... | |
| static SmallVector< int64_t > | getUnpackShufflePermFor128Lane (ArrayRef< int64_t > vals, int numBits) |
Returns a shuffle mask that builds on vals. More... | |
| static Value | createUnpackLoPd (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits) |
| Lower to vector.shuffle on v1 and v2 with UnpackLoPd shuffle mask. More... | |
| static Value | createUnpackHiPd (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits) |
| Lower to vector.shuffle on v1 and v2 with UnpackHiPd shuffle mask. More... | |
| static Value | createUnpackLoPs (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits) |
| Lower to vector.shuffle on v1 and v2 with UnpackLoPs shuffle mask. More... | |
| static Value | createUnpackHiPs (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits) |
| Lower to vector.shuffle on v1 and v2 with UnpackHiPs shuffle mask. More... | |
| static Value | create4x128BitSuffle (ImplicitLocOpBuilder &b, Value v1, Value v2, uint8_t mask) |
Returns a vector.shuffle that shuffles 128-bit lanes (composed of 4 32-bit elements) selected by mask from v1 and v2. More... | |
| static Value | transposeToShuffle1D (OpBuilder &b, Value source, int m, int n) |
| Lowers the value to a vector.shuffle op. More... | |
| static Value | transposeToShuffle16x16 (OpBuilder &builder, Value source, int m, int n) |
| Lowers the value to a sequence of vector.shuffle ops. More... | |
| #define DEBUG_TYPE "lower-vector-transpose" |
Definition at line 26 of file LowerVectorTranspose.cpp.
|
static |
Returns a vector.shuffle that shuffles 128-bit lanes (composed of 4 32-bit elements) selected by mask from v1 and v2.
I.e.,
DEFINE SELECT4(src, control) { CASE(control[1:0]) OF 0: tmp[127:0] := src[127:0] 1: tmp[127:0] := src[255:128] 2: tmp[127:0] := src[383:256] 3: tmp[127:0] := src[511:384] ESAC RETURN tmp[127:0] } dst[127:0] := SELECT4(v1[511:0], mask[1:0]) dst[255:128] := SELECT4(v1[511:0], mask[3:2]) dst[383:256] := SELECT4(v2[511:0], mask[5:4]) dst[511:384] := SELECT4(v2[511:0], mask[7:6])
Definition at line 147 of file LowerVectorTranspose.cpp.
References mlir::Value::getType().
Referenced by transposeToShuffle16x16().
|
static |
Lower to vector.shuffle on v1 and v2 with UnpackHiPd shuffle mask.
For example, if it is targeting 512 bit vector, returns vector.shuffle, v1, v2, [2, 3, 18, 19, 2+4, 3+4, 18+4, 19+4, 2+8, 3+8, 18+8, 19+8, 2+12, 3+12, 18+12, 19+12].
Definition at line 92 of file LowerVectorTranspose.cpp.
References getUnpackShufflePermFor128Lane().
Referenced by transposeToShuffle16x16().
|
static |
Lower to vector.shuffle on v1 and v2 with UnpackHiPs shuffle mask.
For example, if it is targeting 512 bit vector, returns vector.shuffle, v1, v2, [2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, 2+8, 18+8, 3+8, 19+8, 2+12, 18+12, 3+12, 19+12].
Definition at line 122 of file LowerVectorTranspose.cpp.
References getUnpackShufflePermFor128Lane().
Referenced by transposeToShuffle16x16().
|
static |
Lower to vector.shuffle on v1 and v2 with UnpackLoPd shuffle mask.
For example, if it is targeting 512 bit vector, returns vector.shuffle on v1, v2, [0, 1, 16, 17, 0+4, 1+4, 16+4, 17+4, 0+8, 1+8, 16+8, 17+8, 0+12, 1+12, 16+12, 17+12].
Definition at line 78 of file LowerVectorTranspose.cpp.
References getUnpackShufflePermFor128Lane().
Referenced by transposeToShuffle16x16().
|
static |
Lower to vector.shuffle on v1 and v2 with UnpackLoPs shuffle mask.
For example, if it is targeting 512 bit vector, returns vector.shuffle, v1, v2, [0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, 0+8, 16+8, 1+8, 17+8, 0+12, 16+12, 1+12, 17+12].
Definition at line 107 of file LowerVectorTranspose.cpp.
References getUnpackShufflePermFor128Lane().
Referenced by transposeToShuffle16x16().
|
static |
Returns a shuffle mask that builds on vals.
vals is the offset base of shuffle ops, i.e., the unpack pattern. The method iterates with vals to create the mask for numBits bits vector. The numBits have to be a multiple of 128. For example, if vals is {0, 1, 16, 17} and numBits is 512, there should be 16 elements in the final result. It constructs the below mask to get the unpack elements. [0, 1, 16, 17, 0+4, 1+4, 16+4, 17+4, 0+8, 1+8, 16+8, 17+8, 0+12, 1+12, 16+12, 17+12]
Definition at line 62 of file LowerVectorTranspose.cpp.
Referenced by createUnpackHiPd(), createUnpackHiPs(), createUnpackLoPd(), and createUnpackLoPs().
|
static |
Returns true if the lowering option is a vector shuffle based approach.
Definition at line 46 of file LowerVectorTranspose.cpp.
|
static |
Given a 'transpose' pattern, prune the rightmost dimensions that are not transposed.
Definition at line 33 of file LowerVectorTranspose.cpp.
Lowers the value to a sequence of vector.shuffle ops.
The source is expected to be a 16x16 vector.
Definition at line 198 of file LowerVectorTranspose.cpp.
References create4x128BitSuffle(), mlir::ImplicitLocOpBuilder::createOrFold(), createUnpackHiPd(), createUnpackHiPs(), createUnpackLoPd(), createUnpackLoPs(), mlir::get(), mlir::Value::getLoc(), and mlir::Value::getType().
Lowers the value to a vector.shuffle op.
The source is expected to be a 1-D vector and have mxn elements.
Definition at line 187 of file LowerVectorTranspose.cpp.
References mlir::Value::getLoc().