#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/UB/IR/UBOps.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"

Macros
#define	DEBUG_TYPE "lower-vector-transpose"

Functions
static void	pruneNonTransposedDims (ArrayRef< int64_t > transpose, SmallVectorImpl< int64_t > &result)
	Given a 'transpose' pattern, prune the rightmost dimensions that are not transposed. More...

static bool	isShuffleLike (VectorTransposeLowering lowering)
	Returns true if the lowering option is a vector shuffle based approach. More...

static SmallVector< int64_t >	getUnpackShufflePermFor128Lane (ArrayRef< int64_t > vals, int numBits)
	Returns a shuffle mask that builds on `vals`. More...

static Value	createUnpackLoPd (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits)
	Lower to vector.shuffle on v1 and v2 with UnpackLoPd shuffle mask. More...

static Value	createUnpackHiPd (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits)
	Lower to vector.shuffle on v1 and v2 with UnpackHiPd shuffle mask. More...

static Value	createUnpackLoPs (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits)
	Lower to vector.shuffle on v1 and v2 with UnpackLoPs shuffle mask. More...

static Value	createUnpackHiPs (ImplicitLocOpBuilder &b, Value v1, Value v2, int numBits)
	Lower to vector.shuffle on v1 and v2 with UnpackHiPs shuffle mask. More...

static Value	create4x128BitSuffle (ImplicitLocOpBuilder &b, Value v1, Value v2, uint8_t mask)
	Returns a vector.shuffle that shuffles 128-bit lanes (composed of 4 32-bit elements) selected by `mask` from `v1` and `v2`. More...

static Value	transposeToShuffle1D (OpBuilder &b, Value source, int m, int n)
	Lowers the value to a vector.shuffle op. More...

static Value	transposeToShuffle16x16 (OpBuilder &builder, Value source, int m, int n)
	Lowers the value to a sequence of vector.shuffle ops. More...

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "lower-vector-transpose"

Definition at line 28 of file LowerVectorTranspose.cpp.

Function Documentation

◆ create4x128BitSuffle()

static Value create4x128BitSuffle	(	ImplicitLocOpBuilder &	b,
		Value	v1,
		Value	v2,
		uint8_t	mask
	)

static

Returns a vector.shuffle that shuffles 128-bit lanes (composed of 4 32-bit elements) selected by mask from v1 and v2.

I.e.,

DEFINE SELECT4(src, control) { CASE(control[1:0]) OF 0: tmp[127:0] := src[127:0] 1: tmp[127:0] := src[255:128] 2: tmp[127:0] := src[383:256] 3: tmp[127:0] := src[511:384] ESAC RETURN tmp[127:0] } dst[127:0] := SELECT4(v1[511:0], mask[1:0]) dst[255:128] := SELECT4(v1[511:0], mask[3:2]) dst[383:256] := SELECT4(v2[511:0], mask[5:4]) dst[511:384] := SELECT4(v2[511:0], mask[7:6])

Definition at line 149 of file LowerVectorTranspose.cpp.

References mlir::ImplicitLocOpBuilder::create(), and mlir::Value::getType().

◆ createUnpackHiPd()

static Value createUnpackHiPd	(	ImplicitLocOpBuilder &	b,
		Value	v1,
		Value	v2,
		int	numBits
	)

static

Lower to vector.shuffle on v1 and v2 with UnpackHiPd shuffle mask.

For example, if it is targeting 512 bit vector, returns vector.shuffle, v1, v2, [2, 3, 18, 19, 2+4, 3+4, 18+4, 19+4, 2+8, 3+8, 18+8, 19+8, 2+12, 3+12, 18+12, 19+12].

Definition at line 94 of file LowerVectorTranspose.cpp.

References mlir::ImplicitLocOpBuilder::create(), and getUnpackShufflePermFor128Lane().

◆ createUnpackHiPs()

static Value createUnpackHiPs	(	ImplicitLocOpBuilder &	b,
		Value	v1,
		Value	v2,
		int	numBits
	)

static

Lower to vector.shuffle on v1 and v2 with UnpackHiPs shuffle mask.

For example, if it is targeting 512 bit vector, returns vector.shuffle, v1, v2, [2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, 2+8, 18+8, 3+8, 19+8, 2+12, 18+12, 3+12, 19+12].

Definition at line 124 of file LowerVectorTranspose.cpp.

References mlir::ImplicitLocOpBuilder::create(), and getUnpackShufflePermFor128Lane().

◆ createUnpackLoPd()

static Value createUnpackLoPd	(	ImplicitLocOpBuilder &	b,
		Value	v1,
		Value	v2,
		int	numBits
	)

static

Lower to vector.shuffle on v1 and v2 with UnpackLoPd shuffle mask.

For example, if it is targeting 512 bit vector, returns vector.shuffle on v1, v2, [0, 1, 16, 17, 0+4, 1+4, 16+4, 17+4, 0+8, 1+8, 16+8, 17+8, 0+12, 1+12, 16+12, 17+12].

Definition at line 80 of file LowerVectorTranspose.cpp.

References mlir::ImplicitLocOpBuilder::create(), and getUnpackShufflePermFor128Lane().

◆ createUnpackLoPs()

static Value createUnpackLoPs	(	ImplicitLocOpBuilder &	b,
		Value	v1,
		Value	v2,
		int	numBits
	)

static

Lower to vector.shuffle on v1 and v2 with UnpackLoPs shuffle mask.

For example, if it is targeting 512 bit vector, returns vector.shuffle, v1, v2, [0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, 0+8, 16+8, 1+8, 17+8, 0+12, 16+12, 1+12, 17+12].

Definition at line 109 of file LowerVectorTranspose.cpp.

References mlir::ImplicitLocOpBuilder::create(), and getUnpackShufflePermFor128Lane().

◆ getUnpackShufflePermFor128Lane()

static SmallVector<int64_t> getUnpackShufflePermFor128Lane	(	ArrayRef< int64_t >	vals,
		int	numBits
	)

static

Returns a shuffle mask that builds on vals.

vals is the offset base of shuffle ops, i.e., the unpack pattern. The method iterates with vals to create the mask for numBits bits vector. The numBits have to be a multiple of 128. For example, if vals is {0, 1, 16, 17} and numBits is 512, there should be 16 elements in the final result. It constructs the below mask to get the unpack elements. [0, 1, 16, 17, 0+4, 1+4, 16+4, 17+4, 0+8, 1+8, 16+8, 17+8, 0+12, 1+12, 16+12, 17+12]

Definition at line 64 of file LowerVectorTranspose.cpp.

Referenced by createUnpackHiPd(), createUnpackHiPs(), createUnpackLoPd(), and createUnpackLoPs().

◆ isShuffleLike()

static bool isShuffleLike ( VectorTransposeLowering lowering )

static

Returns true if the lowering option is a vector shuffle based approach.

Definition at line 48 of file LowerVectorTranspose.cpp.

◆ pruneNonTransposedDims()

static void pruneNonTransposedDims	(	ArrayRef< int64_t >	transpose,
		SmallVectorImpl< int64_t > &	result
	)

static

Given a 'transpose' pattern, prune the rightmost dimensions that are not transposed.

Definition at line 35 of file LowerVectorTranspose.cpp.

References mlir::xegpu::transpose().

◆ transposeToShuffle16x16()

static Value transposeToShuffle16x16	(	OpBuilder &	builder,
		Value	source,
		int	m,
		int	n
	)

static

Lowers the value to a sequence of vector.shuffle ops.

The source is expected to be a 16x16 vector.

Definition at line 200 of file LowerVectorTranspose.cpp.

◆ transposeToShuffle1D()

static Value transposeToShuffle1D	(	OpBuilder &	b,
		Value	source,
		int	m,
		int	n
	)

static

Lowers the value to a vector.shuffle op.

The source is expected to be a 1-D vector and have mxn elements.

Definition at line 189 of file LowerVectorTranspose.cpp.

Macros

Functions

Macro Definition Documentation

◆ DEBUG_TYPE

Function Documentation

◆ create4x128BitSuffle()

◆ createUnpackHiPd()

◆ createUnpackHiPs()

◆ createUnpackLoPd()

◆ createUnpackLoPs()

◆ getUnpackShufflePermFor128Lane()

◆ isShuffleLike()

◆ pruneNonTransposedDims()

◆ transposeToShuffle16x16()

◆ transposeToShuffle1D()