MLIR 22.0.0git
VectorEmulateNarrowType.cpp File Reference
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Transforms/NarrowTypeEmulationConverter.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Value.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DebugLog.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <optional>
#include "mlir/Dialect/MemRef/Transforms/Transforms.h"

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "vector-narrow-type-emulation"

Typedefs

using VectorValue = TypedValue<VectorType>
using MemRefValue = TypedValue<MemRefType>
using ExtractNBitsFn

Functions

static FailureOr< Operation * > getCompressedMaskOp (OpBuilder &rewriter, Location loc, Value mask, int numSrcElems, int numSrcElemsPerDest, int numFrontPadElems=0)
 Returns a compressed mask for the emulated vector.
static Value staticallyExtractSubvector (OpBuilder &rewriter, Location loc, Value src, int64_t offset, int64_t numElemsToExtract)
 Extracts 1-D subvector from a 1-D vector.
static Value staticallyInsertSubvector (OpBuilder &rewriter, Location loc, Value src, Value dest, int64_t offset)
 Inserts 1-D subvector into a 1-D vector.
static Value dynamicallyExtractSubVector (OpBuilder &rewriter, Location loc, Value src, Value dest, OpFoldResult offset, int64_t numElemsToExtract)
 Extracts 1-D subvector from a 1-D vector.
static Value dynamicallyInsertSubVector (RewriterBase &rewriter, Location loc, Value src, Value dest, OpFoldResult offset, int64_t numElemsToInsert)
 Inserts 1-D subvector into a 1-D vector.
static VectorValue emulatedVectorLoad (OpBuilder &rewriter, Location loc, Value base, OpFoldResult linearizedIndices, int64_t numContainerElemsToLoad, Type emulatedElemTy, Type containerElemTy)
 Emulate a vector load for emulatedElemTy using containerElemTy
static Value downcastSelectAndUpcast (OpBuilder &builder, Location loc, VectorType downcastType, VectorType upcastType, Value mask, Value trueValue, Value falseValue)
 Downcast two values to downcastType, then select values based on mask, and casts the result to upcastType.
static void atomicRMW (OpBuilder &builder, Location loc, MemRefValue linearizedMemref, Value storeIdx, VectorValue valueToStore, Value mask)
 Emits memref.generic_atomic_rmw op to store a subbyte-sized value to a byte in linearizedMemref, with a mask.
static void nonAtomicRMW (OpBuilder &builder, Location loc, MemRefValue linearizedMemref, Value linearizedIndex, VectorValue valueToStore, Value mask)
 Generate a non-atomic read-modify-write sequence for storing to the emulated type.
static Value extractSliceIntoByte (ConversionPatternRewriter &rewriter, Location loc, VectorValue vector, int64_t extractOffset, int64_t sliceNumElements, int64_t insertOffset)
 Extract sliceNumElements from source vector at extractOffset, and insert it into an empty vector at insertOffset.
static raw_ostreamoperator<< (raw_ostream &os, const SmallVector< SourceElementRangeList > &vec)
static LogicalResult commonConversionPrecondition (PatternRewriter &rewriter, VectorType preconditionType, Operation *op)
 Verify that the precondition type meets the common preconditions for any conversion.
static LogicalResult alignedConversionPrecondition (PatternRewriter &rewriter, VectorType subByteVecTy, Type containerTy, Operation *op)
 Verify that subByteVecTy (vector) and containerTy (scalar) are aligned.
static Value bitcastSubByteVectorToI8 (PatternRewriter &rewriter, Location loc, Value subByteVec)
 Bitcasts the aligned subByteVec vector to a vector of i8.
static Value extractNBitsPerByteAndSignExtendToI8 (PatternRewriter &rewriter, Location loc, Value src, int bitIdx, int numBits)
 Extracts a signed N-bit sequence from each element of a vector of bytes, starting at the specified bit index.
static Value extractNBitsPerByteAndExtendToI8 (PatternRewriter &rewriter, Location loc, Value src, int bitIdx, int numBits)
 Extracts an unsigned N-bit sequence from each element of a vector of bytes, starting at the specified bit index.
static Value rewriteI4ToI8Ext (PatternRewriter &rewriter, Location loc, Value srcValue, const ExtractNBitsFn &extFn)
 Rewrite the i4 -> i8 extension into a sequence of shuffles and bitwise ops to avoid leaving LLVM to scramble with peephole optimizations.
static Value rewriteI2ToI8Ext (PatternRewriter &rewriter, Location loc, Value srcValue, const ExtractNBitsFn &extFn)
 Rewrite the i2 -> i8 extension into a sequence of shuffles and bitwise ops to avoid leaving LLVM to scramble with peephole optimizations.
static Value rewriteI8ToI4Trunc (PatternRewriter &rewriter, Location loc, Value srcValue)
 Rewrite the i8 -> i4 truncation into a deinterleave and series of bitwise ops to avoid leaving LLVM to scramble with peephole optimizations.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "vector-narrow-type-emulation"

Definition at line 45 of file VectorEmulateNarrowType.cpp.

Typedef Documentation

◆ ExtractNBitsFn

Initial value:
std::function<Value(PatternRewriter &, Location, Value, int, int)>
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

Definition at line 1847 of file VectorEmulateNarrowType.cpp.

◆ MemRefValue

using MemRefValue = TypedValue<MemRefType>

Definition at line 48 of file VectorEmulateNarrowType.cpp.

◆ VectorValue

using VectorValue = TypedValue<VectorType>

Definition at line 47 of file VectorEmulateNarrowType.cpp.

Function Documentation

◆ alignedConversionPrecondition()

LogicalResult alignedConversionPrecondition ( PatternRewriter & rewriter,
VectorType subByteVecTy,
Type containerTy,
Operation * op )
static

Verify that subByteVecTy (vector) and containerTy (scalar) are aligned.

Alignment means that subByteVecTy can be packed into a vector of containerTy elements. More specifically:

  1. The bit-width of containerTy is a multiple of the bit-width of subByteVecTy elements. For example, for i4 and i16 this multiple is 4.
  2. The multiple from 1. above divides evenly the number of the (trailing) elements in subByteVecTy.

EXAMPLE 1: subByteVecTy = vector<2xi4>, and containerTy = i16

2 divides evenly 4 ( = 16 / 4), hence both conditions are met.

EXAMPLE 2: subByteVecTy = vector<3xi4>, and containerTy = i16

3 does not divide evenly 4 (= 16/4), hence the conditions are not met.

EXAMPLE 3: subByteVecTy = vector<3xi3>, and containerTy = i16

16 is not a multiple of 3, hence the conditions are not met.

NOTE: This method assumes that common conversion preconditions are met. In particular, containerTy is assumed to be a multi-byte scalar type (e.g., i8, i16, i32).

Definition at line 1634 of file VectorEmulateNarrowType.cpp.

References mlir::Type::getIntOrFloatBitWidth(), mlir::Type::isIntOrFloat(), mlir::RewriterBase::notifyMatchFailure(), and success().

◆ atomicRMW()

void atomicRMW ( OpBuilder & builder,
Location loc,
MemRefValue linearizedMemref,
Value storeIdx,
VectorValue valueToStore,
Value mask )
static

Emits memref.generic_atomic_rmw op to store a subbyte-sized value to a byte in linearizedMemref, with a mask.

The valueToStore is a vector of subbyte-sized elements, with size of 8 bits, and the mask is used to select which elements to store.

Inputs: linearizedMemref = |2|2|2|2| : <4xi2> (<1xi8>) storeIdx = 2 valueToStore = |3|3|3|3| : vector<4xi2> mask = |0|0|1|1| : vector<4xi1>

Result: linearizedMemref = |2|2|3|3| : <4xi2> (<1xi8>)

Definition at line 419 of file VectorEmulateNarrowType.cpp.

References downcastSelectAndUpcast(), and mlir::OpBuilder::setInsertionPointToStart().

◆ bitcastSubByteVectorToI8()

Value bitcastSubByteVectorToI8 ( PatternRewriter & rewriter,
Location loc,
Value subByteVec )
static

Bitcasts the aligned subByteVec vector to a vector of i8.

Where aligned means it satisfies the alignedConversionPreconditions.

Example: vector<16x16xi2> -> vector<16x4xi8> vector<16x16xi4> -> vector<16x8xi8>

Definition at line 1753 of file VectorEmulateNarrowType.cpp.

References mlir::Builder::getI8Type(), and mlir::Value::getType().

Referenced by rewriteI2ToI8Ext(), and rewriteI4ToI8Ext().

◆ commonConversionPrecondition()

LogicalResult commonConversionPrecondition ( PatternRewriter & rewriter,
VectorType preconditionType,
Operation * op )
static

Verify that the precondition type meets the common preconditions for any conversion.

Definition at line 1576 of file VectorEmulateNarrowType.cpp.

References mlir::RewriterBase::notifyMatchFailure(), and success().

◆ downcastSelectAndUpcast()

Value downcastSelectAndUpcast ( OpBuilder & builder,
Location loc,
VectorType downcastType,
VectorType upcastType,
Value mask,
Value trueValue,
Value falseValue )
static

Downcast two values to downcastType, then select values based on mask, and casts the result to upcastType.

Definition at line 384 of file VectorEmulateNarrowType.cpp.

References mlir::Value::getType().

Referenced by atomicRMW(), and nonAtomicRMW().

◆ dynamicallyExtractSubVector()

Value dynamicallyExtractSubVector ( OpBuilder & rewriter,
Location loc,
Value src,
Value dest,
OpFoldResult offset,
int64_t numElemsToExtract )
static

Extracts 1-D subvector from a 1-D vector.

Given the input rank-1 source vector, extracts numElemsToExtact elements from src, starting at offset. The result is also a rank-1 vector:

vector<numElemsToExtact x !elType>

(!elType is the element type of the source vector). As offset is assumed to be a dynamic SSA value, this helper method generates a sequence of vector.extract + vector.insert pairs.

EXAMPLE: v1 = vector.extract src[offset] : i2 from vector<8xi2> r1 = vector.insert v1, dest[0] : i2 into vector<3xi2> c1 = arith.constant 1 : index idx2 = arith.addi offset, c1 : index v2 = vector.extract src[idx2] : i2 from vector<8xi2> r2 = vector.insert v2, r1 [1] : i2 into vector<3xi2> (...)

Definition at line 283 of file VectorEmulateNarrowType.cpp.

References mlir::arith::ConstantIndexOp::create(), mlir::Builder::getIndexType(), and mlir::Value::getType().

◆ dynamicallyInsertSubVector()

Value dynamicallyInsertSubVector ( RewriterBase & rewriter,
Location loc,
Value src,
Value dest,
OpFoldResult offset,
int64_t numElemsToInsert )
static

Inserts 1-D subvector into a 1-D vector.

Inserts the input rank-1 source vector into the destination vector starting at offset. As offset is assumed to be a dynamic SSA value, this hook uses a sequence of vector.extract + vector.insert pairs.

EXAMPLE: v1 = vector.extract src[0] : i2 from vector<8xi2> r1 = vector.insert v1, dest[offset] : i2 into vector<3xi2> c1 = arith.constant 1 : index idx2 = arith.addi offset, c1 : index v2 = vector.extract src[1] : i2 from vector<8xi2> r2 = vector.insert v2, r1 [idx2] : i2 into vector<3xi2> (...)

Definition at line 327 of file VectorEmulateNarrowType.cpp.

References mlir::arith::ConstantIndexOp::create(), mlir::Builder::getIndexType(), mlir::Value::getType(), and mlir::getValueOrCreateConstantIndexOp().

◆ emulatedVectorLoad()

VectorValue emulatedVectorLoad ( OpBuilder & rewriter,
Location loc,
Value base,
OpFoldResult linearizedIndices,
int64_t numContainerElemsToLoad,
Type emulatedElemTy,
Type containerElemTy )
static

Emulate a vector load for emulatedElemTy using containerElemTy

Specifically, use containerElemTy for loading a vector of emulatedElemTy. The load location is given by base and linearizedIndices, and the load size is given by numEmulatedElementsToLoad.

Definition at line 364 of file VectorEmulateNarrowType.cpp.

References mlir::Type::getIntOrFloatBitWidth(), and mlir::getValueOrCreateConstantIndexOp().

◆ extractNBitsPerByteAndExtendToI8()

Value extractNBitsPerByteAndExtendToI8 ( PatternRewriter & rewriter,
Location loc,
Value src,
int bitIdx,
int numBits )
static

Extracts an unsigned N-bit sequence from each element of a vector of bytes, starting at the specified bit index.

The bitIdx starts at 0 from the LSB and moves to the left.

Example for a single element: Extract numBits=2 starting at bitIdx=2 src = [0 | 1 | 0 | 1 | 1 | 0 | 1 | 0] indices = [7 | 6 | 5 | 4 | 3 | 2 | 1 | 0] target = [. . . . ^ ^ . .]

The target sequence is [10](decimal=2) as unsigned 2-bit integer. So the result should be [00 00 00 10](decimal=2) as unsigned 8-bit integer.

src = [01 01 10 10] mask = [00 00 00 11] shr = arith.shrui(src, 2) = [00 01 01 10] result = arith.andi(shr, mask) = [00 00 00 10] NOTE: Similarly to extractNBitsPerByteAndSignExtendToI8, this could be achieved by using arith::ShLIOp + arith::ShRUIOp instead of the masking. However, by using arith::ShRUIOp + arith::AndIOp, we are eliminating shift left when the index is 0.

Definition at line 1825 of file VectorEmulateNarrowType.cpp.

References mlir::DenseElementsAttr::get(), and mlir::Value::getType().

◆ extractNBitsPerByteAndSignExtendToI8()

Value extractNBitsPerByteAndSignExtendToI8 ( PatternRewriter & rewriter,
Location loc,
Value src,
int bitIdx,
int numBits )
static

Extracts a signed N-bit sequence from each element of a vector of bytes, starting at the specified bit index.

The bitIdx starts at 0 from the LSB and moves to the left.

Example for a single element: Extract numBits=2 starting at bitIdx=2 src = [0 | 1 | 0 | 1 | 1 | 1 | 1 | 0] indices = [7 | 6 | 5 | 4 | 3 | 2 | 1 | 0] target = [. . . . ^ ^ . .]

The target sequence is [11](decimal=-1) as signed 2-bit integer. So the result should be [11 11 11 11](decimal=-1) as signed 8-bit integer.

src = [01 01 11 10] shl = arith.shl(src, 4) -> [11 10 00 00] result = arith.shrsi(shl, 6) -> [11 11 11 11]

Definition at line 1783 of file VectorEmulateNarrowType.cpp.

References mlir::DenseElementsAttr::get(), and mlir::Value::getType().

◆ extractSliceIntoByte()

Value extractSliceIntoByte ( ConversionPatternRewriter & rewriter,
Location loc,
VectorValue vector,
int64_t extractOffset,
int64_t sliceNumElements,
int64_t insertOffset )
static

Extract sliceNumElements from source vector at extractOffset, and insert it into an empty vector at insertOffset.

Inputs: vec_in = |0|1|2|3| : vector<4xi2> extractOffset = 1 sliceNumElements = 2 insertOffset = 2 Output: vec_out = |0|0|1|2| : vector<4xi2>

Definition at line 479 of file VectorEmulateNarrowType.cpp.

References staticallyExtractSubvector(), and staticallyInsertSubvector().

◆ getCompressedMaskOp()

FailureOr< Operation * > getCompressedMaskOp ( OpBuilder & rewriter,
Location loc,
Value mask,
int numSrcElems,
int numSrcElemsPerDest,
int numFrontPadElems = 0 )
static

Returns a compressed mask for the emulated vector.

For example, when emulating an eight-element i8 vector with i32 (i.e. when the source elements span two dest elements), this method compresses vector<8xi1> into vector<2xi1>.

The compressed/output mask value is set iff any mask in the corresponding numSrcElemsPerDest range of uncompressed/input masks is set. E.g., if numSrcElemsPerDest equals to 2, and numFrontPadElems equals to 1, the following mask:

mask = [1, 1, 0, 0, 0, 0]

will first be padded in the front with numFrontPadElems zeros, and zeros will be added in the back to make the number of elements a multiple of numSrcElemsPerDest (for easier computation). The resulting mask will be:

mask = [0, 1, 1, 0, 0, 0, 0, 0]

then it will return the following new compressed mask:

mask = [1, 1, 0, 0]

NOTE: numFrontPadElems is assumed to be strictly smaller than numSrcElemsPerDest.

Definition at line 78 of file VectorEmulateNarrowType.cpp.

References mlir::bindSymbols(), mlir::DenseElementsAttr::get(), mlir::getAsOpFoldResult(), mlir::Builder::getContext(), mlir::Value::getDefiningOp(), mlir::Builder::getI1Type(), mlir::Operation::getResultTypes(), mlir::getValueOrCreateConstantIndexOp(), and mlir::affine::makeComposedFoldedAffineApply().

◆ nonAtomicRMW()

void nonAtomicRMW ( OpBuilder & builder,
Location loc,
MemRefValue linearizedMemref,
Value linearizedIndex,
VectorValue valueToStore,
Value mask )
static

Generate a non-atomic read-modify-write sequence for storing to the emulated type.

It has similar logic to atomicRMWStore, but without atomicity.

Definition at line 450 of file VectorEmulateNarrowType.cpp.

References downcastSelectAndUpcast(), and getElementType().

◆ operator<<()

raw_ostream & operator<< ( raw_ostream & os,
const SmallVector< SourceElementRangeList > & vec )
static

Definition at line 1519 of file VectorEmulateNarrowType.cpp.

◆ rewriteI2ToI8Ext()

Value rewriteI2ToI8Ext ( PatternRewriter & rewriter,
Location loc,
Value srcValue,
const ExtractNBitsFn & extFn )
static

Rewrite the i2 -> i8 extension into a sequence of shuffles and bitwise ops to avoid leaving LLVM to scramble with peephole optimizations.

Definition at line 1872 of file VectorEmulateNarrowType.cpp.

References bitcastSubByteVectorToI8(), and mlir::Value::getType().

◆ rewriteI4ToI8Ext()

Value rewriteI4ToI8Ext ( PatternRewriter & rewriter,
Location loc,
Value srcValue,
const ExtractNBitsFn & extFn )
static

Rewrite the i4 -> i8 extension into a sequence of shuffles and bitwise ops to avoid leaving LLVM to scramble with peephole optimizations.

Definition at line 1852 of file VectorEmulateNarrowType.cpp.

References bitcastSubByteVectorToI8(), and mlir::Value::getType().

◆ rewriteI8ToI4Trunc()

Value rewriteI8ToI4Trunc ( PatternRewriter & rewriter,
Location loc,
Value srcValue )
static

Rewrite the i8 -> i4 truncation into a deinterleave and series of bitwise ops to avoid leaving LLVM to scramble with peephole optimizations.

Definition at line 1908 of file VectorEmulateNarrowType.cpp.

References mlir::DenseElementsAttr::get(), mlir::Builder::getI4Type(), and mlir::Value::getType().

◆ staticallyExtractSubvector()

Value staticallyExtractSubvector ( OpBuilder & rewriter,
Location loc,
Value src,
int64_t offset,
int64_t numElemsToExtract )
static

Extracts 1-D subvector from a 1-D vector.

Given the input rank-1 source vector, extracts numElemsToExtract elements from src, starting at offset. The result is also a rank-1 vector:

vector<numElemsToExtract x !elemType>

(!elType is the element type of the source vector). As offset is a known static value, this helper hook emits vector.extract_strided_slice.

EXAMPLE: res = vector.extract_strided_slice src { offsets = [offset], sizes = [numElemsToExtract], strides = [1] }

Definition at line 214 of file VectorEmulateNarrowType.cpp.

References mlir::Builder::getI64ArrayAttr(), and mlir::Value::getType().

Referenced by extractSliceIntoByte().

◆ staticallyInsertSubvector()

Value staticallyInsertSubvector ( OpBuilder & rewriter,
Location loc,
Value src,
Value dest,
int64_t offset )
static

Inserts 1-D subvector into a 1-D vector.

Inserts the input rank-1 source vector into the destination vector starting at offset. As offset is a known static value, this helper hook emits vector.insert_strided_slice.

EXAMPLE: res = vector.insert_strided_slice src, dest {offsets = [offset], strides [1]}

Definition at line 247 of file VectorEmulateNarrowType.cpp.

References mlir::Builder::getI64ArrayAttr(), and mlir::Value::getType().

Referenced by extractSliceIntoByte().