MLIR  20.0.0git
Macros | Functions
VectorEmulateNarrowType.cpp File Reference
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Transforms/NarrowTypeEmulationConverter.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Value.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <optional>

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "vector-narrow-type-emulation"
 
#define DBGS()   (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
 
#define DBGSNL()   (llvm::dbgs() << "\n")
 
#define LDBG(X)   LLVM_DEBUG(DBGS() << X << "\n")
 

Functions

static FailureOr< Operation * > getCompressedMaskOp (OpBuilder &rewriter, Location loc, Value mask, int numSrcElems, int numSrcElemsPerDest, int numFrontPadElems=0)
 Returns a compressed mask for the emulated vector. More...
 
static Value staticallyExtractSubvector (OpBuilder &rewriter, Location loc, VectorType extractType, Value source, int64_t frontOffset, int64_t subvecSize)
 Extracts 1-D subvector from a 1-D vector. More...
 
static Value staticallyInsertSubvector (OpBuilder &rewriter, Location loc, Value src, Value dest, int64_t offset)
 Inserts 1-D subvector into a 1-D vector by overwriting the elements starting at offset. More...
 
static Value dynamicallyExtractSubVector (OpBuilder &rewriter, Location loc, TypedValue< VectorType > source, Value dest, OpFoldResult offset, int64_t numElementsToExtract)
 Extracts a 1-D subvector from a 1-D source vector, with index at offset and size numElementsToExtract, and inserts into the dest vector. More...
 
static Value dynamicallyInsertSubVector (RewriterBase &rewriter, Location loc, TypedValue< VectorType > source, Value dest, OpFoldResult destOffsetVar, size_t length)
 Inserts a 1-D subvector into a 1-D dest vector at index destOffsetVar. More...
 
static TypedValue< VectorType > emulatedVectorLoad (OpBuilder &rewriter, Location loc, Value base, OpFoldResult linearizedIndices, int64_t numEmultedElementsToLoad, Type origElemType, Type emulatedElemType)
 Returns the op sequence for an emulated sub-byte data type vector load. More...
 
static raw_ostream & operator<< (raw_ostream &os, const SmallVector< SourceElementRangeList > &vec)
 
static LogicalResult commonConversionPrecondition (PatternRewriter &rewriter, VectorType preconditionType, Operation *op)
 Verify that the precondition type meets the common preconditions for any conversion. More...
 
static LogicalResult alignedConversionPrecondition (PatternRewriter &rewriter, VectorType srcType, VectorType dstType, Operation *op)
 Verify that source and destination element types meet the precondition for the supported aligned conversion cases. More...
 
static Value rewriteI4ToI8SignedExt (PatternRewriter &rewriter, Location loc, Value srcValue)
 Rewrite the i4 -> i8 signed extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations. More...
 
static Value rewriteI4ToI8UnsignedExt (PatternRewriter &rewriter, Location loc, Value srcValue)
 Rewrite the i4 -> i8 unsigned extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations. More...
 
static Value rewriteI8ToI4Trunc (PatternRewriter &rewriter, Location loc, Value srcValue)
 Rewrite the i8 -> i4 truncation into a deinterleave and series of bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations. More...
 

Macro Definition Documentation

◆ DBGS

#define DBGS ( )    (llvm::dbgs() << "[" DEBUG_TYPE "]: ")

Definition at line 44 of file VectorEmulateNarrowType.cpp.

◆ DBGSNL

#define DBGSNL ( )    (llvm::dbgs() << "\n")

Definition at line 45 of file VectorEmulateNarrowType.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE   "vector-narrow-type-emulation"

Definition at line 43 of file VectorEmulateNarrowType.cpp.

◆ LDBG

#define LDBG (   X)    LLVM_DEBUG(DBGS() << X << "\n")

Definition at line 46 of file VectorEmulateNarrowType.cpp.

Function Documentation

◆ alignedConversionPrecondition()

static LogicalResult alignedConversionPrecondition ( PatternRewriter rewriter,
VectorType  srcType,
VectorType  dstType,
Operation op 
)
static

Verify that source and destination element types meet the precondition for the supported aligned conversion cases.

Alignment means that the either the source element type is multiple of the destination element type or the other way around.

NOTE: This method assumes that common conversion preconditions are met.

Definition at line 1078 of file VectorEmulateNarrowType.cpp.

References mlir::RewriterBase::notifyMatchFailure().

◆ commonConversionPrecondition()

static LogicalResult commonConversionPrecondition ( PatternRewriter rewriter,
VectorType  preconditionType,
Operation op 
)
static

Verify that the precondition type meets the common preconditions for any conversion.

Definition at line 1045 of file VectorEmulateNarrowType.cpp.

References mlir::RewriterBase::notifyMatchFailure().

◆ dynamicallyExtractSubVector()

static Value dynamicallyExtractSubVector ( OpBuilder rewriter,
Location  loc,
TypedValue< VectorType >  source,
Value  dest,
OpFoldResult  offset,
int64_t  numElementsToExtract 
)
static

Extracts a 1-D subvector from a 1-D source vector, with index at offset and size numElementsToExtract, and inserts into the dest vector.

This function emits multiple vector.extract and vector.insert ops, so only use it when offset cannot be folded into a constant value.

Definition at line 239 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), and mlir::Builder::getIndexType().

◆ dynamicallyInsertSubVector()

static Value dynamicallyInsertSubVector ( RewriterBase rewriter,
Location  loc,
TypedValue< VectorType >  source,
Value  dest,
OpFoldResult  destOffsetVar,
size_t  length 
)
static

Inserts a 1-D subvector into a 1-D dest vector at index destOffsetVar.

Definition at line 257 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), mlir::Builder::getIndexType(), and mlir::getValueOrCreateConstantIndexOp().

◆ emulatedVectorLoad()

static TypedValue<VectorType> emulatedVectorLoad ( OpBuilder rewriter,
Location  loc,
Value  base,
OpFoldResult  linearizedIndices,
int64_t  numEmultedElementsToLoad,
Type  origElemType,
Type  emulatedElemType 
)
static

Returns the op sequence for an emulated sub-byte data type vector load.

specifically, use emulatedElemType for loading a vector of origElemType. The load location is given by base and linearizedIndices, and the load size is given by numEmulatedElementsToLoad.

Definition at line 281 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), mlir::get(), mlir::Type::getIntOrFloatBitWidth(), and mlir::getValueOrCreateConstantIndexOp().

◆ getCompressedMaskOp()

static FailureOr<Operation *> getCompressedMaskOp ( OpBuilder rewriter,
Location  loc,
Value  mask,
int  numSrcElems,
int  numSrcElemsPerDest,
int  numFrontPadElems = 0 
)
static

Returns a compressed mask for the emulated vector.

For example, when emulating an eight-element i8 vector with i32 (i.e. when the source elements span two dest elements), this method compresses vector<8xi1> into vector<2xi1>.

The compressed/output mask value is set iff any mask in the corresponding numSrcElemsPerDest range of uncompressed/input masks is set. E.g., if numSrcElemsPerDest equals to 2, and numFrontPadElems equals to 1, the following mask:

mask = [1, 1, 0, 0, 0, 0]

will first be padded in the front with numFrontPadElems zeros, and zeros will be added in the back to make the number of elements a multiple of numSrcElemsPerDest (for easier computation). The resulting mask will be:

mask = [0, 1, 1, 0, 0, 0, 0, 0]

then it will return the following new compressed mask:

mask = [1, 1, 0, 0]

NOTE: numFrontPadElems is assumed to be strictly smaller than numSrcElemsPerDest.

Definition at line 72 of file VectorEmulateNarrowType.cpp.

References mlir::bindSymbols(), mlir::OpBuilder::create(), mlir::detail::divideCeil(), mlir::get(), mlir::DenseElementsAttr::get(), mlir::getAsOpFoldResult(), mlir::Builder::getContext(), mlir::Value::getDefiningOp(), mlir::Builder::getI1Type(), mlir::Operation::getResultTypes(), mlir::getValueOrCreateConstantIndexOp(), and mlir::affine::makeComposedFoldedAffineApply().

◆ operator<<()

static raw_ostream& operator<< ( raw_ostream &  os,
const SmallVector< SourceElementRangeList > &  vec 
)
static

Definition at line 988 of file VectorEmulateNarrowType.cpp.

References mlir::detail::enumerate().

◆ rewriteI4ToI8SignedExt()

static Value rewriteI4ToI8SignedExt ( PatternRewriter rewriter,
Location  loc,
Value  srcValue 
)
static

Rewrite the i4 -> i8 signed extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations.

Definition at line 1178 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getI8Type(), and mlir::Value::getType().

◆ rewriteI4ToI8UnsignedExt()

static Value rewriteI4ToI8UnsignedExt ( PatternRewriter rewriter,
Location  loc,
Value  srcValue 
)
static

Rewrite the i4 -> i8 unsigned extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations.

Definition at line 1207 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getI8Type(), and mlir::Value::getType().

◆ rewriteI8ToI4Trunc()

static Value rewriteI8ToI4Trunc ( PatternRewriter rewriter,
Location  loc,
Value  srcValue 
)
static

Rewrite the i8 -> i4 truncation into a deinterleave and series of bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations.

Definition at line 1239 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getI4Type(), and mlir::Value::getType().

◆ staticallyExtractSubvector()

static Value staticallyExtractSubvector ( OpBuilder rewriter,
Location  loc,
VectorType  extractType,
Value  source,
int64_t  frontOffset,
int64_t  subvecSize 
)
static

Extracts 1-D subvector from a 1-D vector.

It is a wrapper function for emitting vector.extract_strided_slice.

Definition at line 196 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), mlir::Builder::getI64ArrayAttr(), and mlir::Value::getType().

◆ staticallyInsertSubvector()

static Value staticallyInsertSubvector ( OpBuilder rewriter,
Location  loc,
Value  src,
Value  dest,
int64_t  offset 
)
static

Inserts 1-D subvector into a 1-D vector by overwriting the elements starting at offset.

it is a wrapper function for emitting vector.insert_strided_slice.

Definition at line 223 of file VectorEmulateNarrowType.cpp.

References mlir::OpBuilder::create(), mlir::Builder::getI64ArrayAttr(), and mlir::Value::getType().