MLIR
20.0.0git
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Transforms/NarrowTypeEmulationConverter.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Value.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <optional>
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "vector-narrow-type-emulation" |
#define | DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
#define | DBGSNL() (llvm::dbgs() << "\n") |
#define | LDBG(X) LLVM_DEBUG(DBGS() << X << "\n") |
Functions | |
static FailureOr< Operation * > | getCompressedMaskOp (OpBuilder &rewriter, Location loc, Value mask, int numSrcElems, int numSrcElemsPerDest, int numFrontPadElems=0) |
Returns a compressed mask for the emulated vector. More... | |
static Value | staticallyExtractSubvector (OpBuilder &rewriter, Location loc, VectorType extractType, Value source, int64_t frontOffset, int64_t subvecSize) |
Extracts 1-D subvector from a 1-D vector. More... | |
static Value | staticallyInsertSubvector (OpBuilder &rewriter, Location loc, Value src, Value dest, int64_t offset) |
Inserts 1-D subvector into a 1-D vector by overwriting the elements starting at offset . More... | |
static Value | dynamicallyExtractSubVector (OpBuilder &rewriter, Location loc, TypedValue< VectorType > source, Value dest, OpFoldResult offset, int64_t numElementsToExtract) |
Extracts a 1-D subvector from a 1-D source vector, with index at offset and size numElementsToExtract , and inserts into the dest vector. More... | |
static Value | dynamicallyInsertSubVector (RewriterBase &rewriter, Location loc, TypedValue< VectorType > source, Value dest, OpFoldResult destOffsetVar, size_t length) |
Inserts a 1-D subvector into a 1-D dest vector at index destOffsetVar . More... | |
static TypedValue< VectorType > | emulatedVectorLoad (OpBuilder &rewriter, Location loc, Value base, OpFoldResult linearizedIndices, int64_t numEmultedElementsToLoad, Type origElemType, Type emulatedElemType) |
Returns the op sequence for an emulated sub-byte data type vector load. More... | |
static raw_ostream & | operator<< (raw_ostream &os, const SmallVector< SourceElementRangeList > &vec) |
static LogicalResult | commonConversionPrecondition (PatternRewriter &rewriter, VectorType preconditionType, Operation *op) |
Verify that the precondition type meets the common preconditions for any conversion. More... | |
static LogicalResult | alignedConversionPrecondition (PatternRewriter &rewriter, VectorType srcType, VectorType dstType, Operation *op) |
Verify that source and destination element types meet the precondition for the supported aligned conversion cases. More... | |
static Value | rewriteI4ToI8SignedExt (PatternRewriter &rewriter, Location loc, Value srcValue) |
Rewrite the i4 -> i8 signed extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations. More... | |
static Value | rewriteI4ToI8UnsignedExt (PatternRewriter &rewriter, Location loc, Value srcValue) |
Rewrite the i4 -> i8 unsigned extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations. More... | |
static Value | rewriteI8ToI4Trunc (PatternRewriter &rewriter, Location loc, Value srcValue) |
Rewrite the i8 -> i4 truncation into a deinterleave and series of bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations. More... | |
#define DBGS | ( | ) | (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
Definition at line 44 of file VectorEmulateNarrowType.cpp.
#define DBGSNL | ( | ) | (llvm::dbgs() << "\n") |
Definition at line 45 of file VectorEmulateNarrowType.cpp.
#define DEBUG_TYPE "vector-narrow-type-emulation" |
Definition at line 43 of file VectorEmulateNarrowType.cpp.
#define LDBG | ( | X | ) | LLVM_DEBUG(DBGS() << X << "\n") |
Definition at line 46 of file VectorEmulateNarrowType.cpp.
|
static |
Verify that source and destination element types meet the precondition for the supported aligned conversion cases.
Alignment means that the either the source element type is multiple of the destination element type or the other way around.
NOTE: This method assumes that common conversion preconditions are met.
Definition at line 1093 of file VectorEmulateNarrowType.cpp.
References mlir::RewriterBase::notifyMatchFailure().
|
static |
Verify that the precondition type meets the common preconditions for any conversion.
Definition at line 1060 of file VectorEmulateNarrowType.cpp.
References mlir::RewriterBase::notifyMatchFailure().
|
static |
Extracts a 1-D subvector from a 1-D source
vector, with index at offset
and size numElementsToExtract
, and inserts into the dest
vector.
This function emits multiple vector.extract
and vector.insert
ops, so only use it when offset
cannot be folded into a constant value.
Definition at line 253 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), and mlir::Builder::getIndexType().
|
static |
Inserts a 1-D subvector into a 1-D dest
vector at index destOffsetVar
.
Definition at line 271 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), mlir::Builder::getIndexType(), and mlir::getValueOrCreateConstantIndexOp().
|
static |
Returns the op sequence for an emulated sub-byte data type vector load.
specifically, use emulatedElemType
for loading a vector of origElemType
. The load location is given by base
and linearizedIndices
, and the load size is given by numEmulatedElementsToLoad
.
Definition at line 295 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), mlir::get(), mlir::Type::getIntOrFloatBitWidth(), and mlir::getValueOrCreateConstantIndexOp().
|
static |
Returns a compressed mask for the emulated vector.
For example, when emulating an eight-element i8
vector with i32
(i.e. when the source elements span two dest elements), this method compresses vector<8xi1>
into vector<2xi1>
.
The compressed/output mask value is set iff any mask in the corresponding numSrcElemsPerDest
range of uncompressed/input masks is set. E.g., if numSrcElemsPerDest
equals to 2, and numFrontPadElems
equals to 1, the following mask:
mask = [1, 1, 0, 0, 0, 0]
will first be padded in the front with numFrontPadElems
zeros, and zeros will be added in the back to make the number of elements a multiple of numSrcElemsPerDest
(for easier computation). The resulting mask will be:
mask = [0, 1, 1, 0, 0, 0, 0, 0]
then it will return the following new compressed mask:
mask = [1, 1, 0, 0]
NOTE: numFrontPadElems
is assumed to be strictly smaller than numSrcElemsPerDest
.
Definition at line 72 of file VectorEmulateNarrowType.cpp.
References mlir::bindSymbols(), mlir::OpBuilder::create(), mlir::detail::divideCeil(), mlir::AffineExpr::floorDiv(), mlir::get(), mlir::DenseElementsAttr::get(), mlir::getAsOpFoldResult(), mlir::Builder::getContext(), mlir::Value::getDefiningOp(), mlir::Builder::getI1Type(), mlir::Operation::getResultTypes(), mlir::getValueOrCreateConstantIndexOp(), and mlir::affine::makeComposedFoldedAffineApply().
|
static |
Definition at line 1003 of file VectorEmulateNarrowType.cpp.
References mlir::detail::enumerate().
|
static |
Rewrite the i4 -> i8 signed extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations.
Definition at line 1193 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getI8Type(), and mlir::Value::getType().
|
static |
Rewrite the i4 -> i8 unsigned extension into a sequence of shuffles and bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations.
Definition at line 1222 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getI8Type(), and mlir::Value::getType().
|
static |
Rewrite the i8 -> i4 truncation into a deinterleave and series of bitwise ops that take advantage of high-level information to avoid leaving LLVM to scramble with peephole optimizations.
Definition at line 1254 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getI4Type(), and mlir::Value::getType().
|
static |
Extracts 1-D subvector from a 1-D vector.
It is a wrapper function for emitting vector.extract_strided_slice
.
Definition at line 210 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), mlir::Builder::getI64ArrayAttr(), and mlir::Value::getType().
|
static |
Inserts 1-D subvector into a 1-D vector by overwriting the elements starting at offset
.
it is a wrapper function for emitting vector.insert_strided_slice
.
Definition at line 237 of file VectorEmulateNarrowType.cpp.
References mlir::OpBuilder::create(), mlir::Builder::getI64ArrayAttr(), and mlir::Value::getType().