doxygen/AMXDialect_8cpp_source.html

 //===- AMXDialect.cpp - MLIR AMX ops implementation -----------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements the AMX dialect and its operations.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/AMX/AMXDialect.h"

 #include "mlir/Conversion/LLVMCommon/Pattern.h"

 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"

 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"

 #include "mlir/IR/Builders.h"

 #include "mlir/IR/DialectImplementation.h"

 #include "mlir/IR/OpImplementation.h"

 #include "mlir/IR/TypeUtilities.h"


 #include "llvm/ADT/TypeSwitch.h"


 using namespace mlir;


 #include "mlir/Dialect/AMX/AMXInterfaces.cpp.inc"


 #include "mlir/Dialect/AMX/AMXDialect.cpp.inc"


 void amx::AMXDialect::initialize() {

   addTypes<

 #define GET_TYPEDEF_LIST

 #include "mlir/Dialect/AMX/AMXTypes.cpp.inc"

       >();


   addOperations<

 #define GET_OP_LIST

 #include "mlir/Dialect/AMX/AMX.cpp.inc"

       >();

 }


 /// Verify that AMX supports the implied tile shape.

 static LogicalResult verifyTileSize(Operation *op, amx::TileType tp) {

   const unsigned kMaxRows = 16;

   const unsigned kBitsPerRow = 64 * 8;

   unsigned col = tp.getDimSize(1) * tp.getElementType().getIntOrFloatBitWidth();

   if (tp.getDimSize(0) > kMaxRows)

     return op->emitOpError("bad row height: ") << tp.getDimSize(0);

   if (col > kBitsPerRow || col & 0x1f)

     return op->emitOpError("bad column width: ") << (col >> 3);

   return success();

 }


 /// Verify that AMX supports the multiplication.

 static LogicalResult verifyMultShape(Operation *op, amx::TileType atp,

                                      amx::TileType btp, amx::TileType ctp,

                                      unsigned scale) {

   unsigned am = atp.getDimSize(0), ak = atp.getDimSize(1) >> scale;

   unsigned bk = btp.getDimSize(0), bn = btp.getDimSize(1) >> scale;

   unsigned cm = ctp.getDimSize(0), cn = ctp.getDimSize(1);

   if (cm != am || cn != bn || ak != bk)

     return op->emitOpError("bad mult shape: ")

            << cm << " x " << cn << " x " << ak;

   return success();

 }


 /// Maps the 2-dim vector shape to the two 16-bit tile sizes. The first

 /// dimension directly translates into the number of rows of the tiles.

 /// The second dimensions needs to be scaled by the number of bytes.

 static SmallVector<Value> getTileSizes(Location loc, amx::TileType tType,

                                        RewriterBase &rewriter) {

   Type llvmInt16Type = rewriter.getIntegerType(16);

   unsigned width = tType.getElementType().getIntOrFloatBitWidth();

   assert(llvm::isPowerOf2_64(width) && width >= 8);

   unsigned bytes = width >> 3;

   auto mattr = rewriter.getI16IntegerAttr(tType.getDimSize(0));

   auto nattr = rewriter.getI16IntegerAttr(tType.getDimSize(1) * bytes);

   return SmallVector<Value>{

       rewriter.create<LLVM::ConstantOp>(loc, llvmInt16Type, mattr),

       rewriter.create<LLVM::ConstantOp>(loc, llvmInt16Type, nattr)};

 }


 /// Maps the 2-dim memref shape to the 64-bit stride. Note that the buffer

 /// shape may "envelop" the actual tile shape, and may be dynamically sized.

 static Value getStride(Location loc, MemRefType mType, Value base,

                        RewriterBase &rewriter) {

   assert(mType.getRank() >= 2 && "Invalid shape for AMX strides");

   int64_t preLast = mType.getRank() - 2;

   Type llvmInt64Type = rewriter.getIntegerType(64);

   unsigned width = mType.getElementType().getIntOrFloatBitWidth();

   assert(llvm::isPowerOf2_64(width) && width >= 8);

   unsigned bytes = width >> 3;

   auto [strides, offset] = mType.getStridesAndOffset();

   if (strides[preLast] == ShapedType::kDynamic) {

     // Dynamic stride needs code to compute the stride at runtime.

     MemRefDescriptor memrefDescriptor(base);

     auto attr = rewriter.getI64IntegerAttr(bytes);

     Value scale = rewriter.create<LLVM::ConstantOp>(loc, llvmInt64Type, attr);

     return rewriter

         .create<LLVM::MulOp>(loc, llvmInt64Type, scale,

                              memrefDescriptor.stride(rewriter, loc, preLast))

         .getResult();

   }

   // Use direct constant for static stride.

   auto attr = rewriter.getI64IntegerAttr(strides[preLast] * bytes);

   return rewriter.create<LLVM::ConstantOp>(loc, llvmInt64Type, attr)

       .getResult();

 }


 LogicalResult amx::TileZeroOp::verify() {

   return verifyTileSize(*this, getTileType());

 }


 SmallVector<Value>

 amx::TileZeroOp::getIntrinsicOperands(ArrayRef<Value> operands,

                                       const LLVMTypeConverter &typeConverter,

                                       RewriterBase &rewriter) {

   return getTileSizes(getLoc(), getTileType(), rewriter);

 }


 LogicalResult amx::TileLoadOp::verify() {

   MemRefType memrefTy = getMemRefType();

   unsigned rank = memrefTy.getRank();

   if (rank < 2)

     return emitOpError("requires at least 2D memref");

   if (getIndices().size() != rank)

     return emitOpError("requires ") << rank << " indices";

   SmallVector<int64_t> strides;

   int64_t offset;

   if (failed(memrefTy.getStridesAndOffset(strides, offset)) ||

       strides.back() != 1)

     return emitOpError("requires memref with unit innermost stride");

   return verifyTileSize(*this, getTileType());

 }


 SmallVector<Value>

 amx::TileLoadOp::getIntrinsicOperands(ArrayRef<Value> operands,

                                       const LLVMTypeConverter &typeConverter,

                                       RewriterBase &rewriter) {

   auto loc = getLoc();

   Adaptor adaptor(operands, *this);


   SmallVector<Value> intrinsicOperands;

   intrinsicOperands.append(getTileSizes(loc, getTileType(), rewriter));

   intrinsicOperands.push_back(

       LLVM::getStridedElementPtr(rewriter, loc, typeConverter, getMemRefType(),

                                  adaptor.getBase(), adaptor.getIndices()));

   intrinsicOperands.push_back(

       getStride(loc, getMemRefType(), adaptor.getBase(), rewriter));


   return intrinsicOperands;

 }


 LogicalResult amx::TileStoreOp::verify() {

   MemRefType memrefTy = getMemRefType();

   unsigned rank = memrefTy.getRank();

   if (rank < 2)

     return emitOpError("requires at least 2D memref");

   if (getIndices().size() != rank)

     return emitOpError("requires ") << rank << " indices";

   SmallVector<int64_t> strides;

   int64_t offset;

   if (failed(memrefTy.getStridesAndOffset(strides, offset)) ||

       strides.back() != 1)

     return emitOpError("requires memref with unit innermost stride");

   return verifyTileSize(*this, getTileType());

 }


 SmallVector<Value>

 amx::TileStoreOp::getIntrinsicOperands(ArrayRef<Value> operands,

                                        const LLVMTypeConverter &typeConverter,

                                        RewriterBase &rewriter) {

   auto loc = getLoc();

   Adaptor adaptor(operands, *this);


   SmallVector<Value> intrinsicOperands;

   intrinsicOperands.append(getTileSizes(loc, getTileType(), rewriter));

   intrinsicOperands.push_back(

       LLVM::getStridedElementPtr(rewriter, loc, typeConverter, getMemRefType(),

                                  adaptor.getBase(), adaptor.getIndices()));

   intrinsicOperands.push_back(

       getStride(loc, getMemRefType(), adaptor.getBase(), rewriter));

   intrinsicOperands.push_back(adaptor.getVal());


   return intrinsicOperands;

 }


 LogicalResult amx::TileMulFOp::verify() {

   amx::TileType aType = getLhsTileType();

   amx::TileType bType = getRhsTileType();

   amx::TileType cType = getTileType();

   if (failed(verifyTileSize(*this, aType)) ||

       failed(verifyTileSize(*this, bType)) ||

       failed(verifyTileSize(*this, cType)) ||

       failed(verifyMultShape(*this, aType, bType, cType, 1)))

     return failure();

   Type ta = aType.getElementType();

   Type tb = bType.getElementType();

   Type tc = cType.getElementType();

   if ((!ta.isBF16() && !ta.isF16()) || (ta != tb) || !tc.isF32())

     return emitOpError("unsupported type combination");

   return success();

 }


 SmallVector<Value>

 amx::TileMulFOp::getIntrinsicOperands(ArrayRef<Value> operands,

                                       const LLVMTypeConverter &typeConverter,

                                       RewriterBase &rewriter) {

   auto loc = getLoc();

   Adaptor adaptor(operands, *this);


   amx::TileType aType = getLhsTileType();

   amx::TileType bType = getRhsTileType();

   SmallVector<Value> tsza = getTileSizes(loc, aType, rewriter);

   SmallVector<Value> tszb = getTileSizes(loc, bType, rewriter);


   SmallVector<Value> intrinsicOperands = {tsza[0],          tszb[1],

                                           tsza[1],          adaptor.getAcc(),

                                           adaptor.getLhs(), adaptor.getRhs()};


   return intrinsicOperands;

 }


 LogicalResult amx::TileMulIOp::verify() {

   amx::TileType aType = getLhsTileType();

   amx::TileType bType = getRhsTileType();

   amx::TileType cType = getTileType();

   if (failed(verifyTileSize(*this, aType)) ||

       failed(verifyTileSize(*this, bType)) ||

       failed(verifyTileSize(*this, cType)) ||

       failed(verifyMultShape(*this, aType, bType, cType, 2)))

     return failure();

   Type ta = aType.getElementType();

   Type tb = bType.getElementType();

   Type tc = cType.getElementType();

   if (!ta.isInteger(8) || !tb.isInteger(8) || !tc.isInteger(32))

     return emitOpError("unsupported type combination");

   return success();

 }


 SmallVector<Value>

 amx::TileMulIOp::getIntrinsicOperands(ArrayRef<Value> operands,

                                       const LLVMTypeConverter &typeConverter,

                                       RewriterBase &rewriter) {

   auto loc = getLoc();

   Adaptor adaptor(operands, *this);


   amx::TileType aType = getLhsTileType();

   amx::TileType bType = getRhsTileType();

   SmallVector<Value> tsza = getTileSizes(loc, aType, rewriter);

   SmallVector<Value> tszb = getTileSizes(loc, bType, rewriter);


   SmallVector<Value> intrinsicOperands = {tsza[0],          tszb[1],

                                           tsza[1],          adaptor.getAcc(),

                                           adaptor.getLhs(), adaptor.getRhs()};


   return intrinsicOperands;

 }


 Type amx::TileType::parse(AsmParser &parser) {

   if (parser.parseLess())

     return nullptr;


   SmallVector<int64_t, 2> shape;

   if (parser.parseDimensionList(shape, false, true))

     return nullptr;


   Type elementType;

   if (parser.parseType(elementType))

     return nullptr;


   if (parser.parseGreater())

     return nullptr;


   return TileType::get(shape, elementType);

 }


 void amx::TileType::print(AsmPrinter &os) const {

   os << "<";

   os.printDimensionList(getShape());

   os << 'x';

   os.printType(getElementType());

   os << '>';

 }


 #define GET_OP_CLASSES

 #include "mlir/Dialect/AMX/AMX.cpp.inc"


 #define GET_TYPEDEF_CLASSES

 #include "mlir/Dialect/AMX/AMXTypes.cpp.inc"

getTileSizes
static SmallVector< Value > getTileSizes(Location loc, amx::TileType tType, RewriterBase &rewriter)
Maps the 2-dim vector shape to the two 16-bit tile sizes.
Definition: AMXDialect.cpp:70

verifyMultShape
static LogicalResult verifyMultShape(Operation *op, amx::TileType atp, amx::TileType btp, amx::TileType ctp, unsigned scale)
Verify that AMX supports the multiplication.
Definition: AMXDialect.cpp:55

verifyTileSize
static LogicalResult verifyTileSize(Operation *op, amx::TileType tp)
Verify that AMX supports the implied tile shape.
Definition: AMXDialect.cpp:43

getStride
static Value getStride(Location loc, MemRefType mType, Value base, RewriterBase &rewriter)
Maps the 2-dim memref shape to the 64-bit stride.
Definition: AMXDialect.cpp:85

AMXDialect.h

Builders.h

DialectImplementation.h

LLVMDialect.h

LLVMTypes.h

OpImplementation.h

print
static void print(spirv::VerCapExtAttr triple, DialectAsmPrinter &printer)
Definition: SPIRVAttributes.cpp:624

getElementType
static Type getElementType(Type type, ArrayRef< int32_t > indices, function_ref< InFlightDiagnostic(StringRef)> emitErrorFn)
Walks the given type hierarchy with the given indices, potentially down to component granularity,...
Definition: SPIRVOps.cpp:188

getShape
static ArrayRef< int64_t > getShape(Type type)
Returns the shape of the given type.
Definition: Traits.cpp:118

TypeUtilities.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

mlir::AsmParser
This base class exposes generic asm parser hooks, usable across the various derived parsers.
Definition: OpImplementation.h:560

mlir::AsmParser::parseLess
virtual ParseResult parseLess()=0
Parse a '<' token.

mlir::AsmParser::parseDimensionList
virtual ParseResult parseDimensionList(SmallVectorImpl< int64_t > &dimensions, bool allowDynamic=true, bool withTrailingX=true)=0
Parse a dimension list of a tensor or memref type.

mlir::AsmParser::parseGreater
virtual ParseResult parseGreater()=0
Parse a '>' token.

mlir::AsmParser::parseType
virtual ParseResult parseType(Type &result)=0
Parse a type.

mlir::AsmPrinter
This base class exposes generic asm printer hooks, usable across the various derived printers.
Definition: OpImplementation.h:120

mlir::AsmPrinter::printType
virtual void printType(Type type)
Definition: AsmPrinter.cpp:2951

mlir::AsmPrinter::printDimensionList
void printDimensionList(ArrayRef< int64_t > shape)
Definition: AsmPrinter.cpp:2999

mlir::Builder::getI16IntegerAttr
IntegerAttr getI16IntegerAttr(int16_t value)
Definition: Builders.cpp:213

mlir::Builder::getI64IntegerAttr
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:108

mlir::Builder::getIntegerType
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:67

mlir::LLVMTypeConverter
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MemRefDescriptor
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
Definition: MemRefBuilder.h:33

mlir::MemRefDescriptor::stride
Value stride(OpBuilder &builder, Location loc, unsigned pos)
Builds IR extracting the pos-th size from the descriptor.
Definition: MemRefBuilder.cpp:166

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::emitOpError
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:673

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::isF32
bool isF32() const
Definition: Types.cpp:40

mlir::Type::isInteger
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56

mlir::Type::isF16
bool isF16() const
Definition: Types.cpp:38

mlir::Type::isBF16
bool isBF16() const
Definition: Types.cpp:37

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

Pattern.h

mlir::LLVM::getStridedElementPtr
Value getStridedElementPtr(OpBuilder &builder, Location loc, const LLVMTypeConverter &converter, MemRefType type, Value memRefDesc, ValueRange indices, LLVM::GEPNoWrapFlags noWrapFlags=LLVM::GEPNoWrapFlags::none)
Performs the index computation to get to the element at indices of the memory pointed to by memRefDes...
Definition: Pattern.cpp:487

mlir::bufferization::getMemRefType
BaseMemRefType getMemRefType(Value value, const BufferizationOptions &options, MemRefLayoutAttrInterface layout={}, Attribute memorySpace=nullptr)
Return a MemRefType to which the type of the given value can be bufferized.
Definition: BufferizableOpInterface.cpp:800

mlir::nvgpu::getIndices
Operation::operand_range getIndices(Operation *op)
Get the indices that the given load/store operation is operating on.
Definition: Utils.cpp:18

mlir::query::parse
QueryRef parse(llvm::StringRef line, const QuerySession &qs)
Definition: Query.cpp:21

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::verify
LogicalResult verify(Operation *op, bool verifyRecursively=true)
Perform (potentially expensive) checks of invariants, used to detect compiler bugs,...
Definition: Verifier.cpp:423