doxygen/WmmaOpsToNvvm_8cpp_source.html

 //===------ WmmaOpsToNVVM.cpp - WMMA LD/ST/Compute to NVVM lowering -------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file contains definitions of patterns to lower GPU Subgroup MMA ops to

 // NVVM Dialect.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"

 #include "mlir/Conversion/LLVMCommon/Pattern.h"

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"

 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"

 #include "mlir/IR/TypeUtilities.h"


 using namespace mlir;


 namespace {


 /// Checks if all the operands of the op being lowered are of LLVM Types. The

 /// types are expected to be converted by the `LLVMTypeConverter` before the op

 /// is actually lowered. If the type of an operands is not already converted it

 /// hints a missing typeConversion and failure is returned in that case.

 static LogicalResult areAllLLVMTypes(Operation *op, ValueRange operands,

                                      ConversionPatternRewriter &rewriter) {

   if (!llvm::all_of(operands, [](Value value) {

         return LLVM::isCompatibleType(value.getType());

       })) {

     return rewriter.notifyMatchFailure(

         op, "cannot convert if operands aren't of LLVM type.");

   }


   return success();

 }


 /// Error string to emit when an unimplemented WMMA variant is encountered.

 static constexpr StringRef kInvalidCaseStr = "Unsupported WMMA variant.";


 static NVVM::MMAFrag convertOperand(StringRef operandName) {

   if (operandName == "AOp")

     return NVVM::MMAFrag::a;

   if (operandName == "BOp")

     return NVVM::MMAFrag::b;

   if (operandName == "COp")

     return NVVM::MMAFrag::c;

   llvm_unreachable("Unknown operand name");

 }


 static NVVM::MMATypes getElementType(gpu::MMAMatrixType type) {

   if (type.getElementType().isF16())

     return NVVM::MMATypes::f16;

   if (type.getElementType().isF32())

     return type.getOperand() == "COp" ? NVVM::MMATypes::f32

                                       : NVVM::MMATypes::tf32;


   if (type.getElementType().isSignedInteger(8))

     return NVVM::MMATypes::s8;

   if (type.getElementType().isUnsignedInteger(8))

     return NVVM::MMATypes::u8;

   // Accumulator type is signless and implies signed.

   if (type.getElementType().isInteger(32))

     return NVVM::MMATypes::s32;

   llvm_unreachable("Unsupported type");

 }


 /// This class implements the conversion of GPU MMA loadOp to wmma.load op

 /// in the NVVM dialect. The conversion not only emits the NVVM op but also

 /// emits code that is necessary to store the data in the destination memref

 /// after it has been loaded.

 struct WmmaLoadOpToNVVMLowering

     : public ConvertOpToLLVMPattern<gpu::SubgroupMmaLoadMatrixOp> {

   using ConvertOpToLLVMPattern<

       gpu::SubgroupMmaLoadMatrixOp>::ConvertOpToLLVMPattern;


   LogicalResult

   matchAndRewrite(gpu::SubgroupMmaLoadMatrixOp subgroupMmaLoadMatrixOp,

                   OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Operation *op = subgroupMmaLoadMatrixOp.getOperation();

     if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)))

       return failure();


     // Get the shape of the MMAMatrix type being returned. The shape will

     // choose which intrinsic this op will be lowered to.

     NVVM::MMALayout layout = subgroupMmaLoadMatrixOp.getTranspose()

                                  ? NVVM::MMALayout::col

                                  : NVVM::MMALayout::row;

     gpu::MMAMatrixType retType =

         cast<gpu::MMAMatrixType>(subgroupMmaLoadMatrixOp.getRes().getType());

     ArrayRef<int64_t> retTypeShape = retType.getShape();

     int64_t m = 0;

     int64_t n = 0;

     int64_t k = 0;

     NVVM::MMATypes eltype = getElementType(retType);

     // NVVM intrinsics require to give mxnxk dimensions, infer the missing

     // dimension based on the valid intrinsics available.

     if (retType.getOperand() == "AOp") {

       m = retTypeShape[0];

       k = retTypeShape[1];

       n = NVVM::WMMALoadOp::inferNDimension(m, k, eltype);

     } else if (retType.getOperand() == "BOp") {

       k = retTypeShape[0];

       n = retTypeShape[1];

       m = NVVM::WMMALoadOp::inferMDimension(k, n, eltype);

     } else if (retType.getOperand() == "COp") {

       m = retTypeShape[0];

       n = retTypeShape[1];

       k = NVVM::WMMALoadOp::inferKDimension(m, n, eltype);

     }

     NVVM::MMAFrag frag = convertOperand(retType.getOperand());

     // Check that there is an exisiting instruction for the combination we need.

     if (NVVM::WMMALoadOp::getIntrinsicID(m, n, k, layout, eltype, frag) == 0)

       return rewriter.notifyMatchFailure(op, kInvalidCaseStr);


     Type resType = convertMMAToLLVMType(retType);

     Location loc = op->getLoc();


     // Create nvvm.mma_load op according to the operand types.

     Value dataPtr = getStridedElementPtr(

         rewriter, loc,

         cast<MemRefType>(subgroupMmaLoadMatrixOp.getSrcMemref().getType()),

         adaptor.getSrcMemref(), adaptor.getIndices());


     Value leadingDim = rewriter.create<LLVM::ConstantOp>(

         loc, rewriter.getI32Type(),

         subgroupMmaLoadMatrixOp.getLeadDimensionAttr());

     rewriter.replaceOpWithNewOp<NVVM::WMMALoadOp>(

         op, resType, dataPtr, leadingDim, m, n, k, layout, eltype, frag);

     return success();

   }

 };


 /// This class implements the conversion of GPU MMA storeOp to wmma.store op

 /// in the NVVM dialect. The conversion not only emits the NVVM op but also

 /// emits code that is necessary to unpack the data in the source and

 /// convert the data in the format that is needed by the NVVM op.

 struct WmmaStoreOpToNVVMLowering

     : public ConvertOpToLLVMPattern<gpu::SubgroupMmaStoreMatrixOp> {

   using ConvertOpToLLVMPattern<

       gpu::SubgroupMmaStoreMatrixOp>::ConvertOpToLLVMPattern;


   LogicalResult

   matchAndRewrite(gpu::SubgroupMmaStoreMatrixOp subgroupMmaStoreMatrixOp,

                   OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Operation *op = subgroupMmaStoreMatrixOp.getOperation();

     if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)))

       return failure();


     Location loc = op->getLoc();


     SmallVector<Value, 4> storeOpOperands;

     // Get the shape of the MMAMatrix type being stored. The shape will

     // choose which intrinsic this op will be lowered to.

     gpu::MMAMatrixType srcType =

         cast<gpu::MMAMatrixType>(subgroupMmaStoreMatrixOp.getSrc().getType());

     ArrayRef<int64_t> srcTypeShape = srcType.getShape();

     NVVM::MMALayout layout = subgroupMmaStoreMatrixOp.getTranspose()

                                  ? NVVM::MMALayout::col

                                  : NVVM::MMALayout::row;

     NVVM::MMATypes eltype = getElementType(srcType);

     int64_t m = srcTypeShape[0];

     int64_t n = srcTypeShape[1];

     int64_t k = NVVM::WMMAStoreOp::inferKDimension(m, n, eltype);

     if (NVVM::WMMAStoreOp::getIntrinsicID(m, n, k, layout, eltype) == 0)

       return rewriter.notifyMatchFailure(op, kInvalidCaseStr);


     auto matrixType = cast<LLVM::LLVMStructType>(adaptor.getSrc().getType());

     for (unsigned i = 0, e = matrixType.getBody().size(); i < e; ++i) {

       Value toUse =

           rewriter.create<LLVM::ExtractValueOp>(loc, adaptor.getSrc(), i);

       storeOpOperands.push_back(toUse);

     }


     Value dataPtr = getStridedElementPtr(

         rewriter, loc,

         cast<MemRefType>(subgroupMmaStoreMatrixOp.getDstMemref().getType()),

         adaptor.getDstMemref(), adaptor.getIndices());

     Value leadingDim = rewriter.create<LLVM::ConstantOp>(

         loc, rewriter.getI32Type(),

         subgroupMmaStoreMatrixOp.getLeadDimensionAttr());

     rewriter.replaceOpWithNewOp<NVVM::WMMAStoreOp>(

         op, dataPtr, m, n, k, layout, eltype, storeOpOperands, leadingDim);

     return success();

   }

 };


 /// This class implements the conversion of GPU MMA computeOp to wmma.mma op

 /// in the NVVM dialect.

 struct WmmaMmaOpToNVVMLowering

     : public ConvertOpToLLVMPattern<gpu::SubgroupMmaComputeOp> {

   using ConvertOpToLLVMPattern<

       gpu::SubgroupMmaComputeOp>::ConvertOpToLLVMPattern;


   LogicalResult

   matchAndRewrite(gpu::SubgroupMmaComputeOp subgroupMmaComputeOp,

                   OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Operation *op = subgroupMmaComputeOp.getOperation();

     if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)))

       return failure();


     Location loc = op->getLoc();


     // The wmma.mma intrinsic in llvm requires the operands as individual

     // values. So individual elements from the memrefs need to be extracted and

     // then passed on to the intrinsic call. Emit llvm ops to extract individual

     // values form lowered memrefs.

     SmallVector<Value> unpackedOps;


     auto unpackOp = [&](Value operand) {

       auto structType = cast<LLVM::LLVMStructType>(operand.getType());

       for (size_t i = 0, e = structType.getBody().size(); i < e; ++i) {

         Value toUse = rewriter.create<LLVM::ExtractValueOp>(loc, operand, i);

         unpackedOps.push_back(toUse);

       }

     };


     // Get the shapes of the MMAMatrix type being used. The shapes will

     // choose which intrinsic this op will be lowered to.

     gpu::MMAMatrixType aType =

         cast<gpu::MMAMatrixType>(subgroupMmaComputeOp.getOpA().getType());

     ArrayRef<int64_t> aTypeShape = aType.getShape();

     gpu::MMAMatrixType cType =

         cast<gpu::MMAMatrixType>(subgroupMmaComputeOp.getOpC().getType());

     ArrayRef<int64_t> cTypeShape = cType.getShape();

     int64_t m = cTypeShape[0];

     int64_t n = cTypeShape[1];

     int64_t k = aTypeShape[1];

     NVVM::MMALayout aLayout = subgroupMmaComputeOp.getATranspose()

                                   ? NVVM::MMALayout::col

                                   : NVVM::MMALayout::row;

     NVVM::MMALayout bLayout = subgroupMmaComputeOp.getBTranspose()

                                   ? NVVM::MMALayout::col

                                   : NVVM::MMALayout::row;

     NVVM::MMATypes sourceType = getElementType(aType);

     NVVM::MMATypes destType = getElementType(cType);

     if (NVVM::WMMAMmaOp::getIntrinsicID(m, n, k, aLayout, bLayout, sourceType,

                                         destType) == 0)

       return rewriter.notifyMatchFailure(op, kInvalidCaseStr);


     NVVM::MMATypes bElementType = getElementType(

         cast<gpu::MMAMatrixType>(subgroupMmaComputeOp.getOpB().getType()));

     if (bElementType != sourceType)

       return rewriter.notifyMatchFailure(

           op, "WMMA compute op input matrix element types must match.");


     unpackOp(adaptor.getOpA());

     unpackOp(adaptor.getOpB());

     unpackOp(adaptor.getOpC());


     rewriter.replaceOpWithNewOp<NVVM::WMMAMmaOp>(

         op, adaptor.getOpC().getType(), m, n, k, aLayout, bLayout, sourceType,

         destType, unpackedOps);

     return success();

   }

 };


 /// Convert GPU MMA ConstantMatrixOp to a chain of InsertValueOp.

 struct WmmaConstantOpToNVVMLowering

     : public ConvertOpToLLVMPattern<gpu::SubgroupMmaConstantMatrixOp> {

   using ConvertOpToLLVMPattern<

       gpu::SubgroupMmaConstantMatrixOp>::ConvertOpToLLVMPattern;


   LogicalResult

   matchAndRewrite(gpu::SubgroupMmaConstantMatrixOp subgroupMmaConstantOp,

                   OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     if (failed(areAllLLVMTypes(subgroupMmaConstantOp.getOperation(),

                                adaptor.getOperands(), rewriter)))

       return failure();

     Location loc = subgroupMmaConstantOp.getLoc();

     Value cst = adaptor.getOperands()[0];

     LLVM::LLVMStructType type = convertMMAToLLVMType(

         cast<gpu::MMAMatrixType>(subgroupMmaConstantOp.getType()));

     // If the element type is a vector create a vector from the operand.

     if (auto vecType = dyn_cast<VectorType>(type.getBody()[0])) {

       Value vecCst = rewriter.create<LLVM::PoisonOp>(loc, vecType);

       for (int64_t vecEl = 0; vecEl < vecType.getNumElements(); vecEl++) {

         Value idx = rewriter.create<LLVM::ConstantOp>(

             loc, rewriter.getI32Type(), vecEl);

         vecCst = rewriter.create<LLVM::InsertElementOp>(loc, vecType, vecCst,

                                                         cst, idx);

       }

       cst = vecCst;

     }

     Value matrixStruct = rewriter.create<LLVM::PoisonOp>(loc, type);

     for (size_t i : llvm::seq(size_t(0), type.getBody().size())) {

       matrixStruct =

           rewriter.create<LLVM::InsertValueOp>(loc, matrixStruct, cst, i);

     }

     rewriter.replaceOp(subgroupMmaConstantOp, matrixStruct);

     return success();

   }

 };


 static Value createMinMaxF(OpBuilder &builder, Location loc, Value lhs,

                            Value rhs, bool isMin) {

   auto floatType = cast<FloatType>(getElementTypeOrSelf(lhs.getType()));

   Type i1Type = builder.getI1Type();

   if (auto vecType = dyn_cast<VectorType>(lhs.getType()))

     i1Type = VectorType::get(vecType.getShape(), i1Type);

   Value cmp = builder.create<LLVM::FCmpOp>(

       loc, i1Type, isMin ? LLVM::FCmpPredicate::olt : LLVM::FCmpPredicate::ogt,

       lhs, rhs);

   Value sel = builder.create<LLVM::SelectOp>(loc, cmp, lhs, rhs);

   Value isNan = builder.create<LLVM::FCmpOp>(

       loc, i1Type, LLVM::FCmpPredicate::uno, lhs, rhs);

   Value nan = builder.create<LLVM::ConstantOp>(

       loc, lhs.getType(),

       builder.getFloatAttr(floatType,

                            APFloat::getQNaN(floatType.getFloatSemantics())));

   return builder.create<LLVM::SelectOp>(loc, isNan, nan, sel);

 }


 static Value createScalarOp(OpBuilder &builder, Location loc,

                             gpu::MMAElementwiseOp op,

                             ArrayRef<Value> operands) {

   switch (op) {

   case gpu::MMAElementwiseOp::ADDF:

     return builder.create<LLVM::FAddOp>(loc, operands[0].getType(), operands);

   case gpu::MMAElementwiseOp::MULF:

     return builder.create<LLVM::FMulOp>(loc, operands[0].getType(), operands);

   case gpu::MMAElementwiseOp::DIVF:

     return builder.create<LLVM::FDivOp>(loc, operands[0].getType(), operands);

   case gpu::MMAElementwiseOp::MAXF:

     return createMinMaxF(builder, loc, operands[0], operands[1],

                          /*isMin=*/false);

   case gpu::MMAElementwiseOp::MINF:

     return createMinMaxF(builder, loc, operands[0], operands[1],

                          /*isMin=*/true);

   default:

     llvm_unreachable("unknown op");

   }

 }


 /// Convert GPU MMA elementwise ops to extract + op + insert.

 struct WmmaElementwiseOpToNVVMLowering

     : public ConvertOpToLLVMPattern<gpu::SubgroupMmaElementwiseOp> {

   using ConvertOpToLLVMPattern<

       gpu::SubgroupMmaElementwiseOp>::ConvertOpToLLVMPattern;


   LogicalResult

   matchAndRewrite(gpu::SubgroupMmaElementwiseOp subgroupMmaElementwiseOp,

                   OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     if (failed(areAllLLVMTypes(subgroupMmaElementwiseOp.getOperation(),

                                adaptor.getOperands(), rewriter)))

       return failure();

     Location loc = subgroupMmaElementwiseOp.getLoc();

     size_t numOperands = adaptor.getOperands().size();

     LLVM::LLVMStructType destType = convertMMAToLLVMType(

         cast<gpu::MMAMatrixType>(subgroupMmaElementwiseOp.getType()));

     Value matrixStruct = rewriter.create<LLVM::PoisonOp>(loc, destType);

     for (size_t i = 0, e = destType.getBody().size(); i < e; ++i) {

       SmallVector<Value> extractedOperands;

       for (size_t opIdx = 0; opIdx < numOperands; opIdx++) {

         extractedOperands.push_back(rewriter.create<LLVM::ExtractValueOp>(

             loc, adaptor.getOperands()[opIdx], i));

       }

       Value element =

           createScalarOp(rewriter, loc, subgroupMmaElementwiseOp.getOpType(),

                          extractedOperands);

       matrixStruct =

           rewriter.create<LLVM::InsertValueOp>(loc, matrixStruct, element, i);

     }

     rewriter.replaceOp(subgroupMmaElementwiseOp, matrixStruct);

     return success();

   }

 };


 } // namespace


 /// Return the LLVMStructureType corresponding to the MMAMatrixType `type`.

 LLVM::LLVMStructType mlir::convertMMAToLLVMType(gpu::MMAMatrixType type) {

   NVVM::MMAFrag frag = convertOperand(type.getOperand());

   NVVM::MMATypes eltType = getElementType(type);

   auto nRow = type.getShape()[0];

   auto nCol = type.getShape()[1];

   std::pair<Type, unsigned> typeInfo =

       NVVM::inferMMAType(eltType, frag, nRow, nCol, type.getContext());

   return LLVM::LLVMStructType::getLiteral(

       type.getContext(), SmallVector<Type, 8>(typeInfo.second, typeInfo.first));

 }


 void mlir::populateGpuWMMAToNVVMConversionPatterns(

     const LLVMTypeConverter &converter, RewritePatternSet &patterns,

     PatternBenefit benefit) {

   patterns.add<WmmaLoadOpToNVVMLowering, WmmaMmaOpToNVVMLowering,

                WmmaStoreOpToNVVMLowering, WmmaConstantOpToNVVMLowering,

                WmmaElementwiseOpToNVVMLowering>(converter, benefit);

 }

GPUDialect.h

areAllLLVMTypes
static LogicalResult areAllLLVMTypes(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter)
Definition: GPUToLLVMConversion.cpp:690

GPUToNVVMPass.h

LLVMDialect.h

NVVMDialect.h

getElementType
static Type getElementType(Type type, ArrayRef< int32_t > indices, function_ref< InFlightDiagnostic(StringRef)> emitErrorFn)
Walks the given type hierarchy with the given indices, potentially down to component granularity,...
Definition: SPIRVOps.cpp:188

TypeUtilities.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

mlir::Builder::getFloatAttr
FloatAttr getFloatAttr(Type type, double value)
Definition: Builders.cpp:250

mlir::Builder::getI32Type
IntegerType getI32Type()
Definition: Builders.cpp:63

mlir::Builder::getI1Type
IntegerType getI1Type()
Definition: Builders.cpp:53

mlir::ConversionPatternRewriter
This class implements a pattern rewriter for use with ConversionPatterns.
Definition: DialectConversion.h:726

mlir::ConversionPatternRewriter::replaceOp
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
Definition: DialectConversion.cpp:1655

mlir::ConvertOpToLLVMPattern
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:191

mlir::LLVMTypeConverter
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:204

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getLoc
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223

mlir::PatternBenefit
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34

mlir::RewritePatternSet
Definition: PatternMatch.h:772

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:682

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:500

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::isSignedInteger
bool isSignedInteger() const
Return true if this is a signed integer type (with the specified width).
Definition: Types.cpp:76

mlir::Type::isF32
bool isF32() const
Definition: Types.cpp:40

mlir::Type::isUnsignedInteger
bool isUnsignedInteger() const
Return true if this is an unsigned integer type (with the specified width).
Definition: Types.cpp:88

mlir::Type::isInteger
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56

mlir::Type::isF16
bool isF16() const
Definition: Types.cpp:38

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::gpu::MMAMatrixType
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
Definition: GPUDialect.h:131

mlir::gpu::MMAMatrixType::getShape
ArrayRef< int64_t > getShape() const
Get shape of the matrix.
Definition: GPUDialect.cpp:140

mlir::gpu::MMAMatrixType::getElementType
Type getElementType() const
Get elementType of a single element.
Definition: GPUDialect.cpp:144

mlir::gpu::MMAMatrixType::getOperand
StringRef getOperand() const
The general form of operation this type supports is given by the equation C += A*B.
Definition: GPUDialect.cpp:146

Pattern.h

mlir::LLVM::getStridedElementPtr
Value getStridedElementPtr(OpBuilder &builder, Location loc, const LLVMTypeConverter &converter, MemRefType type, Value memRefDesc, ValueRange indices, LLVM::GEPNoWrapFlags noWrapFlags=LLVM::GEPNoWrapFlags::none)
Performs the index computation to get to the element at indices of the memory pointed to by memRefDes...
Definition: Pattern.cpp:487

mlir::LLVM::isCompatibleType
bool isCompatibleType(Type type)
Returns true if the given type is compatible with the LLVM dialect.
Definition: LLVMTypes.cpp:796

mlir::NVVM::inferMMAType
std::pair< mlir::Type, unsigned > inferMMAType(mlir::NVVM::MMATypes type, mlir::NVVM::MMAFrag frag, int nRow, int nCol, mlir::MLIRContext *context)
Return the element type and number of elements associated with a wmma matrix of given chracteristics.

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::convertMMAToLLVMType
LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type)
Return the LLVMStructureType corresponding to the MMAMatrixType type.
Definition: WmmaOpsToNvvm.cpp:380

mlir::getElementTypeOrSelf
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
Definition: TypeUtilities.cpp:23

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::populateGpuWMMAToNVVMConversionPatterns
void populateGpuWMMAToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM.
Definition: WmmaOpsToNvvm.cpp:391