doxygen/NVVMToLLVMIRTranslation_8cpp_source.html

 //===- NVVMToLLVMIRTranslation.cpp - Translate NVVM to LLVM IR ------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements a translation between the MLIR NVVM dialect and

 // LLVM IR.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"

 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"

 #include "mlir/IR/Operation.h"

 #include "mlir/Target/LLVMIR/ModuleTranslation.h"


 #include "llvm/ADT/StringExtras.h"

 #include "llvm/ADT/iterator_range.h"

 #include "llvm/IR/IRBuilder.h"

 #include "llvm/IR/IntrinsicsNVPTX.h"

 #include "llvm/Support/FormatVariadic.h"


 using namespace mlir;

 using namespace mlir::LLVM;

 using mlir::LLVM::detail::createIntrinsicCall;


 #define REDUX_F32_ID_IMPL(op, abs, hasNaN)                                     \

   hasNaN ? llvm::Intrinsic::nvvm_redux_sync_f##op##abs##_NaN                   \

          : llvm::Intrinsic::nvvm_redux_sync_f##op##abs


 #define GET_REDUX_F32_ID(op, hasAbs, hasNaN)                                   \

   hasAbs ? REDUX_F32_ID_IMPL(op, _abs, hasNaN) : REDUX_F32_ID_IMPL(op, , hasNaN)


 static llvm::Intrinsic::ID getReduxIntrinsicId(llvm::Type *resultType,

                                                NVVM::ReduxKind kind,

                                                bool hasAbs, bool hasNaN) {

   if (!(resultType->isIntegerTy(32) || resultType->isFloatTy()))

     llvm_unreachable("unsupported data type for redux");


   switch (kind) {

   case NVVM::ReduxKind::ADD:

     return llvm::Intrinsic::nvvm_redux_sync_add;

   case NVVM::ReduxKind::UMAX:

     return llvm::Intrinsic::nvvm_redux_sync_umax;

   case NVVM::ReduxKind::UMIN:

     return llvm::Intrinsic::nvvm_redux_sync_umin;

   case NVVM::ReduxKind::AND:

     return llvm::Intrinsic::nvvm_redux_sync_and;

   case NVVM::ReduxKind::OR:

     return llvm::Intrinsic::nvvm_redux_sync_or;

   case NVVM::ReduxKind::XOR:

     return llvm::Intrinsic::nvvm_redux_sync_xor;

   case NVVM::ReduxKind::MAX:

     return llvm::Intrinsic::nvvm_redux_sync_max;

   case NVVM::ReduxKind::MIN:

     return llvm::Intrinsic::nvvm_redux_sync_min;

   case NVVM::ReduxKind::FMIN:

     return GET_REDUX_F32_ID(min, hasAbs, hasNaN);

   case NVVM::ReduxKind::FMAX:

     return GET_REDUX_F32_ID(max, hasAbs, hasNaN);

   }

   llvm_unreachable("unknown redux kind");

 }


 static llvm::Intrinsic::ID getShflIntrinsicId(llvm::Type *resultType,

                                               NVVM::ShflKind kind,

                                               bool withPredicate) {


   if (withPredicate) {

     resultType = cast<llvm::StructType>(resultType)->getElementType(0);

     switch (kind) {

     case NVVM::ShflKind::bfly:

       return resultType->isFloatTy()

                  ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32p

                  : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32p;

     case NVVM::ShflKind::up:

       return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_up_f32p

                                      : llvm::Intrinsic::nvvm_shfl_sync_up_i32p;

     case NVVM::ShflKind::down:

       return resultType->isFloatTy()

                  ? llvm::Intrinsic::nvvm_shfl_sync_down_f32p

                  : llvm::Intrinsic::nvvm_shfl_sync_down_i32p;

     case NVVM::ShflKind::idx:

       return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_idx_f32p

                                      : llvm::Intrinsic::nvvm_shfl_sync_idx_i32p;

     }

   } else {

     switch (kind) {

     case NVVM::ShflKind::bfly:

       return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32

                                      : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32;

     case NVVM::ShflKind::up:

       return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_up_f32

                                      : llvm::Intrinsic::nvvm_shfl_sync_up_i32;

     case NVVM::ShflKind::down:

       return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_down_f32

                                      : llvm::Intrinsic::nvvm_shfl_sync_down_i32;

     case NVVM::ShflKind::idx:

       return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_idx_f32

                                      : llvm::Intrinsic::nvvm_shfl_sync_idx_i32;

     }

   }

   llvm_unreachable("unknown shuffle kind");

 }


 static llvm::Intrinsic::ID getMatchSyncIntrinsicId(Type valType,

                                                    NVVM::MatchSyncKind kind) {

   switch (kind) {

   case NVVM::MatchSyncKind::any:

     return valType.isInteger(32) ? llvm::Intrinsic::nvvm_match_any_sync_i32

                                  : llvm::Intrinsic::nvvm_match_any_sync_i64;

   case NVVM::MatchSyncKind::all:

     // match.all instruction has two variants -- one returns a single value,

     // another returns a pair {value, predicate}. We currently only implement

     // the latter as that's the variant exposed by CUDA API.

     return valType.isInteger(32) ? llvm::Intrinsic::nvvm_match_all_sync_i32p

                                  : llvm::Intrinsic::nvvm_match_all_sync_i64p;

   }

   llvm_unreachable("unsupported match sync kind");

 }


 static llvm::Intrinsic::ID getVoteSyncIntrinsicId(NVVM::VoteSyncKind kind) {

   switch (kind) {

   case NVVM::VoteSyncKind::any:

     return llvm::Intrinsic::nvvm_vote_any_sync;

   case NVVM::VoteSyncKind::all:

     return llvm::Intrinsic::nvvm_vote_all_sync;

   case NVVM::VoteSyncKind::ballot:

     return llvm::Intrinsic::nvvm_vote_ballot_sync;

   case NVVM::VoteSyncKind::uni:

     return llvm::Intrinsic::nvvm_vote_uni_sync;

   }

   llvm_unreachable("unsupported vote kind");

 }


 static llvm::Intrinsic::ID

 getLdMatrixIntrinsicId(NVVM::MMALayout layout, int32_t num,

                        NVVM::LdStMatrixShapeAttr shape,

                        NVVM::LdStMatrixEltType eltType) {

   if (shape.getM() == 8 && shape.getN() == 8) {

     switch (num) {

     case 1:

       return (layout == NVVM::MMALayout::row)

                  ? llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16

                  : llvm::Intrinsic::

                        nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16;

     case 2:

       return (layout == NVVM::MMALayout::row)

                  ? llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16

                  : llvm::Intrinsic::

                        nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16;

     case 4:

       return (layout == NVVM::MMALayout::row)

                  ? llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16

                  : llvm::Intrinsic::

                        nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16;

     }

   } else if (shape.getM() == 8 && shape.getN() == 16) {

     if (eltType == NVVM::LdStMatrixEltType::B8X16_B6X16_P32) {

       switch (num) {

       case 1:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m8n16_x1_b8x16_b6x16_p32;

       case 2:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m8n16_x2_b8x16_b6x16_p32;

       case 4:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m8n16_x4_b8x16_b6x16_p32;

       }

     } else if (eltType == NVVM::LdStMatrixEltType::B8X16_B4X16_P64) {

       switch (num) {

       case 1:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m8n16_x1_b8x16_b4x16_p64;

       case 2:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m8n16_x2_b8x16_b4x16_p64;

       case 4:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m8n16_x4_b8x16_b4x16_p64;

       }

     }

   } else if (shape.getM() == 16 && shape.getN() == 16) {

     if (eltType == NVVM::LdStMatrixEltType::B8) {

       switch (num) {

       case 1:

         return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m16n16_x1_trans_b8;

       case 2:

         return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m16n16_x2_trans_b8;

       }

     } else if (eltType == NVVM::LdStMatrixEltType::B8X16_B6X16_P32) {

       switch (num) {

       case 1:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m16n16_x1_trans_b8x16_b6x16_p32;

       case 2:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m16n16_x2_trans_b8x16_b6x16_p32;

       }

     } else if (eltType == NVVM::LdStMatrixEltType::B8X16_B4X16_P64) {

       switch (num) {

       case 1:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m16n16_x1_trans_b8x16_b4x16_p64;

       case 2:

         return llvm::Intrinsic::

             nvvm_ldmatrix_sync_aligned_m16n16_x2_trans_b8x16_b4x16_p64;

       }

     }

   }

   llvm_unreachable("unknown ldmatrix kind");

 }


 /// Return the intrinsic ID associated with stmatrix for the given paramters.

 static llvm::Intrinsic::ID

 getStMatrixIntrinsicId(NVVM::MMALayout layout, int32_t num,

                        NVVM::LdStMatrixShapeAttr shape,

                        NVVM::LdStMatrixEltType eltType) {

   if (shape.getM() == 8 && shape.getN() == 8) {

     switch (num) {

     case 1:

       return (layout == NVVM::MMALayout::row)

                  ? llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m8n8_x1_b16

                  : llvm::Intrinsic::

                        nvvm_stmatrix_sync_aligned_m8n8_x1_trans_b16;

     case 2:

       return (layout == NVVM::MMALayout::row)

                  ? llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m8n8_x2_b16

                  : llvm::Intrinsic::

                        nvvm_stmatrix_sync_aligned_m8n8_x2_trans_b16;

     case 4:

       return (layout == NVVM::MMALayout::row)

                  ? llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m8n8_x4_b16

                  : llvm::Intrinsic::

                        nvvm_stmatrix_sync_aligned_m8n8_x4_trans_b16;

     }

   } else if (shape.getM() == 16 && shape.getN() == 8) {

     switch (num) {

     case 1:

       return llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m16n8_x1_trans_b8;

     case 2:

       return llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m16n8_x2_trans_b8;

     case 4:

       return llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m16n8_x4_trans_b8;

     }

   }

   llvm_unreachable("unknown stmatrix kind");

 }


 /// Return the intrinsic ID associated with st.bulk for the given address type.

 static llvm::Intrinsic::ID

 getStBulkIntrinsicId(LLVM::LLVMPointerType addrType) {

   bool isSharedMemory = addrType.getAddressSpace() ==

                         static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);

   return isSharedMemory ? llvm::Intrinsic::nvvm_st_bulk_shared_cta

                         : llvm::Intrinsic::nvvm_st_bulk;

 }


 static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,

                                               NVVM::ProxyKind toProxy,

                                               NVVM::MemScopeKind scope,

                                               bool isRelease) {

   if (fromProxy == NVVM::ProxyKind::GENERIC &&

       toProxy == NVVM::ProxyKind::TENSORMAP) {

     switch (scope) {

     case NVVM::MemScopeKind::CTA: {

       if (isRelease)

         return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_cta;

       return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_cta;

     }

     case NVVM::MemScopeKind::CLUSTER: {

       if (isRelease)

         return llvm::Intrinsic::

             nvvm_fence_proxy_tensormap_generic_release_cluster;

       return llvm::Intrinsic::

           nvvm_fence_proxy_tensormap_generic_acquire_cluster;

     }

     case NVVM::MemScopeKind::GPU: {

       if (isRelease)

         return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_gpu;

       return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_gpu;

     }

     case NVVM::MemScopeKind::SYS: {

       if (isRelease)

         return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_sys;

       return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_sys;

     }

     }

     llvm_unreachable("Unknown scope for uni-directional fence.proxy operation");

   }

   llvm_unreachable("Unsupported proxy kinds");

 }


 #define TCGEN05LD(SHAPE, NUM) llvm::Intrinsic::nvvm_tcgen05_ld_##SHAPE##_##NUM


 static llvm::Intrinsic::ID

 getTcgen05LdIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num) {

   llvm::Intrinsic::ID Shape16x64b[] = {

       TCGEN05LD(16x64b, x1),  TCGEN05LD(16x64b, x2),   TCGEN05LD(16x64b, x4),

       TCGEN05LD(16x64b, x8),  TCGEN05LD(16x64b, x16),  TCGEN05LD(16x64b, x32),

       TCGEN05LD(16x64b, x64), TCGEN05LD(16x64b, x128),

   };


   llvm::Intrinsic::ID Shape16x128b[] = {

       TCGEN05LD(16x128b, x1),  TCGEN05LD(16x128b, x2),  TCGEN05LD(16x128b, x4),

       TCGEN05LD(16x128b, x8),  TCGEN05LD(16x128b, x16), TCGEN05LD(16x128b, x32),

       TCGEN05LD(16x128b, x64),

   };


   llvm::Intrinsic::ID Shape16x256b[] = {

       TCGEN05LD(16x256b, x1), TCGEN05LD(16x256b, x2),  TCGEN05LD(16x256b, x4),

       TCGEN05LD(16x256b, x8), TCGEN05LD(16x256b, x16), TCGEN05LD(16x256b, x32),

   };


   llvm::Intrinsic::ID Shape16x32bx2[] = {

       TCGEN05LD(16x32bx2, x1),  TCGEN05LD(16x32bx2, x2),

       TCGEN05LD(16x32bx2, x4),  TCGEN05LD(16x32bx2, x8),

       TCGEN05LD(16x32bx2, x16), TCGEN05LD(16x32bx2, x32),

       TCGEN05LD(16x32bx2, x64), TCGEN05LD(16x32bx2, x128),

   };


   llvm::Intrinsic::ID Shape32x32b[] = {

       TCGEN05LD(32x32b, x1),  TCGEN05LD(32x32b, x2),   TCGEN05LD(32x32b, x4),

       TCGEN05LD(32x32b, x8),  TCGEN05LD(32x32b, x16),  TCGEN05LD(32x32b, x32),

       TCGEN05LD(32x32b, x64), TCGEN05LD(32x32b, x128),

   };


   // `num` contains the length of vector and log2 of `num` returns the index

   // into the shape array

   unsigned Idx = std::log2(num);


   switch (shape) {

   case NVVM::Tcgen05LdStShape::SHAPE_16X64B:

     return Shape16x64b[Idx];

   case NVVM::Tcgen05LdStShape::SHAPE_16X128B:

     return Shape16x128b[Idx - 1];

   case NVVM::Tcgen05LdStShape::SHAPE_16X256B:

     return Shape16x256b[Idx - 2];

   case NVVM::Tcgen05LdStShape::SHAPE_32X32B:

     return Shape32x32b[Idx];

   case NVVM::Tcgen05LdStShape::SHAPE_16X32BX2:

     return Shape16x32bx2[Idx];

   }

   llvm_unreachable("unhandled tcgen05.ld lowering");

 }


 #define TCGEN05ST(SHAPE, NUM) llvm::Intrinsic::nvvm_tcgen05_st_##SHAPE##_##NUM


 static llvm::Intrinsic::ID

 getTcgen05StIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num) {

   llvm::Intrinsic::ID Shape16x64b[] = {

       TCGEN05ST(16x64b, x1),  TCGEN05ST(16x64b, x2),   TCGEN05ST(16x64b, x4),

       TCGEN05ST(16x64b, x8),  TCGEN05ST(16x64b, x16),  TCGEN05ST(16x64b, x32),

       TCGEN05ST(16x64b, x64), TCGEN05ST(16x64b, x128),

   };


   llvm::Intrinsic::ID Shape16x128b[] = {

       TCGEN05ST(16x128b, x1),  TCGEN05ST(16x128b, x2),  TCGEN05ST(16x128b, x4),

       TCGEN05ST(16x128b, x8),  TCGEN05ST(16x128b, x16), TCGEN05ST(16x128b, x32),

       TCGEN05ST(16x128b, x64),

   };


   llvm::Intrinsic::ID Shape16x256b[] = {

       TCGEN05ST(16x256b, x1), TCGEN05ST(16x256b, x2),  TCGEN05ST(16x256b, x4),

       TCGEN05ST(16x256b, x8), TCGEN05ST(16x256b, x16), TCGEN05ST(16x256b, x32),

   };


   llvm::Intrinsic::ID Shape16x32bx2[] = {

       TCGEN05ST(16x32bx2, x1),  TCGEN05ST(16x32bx2, x2),

       TCGEN05ST(16x32bx2, x4),  TCGEN05ST(16x32bx2, x8),

       TCGEN05ST(16x32bx2, x16), TCGEN05ST(16x32bx2, x32),

       TCGEN05ST(16x32bx2, x64), TCGEN05ST(16x32bx2, x128),

   };


   llvm::Intrinsic::ID Shape32x32b[] = {

       TCGEN05ST(32x32b, x1),  TCGEN05ST(32x32b, x2),   TCGEN05ST(32x32b, x4),

       TCGEN05ST(32x32b, x8),  TCGEN05ST(32x32b, x16),  TCGEN05ST(32x32b, x32),

       TCGEN05ST(32x32b, x64), TCGEN05ST(32x32b, x128),

   };


   // `num` contains the length of vector and log2 of `num` returns the index

   // into the shape array

   unsigned Idx = std::log2(num);


   switch (shape) {

   case NVVM::Tcgen05LdStShape::SHAPE_16X64B:

     return Shape16x64b[Idx];

   case NVVM::Tcgen05LdStShape::SHAPE_16X128B:

     return Shape16x128b[Idx - 1];

   case NVVM::Tcgen05LdStShape::SHAPE_16X256B:

     return Shape16x256b[Idx - 2];

   case NVVM::Tcgen05LdStShape::SHAPE_32X32B:

     return Shape32x32b[Idx];

   case NVVM::Tcgen05LdStShape::SHAPE_16X32BX2:

     return Shape16x32bx2[Idx];

   }

   llvm_unreachable("unhandled tcgen05.st lowering");

 }


 namespace {

 /// Implementation of the dialect interface that converts operations belonging

 /// to the NVVM dialect to LLVM IR.

 class NVVMDialectLLVMIRTranslationInterface

     : public LLVMTranslationDialectInterface {

 public:

   using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;


   /// Translates the given operation to LLVM IR using the provided IR builder

   /// and saving the state in `moduleTranslation`.

   LogicalResult

   convertOperation(Operation *op, llvm::IRBuilderBase &builder,

                    LLVM::ModuleTranslation &moduleTranslation) const final {

     Operation &opInst = *op;

 #include "mlir/Dialect/LLVMIR/NVVMConversions.inc"


     return failure();

   }


   /// Attaches module-level metadata for functions marked as kernels.

   LogicalResult

   amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,

                  NamedAttribute attribute,

                  LLVM::ModuleTranslation &moduleTranslation) const final {

     auto func = dyn_cast<LLVM::LLVMFuncOp>(op);

     if (!func)

       return failure();

     llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName());


     if (attribute.getName() == NVVM::NVVMDialect::getMaxntidAttrName()) {

       if (!isa<DenseI32ArrayAttr>(attribute.getValue()))

         return failure();

       auto values = cast<DenseI32ArrayAttr>(attribute.getValue());

       const std::string attr = llvm::formatv(

           "{0:$[,]}", llvm::make_range(values.asArrayRef().begin(),

                                        values.asArrayRef().end()));

       llvmFunc->addFnAttr("nvvm.maxntid", attr);

     } else if (attribute.getName() == NVVM::NVVMDialect::getReqntidAttrName()) {

       if (!isa<DenseI32ArrayAttr>(attribute.getValue()))

         return failure();

       auto values = cast<DenseI32ArrayAttr>(attribute.getValue());

       const std::string attr = llvm::formatv(

           "{0:$[,]}", llvm::make_range(values.asArrayRef().begin(),

                                        values.asArrayRef().end()));

       llvmFunc->addFnAttr("nvvm.reqntid", attr);

     } else if (attribute.getName() ==

                NVVM::NVVMDialect::getClusterDimAttrName()) {

       if (!isa<DenseI32ArrayAttr>(attribute.getValue()))

         return failure();

       auto values = cast<DenseI32ArrayAttr>(attribute.getValue());

       const std::string attr = llvm::formatv(

           "{0:$[,]}", llvm::make_range(values.asArrayRef().begin(),

                                        values.asArrayRef().end()));

       llvmFunc->addFnAttr("nvvm.cluster_dim", attr);

     } else if (attribute.getName() ==

                NVVM::NVVMDialect::getClusterMaxBlocksAttrName()) {

       auto value = dyn_cast<IntegerAttr>(attribute.getValue());

       llvmFunc->addFnAttr("nvvm.maxclusterrank", llvm::utostr(value.getInt()));

     } else if (attribute.getName() ==

                NVVM::NVVMDialect::getMinctasmAttrName()) {

       auto value = dyn_cast<IntegerAttr>(attribute.getValue());

       llvmFunc->addFnAttr("nvvm.minctasm", llvm::utostr(value.getInt()));

     } else if (attribute.getName() == NVVM::NVVMDialect::getMaxnregAttrName()) {

       auto value = dyn_cast<IntegerAttr>(attribute.getValue());

       llvmFunc->addFnAttr("nvvm.maxnreg", llvm::utostr(value.getInt()));

     } else if (attribute.getName() ==

                NVVM::NVVMDialect::getKernelFuncAttrName()) {

       llvmFunc->setCallingConv(llvm::CallingConv::PTX_Kernel);

     } else if (attribute.getName() ==

                NVVM::NVVMDialect::getBlocksAreClustersAttrName()) {

       llvmFunc->addFnAttr("nvvm.blocksareclusters");

     }


     return success();

   }


   LogicalResult

   convertParameterAttr(LLVMFuncOp funcOp, int argIdx, NamedAttribute attribute,

                        LLVM::ModuleTranslation &moduleTranslation) const final {


     llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();

     llvm::Function *llvmFunc =

         moduleTranslation.lookupFunction(funcOp.getName());


     if (attribute.getName() == NVVM::NVVMDialect::getGridConstantAttrName()) {

       llvmFunc->addParamAttr(

           argIdx, llvm::Attribute::get(llvmContext, "nvvm.grid_constant"));

     }

     return success();

   }

 };

 } // namespace


 void mlir::registerNVVMDialectTranslation(DialectRegistry &registry) {

   registry.insert<NVVM::NVVMDialect>();

   registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {

     dialect->addInterfaces<NVVMDialectLLVMIRTranslationInterface>();

   });

 }


 void mlir::registerNVVMDialectTranslation(MLIRContext &context) {

   DialectRegistry registry;

   registerNVVMDialectTranslation(registry);

   context.appendDialectRegistry(registry);

 }

Operation.h

kind
union mlir::linalg::@1252::ArityGroupAndKind::Kind kind

convertParameterAttr
static LogicalResult convertParameterAttr(llvm::AttrBuilder &attrBuilder, llvm::Attribute::AttrKind llvmKind, NamedAttribute namedAttr, ModuleTranslation &moduleTranslation, Location loc)
Definition: ModuleTranslation.cpp:1712

ModuleTranslation.h

NVVMDialect.h

getLdMatrixIntrinsicId
static llvm::Intrinsic::ID getLdMatrixIntrinsicId(NVVM::MMALayout layout, int32_t num, NVVM::LdStMatrixShapeAttr shape, NVVM::LdStMatrixEltType eltType)
Definition: NVVMToLLVMIRTranslation.cpp:139

GET_REDUX_F32_ID
#define GET_REDUX_F32_ID(op, hasAbs, hasNaN)
Definition: NVVMToLLVMIRTranslation.cpp:33

getStMatrixIntrinsicId
static llvm::Intrinsic::ID getStMatrixIntrinsicId(NVVM::MMALayout layout, int32_t num, NVVM::LdStMatrixShapeAttr shape, NVVM::LdStMatrixEltType eltType)
Return the intrinsic ID associated with stmatrix for the given paramters.
Definition: NVVMToLLVMIRTranslation.cpp:219

getTcgen05StIntrinsicID
static llvm::Intrinsic::ID getTcgen05StIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num)
Definition: NVVMToLLVMIRTranslation.cpp:353

getTcgen05LdIntrinsicID
static llvm::Intrinsic::ID getTcgen05LdIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num)
Definition: NVVMToLLVMIRTranslation.cpp:300

getUnidirectionalFenceProxyID
static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy, NVVM::ProxyKind toProxy, NVVM::MemScopeKind scope, bool isRelease)
Definition: NVVMToLLVMIRTranslation.cpp:262

TCGEN05ST
#define TCGEN05ST(SHAPE, NUM)
Definition: NVVMToLLVMIRTranslation.cpp:350

getReduxIntrinsicId
static llvm::Intrinsic::ID getReduxIntrinsicId(llvm::Type *resultType, NVVM::ReduxKind kind, bool hasAbs, bool hasNaN)
Definition: NVVMToLLVMIRTranslation.cpp:36

TCGEN05LD
#define TCGEN05LD(SHAPE, NUM)
Definition: NVVMToLLVMIRTranslation.cpp:297

getShflIntrinsicId
static llvm::Intrinsic::ID getShflIntrinsicId(llvm::Type *resultType, NVVM::ShflKind kind, bool withPredicate)
Definition: NVVMToLLVMIRTranslation.cpp:67

getVoteSyncIntrinsicId
static llvm::Intrinsic::ID getVoteSyncIntrinsicId(NVVM::VoteSyncKind kind)
Definition: NVVMToLLVMIRTranslation.cpp:124

getMatchSyncIntrinsicId
static llvm::Intrinsic::ID getMatchSyncIntrinsicId(Type valType, NVVM::MatchSyncKind kind)
Definition: NVVMToLLVMIRTranslation.cpp:108

getStBulkIntrinsicId
static llvm::Intrinsic::ID getStBulkIntrinsicId(LLVM::LLVMPointerType addrType)
Return the intrinsic ID associated with st.bulk for the given address type.
Definition: NVVMToLLVMIRTranslation.cpp:255

NVVMToLLVMIRTranslation.h

max
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:212

min
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:204

isSharedMemory
static bool isSharedMemory(MemRefType type)
Return true if this is a shared memory memref type.
Definition: VectorToGPU.cpp:854

llvm::ArrayRef
Definition: LLVM.h:48

mlir::DialectRegistry
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
Definition: DialectRegistry.h:139

mlir::DialectRegistry::addExtension
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Definition: DialectRegistry.h:215

mlir::DialectRegistry::insert
void insert()
Definition: DialectRegistry.h:152

mlir::LLVMTranslationDialectInterface
Base class for dialect interfaces providing translation to LLVM IR.
Definition: LLVMTranslationInterface.h:38

mlir::LLVMTranslationDialectInterface::LLVMTranslationDialectInterface
LLVMTranslationDialectInterface(Dialect *dialect)
Definition: LLVMTranslationInterface.h:40

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63

mlir::MLIRContext::appendDialectRegistry
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
Definition: MLIRContext.cpp:416

mlir::NamedAttribute
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::isInteger
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56

mlir::LLVM::detail::createIntrinsicCall
llvm::CallInst * createIntrinsicCall(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, ArrayRef< llvm::Value * > args={}, ArrayRef< llvm::Type * > tys={})
Creates a call to an LLVM IR intrinsic function with the given arguments.
Definition: ModuleTranslation.cpp:894

mlir::LLVM
Definition: ConvertFuncToLLVM.h:21

mlir::bytecode::Section::ID
ID
Definition: Encoding.h:64

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::registerNVVMDialectTranslation
void registerNVVMDialectTranslation(DialectRegistry &registry)
Register the NVVM dialect and the translation from it to the LLVM IR in the given registry;.
Definition: NVVMToLLVMIRTranslation.cpp:496

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509