MLIR  21.0.0git
NVVMToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- NVVMToLLVMIRTranslation.cpp - Translate NVVM to LLVM IR ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR NVVM dialect and
10 // LLVM IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "mlir/IR/Operation.h"
18 
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/iterator_range.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/IntrinsicsNVPTX.h"
23 #include "llvm/Support/FormatVariadic.h"
24 
25 using namespace mlir;
26 using namespace mlir::LLVM;
28 
29 #define REDUX_F32_ID_IMPL(op, abs, hasNaN) \
30  hasNaN ? llvm::Intrinsic::nvvm_redux_sync_f##op##abs##_NaN \
31  : llvm::Intrinsic::nvvm_redux_sync_f##op##abs
32 
33 #define GET_REDUX_F32_ID(op, hasAbs, hasNaN) \
34  hasAbs ? REDUX_F32_ID_IMPL(op, _abs, hasNaN) : REDUX_F32_ID_IMPL(op, , hasNaN)
35 
36 static llvm::Intrinsic::ID getReduxIntrinsicId(llvm::Type *resultType,
37  NVVM::ReduxKind kind,
38  bool hasAbs, bool hasNaN) {
39  if (!(resultType->isIntegerTy(32) || resultType->isFloatTy()))
40  llvm_unreachable("unsupported data type for redux");
41 
42  switch (kind) {
43  case NVVM::ReduxKind::ADD:
44  return llvm::Intrinsic::nvvm_redux_sync_add;
45  case NVVM::ReduxKind::UMAX:
46  return llvm::Intrinsic::nvvm_redux_sync_umax;
47  case NVVM::ReduxKind::UMIN:
48  return llvm::Intrinsic::nvvm_redux_sync_umin;
49  case NVVM::ReduxKind::AND:
50  return llvm::Intrinsic::nvvm_redux_sync_and;
51  case NVVM::ReduxKind::OR:
52  return llvm::Intrinsic::nvvm_redux_sync_or;
53  case NVVM::ReduxKind::XOR:
54  return llvm::Intrinsic::nvvm_redux_sync_xor;
55  case NVVM::ReduxKind::MAX:
56  return llvm::Intrinsic::nvvm_redux_sync_max;
57  case NVVM::ReduxKind::MIN:
58  return llvm::Intrinsic::nvvm_redux_sync_min;
59  case NVVM::ReduxKind::FMIN:
60  return GET_REDUX_F32_ID(min, hasAbs, hasNaN);
61  case NVVM::ReduxKind::FMAX:
62  return GET_REDUX_F32_ID(max, hasAbs, hasNaN);
63  }
64  llvm_unreachable("unknown redux kind");
65 }
66 
67 static llvm::Intrinsic::ID getShflIntrinsicId(llvm::Type *resultType,
68  NVVM::ShflKind kind,
69  bool withPredicate) {
70 
71  if (withPredicate) {
72  resultType = cast<llvm::StructType>(resultType)->getElementType(0);
73  switch (kind) {
74  case NVVM::ShflKind::bfly:
75  return resultType->isFloatTy()
76  ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32p
77  : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32p;
78  case NVVM::ShflKind::up:
79  return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_up_f32p
80  : llvm::Intrinsic::nvvm_shfl_sync_up_i32p;
81  case NVVM::ShflKind::down:
82  return resultType->isFloatTy()
83  ? llvm::Intrinsic::nvvm_shfl_sync_down_f32p
84  : llvm::Intrinsic::nvvm_shfl_sync_down_i32p;
85  case NVVM::ShflKind::idx:
86  return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_idx_f32p
87  : llvm::Intrinsic::nvvm_shfl_sync_idx_i32p;
88  }
89  } else {
90  switch (kind) {
91  case NVVM::ShflKind::bfly:
92  return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_bfly_f32
93  : llvm::Intrinsic::nvvm_shfl_sync_bfly_i32;
94  case NVVM::ShflKind::up:
95  return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_up_f32
96  : llvm::Intrinsic::nvvm_shfl_sync_up_i32;
97  case NVVM::ShflKind::down:
98  return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_down_f32
99  : llvm::Intrinsic::nvvm_shfl_sync_down_i32;
100  case NVVM::ShflKind::idx:
101  return resultType->isFloatTy() ? llvm::Intrinsic::nvvm_shfl_sync_idx_f32
102  : llvm::Intrinsic::nvvm_shfl_sync_idx_i32;
103  }
104  }
105  llvm_unreachable("unknown shuffle kind");
106 }
107 
109  NVVM::MatchSyncKind kind) {
110  switch (kind) {
111  case NVVM::MatchSyncKind::any:
112  return valType.isInteger(32) ? llvm::Intrinsic::nvvm_match_any_sync_i32
113  : llvm::Intrinsic::nvvm_match_any_sync_i64;
114  case NVVM::MatchSyncKind::all:
115  // match.all instruction has two variants -- one returns a single value,
116  // another returns a pair {value, predicate}. We currently only implement
117  // the latter as that's the variant exposed by CUDA API.
118  return valType.isInteger(32) ? llvm::Intrinsic::nvvm_match_all_sync_i32p
119  : llvm::Intrinsic::nvvm_match_all_sync_i64p;
120  }
121 }
122 
123 static llvm::Intrinsic::ID getVoteSyncIntrinsicId(NVVM::VoteSyncKind kind) {
124  switch (kind) {
125  case NVVM::VoteSyncKind::any:
126  return llvm::Intrinsic::nvvm_vote_any_sync;
127  case NVVM::VoteSyncKind::all:
128  return llvm::Intrinsic::nvvm_vote_all_sync;
129  case NVVM::VoteSyncKind::ballot:
130  return llvm::Intrinsic::nvvm_vote_ballot_sync;
131  case NVVM::VoteSyncKind::uni:
132  return llvm::Intrinsic::nvvm_vote_uni_sync;
133  }
134  llvm_unreachable("unsupported vote kind");
135 }
136 
137 /// Return the intrinsic ID associated with ldmatrix for the given paramters.
138 static llvm::Intrinsic::ID getLdMatrixIntrinsicId(NVVM::MMALayout layout,
139  int32_t num) {
140  if (layout == NVVM::MMALayout::row) {
141  switch (num) {
142  case 1:
143  return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16;
144  case 2:
145  return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16;
146  case 4:
147  return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16;
148  default:
149  llvm_unreachable("unsupported number of matrix");
150  }
151 
152  } else {
153  switch (num) {
154  case 1:
155  return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16;
156  case 2:
157  return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16;
158  case 4:
159  return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16;
160  default:
161  llvm_unreachable("unsupported number of matrix");
162  }
163  }
164 }
165 
166 /// Return the intrinsic ID associated with st.bulk for the given address type.
167 static llvm::Intrinsic::ID
168 getStBulkIntrinsicId(LLVM::LLVMPointerType addrType) {
169  bool isSharedMemory =
170  addrType.getAddressSpace() == NVVM::NVVMMemorySpace::kSharedMemorySpace;
171  return isSharedMemory ? llvm::Intrinsic::nvvm_st_bulk_shared_cta
172  : llvm::Intrinsic::nvvm_st_bulk;
173 }
174 
175 static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy,
176  NVVM::ProxyKind toProxy,
177  NVVM::MemScopeKind scope,
178  bool isRelease) {
179  if (fromProxy == NVVM::ProxyKind::GENERIC &&
180  toProxy == NVVM::ProxyKind::TENSORMAP) {
181  switch (scope) {
182  case NVVM::MemScopeKind::CTA: {
183  if (isRelease)
184  return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_cta;
185  return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_cta;
186  }
187  case NVVM::MemScopeKind::CLUSTER: {
188  if (isRelease)
189  return llvm::Intrinsic::
190  nvvm_fence_proxy_tensormap_generic_release_cluster;
191  return llvm::Intrinsic::
192  nvvm_fence_proxy_tensormap_generic_acquire_cluster;
193  }
194  case NVVM::MemScopeKind::GPU: {
195  if (isRelease)
196  return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_gpu;
197  return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_gpu;
198  }
199  case NVVM::MemScopeKind::SYS: {
200  if (isRelease)
201  return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_sys;
202  return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_sys;
203  }
204  }
205  llvm_unreachable("Unknown scope for uni-directional fence.proxy operation");
206  }
207  llvm_unreachable("Unsupported proxy kinds");
208 }
209 
210 #define TCGEN05LD(SHAPE, NUM) llvm::Intrinsic::nvvm_tcgen05_ld_##SHAPE##_##NUM
211 
212 static llvm::Intrinsic::ID
213 getTcgen05LdIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num) {
214  llvm::Intrinsic::ID Shape16x64b[] = {
215  TCGEN05LD(16x64b, x1), TCGEN05LD(16x64b, x2), TCGEN05LD(16x64b, x4),
216  TCGEN05LD(16x64b, x8), TCGEN05LD(16x64b, x16), TCGEN05LD(16x64b, x32),
217  TCGEN05LD(16x64b, x64), TCGEN05LD(16x64b, x128),
218  };
219 
220  llvm::Intrinsic::ID Shape16x128b[] = {
221  TCGEN05LD(16x128b, x1), TCGEN05LD(16x128b, x2), TCGEN05LD(16x128b, x4),
222  TCGEN05LD(16x128b, x8), TCGEN05LD(16x128b, x16), TCGEN05LD(16x128b, x32),
223  TCGEN05LD(16x128b, x64),
224  };
225 
226  llvm::Intrinsic::ID Shape16x256b[] = {
227  TCGEN05LD(16x256b, x1), TCGEN05LD(16x256b, x2), TCGEN05LD(16x256b, x4),
228  TCGEN05LD(16x256b, x8), TCGEN05LD(16x256b, x16), TCGEN05LD(16x256b, x32),
229  };
230 
231  llvm::Intrinsic::ID Shape16x32bx2[] = {
232  TCGEN05LD(16x32bx2, x1), TCGEN05LD(16x32bx2, x2),
233  TCGEN05LD(16x32bx2, x4), TCGEN05LD(16x32bx2, x8),
234  TCGEN05LD(16x32bx2, x16), TCGEN05LD(16x32bx2, x32),
235  TCGEN05LD(16x32bx2, x64), TCGEN05LD(16x32bx2, x128),
236  };
237 
238  llvm::Intrinsic::ID Shape32x32b[] = {
239  TCGEN05LD(32x32b, x1), TCGEN05LD(32x32b, x2), TCGEN05LD(32x32b, x4),
240  TCGEN05LD(32x32b, x8), TCGEN05LD(32x32b, x16), TCGEN05LD(32x32b, x32),
241  TCGEN05LD(32x32b, x64), TCGEN05LD(32x32b, x128),
242  };
243 
244  // `num` contains the length of vector and log2 of `num` returns the index
245  // into the shape array
246  unsigned Idx = std::log2(num);
247 
248  switch (shape) {
249  case NVVM::Tcgen05LdStShape::SHAPE_16X64B:
250  return Shape16x64b[Idx];
251  case NVVM::Tcgen05LdStShape::SHAPE_16X128B:
252  return Shape16x128b[Idx - 1];
253  case NVVM::Tcgen05LdStShape::SHAPE_16X256B:
254  return Shape16x256b[Idx - 2];
255  case NVVM::Tcgen05LdStShape::SHAPE_32X32B:
256  return Shape32x32b[Idx];
257  case NVVM::Tcgen05LdStShape::SHAPE_16X32BX2:
258  return Shape16x32bx2[Idx];
259  }
260  llvm_unreachable("unhandled tcgen05.ld lowering");
261 }
262 
263 #define TCGEN05ST(SHAPE, NUM) llvm::Intrinsic::nvvm_tcgen05_st_##SHAPE##_##NUM
264 
265 static llvm::Intrinsic::ID
266 getTcgen05StIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num) {
267  llvm::Intrinsic::ID Shape16x64b[] = {
268  TCGEN05ST(16x64b, x1), TCGEN05ST(16x64b, x2), TCGEN05ST(16x64b, x4),
269  TCGEN05ST(16x64b, x8), TCGEN05ST(16x64b, x16), TCGEN05ST(16x64b, x32),
270  TCGEN05ST(16x64b, x64), TCGEN05ST(16x64b, x128),
271  };
272 
273  llvm::Intrinsic::ID Shape16x128b[] = {
274  TCGEN05ST(16x128b, x1), TCGEN05ST(16x128b, x2), TCGEN05ST(16x128b, x4),
275  TCGEN05ST(16x128b, x8), TCGEN05ST(16x128b, x16), TCGEN05ST(16x128b, x32),
276  TCGEN05ST(16x128b, x64),
277  };
278 
279  llvm::Intrinsic::ID Shape16x256b[] = {
280  TCGEN05ST(16x256b, x1), TCGEN05ST(16x256b, x2), TCGEN05ST(16x256b, x4),
281  TCGEN05ST(16x256b, x8), TCGEN05ST(16x256b, x16), TCGEN05ST(16x256b, x32),
282  };
283 
284  llvm::Intrinsic::ID Shape16x32bx2[] = {
285  TCGEN05ST(16x32bx2, x1), TCGEN05ST(16x32bx2, x2),
286  TCGEN05ST(16x32bx2, x4), TCGEN05ST(16x32bx2, x8),
287  TCGEN05ST(16x32bx2, x16), TCGEN05ST(16x32bx2, x32),
288  TCGEN05ST(16x32bx2, x64), TCGEN05ST(16x32bx2, x128),
289  };
290 
291  llvm::Intrinsic::ID Shape32x32b[] = {
292  TCGEN05ST(32x32b, x1), TCGEN05ST(32x32b, x2), TCGEN05ST(32x32b, x4),
293  TCGEN05ST(32x32b, x8), TCGEN05ST(32x32b, x16), TCGEN05ST(32x32b, x32),
294  TCGEN05ST(32x32b, x64), TCGEN05ST(32x32b, x128),
295  };
296 
297  // `num` contains the length of vector and log2 of `num` returns the index
298  // into the shape array
299  unsigned Idx = std::log2(num);
300 
301  switch (shape) {
302  case NVVM::Tcgen05LdStShape::SHAPE_16X64B:
303  return Shape16x64b[Idx];
304  case NVVM::Tcgen05LdStShape::SHAPE_16X128B:
305  return Shape16x128b[Idx - 1];
306  case NVVM::Tcgen05LdStShape::SHAPE_16X256B:
307  return Shape16x256b[Idx - 2];
308  case NVVM::Tcgen05LdStShape::SHAPE_32X32B:
309  return Shape32x32b[Idx];
310  case NVVM::Tcgen05LdStShape::SHAPE_16X32BX2:
311  return Shape16x32bx2[Idx];
312  }
313  llvm_unreachable("unhandled tcgen05.st lowering");
314 }
315 
316 namespace {
317 /// Implementation of the dialect interface that converts operations belonging
318 /// to the NVVM dialect to LLVM IR.
319 class NVVMDialectLLVMIRTranslationInterface
321 public:
323 
324  /// Translates the given operation to LLVM IR using the provided IR builder
325  /// and saving the state in `moduleTranslation`.
326  LogicalResult
327  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
328  LLVM::ModuleTranslation &moduleTranslation) const final {
329  Operation &opInst = *op;
330 #include "mlir/Dialect/LLVMIR/NVVMConversions.inc"
331 
332  return failure();
333  }
334 
335  /// Attaches module-level metadata for functions marked as kernels.
336  LogicalResult
337  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
338  NamedAttribute attribute,
339  LLVM::ModuleTranslation &moduleTranslation) const final {
340  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
341  if (!func)
342  return failure();
343  llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName());
344 
345  if (attribute.getName() == NVVM::NVVMDialect::getMaxntidAttrName()) {
346  if (!isa<DenseI32ArrayAttr>(attribute.getValue()))
347  return failure();
348  auto values = cast<DenseI32ArrayAttr>(attribute.getValue());
349  const std::string attr = llvm::formatv(
350  "{0:$[,]}", llvm::make_range(values.asArrayRef().begin(),
351  values.asArrayRef().end()));
352  llvmFunc->addFnAttr("nvvm.maxntid", attr);
353  } else if (attribute.getName() == NVVM::NVVMDialect::getReqntidAttrName()) {
354  if (!isa<DenseI32ArrayAttr>(attribute.getValue()))
355  return failure();
356  auto values = cast<DenseI32ArrayAttr>(attribute.getValue());
357  const std::string attr = llvm::formatv(
358  "{0:$[,]}", llvm::make_range(values.asArrayRef().begin(),
359  values.asArrayRef().end()));
360  llvmFunc->addFnAttr("nvvm.reqntid", attr);
361  } else if (attribute.getName() ==
362  NVVM::NVVMDialect::getClusterDimAttrName()) {
363  if (!isa<DenseI32ArrayAttr>(attribute.getValue()))
364  return failure();
365  auto values = cast<DenseI32ArrayAttr>(attribute.getValue());
366  const std::string attr = llvm::formatv(
367  "{0:$[,]}", llvm::make_range(values.asArrayRef().begin(),
368  values.asArrayRef().end()));
369  llvmFunc->addFnAttr("nvvm.cluster_dim", attr);
370  } else if (attribute.getName() ==
371  NVVM::NVVMDialect::getClusterMaxBlocksAttrName()) {
372  auto value = dyn_cast<IntegerAttr>(attribute.getValue());
373  llvmFunc->addFnAttr("nvvm.maxclusterrank", llvm::utostr(value.getInt()));
374  } else if (attribute.getName() ==
375  NVVM::NVVMDialect::getMinctasmAttrName()) {
376  auto value = dyn_cast<IntegerAttr>(attribute.getValue());
377  llvmFunc->addFnAttr("nvvm.minctasm", llvm::utostr(value.getInt()));
378  } else if (attribute.getName() == NVVM::NVVMDialect::getMaxnregAttrName()) {
379  auto value = dyn_cast<IntegerAttr>(attribute.getValue());
380  llvmFunc->addFnAttr("nvvm.maxnreg", llvm::utostr(value.getInt()));
381  } else if (attribute.getName() ==
382  NVVM::NVVMDialect::getKernelFuncAttrName()) {
383  llvmFunc->setCallingConv(llvm::CallingConv::PTX_Kernel);
384  }
385  return success();
386  }
387 
388  LogicalResult
389  convertParameterAttr(LLVMFuncOp funcOp, int argIdx, NamedAttribute attribute,
390  LLVM::ModuleTranslation &moduleTranslation) const final {
391 
392  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
393  llvm::Function *llvmFunc =
394  moduleTranslation.lookupFunction(funcOp.getName());
395  llvm::NamedMDNode *nvvmAnnotations =
396  moduleTranslation.getOrInsertNamedModuleMetadata("nvvm.annotations");
397 
398  if (attribute.getName() == NVVM::NVVMDialect::getGridConstantAttrName()) {
399  llvm::MDNode *gridConstantMetaData = nullptr;
400 
401  // Check if a 'grid_constant' metadata node exists for the given function
402  for (llvm::MDNode *opnd : llvm::reverse(nvvmAnnotations->operands())) {
403  if (opnd->getNumOperands() == 3 &&
404  opnd->getOperand(0) == llvm::ValueAsMetadata::get(llvmFunc) &&
405  opnd->getOperand(1) ==
406  llvm::MDString::get(llvmContext, "grid_constant")) {
407  gridConstantMetaData = opnd;
408  break;
409  }
410  }
411 
412  // 'grid_constant' is a function-level meta data node with a list of
413  // integers, where each integer n denotes that the nth parameter has the
414  // grid_constant annotation (numbering from 1). This requires aggregating
415  // the indices of the individual parameters that have this attribute.
416  llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
417  if (gridConstantMetaData == nullptr) {
418  // Create a new 'grid_constant' metadata node
419  SmallVector<llvm::Metadata *> gridConstMetadata = {
420  llvm::ValueAsMetadata::getConstant(
421  llvm::ConstantInt::get(i32, argIdx + 1))};
422  llvm::Metadata *llvmMetadata[] = {
423  llvm::ValueAsMetadata::get(llvmFunc),
424  llvm::MDString::get(llvmContext, "grid_constant"),
425  llvm::MDNode::get(llvmContext, gridConstMetadata)};
426  llvm::MDNode *llvmMetadataNode =
427  llvm::MDNode::get(llvmContext, llvmMetadata);
428  nvvmAnnotations->addOperand(llvmMetadataNode);
429  } else {
430  // Append argIdx + 1 to the 'grid_constant' argument list
431  if (auto argList =
432  dyn_cast<llvm::MDTuple>(gridConstantMetaData->getOperand(2))) {
433  llvm::TempMDTuple clonedArgList = argList->clone();
434  clonedArgList->push_back((llvm::ValueAsMetadata::getConstant(
435  llvm::ConstantInt::get(i32, argIdx + 1))));
436  gridConstantMetaData->replaceOperandWith(
437  2, llvm::MDNode::replaceWithUniqued(std::move(clonedArgList)));
438  }
439  }
440  }
441  return success();
442  }
443 };
444 } // namespace
445 
447  registry.insert<NVVM::NVVMDialect>();
448  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
449  dialect->addInterfaces<NVVMDialectLLVMIRTranslationInterface>();
450  });
451 }
452 
454  DialectRegistry registry;
456  context.appendDialectRegistry(registry);
457 }
static constexpr int64_t kSharedMemorySpace
union mlir::linalg::@1216::ArityGroupAndKind::Kind kind
static LogicalResult convertParameterAttr(llvm::AttrBuilder &attrBuilder, llvm::Attribute::AttrKind llvmKind, NamedAttribute namedAttr, ModuleTranslation &moduleTranslation, Location loc)
#define GET_REDUX_F32_ID(op, hasAbs, hasNaN)
static llvm::Intrinsic::ID getTcgen05StIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num)
static llvm::Intrinsic::ID getTcgen05LdIntrinsicID(mlir::NVVM::Tcgen05LdStShape shape, uint32_t num)
static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy, NVVM::ProxyKind toProxy, NVVM::MemScopeKind scope, bool isRelease)
#define TCGEN05ST(SHAPE, NUM)
static llvm::Intrinsic::ID getReduxIntrinsicId(llvm::Type *resultType, NVVM::ReduxKind kind, bool hasAbs, bool hasNaN)
#define TCGEN05LD(SHAPE, NUM)
static llvm::Intrinsic::ID getShflIntrinsicId(llvm::Type *resultType, NVVM::ShflKind kind, bool withPredicate)
static llvm::Intrinsic::ID getLdMatrixIntrinsicId(NVVM::MMALayout layout, int32_t num)
Return the intrinsic ID associated with ldmatrix for the given paramters.
static llvm::Intrinsic::ID getVoteSyncIntrinsicId(NVVM::VoteSyncKind kind)
static llvm::Intrinsic::ID getMatchSyncIntrinsicId(Type valType, NVVM::MatchSyncKind kind)
static llvm::Intrinsic::ID getStBulkIntrinsicId(LLVM::LLVMPointerType addrType)
Return the intrinsic ID associated with st.bulk for the given address type.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
static bool isSharedMemory(MemRefType type)
Return true if this is a shared memory memref type.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Base class for dialect interfaces providing translation to LLVM IR.
Implementation class for module translation.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56
llvm::CallInst * createIntrinsicCall(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, ArrayRef< llvm::Value * > args={}, ArrayRef< llvm::Type * > tys={})
Creates a call to an LLVM IR intrinsic function with the given arguments.
Include the generated interface declarations.
void registerNVVMDialectTranslation(DialectRegistry &registry)
Register the NVVM dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...