MLIR  20.0.0git
LowerGpuOpsToNVVMOps.cpp
Go to the documentation of this file.
1 //===- LowerGpuOpsToNVVMOps.cpp - MLIR GPU to NVVM lowering passes --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to generate NVVMIR operations for higher-level
10 // GPU operations.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
37 
38 #include "../GPUCommon/GPUOpsLowering.h"
39 #include "../GPUCommon/IndexIntrinsicsOpLowering.h"
40 #include "../GPUCommon/OpToFuncCallLowering.h"
41 #include <optional>
42 
43 namespace mlir {
44 #define GEN_PASS_DEF_CONVERTGPUOPSTONVVMOPS
45 #include "mlir/Conversion/Passes.h.inc"
46 } // namespace mlir
47 
48 using namespace mlir;
49 
50 namespace {
51 
52 /// Convert gpu dialect shfl mode enum to the equivalent nvvm one.
53 static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) {
54  switch (mode) {
55  case gpu::ShuffleMode::XOR:
56  return NVVM::ShflKind::bfly;
57  case gpu::ShuffleMode::UP:
58  return NVVM::ShflKind::up;
59  case gpu::ShuffleMode::DOWN:
60  return NVVM::ShflKind::down;
61  case gpu::ShuffleMode::IDX:
62  return NVVM::ShflKind::idx;
63  }
64  llvm_unreachable("unknown shuffle mode");
65 }
66 
67 static std::optional<NVVM::ReduxKind>
68 convertReduxKind(gpu::AllReduceOperation mode) {
69  switch (mode) {
70  case gpu::AllReduceOperation::ADD:
71  return NVVM::ReduxKind::ADD;
72  case gpu::AllReduceOperation::MUL:
73  return std::nullopt;
74  case gpu::AllReduceOperation::MINSI:
75  return NVVM::ReduxKind::MIN;
77  return std::nullopt;
78  case gpu::AllReduceOperation::MINNUMF:
79  return NVVM::ReduxKind::MIN;
80  case gpu::AllReduceOperation::MAXSI:
81  return NVVM::ReduxKind::MAX;
82  case gpu::AllReduceOperation::MAXUI:
83  return std::nullopt;
84  case gpu::AllReduceOperation::MAXNUMF:
85  return NVVM::ReduxKind::MAX;
86  case gpu::AllReduceOperation::AND:
87  return NVVM::ReduxKind::AND;
88  case gpu::AllReduceOperation::OR:
89  return NVVM::ReduxKind::OR;
90  case gpu::AllReduceOperation::XOR:
91  return NVVM::ReduxKind::XOR;
92  case gpu::AllReduceOperation::MINIMUMF:
93  case gpu::AllReduceOperation::MAXIMUMF:
94  return std::nullopt;
95  }
96  return std::nullopt;
97 }
98 
99 /// This pass lowers gpu.subgroup_reduce op into to the nvvm.redux op. The op
100 /// must be run by the entire subgroup, otherwise it is undefined behaviour.
101 struct GPUSubgroupReduceOpLowering
102  : public ConvertOpToLLVMPattern<gpu::SubgroupReduceOp> {
104  LogicalResult
105 
106  matchAndRewrite(gpu::SubgroupReduceOp op, OpAdaptor adaptor,
107  ConversionPatternRewriter &rewriter) const override {
108  if (op.getClusterSize())
109  return rewriter.notifyMatchFailure(
110  op, "lowering for clustered reduce not implemented");
111 
112  if (!op.getUniform())
113  return rewriter.notifyMatchFailure(
114  op, "cannot be lowered to redux as the op must be run "
115  "uniformly (entire subgroup).");
116  if (!op.getValue().getType().isInteger(32))
117  return rewriter.notifyMatchFailure(op, "unsupported data type");
118 
119  std::optional<NVVM::ReduxKind> mode = convertReduxKind(op.getOp());
120  if (!mode.has_value())
121  return rewriter.notifyMatchFailure(
122  op, "unsupported reduction mode for redux");
123 
124  Location loc = op->getLoc();
125  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
126  Value offset = rewriter.create<LLVM::ConstantOp>(loc, int32Type, -1);
127 
128  auto reduxOp = rewriter.create<NVVM::ReduxOp>(loc, int32Type, op.getValue(),
129  mode.value(), offset);
130 
131  rewriter.replaceOp(op, reduxOp->getResult(0));
132  return success();
133  }
134 };
135 
136 struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
138 
139  /// Lowers a shuffle to the corresponding NVVM op.
140  ///
141  /// Convert the `width` argument into an activeMask (a bitmask which specifies
142  /// which threads participate in the shuffle) and a maskAndClamp (specifying
143  /// the highest lane which participates in the shuffle).
144  ///
145  /// %one = llvm.constant(1 : i32) : i32
146  /// %minus_one = llvm.constant(-1 : i32) : i32
147  /// %thirty_two = llvm.constant(32 : i32) : i32
148  /// %num_lanes = llvm.sub %thirty_two, %width : i32
149  /// %active_mask = llvm.lshr %minus_one, %num_lanes : i32
150  /// %mask_and_clamp = llvm.sub %width, %one : i32
151  /// %shfl = nvvm.shfl.sync.bfly %active_mask, %value, %offset,
152  /// %mask_and_clamp : !llvm<"{ float, i1 }">
153  /// %shfl_value = llvm.extractvalue %shfl[0] :
154  /// !llvm<"{ float, i1 }">
155  /// %shfl_pred = llvm.extractvalue %shfl[1] :
156  /// !llvm<"{ float, i1 }">
157  LogicalResult
158  matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
159  ConversionPatternRewriter &rewriter) const override {
160  Location loc = op->getLoc();
161 
162  auto valueTy = adaptor.getValue().getType();
163  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
164  auto predTy = IntegerType::get(rewriter.getContext(), 1);
165 
166  Value one = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 1);
167  Value minusOne = rewriter.create<LLVM::ConstantOp>(loc, int32Type, -1);
168  Value thirtyTwo = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 32);
169  Value numLeadInactiveLane = rewriter.create<LLVM::SubOp>(
170  loc, int32Type, thirtyTwo, adaptor.getWidth());
171  // Bit mask of active lanes: `(-1) >> (32 - activeWidth)`.
172  Value activeMask = rewriter.create<LLVM::LShrOp>(loc, int32Type, minusOne,
173  numLeadInactiveLane);
174  Value maskAndClamp;
175  if (op.getMode() == gpu::ShuffleMode::UP) {
176  // Clamp lane: `32 - activeWidth`
177  maskAndClamp = numLeadInactiveLane;
178  } else {
179  // Clamp lane: `activeWidth - 1`
180  maskAndClamp =
181  rewriter.create<LLVM::SubOp>(loc, int32Type, adaptor.getWidth(), one);
182  }
183 
184  bool predIsUsed = !op->getResult(1).use_empty();
185  UnitAttr returnValueAndIsValidAttr = nullptr;
186  Type resultTy = valueTy;
187  if (predIsUsed) {
188  returnValueAndIsValidAttr = rewriter.getUnitAttr();
189  resultTy = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
190  {valueTy, predTy});
191  }
192  Value shfl = rewriter.create<NVVM::ShflOp>(
193  loc, resultTy, activeMask, adaptor.getValue(), adaptor.getOffset(),
194  maskAndClamp, convertShflKind(op.getMode()), returnValueAndIsValidAttr);
195  if (predIsUsed) {
196  Value shflValue = rewriter.create<LLVM::ExtractValueOp>(loc, shfl, 0);
197  Value isActiveSrcLane =
198  rewriter.create<LLVM::ExtractValueOp>(loc, shfl, 1);
199  rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
200  } else {
201  rewriter.replaceOp(op, {shfl, nullptr});
202  }
203  return success();
204  }
205 };
206 
207 struct GPULaneIdOpToNVVM : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
209 
210  LogicalResult
211  matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
212  ConversionPatternRewriter &rewriter) const override {
213  auto loc = op->getLoc();
214  MLIRContext *context = rewriter.getContext();
215  LLVM::ConstantRangeAttr bounds = nullptr;
216  if (std::optional<APInt> upperBound = op.getUpperBound())
217  bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
218  /*bitWidth=*/32, /*lower=*/0, upperBound->getZExtValue());
219  else
220  bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
221  /*bitWidth=*/32, /*lower=*/0, /*upper=*/kWarpSize);
222  Value newOp =
223  rewriter.create<NVVM::LaneIdOp>(loc, rewriter.getI32Type(), bounds);
224  // Truncate or extend the result depending on the index bitwidth specified
225  // by the LLVMTypeConverter options.
226  const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
227  if (indexBitwidth > 32) {
228  newOp = rewriter.create<LLVM::SExtOp>(
229  loc, IntegerType::get(context, indexBitwidth), newOp);
230  } else if (indexBitwidth < 32) {
231  newOp = rewriter.create<LLVM::TruncOp>(
232  loc, IntegerType::get(context, indexBitwidth), newOp);
233  }
234  rewriter.replaceOp(op, {newOp});
235  return success();
236  }
237 };
238 
239 /// Import the GPU Ops to NVVM Patterns.
240 #include "GPUToNVVM.cpp.inc"
241 
242 /// A pass that replaces all occurrences of GPU device operations with their
243 /// corresponding NVVM equivalent.
244 ///
245 /// This pass only handles device code and is not meant to be run on GPU host
246 /// code.
247 struct LowerGpuOpsToNVVMOpsPass
248  : public impl::ConvertGpuOpsToNVVMOpsBase<LowerGpuOpsToNVVMOpsPass> {
249  using Base::Base;
250 
251  void runOnOperation() override {
252  gpu::GPUModuleOp m = getOperation();
253 
254  // Request C wrapper emission.
255  for (auto func : m.getOps<func::FuncOp>()) {
256  func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
258  }
259 
260  // Customize the bitwidth used for the device side index computations.
262  m.getContext(),
263  DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
264  if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
265  options.overrideIndexBitwidth(indexBitwidth);
266  options.useBarePtrCallConv = useBarePtrCallConv;
267 
268  // Apply in-dialect lowering. In-dialect lowering will replace
269  // ops which need to be lowered further, which is not supported by a
270  // single conversion pass.
271  {
272  RewritePatternSet patterns(m.getContext());
273  populateGpuRewritePatterns(patterns);
274  if (failed(applyPatternsAndFoldGreedily(m, std::move(patterns))))
275  return signalPassFailure();
276  }
277 
278  LLVMTypeConverter converter(m.getContext(), options);
280  RewritePatternSet llvmPatterns(m.getContext());
281 
282  arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns);
283  cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
284  populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
285  populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
286  populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
287  populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
288  populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
289  if (this->hasRedux)
290  populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
293  if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
294  signalPassFailure();
295  }
296 };
297 
298 } // namespace
299 
301  target.addIllegalOp<func::FuncOp>();
302  target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
303  target.addLegalDialect<::mlir::NVVM::NVVMDialect>();
304  target.addIllegalDialect<gpu::GPUDialect>();
305  target.addIllegalOp<LLVM::CopySignOp, LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op,
306  LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FMAOp,
307  LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op,
308  LLVM::PowOp, LLVM::RoundEvenOp, LLVM::RoundOp,
309  LLVM::SinOp, LLVM::SqrtOp>();
310 
311  // TODO: Remove once we support replacing non-root ops.
312  target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
313 }
314 
316  // NVVM uses alloca in the default address space to represent private
317  // memory allocations, so drop private annotations. NVVM uses address
318  // space 3 for shared memory. NVVM uses the default address space to
319  // represent global memory.
321  converter, [](gpu::AddressSpace space) -> unsigned {
322  switch (space) {
323  case gpu::AddressSpace::Global:
324  return static_cast<unsigned>(
326  case gpu::AddressSpace::Workgroup:
327  return static_cast<unsigned>(
329  case gpu::AddressSpace::Private:
330  return 0;
331  }
332  llvm_unreachable("unknown address space enum value");
333  return 0;
334  });
335  // Lowering for MMAMatrixType.
336  converter.addConversion([&](gpu::MMAMatrixType type) -> Type {
337  return convertMMAToLLVMType(type);
338  });
339 }
340 
341 template <typename OpTy>
342 static void populateOpPatterns(const LLVMTypeConverter &converter,
343  RewritePatternSet &patterns, StringRef f32Func,
344  StringRef f64Func, StringRef f32ApproxFunc = "",
345  StringRef f16Func = "") {
346  patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
347  patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func,
348  f32ApproxFunc, f16Func);
349 }
350 
352  const LLVMTypeConverter &converter, RewritePatternSet &patterns) {
353  patterns.add<GPUSubgroupReduceOpLowering>(converter);
354 }
355 
357  const LLVMTypeConverter &converter, RewritePatternSet &patterns) {
360  populateWithGenerated(patterns);
361  patterns.add<GPUPrintfOpToVPrintfLowering>(converter);
362  patterns.add<
363  gpu::index_lowering::OpLowering<gpu::ThreadIdOp, NVVM::ThreadIdXOp,
364  NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>>(
365  converter, IndexKind::Block, IntrType::Id);
366  patterns.add<
367  gpu::index_lowering::OpLowering<gpu::BlockDimOp, NVVM::BlockDimXOp,
368  NVVM::BlockDimYOp, NVVM::BlockDimZOp>>(
369  converter, IndexKind::Block, IntrType::Dim);
370  patterns.add<
371  gpu::index_lowering::OpLowering<gpu::ClusterIdOp, NVVM::ClusterIdXOp,
372  NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>>(
373  converter, IndexKind::Other, IntrType::Id);
375  gpu::ClusterDimOp, NVVM::ClusterDimXOp, NVVM::ClusterDimYOp,
376  NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim);
378  gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
379  NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>>(
380  converter, IndexKind::Other, IntrType::Id);
382  gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
383  NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>>(
384  converter, IndexKind::Other, IntrType::Dim);
386  gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
387  converter, IndexKind::Grid, IntrType::Id);
389  gpu::GridDimOp, NVVM::GridDimXOp, NVVM::GridDimYOp, NVVM::GridDimZOp>>(
390  converter, IndexKind::Grid, IntrType::Dim);
391  patterns.add<GPULaneIdOpToNVVM, GPUShuffleOpLowering, GPUReturnOpLowering>(
392  converter);
393 
396 
397  // Explicitly drop memory space when lowering private memory
398  // attributions since NVVM models it as `alloca`s in the default
399  // memory space and does not support `alloca`s with addrspace(5).
400  patterns.add<GPUFuncOpLowering>(
401  converter,
403  /*allocaAddrSpace=*/0,
404  /*workgroupAddrSpace=*/
405  static_cast<unsigned>(NVVM::NVVMMemorySpace::kSharedMemorySpace),
406  StringAttr::get(&converter.getContext(),
407  NVVM::NVVMDialect::getKernelFuncAttrName()),
408  StringAttr::get(&converter.getContext(),
409  NVVM::NVVMDialect::getMaxntidAttrName())});
410 
411  populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
412  "__nv_fmod");
413  populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
414  "__nv_fabs");
415  populateOpPatterns<math::AcosOp>(converter, patterns, "__nv_acosf",
416  "__nv_acos");
417  populateOpPatterns<math::AcoshOp>(converter, patterns, "__nv_acoshf",
418  "__nv_acosh");
419  populateOpPatterns<math::AsinOp>(converter, patterns, "__nv_asinf",
420  "__nv_asin");
421  populateOpPatterns<math::AsinhOp>(converter, patterns, "__nv_asinhf",
422  "__nv_asinh");
423  populateOpPatterns<math::AtanOp>(converter, patterns, "__nv_atanf",
424  "__nv_atan");
425  populateOpPatterns<math::Atan2Op>(converter, patterns, "__nv_atan2f",
426  "__nv_atan2");
427  populateOpPatterns<math::AtanhOp>(converter, patterns, "__nv_atanhf",
428  "__nv_atanh");
429  populateOpPatterns<math::CbrtOp>(converter, patterns, "__nv_cbrtf",
430  "__nv_cbrt");
431  populateOpPatterns<math::CeilOp>(converter, patterns, "__nv_ceilf",
432  "__nv_ceil");
433  populateOpPatterns<math::CopySignOp>(converter, patterns, "__nv_copysignf",
434  "__nv_copysign");
435  populateOpPatterns<math::CosOp>(converter, patterns, "__nv_cosf", "__nv_cos",
436  "__nv_fast_cosf");
437  populateOpPatterns<math::CoshOp>(converter, patterns, "__nv_coshf",
438  "__nv_cosh");
439  populateOpPatterns<math::ErfOp>(converter, patterns, "__nv_erff", "__nv_erf");
440  populateOpPatterns<math::ExpOp>(converter, patterns, "__nv_expf", "__nv_exp",
441  "__nv_fast_expf");
442  populateOpPatterns<math::Exp2Op>(converter, patterns, "__nv_exp2f",
443  "__nv_exp2");
444  populateOpPatterns<math::ExpM1Op>(converter, patterns, "__nv_expm1f",
445  "__nv_expm1");
446  populateOpPatterns<math::FloorOp>(converter, patterns, "__nv_floorf",
447  "__nv_floor");
448  populateOpPatterns<math::FmaOp>(converter, patterns, "__nv_fmaf", "__nv_fma");
449  populateOpPatterns<math::LogOp>(converter, patterns, "__nv_logf", "__nv_log",
450  "__nv_fast_logf");
451  populateOpPatterns<math::Log10Op>(converter, patterns, "__nv_log10f",
452  "__nv_log10", "__nv_fast_log10f");
453  populateOpPatterns<math::Log1pOp>(converter, patterns, "__nv_log1pf",
454  "__nv_log1p");
455  populateOpPatterns<math::Log2Op>(converter, patterns, "__nv_log2f",
456  "__nv_log2", "__nv_fast_log2f");
457  populateOpPatterns<math::PowFOp>(converter, patterns, "__nv_powf", "__nv_pow",
458  "__nv_fast_powf");
459  populateOpPatterns<math::RoundOp>(converter, patterns, "__nv_roundf",
460  "__nv_round");
461  populateOpPatterns<math::RoundEvenOp>(converter, patterns, "__nv_rintf",
462  "__nv_rint");
463  populateOpPatterns<math::RsqrtOp>(converter, patterns, "__nv_rsqrtf",
464  "__nv_rsqrt");
465  populateOpPatterns<math::SinOp>(converter, patterns, "__nv_sinf", "__nv_sin",
466  "__nv_fast_sinf");
467  populateOpPatterns<math::SinhOp>(converter, patterns, "__nv_sinhf",
468  "__nv_sinh");
469  populateOpPatterns<math::SqrtOp>(converter, patterns, "__nv_sqrtf",
470  "__nv_sqrt");
471  populateOpPatterns<math::TanOp>(converter, patterns, "__nv_tanf", "__nv_tan",
472  "__nv_fast_tanf");
473  populateOpPatterns<math::TanhOp>(converter, patterns, "__nv_tanhf",
474  "__nv_tanh");
475 }
476 
477 //===----------------------------------------------------------------------===//
478 // NVVMTargetAttr convert to LLVM attr interface
479 //===----------------------------------------------------------------------===//
480 
481 namespace {
482 struct NVVMTargetConvertToLLVMAttrInterface
483  : public ConvertToLLVMAttrInterface::ExternalModel<
484  NVVMTargetConvertToLLVMAttrInterface, NVVM::NVVMTargetAttr> {
485  /// Configure GPU to NVVM.
486  void populateConvertToLLVMConversionPatterns(
487  Attribute attr, ConversionTarget &target,
488  LLVMTypeConverter &typeConverter, RewritePatternSet &patterns) const;
489 };
490 } // namespace
491 
492 void NVVMTargetConvertToLLVMAttrInterface::
493  populateConvertToLLVMConversionPatterns(Attribute attr,
494  ConversionTarget &target,
495  LLVMTypeConverter &typeConverter,
496  RewritePatternSet &patterns) const {
498  configureGpuToNVVMTypeConverter(typeConverter);
499  populateGpuToNVVMConversionPatterns(typeConverter, patterns);
500 }
501 
503  registry.addExtension(+[](MLIRContext *ctx, NVVMDialect *dialect) {
504  NVVMTargetAttr::attachInterface<NVVMTargetConvertToLLVMAttrInterface>(*ctx);
505  });
506 }
static constexpr int64_t kSharedMemorySpace
static MLIRContext * getContext(OpFoldResult val)
static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, StringRef f32Func, StringRef f64Func, StringRef f32ApproxFunc="", StringRef f16Func="")
constexpr int kWarpSize
Definition: NVGPUDialect.h:25
static llvm::ManagedStatic< PassManagerOptions > options
#define MINUI(lhs, rhs)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
UnitAttr getUnitAttr()
Definition: Builders.cpp:138
IntegerType getI32Type()
Definition: Builders.cpp:107
MLIRContext * getContext() const
Definition: Builders.h:56
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:107
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
This class describes a specific conversion target.
void addLegalOp(OperationName op)
Register the given operations as legal.
void addLegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as legal.
void addIllegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as illegal, i.e.
void addIllegalOp(OperationName op)
Register the given operation as illegal, i.e.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:143
The main mechanism for performing data layout queries.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
MLIRContext & getContext() const
Returns the MLIR context.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:497
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
Definition: PatternMatch.h:853
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:724
void addConversion(FnT &&callback)
Register a conversion function.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition: Value.h:218
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
Definition: GPUDialect.h:131
constexpr int kSharedMemoryAlignmentBit
Definition: NVVMDialect.h:31
@ kGlobalMemorySpace
Global memory space identifier.
Definition: NVVMDialect.h:36
void registerConvertGpuToNVVMInterface(DialectRegistry &registry)
Registers the ConvertToLLVMAttrInterface interface on the NVVM::NVVMTargetAttr attribute.
void populateArithToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
void populateControlFlowToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect the patterns to convert from the ControlFlow dialect to LLVM.
Include the generated interface declarations.
void populateGpuToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert from the GPU dialect to NVVM.
LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type)
Return the LLVMStructureType corresponding to the MMAMatrixType type.
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:91
void populateFinalizeMemRefToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert memory-related operations from the MemRef dialect to the LLVM di...
void configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter)
Configure the LLVM type convert to convert types and address spaces from the GPU dialect to NVVM.
void configureGpuToNVVMConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to NVVM.
LogicalResult applyPatternsAndFoldGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void populateVectorToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, bool reassociateFPReductions=false, bool force32BitVectorIndices=false)
Collect a set of patterns to convert from the Vector dialect to LLVM.
void populateFuncToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, const SymbolTable *symbolTable=nullptr)
Collect the patterns to convert from the Func dialect to LLVM.
Definition: FuncToLLVM.cpp:733
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
void populateGpuWMMAToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM.
void populateGpuSubgroupReduceOpLoweringPattern(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Populate GpuSubgroupReduce pattern to NVVM.
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
Lowering of gpu.printf to a vprintf standard library.
Rewriting that replace SourceOp with a CallOp to f32Func or f64Func or f32ApproxFunc or f16Func depen...
Rewriting that unrolls SourceOp to scalars if it's operating on vectors.