MLIR  20.0.0git
LowerGpuOpsToNVVMOps.cpp
Go to the documentation of this file.
1 //===- LowerGpuOpsToNVVMOps.cpp - MLIR GPU to NVVM lowering passes --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to generate NVVMIR operations for higher-level
10 // GPU operations.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
34 
35 #include "../GPUCommon/GPUOpsLowering.h"
36 #include "../GPUCommon/IndexIntrinsicsOpLowering.h"
37 #include "../GPUCommon/OpToFuncCallLowering.h"
38 #include <optional>
39 
40 namespace mlir {
41 #define GEN_PASS_DEF_CONVERTGPUOPSTONVVMOPS
42 #include "mlir/Conversion/Passes.h.inc"
43 } // namespace mlir
44 
45 using namespace mlir;
46 
47 namespace {
48 
49 /// Convert gpu dialect shfl mode enum to the equivalent nvvm one.
50 static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) {
51  switch (mode) {
52  case gpu::ShuffleMode::XOR:
53  return NVVM::ShflKind::bfly;
54  case gpu::ShuffleMode::UP:
55  return NVVM::ShflKind::up;
56  case gpu::ShuffleMode::DOWN:
57  return NVVM::ShflKind::down;
58  case gpu::ShuffleMode::IDX:
59  return NVVM::ShflKind::idx;
60  }
61  llvm_unreachable("unknown shuffle mode");
62 }
63 
64 static std::optional<NVVM::ReduxKind>
65 convertReduxKind(gpu::AllReduceOperation mode) {
66  switch (mode) {
67  case gpu::AllReduceOperation::ADD:
68  return NVVM::ReduxKind::ADD;
69  case gpu::AllReduceOperation::MUL:
70  return std::nullopt;
71  case gpu::AllReduceOperation::MINSI:
72  return NVVM::ReduxKind::MIN;
74  return std::nullopt;
75  case gpu::AllReduceOperation::MINNUMF:
76  return NVVM::ReduxKind::MIN;
77  case gpu::AllReduceOperation::MAXSI:
78  return NVVM::ReduxKind::MAX;
79  case gpu::AllReduceOperation::MAXUI:
80  return std::nullopt;
81  case gpu::AllReduceOperation::MAXNUMF:
82  return NVVM::ReduxKind::MAX;
83  case gpu::AllReduceOperation::AND:
84  return NVVM::ReduxKind::AND;
85  case gpu::AllReduceOperation::OR:
86  return NVVM::ReduxKind::OR;
87  case gpu::AllReduceOperation::XOR:
88  return NVVM::ReduxKind::XOR;
89  case gpu::AllReduceOperation::MINIMUMF:
90  case gpu::AllReduceOperation::MAXIMUMF:
91  return std::nullopt;
92  }
93  return std::nullopt;
94 }
95 
96 /// This pass lowers gpu.subgroup_reduce op into to the nvvm.redux op. The op
97 /// must be run by the entire subgroup, otherwise it is undefined behaviour.
98 struct GPUSubgroupReduceOpLowering
99  : public ConvertOpToLLVMPattern<gpu::SubgroupReduceOp> {
101  LogicalResult
102 
103  matchAndRewrite(gpu::SubgroupReduceOp op, OpAdaptor adaptor,
104  ConversionPatternRewriter &rewriter) const override {
105  if (op.getClusterSize())
106  return rewriter.notifyMatchFailure(
107  op, "lowering for clustered reduce not implemented");
108 
109  if (!op.getUniform())
110  return rewriter.notifyMatchFailure(
111  op, "cannot be lowered to redux as the op must be run "
112  "uniformly (entire subgroup).");
113  if (!op.getValue().getType().isInteger(32))
114  return rewriter.notifyMatchFailure(op, "unsupported data type");
115 
116  std::optional<NVVM::ReduxKind> mode = convertReduxKind(op.getOp());
117  if (!mode.has_value())
118  return rewriter.notifyMatchFailure(
119  op, "unsupported reduction mode for redux");
120 
121  Location loc = op->getLoc();
122  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
123  Value offset = rewriter.create<LLVM::ConstantOp>(loc, int32Type, -1);
124 
125  auto reduxOp = rewriter.create<NVVM::ReduxOp>(loc, int32Type, op.getValue(),
126  mode.value(), offset);
127 
128  rewriter.replaceOp(op, reduxOp->getResult(0));
129  return success();
130  }
131 };
132 
133 struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
135 
136  /// Lowers a shuffle to the corresponding NVVM op.
137  ///
138  /// Convert the `width` argument into an activeMask (a bitmask which specifies
139  /// which threads participate in the shuffle) and a maskAndClamp (specifying
140  /// the highest lane which participates in the shuffle).
141  ///
142  /// %one = llvm.constant(1 : i32) : i32
143  /// %minus_one = llvm.constant(-1 : i32) : i32
144  /// %thirty_two = llvm.constant(32 : i32) : i32
145  /// %num_lanes = llvm.sub %thirty_two, %width : i32
146  /// %active_mask = llvm.lshr %minus_one, %num_lanes : i32
147  /// %mask_and_clamp = llvm.sub %width, %one : i32
148  /// %shfl = nvvm.shfl.sync.bfly %active_mask, %value, %offset,
149  /// %mask_and_clamp : !llvm<"{ float, i1 }">
150  /// %shfl_value = llvm.extractvalue %shfl[0] :
151  /// !llvm<"{ float, i1 }">
152  /// %shfl_pred = llvm.extractvalue %shfl[1] :
153  /// !llvm<"{ float, i1 }">
154  LogicalResult
155  matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
156  ConversionPatternRewriter &rewriter) const override {
157  Location loc = op->getLoc();
158 
159  auto valueTy = adaptor.getValue().getType();
160  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
161  auto predTy = IntegerType::get(rewriter.getContext(), 1);
162 
163  Value one = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 1);
164  Value minusOne = rewriter.create<LLVM::ConstantOp>(loc, int32Type, -1);
165  Value thirtyTwo = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 32);
166  Value numLeadInactiveLane = rewriter.create<LLVM::SubOp>(
167  loc, int32Type, thirtyTwo, adaptor.getWidth());
168  // Bit mask of active lanes: `(-1) >> (32 - activeWidth)`.
169  Value activeMask = rewriter.create<LLVM::LShrOp>(loc, int32Type, minusOne,
170  numLeadInactiveLane);
171  Value maskAndClamp;
172  if (op.getMode() == gpu::ShuffleMode::UP) {
173  // Clamp lane: `32 - activeWidth`
174  maskAndClamp = numLeadInactiveLane;
175  } else {
176  // Clamp lane: `activeWidth - 1`
177  maskAndClamp =
178  rewriter.create<LLVM::SubOp>(loc, int32Type, adaptor.getWidth(), one);
179  }
180 
181  bool predIsUsed = !op->getResult(1).use_empty();
182  UnitAttr returnValueAndIsValidAttr = nullptr;
183  Type resultTy = valueTy;
184  if (predIsUsed) {
185  returnValueAndIsValidAttr = rewriter.getUnitAttr();
186  resultTy = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
187  {valueTy, predTy});
188  }
189  Value shfl = rewriter.create<NVVM::ShflOp>(
190  loc, resultTy, activeMask, adaptor.getValue(), adaptor.getOffset(),
191  maskAndClamp, convertShflKind(op.getMode()), returnValueAndIsValidAttr);
192  if (predIsUsed) {
193  Value shflValue = rewriter.create<LLVM::ExtractValueOp>(loc, shfl, 0);
194  Value isActiveSrcLane =
195  rewriter.create<LLVM::ExtractValueOp>(loc, shfl, 1);
196  rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
197  } else {
198  rewriter.replaceOp(op, {shfl, nullptr});
199  }
200  return success();
201  }
202 };
203 
204 struct GPULaneIdOpToNVVM : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
206 
207  LogicalResult
208  matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
209  ConversionPatternRewriter &rewriter) const override {
210  auto loc = op->getLoc();
211  MLIRContext *context = rewriter.getContext();
212  Value newOp = rewriter.create<NVVM::LaneIdOp>(loc, rewriter.getI32Type());
213  // Truncate or extend the result depending on the index bitwidth specified
214  // by the LLVMTypeConverter options.
215  const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
216  if (indexBitwidth > 32) {
217  newOp = rewriter.create<LLVM::SExtOp>(
218  loc, IntegerType::get(context, indexBitwidth), newOp);
219  } else if (indexBitwidth < 32) {
220  newOp = rewriter.create<LLVM::TruncOp>(
221  loc, IntegerType::get(context, indexBitwidth), newOp);
222  }
223  rewriter.replaceOp(op, {newOp});
224  return success();
225  }
226 };
227 
228 /// Import the GPU Ops to NVVM Patterns.
229 #include "GPUToNVVM.cpp.inc"
230 
231 /// A pass that replaces all occurrences of GPU device operations with their
232 /// corresponding NVVM equivalent.
233 ///
234 /// This pass only handles device code and is not meant to be run on GPU host
235 /// code.
236 struct LowerGpuOpsToNVVMOpsPass
237  : public impl::ConvertGpuOpsToNVVMOpsBase<LowerGpuOpsToNVVMOpsPass> {
238  using Base::Base;
239 
240  void runOnOperation() override {
241  gpu::GPUModuleOp m = getOperation();
242 
243  // Request C wrapper emission.
244  for (auto func : m.getOps<func::FuncOp>()) {
245  func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
247  }
248 
249  // Customize the bitwidth used for the device side index computations.
251  m.getContext(),
252  DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
253  if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
254  options.overrideIndexBitwidth(indexBitwidth);
255  options.useBarePtrCallConv = useBarePtrCallConv;
256 
257  // Apply in-dialect lowering. In-dialect lowering will replace
258  // ops which need to be lowered further, which is not supported by a
259  // single conversion pass.
260  {
261  RewritePatternSet patterns(m.getContext());
262  populateGpuRewritePatterns(patterns);
263  if (failed(applyPatternsAndFoldGreedily(m, std::move(patterns))))
264  return signalPassFailure();
265  }
266 
267  LLVMTypeConverter converter(m.getContext(), options);
268  // NVVM uses alloca in the default address space to represent private
269  // memory allocations, so drop private annotations. NVVM uses address
270  // space 3 for shared memory. NVVM uses the default address space to
271  // represent global memory.
273  converter, [](gpu::AddressSpace space) -> unsigned {
274  switch (space) {
275  case gpu::AddressSpace::Global:
276  return static_cast<unsigned>(
278  case gpu::AddressSpace::Workgroup:
279  return static_cast<unsigned>(
281  case gpu::AddressSpace::Private:
282  return 0;
283  }
284  llvm_unreachable("unknown address space enum value");
285  return 0;
286  });
287  // Lowering for MMAMatrixType.
288  converter.addConversion([&](gpu::MMAMatrixType type) -> Type {
289  return convertMMAToLLVMType(type);
290  });
291  RewritePatternSet llvmPatterns(m.getContext());
292 
293  arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns);
294  cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
295  populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
296  populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
297  populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
298  populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
299  populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
300  if (this->hasRedux)
301  populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
304  if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
305  signalPassFailure();
306  }
307 };
308 
309 } // namespace
310 
312  target.addIllegalOp<func::FuncOp>();
313  target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
314  target.addLegalDialect<::mlir::NVVM::NVVMDialect>();
315  target.addIllegalDialect<gpu::GPUDialect>();
316  target.addIllegalOp<LLVM::CopySignOp, LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op,
317  LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FMAOp,
318  LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op,
319  LLVM::PowOp, LLVM::RoundEvenOp, LLVM::RoundOp,
320  LLVM::SinOp, LLVM::SqrtOp>();
321 
322  // TODO: Remove once we support replacing non-root ops.
323  target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
324 }
325 
326 template <typename OpTy>
327 static void populateOpPatterns(LLVMTypeConverter &converter,
328  RewritePatternSet &patterns, StringRef f32Func,
329  StringRef f64Func,
330  StringRef f32ApproxFunc = "") {
331  patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
332  patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func,
333  f32ApproxFunc);
334 }
335 
337  LLVMTypeConverter &converter, RewritePatternSet &patterns) {
338  patterns.add<GPUSubgroupReduceOpLowering>(converter);
339 }
340 
342  RewritePatternSet &patterns) {
343  populateWithGenerated(patterns);
344  patterns.add<GPUPrintfOpToVPrintfLowering>(converter);
345  patterns.add<
346  gpu::index_lowering::OpLowering<gpu::ThreadIdOp, NVVM::ThreadIdXOp,
347  NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>,
348  gpu::index_lowering::OpLowering<gpu::BlockDimOp, NVVM::BlockDimXOp,
349  NVVM::BlockDimYOp, NVVM::BlockDimZOp>,
350  gpu::index_lowering::OpLowering<gpu::ClusterIdOp, NVVM::ClusterIdXOp,
351  NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>,
352  gpu::index_lowering::OpLowering<gpu::ClusterDimOp, NVVM::ClusterDimXOp,
353  NVVM::ClusterDimYOp, NVVM::ClusterDimZOp>,
355  gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
356  NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>,
357  gpu::index_lowering::OpLowering<gpu::ClusterDimOp, NVVM::ClusterDimXOp,
358  NVVM::ClusterDimYOp, NVVM::ClusterDimZOp>,
359  gpu::index_lowering::OpLowering<gpu::BlockIdOp, NVVM::BlockIdXOp,
360  NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
361  gpu::index_lowering::OpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
362  NVVM::GridDimYOp, NVVM::GridDimZOp>,
363  GPULaneIdOpToNVVM, GPUShuffleOpLowering, GPUReturnOpLowering>(converter);
364 
367 
368  // Explicitly drop memory space when lowering private memory
369  // attributions since NVVM models it as `alloca`s in the default
370  // memory space and does not support `alloca`s with addrspace(5).
371  patterns.add<GPUFuncOpLowering>(
372  converter,
374  /*allocaAddrSpace=*/0,
375  /*workgroupAddrSpace=*/
376  static_cast<unsigned>(NVVM::NVVMMemorySpace::kSharedMemorySpace),
377  StringAttr::get(&converter.getContext(),
378  NVVM::NVVMDialect::getKernelFuncAttrName()),
379  StringAttr::get(&converter.getContext(),
380  NVVM::NVVMDialect::getMaxntidAttrName())});
381 
382  populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
383  "__nv_fmod");
384  populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
385  "__nv_fabs");
386  populateOpPatterns<math::AcosOp>(converter, patterns, "__nv_acosf",
387  "__nv_acos");
388  populateOpPatterns<math::AcoshOp>(converter, patterns, "__nv_acoshf",
389  "__nv_acosh");
390  populateOpPatterns<math::AsinOp>(converter, patterns, "__nv_asinf",
391  "__nv_asin");
392  populateOpPatterns<math::AsinhOp>(converter, patterns, "__nv_asinhf",
393  "__nv_asinh");
394  populateOpPatterns<math::AtanOp>(converter, patterns, "__nv_atanf",
395  "__nv_atan");
396  populateOpPatterns<math::Atan2Op>(converter, patterns, "__nv_atan2f",
397  "__nv_atan2");
398  populateOpPatterns<math::AtanhOp>(converter, patterns, "__nv_atanhf",
399  "__nv_atanh");
400  populateOpPatterns<math::CbrtOp>(converter, patterns, "__nv_cbrtf",
401  "__nv_cbrt");
402  populateOpPatterns<math::CeilOp>(converter, patterns, "__nv_ceilf",
403  "__nv_ceil");
404  populateOpPatterns<math::CopySignOp>(converter, patterns, "__nv_copysignf",
405  "__nv_copysign");
406  populateOpPatterns<math::CosOp>(converter, patterns, "__nv_cosf", "__nv_cos",
407  "__nv_fast_cosf");
408  populateOpPatterns<math::CoshOp>(converter, patterns, "__nv_coshf",
409  "__nv_cosh");
410  populateOpPatterns<math::ErfOp>(converter, patterns, "__nv_erff", "__nv_erf");
411  populateOpPatterns<math::ExpOp>(converter, patterns, "__nv_expf", "__nv_exp",
412  "__nv_fast_expf");
413  populateOpPatterns<math::Exp2Op>(converter, patterns, "__nv_exp2f",
414  "__nv_exp2");
415  populateOpPatterns<math::ExpM1Op>(converter, patterns, "__nv_expm1f",
416  "__nv_expm1");
417  populateOpPatterns<math::FloorOp>(converter, patterns, "__nv_floorf",
418  "__nv_floor");
419  populateOpPatterns<math::FmaOp>(converter, patterns, "__nv_fmaf", "__nv_fma");
420  populateOpPatterns<math::LogOp>(converter, patterns, "__nv_logf", "__nv_log",
421  "__nv_fast_logf");
422  populateOpPatterns<math::Log10Op>(converter, patterns, "__nv_log10f",
423  "__nv_log10", "__nv_fast_log10f");
424  populateOpPatterns<math::Log1pOp>(converter, patterns, "__nv_log1pf",
425  "__nv_log1p");
426  populateOpPatterns<math::Log2Op>(converter, patterns, "__nv_log2f",
427  "__nv_log2", "__nv_fast_log2f");
428  populateOpPatterns<math::PowFOp>(converter, patterns, "__nv_powf", "__nv_pow",
429  "__nv_fast_powf");
430  populateOpPatterns<math::RoundOp>(converter, patterns, "__nv_roundf",
431  "__nv_round");
432  populateOpPatterns<math::RoundEvenOp>(converter, patterns, "__nv_rintf",
433  "__nv_rint");
434  populateOpPatterns<math::RsqrtOp>(converter, patterns, "__nv_rsqrtf",
435  "__nv_rsqrt");
436  populateOpPatterns<math::SinOp>(converter, patterns, "__nv_sinf", "__nv_sin",
437  "__nv_fast_sinf");
438  populateOpPatterns<math::SinhOp>(converter, patterns, "__nv_sinhf",
439  "__nv_sinh");
440  populateOpPatterns<math::SqrtOp>(converter, patterns, "__nv_sqrtf",
441  "__nv_sqrt");
442  populateOpPatterns<math::TanOp>(converter, patterns, "__nv_tanf", "__nv_tan",
443  "__nv_fast_tanf");
444  populateOpPatterns<math::TanhOp>(converter, patterns, "__nv_tanhf",
445  "__nv_tanh");
446 }
static constexpr int64_t kSharedMemorySpace
static MLIRContext * getContext(OpFoldResult val)
static void populateOpPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, StringRef f32Func, StringRef f64Func, StringRef f32ApproxFunc="")
static llvm::ManagedStatic< PassManagerOptions > options
#define MINUI(lhs, rhs)
UnitAttr getUnitAttr()
Definition: Builders.cpp:126
IntegerType getI32Type()
Definition: Builders.cpp:95
MLIRContext * getContext() const
Definition: Builders.h:55
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
PatternRewriter hook for replacing an operation.
This class describes a specific conversion target.
void addLegalOp(OperationName op)
Register the given operations as legal.
void addLegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as legal.
void addIllegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as illegal, i.e.
void addIllegalOp(OperationName op)
Register the given operation as illegal, i.e.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:143
The main mechanism for performing data layout queries.
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
MLIRContext & getContext() const
Returns the MLIR context.
static LLVMStructType getLiteral(MLIRContext *context, ArrayRef< Type > types, bool isPacked=false)
Gets or creates a literal struct with the given body in the provided context.
Definition: LLVMTypes.cpp:452
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:476
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
Definition: PatternMatch.h:847
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:718
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition: Value.h:218
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
Definition: GPUDialect.h:131
constexpr int kSharedMemoryAlignmentBit
Definition: NVVMDialect.h:31
@ kGlobalMemorySpace
Global memory space identifier.
Definition: NVVMDialect.h:36
void populateArithToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns)
void populateControlFlowToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect the patterns to convert from the ControlFlow dialect to LLVM.
Include the generated interface declarations.
void populateFuncToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, const SymbolTable *symbolTable=nullptr)
Collect the patterns to convert from the Func dialect to LLVM.
Definition: FuncToLLVM.cpp:725
LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type)
Return the LLVMStructureType corresponding to the MMAMatrixType type.
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:81
void populateFinalizeMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert memory-related operations from the MemRef dialect to the LLVM di...
void populateVectorToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, bool reassociateFPReductions=false, bool force32BitVectorIndices=false)
Collect a set of patterns to convert from the Vector dialect to LLVM.
void configureGpuToNVVMConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to NVVM.
LogicalResult applyPatternsAndFoldGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void populateGpuWMMAToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert WMMA ops from GPU dialect to NVVM.
void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert from the GPU dialect to NVVM.
void populateGpuSubgroupReduceOpLoweringPattern(LLVMTypeConverter &converter, RewritePatternSet &patterns)
Populate GpuSubgroupReduce pattern to NVVM.
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
Lowering of gpu.printf to a vprintf standard library.
Rewriting that replace SourceOp with a CallOp to f32Func or f64Func or f32ApproxFunc depending on the...
Rewriting that unrolls SourceOp to scalars if it's operating on vectors.