MLIR  20.0.0git
LowerGpuOpsToROCDLOps.cpp
Go to the documentation of this file.
1 //===- LowerGpuOpsToROCDLOps.cpp - MLIR GPU to ROCDL lowering passes ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to generate ROCDLIR operations for higher-level
10 // GPU operations.
11 //
12 //===----------------------------------------------------------------------===//
13 
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Pass/PassManager.h"
19 #include "mlir/Transforms/Passes.h"
20 
43 #include "mlir/Pass/Pass.h"
46 #include "llvm/Support/FormatVariadic.h"
47 
48 #include "../GPUCommon/GPUOpsLowering.h"
49 #include "../GPUCommon/IndexIntrinsicsOpLowering.h"
50 
51 namespace mlir {
52 #define GEN_PASS_DEF_CONVERTGPUOPSTOROCDLOPS
53 #include "mlir/Conversion/Passes.h.inc"
54 } // namespace mlir
55 
56 using namespace mlir;
57 
58 /// Returns true if the given `gpu.func` can be safely called using the bare
59 /// pointer calling convention.
60 static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func) {
61  bool canBeBare = true;
62  for (Type type : func.getArgumentTypes())
63  if (auto memrefTy = dyn_cast<BaseMemRefType>(type))
64  canBeBare &= LLVMTypeConverter::canConvertToBarePtr(memrefTy);
65  return canBeBare;
66 }
67 
69  const unsigned indexBitwidth) {
70  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
71  Value zero = rewriter.create<arith::ConstantIntOp>(loc, 0, 32);
72  Value minus1 = rewriter.create<arith::ConstantIntOp>(loc, -1, 32);
73  Value mbcntLo = rewriter.create<ROCDL::MbcntLoOp>(loc, int32Type,
74  ValueRange{minus1, zero});
75  Value laneId = rewriter.create<ROCDL::MbcntHiOp>(loc, int32Type,
76  ValueRange{minus1, mbcntLo});
77  return laneId;
78 }
79 static constexpr StringLiteral amdgcnDataLayout =
80  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
81  "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
82  "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:"
83  "64-S32-A5-G1-ni:7:8:9";
84 
85 namespace {
86 struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
88 
89  LogicalResult
90  matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
91  ConversionPatternRewriter &rewriter) const override {
92  auto loc = op->getLoc();
93  MLIRContext *context = rewriter.getContext();
94  // convert to: %mlo = call @llvm.amdgcn.mbcnt.lo(-1, 0)
95  // followed by: %lid = call @llvm.amdgcn.mbcnt.hi(-1, %mlo)
96 
97  Type intTy = IntegerType::get(context, 32);
98  Value zero = rewriter.create<arith::ConstantIntOp>(loc, 0, 32);
99  Value minus1 = rewriter.create<arith::ConstantIntOp>(loc, -1, 32);
100  Value mbcntLo =
101  rewriter.create<ROCDL::MbcntLoOp>(loc, intTy, ValueRange{minus1, zero});
102  Value laneId = rewriter.create<ROCDL::MbcntHiOp>(
103  loc, intTy, ValueRange{minus1, mbcntLo});
104  // Truncate or extend the result depending on the index bitwidth specified
105  // by the LLVMTypeConverter options.
106  const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
107  if (indexBitwidth > 32) {
108  laneId = rewriter.create<LLVM::SExtOp>(
109  loc, IntegerType::get(context, indexBitwidth), laneId);
110  } else if (indexBitwidth < 32) {
111  laneId = rewriter.create<LLVM::TruncOp>(
112  loc, IntegerType::get(context, indexBitwidth), laneId);
113  }
114  rewriter.replaceOp(op, {laneId});
115  return success();
116  }
117 };
118 
119 struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
121 
122  /// Lowers a shuffle to the corresponding ROCDL ops.
123  ///
124  /// Use the `width` argument to see if src lane is participating.
125  /// If not the dstLane would be itself.
126  ///
127  /// Shuffle with DS Bpermute:
128  /// let shflMode = [xor, up, down, idx]
129  /// let width = 32(usually warpsize), step = [1, 2, 4, 8, 16, ... , width].
130  /// 1. curLaneId = using mbcnt.lo + mbcnt.hi
131  /// 2. widthOrZeroIfOutside = (curLaneId + width) & -width
132  /// 3. dstLane = shflMode(curLaneId, step)
133  /// 4. isActiveSrcLane = dstLane < isActiveSrcLane
134  /// 5. dstLane = isActiveSrcLane ? dstLane : curLaneId
135  /// 6. dwordAlignedDstLane = dstLane * 4 or dstLane << 2.
136  /// 7. bpermute(dwordAlignedDstLane, shfl_value).
137  ///
138  LogicalResult
139  matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
140  ConversionPatternRewriter &rewriter) const override {
141  Location loc = op->getLoc();
142  // TODO: Add support for non 32-bit shuffle values.
143  if (adaptor.getValue().getType().getIntOrFloatBitWidth() != 32)
144  return failure();
145  const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
146  Value srcLaneId = getLaneId(rewriter, loc, indexBitwidth);
147 
148  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
149  Value width = adaptor.getWidth();
150  Value zero = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 0);
151  Value negwidth = rewriter.create<LLVM::SubOp>(loc, int32Type, zero, width);
152  Value add = rewriter.create<LLVM::AddOp>(loc, int32Type, srcLaneId, width);
153  Value widthOrZeroIfOutside =
154  rewriter.create<LLVM::AndOp>(loc, int32Type, add, negwidth);
155  Value dstLane;
156  // TODO: Add support for gpu::ShuffleMode::UP and gpu::ShuffleMode::DOWN.
157  // TODO: Use ds_swizzle for XOR when step/offsets are constants for better
158  // perf.
159  switch (op.getMode()) {
160  case gpu::ShuffleMode::DOWN:
161  dstLane = rewriter.create<LLVM::AddOp>(loc, int32Type, srcLaneId,
162  adaptor.getOffset());
163  break;
164  case gpu::ShuffleMode::XOR:
165  dstLane = rewriter.create<LLVM::XOrOp>(loc, int32Type, srcLaneId,
166  adaptor.getOffset());
167  break;
168  case gpu::ShuffleMode::IDX:
169  dstLane = adaptor.getOffset();
170  break;
171  default:
172  return failure();
173  }
174  Value isActiveSrcLane = rewriter.create<LLVM::ICmpOp>(
175  loc, LLVM::ICmpPredicate::slt, dstLane, widthOrZeroIfOutside);
176  Value selectDstLane = rewriter.create<LLVM::SelectOp>(loc, isActiveSrcLane,
177  dstLane, srcLaneId);
178  Value two = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 2);
179  Value dwordAlignedDstLane =
180  rewriter.create<LLVM::ShlOp>(loc, int32Type, selectDstLane, two);
181  Value initShflValue = adaptor.getValue();
182  if (adaptor.getValue().getType().isF32()) {
183  initShflValue =
184  rewriter.create<LLVM::BitcastOp>(loc, int32Type, initShflValue);
185  }
186  Value shflValue = rewriter.create<ROCDL::DsBpermuteOp>(
187  loc, int32Type, dwordAlignedDstLane, initShflValue);
188  if (adaptor.getValue().getType().isF32()) {
189  shflValue = rewriter.create<LLVM::BitcastOp>(
190  loc, adaptor.getValue().getType(), shflValue);
191  }
192  rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
193  return success();
194  }
195 };
196 
197 /// Import the GPU Ops to ROCDL Patterns.
198 #include "GPUToROCDL.cpp.inc"
199 
200 // A pass that replaces all occurrences of GPU device operations with their
201 // corresponding ROCDL equivalent.
202 //
203 // This pass only handles device code and is not meant to be run on GPU host
204 // code.
205 struct LowerGpuOpsToROCDLOpsPass
206  : public impl::ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
207  LowerGpuOpsToROCDLOpsPass() = default;
208  LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth,
209  bool useBarePtrCallConv,
210  gpu::amd::Runtime runtime) {
211  if (this->chipset.getNumOccurrences() == 0)
212  this->chipset = chipset;
213  if (this->indexBitwidth.getNumOccurrences() == 0)
214  this->indexBitwidth = indexBitwidth;
215  if (this->useBarePtrCallConv.getNumOccurrences() == 0)
216  this->useBarePtrCallConv = useBarePtrCallConv;
217  if (this->runtime.getNumOccurrences() == 0)
218  this->runtime = runtime;
219  }
220 
221  void runOnOperation() override {
222  gpu::GPUModuleOp m = getOperation();
223  MLIRContext *ctx = m.getContext();
224 
225  auto llvmDataLayout = m->getAttrOfType<StringAttr>(
226  LLVM::LLVMDialect::getDataLayoutAttrName());
227  if (!llvmDataLayout) {
228  llvmDataLayout = StringAttr::get(ctx, amdgcnDataLayout);
229  m->setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(), llvmDataLayout);
230  }
231  // Request C wrapper emission.
232  for (auto func : m.getOps<func::FuncOp>()) {
233  func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
234  UnitAttr::get(ctx));
235  }
236 
237  FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
238  if (failed(maybeChipset)) {
239  emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
240  return signalPassFailure();
241  }
242 
243  /// Customize the bitwidth used for the device side index computations.
245  ctx, DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
246  options.dataLayout = llvm::DataLayout(llvmDataLayout.getValue());
247  if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
248  options.overrideIndexBitwidth(indexBitwidth);
249 
250  if (useBarePtrCallConv) {
251  options.useBarePtrCallConv = true;
252  WalkResult canUseBarePointers =
253  m.walk([](gpu::GPUFuncOp func) -> WalkResult {
254  if (canBeCalledWithBarePointers(func))
255  return WalkResult::advance();
256  return WalkResult::interrupt();
257  });
258  if (canUseBarePointers.wasInterrupted()) {
260  "bare pointer calling convention requires all memrefs to "
261  "have static shape and use the identity map");
262  return signalPassFailure();
263  }
264  }
265 
266  // Apply in-dialect lowering. In-dialect lowering will replace
267  // ops which need to be lowered further, which is not supported by a
268  // single conversion pass.
269  {
273  (void)applyPatternsGreedily(m, std::move(patterns));
274  }
275 
276  LLVMTypeConverter converter(ctx, options);
278  converter, [](gpu::AddressSpace space) {
279  switch (space) {
280  case gpu::AddressSpace::Global:
281  return 1;
282  case gpu::AddressSpace::Workgroup:
283  return 3;
284  case gpu::AddressSpace::Private:
285  return 5;
286  }
287  llvm_unreachable("unknown address space enum value");
288  return 0;
289  });
290 
291  RewritePatternSet llvmPatterns(ctx);
292 
294  populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
295  *maybeChipset);
296  populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
297  populateMathToLLVMConversionPatterns(converter, llvmPatterns);
298  cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
299  cf::populateAssertToLLVMConversionPattern(converter, llvmPatterns);
300  populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
301  populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
302  populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime);
305  if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
306  signalPassFailure();
307  auto *rocdlDialect = getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
308  auto reqdWorkGroupSizeAttrHelper =
309  rocdlDialect->getReqdWorkGroupSizeAttrHelper();
310  auto flatWorkGroupSizeAttrHelper =
311  rocdlDialect->getFlatWorkGroupSizeAttrHelper();
312  // Manually rewrite known block size attributes so the LLVMIR translation
313  // infrastructure can pick them up.
314  m.walk([&](LLVM::LLVMFuncOp op) {
315  if (reqdWorkGroupSizeAttrHelper.isAttrPresent(op)) {
316  auto blockSizes = reqdWorkGroupSizeAttrHelper.getAttr(op);
317  // Also set up the rocdl.flat_work_group_size attribute to prevent
318  // conflicting metadata.
319  uint32_t flatSize = 1;
320  for (uint32_t size : blockSizes.asArrayRef()) {
321  flatSize *= size;
322  }
323  StringAttr flatSizeAttr =
324  StringAttr::get(ctx, Twine(flatSize) + "," + Twine(flatSize));
325  flatWorkGroupSizeAttrHelper.setAttr(op, flatSizeAttr);
326  }
327  });
328  }
329 };
330 
331 } // namespace
332 
334  target.addIllegalOp<func::FuncOp>();
335  target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
336  target.addLegalDialect<ROCDL::ROCDLDialect>();
337  target.addIllegalDialect<gpu::GPUDialect>();
338  target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
339  LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
340  LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
341  // These ops are legal for f32 type.
342  target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
343  return any_of(op->getOperandTypes(), llvm::IsaPred<Float32Type>);
344  });
345  // TODO: Remove once we support replacing non-root ops.
346  target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
347 }
348 
350  const LLVMTypeConverter &converter, RewritePatternSet &patterns,
351  mlir::gpu::amd::Runtime runtime) {
355  auto *rocdlDialect =
356  converter.getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
357  populateWithGenerated(patterns);
358  patterns.add<
359  gpu::index_lowering::OpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
360  ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>>(
361  converter, IndexKind::Block, IntrType::Id);
363  gpu::BlockIdOp, ROCDL::BlockIdXOp, ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>>(
364  converter, IndexKind::Grid, IntrType::Id);
365  patterns.add<
366  gpu::index_lowering::OpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
367  ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>>(
368  converter, IndexKind::Block, IntrType::Dim);
370  gpu::GridDimOp, ROCDL::GridDimXOp, ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
371  converter, IndexKind::Grid, IntrType::Dim);
372  patterns.add<GPUReturnOpLowering>(converter);
374  converter,
376  /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace,
377  /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace,
378  rocdlDialect->getKernelAttrHelper().getName(),
379  rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName()});
380  if (Runtime::HIP == runtime) {
381  patterns.add<GPUPrintfOpToHIPLowering>(converter);
382  } else if (Runtime::OpenCL == runtime) {
383  // Use address space = 4 to match the OpenCL definition of printf()
384  patterns.add<GPUPrintfOpToLLVMCallLowering>(converter, /*addressSpace=*/4);
385  }
386  // TODO: Add alignment for workgroup memory
388 
389  patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL>(converter);
390 
392 }
393 
394 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
395 mlir::createLowerGpuOpsToROCDLOpsPass(const std::string &chipset,
396  unsigned indexBitwidth,
397  bool useBarePtrCallConv,
398  gpu::amd::Runtime runtime) {
399  return std::make_unique<LowerGpuOpsToROCDLOpsPass>(
400  chipset, indexBitwidth, useBarePtrCallConv, runtime);
401 }
static MLIRContext * getContext(OpFoldResult val)
static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func)
Returns true if the given gpu.func can be safely called using the bare pointer calling convention.
static constexpr StringLiteral amdgcnDataLayout
Value getLaneId(ConversionPatternRewriter &rewriter, Location loc, const unsigned indexBitwidth)
static llvm::ManagedStatic< PassManagerOptions > options
MLIRContext * getContext() const
Definition: Builders.h:56
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
This class describes a specific conversion target.
void addLegalOp(OperationName op)
Register the given operations as legal.
void addLegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as legal.
void addDynamicallyLegalOp(OperationName op, const DynamicLegalityCallbackFn &callback)
Register the given operation as dynamically legal and set the dynamic legalization callback to the on...
void addIllegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as illegal, i.e.
void addIllegalOp(OperationName op)
Register the given operation as illegal, i.e.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:143
The main mechanism for performing data layout queries.
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
static bool canConvertToBarePtr(BaseMemRefType type)
Check if a memref type can be converted to a bare pointer.
MLIRContext & getContext() const
Returns the MLIR context.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:491
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
void populateExpandBFloat16Patterns(RewritePatternSet &patterns)
Add patterns to expand Arith bf16 patterns to lower level bitcasts/shifts.
Definition: ExpandOps.cpp:397
void populateArithToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
void populateAssertToLLVMConversionPattern(const LLVMTypeConverter &converter, RewritePatternSet &patterns, bool abortOnFailure=true)
Populate the cf.assert to LLVM conversion pattern.
void populateControlFlowToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect the patterns to convert from the ControlFlow dialect to LLVM.
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
void populateMathToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, bool approximateLog1p=true)
Definition: MathToLLVM.cpp:301
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, gpu::amd::Runtime runtime)
Collect a set of patterns to convert from the GPU dialect to ROCDL.
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:91
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void populateFinalizeMemRefToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert memory-related operations from the MemRef dialect to the LLVM di...
void populateAMDGPUToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, amdgpu::Chipset chipset)
Note: The ROCDL target does not support the LLVM bfloat type at this time and so this function will a...
void configureGpuToROCDLConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to ROCDL.
std::unique_ptr< OperationPass< gpu::GPUModuleOp > > createLowerGpuOpsToROCDLOpsPass(const std::string &chipset="gfx900", unsigned indexBitwidth=kDeriveIndexBitwidthFromDataLayout, bool useBarePtrCallConv=false, gpu::amd::Runtime runtime=gpu::amd::Runtime::Unknown)
Creates a pass that lowers GPU dialect operations to ROCDL counterparts.
const FrozenRewritePatternSet & patterns
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void populateVectorToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, bool reassociateFPReductions=false, bool force32BitVectorIndices=false)
Collect a set of patterns to convert from the Vector dialect to LLVM.
void populateFuncToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, const SymbolTable *symbolTable=nullptr)
Collect the patterns to convert from the Func dialect to LLVM.
Definition: FuncToLLVM.cpp:733
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
void populateMathToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Populate the given list with patterns that convert from Math to ROCDL calls.
Definition: MathToROCDL.cpp:48
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
The lowering of gpu.printf to a call to HIP hostcalls.
The lowering of gpu.printf to a call to an external printf() function.
static FailureOr< Chipset > parse(StringRef name)
Parses the chipset version string and returns the chipset on success, and failure otherwise.
Definition: Chipset.cpp:14