MLIR  20.0.0git
LowerGpuOpsToROCDLOps.cpp
Go to the documentation of this file.
1 //===- LowerGpuOpsToROCDLOps.cpp - MLIR GPU to ROCDL lowering passes ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to generate ROCDLIR operations for higher-level
10 // GPU operations.
11 //
12 //===----------------------------------------------------------------------===//
13 
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Pass/PassManager.h"
19 #include "mlir/Transforms/Passes.h"
20 
43 #include "mlir/Pass/Pass.h"
46 #include "llvm/Support/FormatVariadic.h"
47 
48 #include "../GPUCommon/GPUOpsLowering.h"
49 #include "../GPUCommon/IndexIntrinsicsOpLowering.h"
50 #include "../GPUCommon/OpToFuncCallLowering.h"
51 
52 namespace mlir {
53 #define GEN_PASS_DEF_CONVERTGPUOPSTOROCDLOPS
54 #include "mlir/Conversion/Passes.h.inc"
55 } // namespace mlir
56 
57 using namespace mlir;
58 
59 /// Returns true if the given `gpu.func` can be safely called using the bare
60 /// pointer calling convention.
61 static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func) {
62  bool canBeBare = true;
63  for (Type type : func.getArgumentTypes())
64  if (auto memrefTy = dyn_cast<BaseMemRefType>(type))
65  canBeBare &= LLVMTypeConverter::canConvertToBarePtr(memrefTy);
66  return canBeBare;
67 }
68 
70  const unsigned indexBitwidth) {
71  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
72  Value zero = rewriter.create<arith::ConstantIntOp>(loc, 0, 32);
73  Value minus1 = rewriter.create<arith::ConstantIntOp>(loc, -1, 32);
74  Value mbcntLo = rewriter.create<ROCDL::MbcntLoOp>(loc, int32Type,
75  ValueRange{minus1, zero});
76  Value laneId = rewriter.create<ROCDL::MbcntHiOp>(loc, int32Type,
77  ValueRange{minus1, mbcntLo});
78  return laneId;
79 }
80 static constexpr StringLiteral amdgcnDataLayout =
81  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
82  "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
83  "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:"
84  "64-S32-A5-G1-ni:7:8:9";
85 
86 namespace {
87 struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
89 
90  LogicalResult
91  matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
92  ConversionPatternRewriter &rewriter) const override {
93  auto loc = op->getLoc();
94  MLIRContext *context = rewriter.getContext();
95  // convert to: %mlo = call @llvm.amdgcn.mbcnt.lo(-1, 0)
96  // followed by: %lid = call @llvm.amdgcn.mbcnt.hi(-1, %mlo)
97 
98  Type intTy = IntegerType::get(context, 32);
99  Value zero = rewriter.create<arith::ConstantIntOp>(loc, 0, 32);
100  Value minus1 = rewriter.create<arith::ConstantIntOp>(loc, -1, 32);
101  Value mbcntLo =
102  rewriter.create<ROCDL::MbcntLoOp>(loc, intTy, ValueRange{minus1, zero});
103  Value laneId = rewriter.create<ROCDL::MbcntHiOp>(
104  loc, intTy, ValueRange{minus1, mbcntLo});
105  // Truncate or extend the result depending on the index bitwidth specified
106  // by the LLVMTypeConverter options.
107  const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
108  if (indexBitwidth > 32) {
109  laneId = rewriter.create<LLVM::SExtOp>(
110  loc, IntegerType::get(context, indexBitwidth), laneId);
111  } else if (indexBitwidth < 32) {
112  laneId = rewriter.create<LLVM::TruncOp>(
113  loc, IntegerType::get(context, indexBitwidth), laneId);
114  }
115  rewriter.replaceOp(op, {laneId});
116  return success();
117  }
118 };
119 
120 struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
122 
123  /// Lowers a shuffle to the corresponding ROCDL ops.
124  ///
125  /// Use the `width` argument to see if src lane is participating.
126  /// If not the dstLane would be itself.
127  ///
128  /// Shuffle with DS Bpermute:
129  /// let shflMode = [xor, up, down, idx]
130  /// let width = 32(usually warpsize), step = [1, 2, 4, 8, 16, ... , width].
131  /// 1. curLaneId = using mbcnt.lo + mbcnt.hi
132  /// 2. widthOrZeroIfOutside = (curLaneId + width) & -width
133  /// 3. dstLane = shflMode(curLaneId, step)
134  /// 4. isActiveSrcLane = dstLane < isActiveSrcLane
135  /// 5. dstLane = isActiveSrcLane ? dstLane : curLaneId
136  /// 6. dwordAlignedDstLane = dstLane * 4 or dstLane << 2.
137  /// 7. bpermute(dwordAlignedDstLane, shfl_value).
138  ///
139  LogicalResult
140  matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
141  ConversionPatternRewriter &rewriter) const override {
142  Location loc = op->getLoc();
143  // TODO: Add support for non 32-bit shuffle values.
144  if (adaptor.getValue().getType().getIntOrFloatBitWidth() != 32)
145  return failure();
146  const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
147  Value srcLaneId = getLaneId(rewriter, loc, indexBitwidth);
148 
149  auto int32Type = IntegerType::get(rewriter.getContext(), 32);
150  Value width = adaptor.getWidth();
151  Value zero = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 0);
152  Value negwidth = rewriter.create<LLVM::SubOp>(loc, int32Type, zero, width);
153  Value add = rewriter.create<LLVM::AddOp>(loc, int32Type, srcLaneId, width);
154  Value widthOrZeroIfOutside =
155  rewriter.create<LLVM::AndOp>(loc, int32Type, add, negwidth);
156  Value dstLane;
157  // TODO: Add support for gpu::ShuffleMode::UP and gpu::ShuffleMode::DOWN.
158  // TODO: Use ds_swizzle for XOR when step/offsets are constants for better
159  // perf.
160  switch (op.getMode()) {
161  case gpu::ShuffleMode::DOWN:
162  dstLane = rewriter.create<LLVM::AddOp>(loc, int32Type, srcLaneId,
163  adaptor.getOffset());
164  break;
165  case gpu::ShuffleMode::XOR:
166  dstLane = rewriter.create<LLVM::XOrOp>(loc, int32Type, srcLaneId,
167  adaptor.getOffset());
168  break;
169  case gpu::ShuffleMode::IDX:
170  dstLane = adaptor.getOffset();
171  break;
172  default:
173  return failure();
174  }
175  Value isActiveSrcLane = rewriter.create<LLVM::ICmpOp>(
176  loc, LLVM::ICmpPredicate::slt, dstLane, widthOrZeroIfOutside);
177  Value selectDstLane = rewriter.create<LLVM::SelectOp>(loc, isActiveSrcLane,
178  dstLane, srcLaneId);
179  Value two = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 2);
180  Value dwordAlignedDstLane =
181  rewriter.create<LLVM::ShlOp>(loc, int32Type, selectDstLane, two);
182  Value initShflValue = adaptor.getValue();
183  if (adaptor.getValue().getType().isF32()) {
184  initShflValue =
185  rewriter.create<LLVM::BitcastOp>(loc, int32Type, initShflValue);
186  }
187  Value shflValue = rewriter.create<ROCDL::DsBpermuteOp>(
188  loc, int32Type, dwordAlignedDstLane, initShflValue);
189  if (adaptor.getValue().getType().isF32()) {
190  shflValue = rewriter.create<LLVM::BitcastOp>(
191  loc, adaptor.getValue().getType(), shflValue);
192  }
193  rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
194  return success();
195  }
196 };
197 
198 /// Import the GPU Ops to ROCDL Patterns.
199 #include "GPUToROCDL.cpp.inc"
200 
201 // A pass that replaces all occurrences of GPU device operations with their
202 // corresponding ROCDL equivalent.
203 //
204 // This pass only handles device code and is not meant to be run on GPU host
205 // code.
206 struct LowerGpuOpsToROCDLOpsPass
207  : public impl::ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
208  LowerGpuOpsToROCDLOpsPass() = default;
209  LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth,
210  bool useBarePtrCallConv,
211  gpu::amd::Runtime runtime) {
212  if (this->chipset.getNumOccurrences() == 0)
213  this->chipset = chipset;
214  if (this->indexBitwidth.getNumOccurrences() == 0)
215  this->indexBitwidth = indexBitwidth;
216  if (this->useBarePtrCallConv.getNumOccurrences() == 0)
217  this->useBarePtrCallConv = useBarePtrCallConv;
218  if (this->runtime.getNumOccurrences() == 0)
219  this->runtime = runtime;
220  }
221 
222  void runOnOperation() override {
223  gpu::GPUModuleOp m = getOperation();
224  MLIRContext *ctx = m.getContext();
225 
226  auto llvmDataLayout = m->getAttrOfType<StringAttr>(
227  LLVM::LLVMDialect::getDataLayoutAttrName());
228  if (!llvmDataLayout) {
229  llvmDataLayout = StringAttr::get(ctx, amdgcnDataLayout);
230  m->setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(), llvmDataLayout);
231  }
232  // Request C wrapper emission.
233  for (auto func : m.getOps<func::FuncOp>()) {
234  func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
235  UnitAttr::get(ctx));
236  }
237 
238  FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
239  if (failed(maybeChipset)) {
240  emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
241  return signalPassFailure();
242  }
243 
244  /// Customize the bitwidth used for the device side index computations.
246  ctx, DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
247  options.dataLayout = llvm::DataLayout(llvmDataLayout.getValue());
248  if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
249  options.overrideIndexBitwidth(indexBitwidth);
250 
251  if (useBarePtrCallConv) {
252  options.useBarePtrCallConv = true;
253  WalkResult canUseBarePointers =
254  m.walk([](gpu::GPUFuncOp func) -> WalkResult {
255  if (canBeCalledWithBarePointers(func))
256  return WalkResult::advance();
257  return WalkResult::interrupt();
258  });
259  if (canUseBarePointers.wasInterrupted()) {
261  "bare pointer calling convention requires all memrefs to "
262  "have static shape and use the identity map");
263  return signalPassFailure();
264  }
265  }
266 
267  // Apply in-dialect lowering. In-dialect lowering will replace
268  // ops which need to be lowered further, which is not supported by a
269  // single conversion pass.
270  {
274  (void)applyPatternsGreedily(m, std::move(patterns));
275  }
276 
277  LLVMTypeConverter converter(ctx, options);
279  converter, [](gpu::AddressSpace space) {
280  switch (space) {
281  case gpu::AddressSpace::Global:
282  return 1;
283  case gpu::AddressSpace::Workgroup:
284  return 3;
285  case gpu::AddressSpace::Private:
286  return 5;
287  }
288  llvm_unreachable("unknown address space enum value");
289  return 0;
290  });
291 
292  RewritePatternSet llvmPatterns(ctx);
293 
295  populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
296  *maybeChipset);
297  populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
298  populateMathToLLVMConversionPatterns(converter, llvmPatterns);
299  cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
300  populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
301  populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
302  populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime);
305  if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
306  signalPassFailure();
307  auto *rocdlDialect = getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
308  auto reqdWorkGroupSizeAttrHelper =
309  rocdlDialect->getReqdWorkGroupSizeAttrHelper();
310  auto flatWorkGroupSizeAttrHelper =
311  rocdlDialect->getFlatWorkGroupSizeAttrHelper();
312  // Manually rewrite known block size attributes so the LLVMIR translation
313  // infrastructure can pick them up.
314  m.walk([&](LLVM::LLVMFuncOp op) {
315  if (reqdWorkGroupSizeAttrHelper.isAttrPresent(op)) {
316  auto blockSizes = reqdWorkGroupSizeAttrHelper.getAttr(op);
317  // Also set up the rocdl.flat_work_group_size attribute to prevent
318  // conflicting metadata.
319  uint32_t flatSize = 1;
320  for (uint32_t size : blockSizes.asArrayRef()) {
321  flatSize *= size;
322  }
323  StringAttr flatSizeAttr =
324  StringAttr::get(ctx, Twine(flatSize) + "," + Twine(flatSize));
325  flatWorkGroupSizeAttrHelper.setAttr(op, flatSizeAttr);
326  }
327  });
328  }
329 };
330 
331 } // namespace
332 
334  target.addIllegalOp<func::FuncOp>();
335  target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
336  target.addLegalDialect<ROCDL::ROCDLDialect>();
337  target.addIllegalDialect<gpu::GPUDialect>();
338  target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
339  LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
340  LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
341  // These ops are legal for f32 type.
342  target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
343  return any_of(op->getOperandTypes(), llvm::IsaPred<Float32Type>);
344  });
345  // TODO: Remove once we support replacing non-root ops.
346  target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
347 }
348 
349 template <typename OpTy>
350 static void populateOpPatterns(const LLVMTypeConverter &converter,
351  RewritePatternSet &patterns, StringRef f32Func,
352  StringRef f64Func, StringRef f32ApproxFunc,
353  StringRef f16Func) {
355  patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f32ApproxFunc,
356  f16Func);
357 }
358 
360  const LLVMTypeConverter &converter, RewritePatternSet &patterns,
361  mlir::gpu::amd::Runtime runtime) {
365  auto *rocdlDialect =
366  converter.getContext().getLoadedDialect<ROCDL::ROCDLDialect>();
367  populateWithGenerated(patterns);
368  patterns.add<
369  gpu::index_lowering::OpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
370  ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>>(
371  converter, IndexKind::Block, IntrType::Id);
373  gpu::BlockIdOp, ROCDL::BlockIdXOp, ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>>(
374  converter, IndexKind::Grid, IntrType::Id);
375  patterns.add<
376  gpu::index_lowering::OpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
377  ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>>(
378  converter, IndexKind::Block, IntrType::Dim);
380  gpu::GridDimOp, ROCDL::GridDimXOp, ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
381  converter, IndexKind::Grid, IntrType::Dim);
382  patterns.add<GPUReturnOpLowering>(converter);
384  converter,
386  /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace,
387  /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace,
388  rocdlDialect->getKernelAttrHelper().getName(),
389  rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName()});
390  if (Runtime::HIP == runtime) {
391  patterns.add<GPUPrintfOpToHIPLowering>(converter);
392  } else if (Runtime::OpenCL == runtime) {
393  // Use address space = 4 to match the OpenCL definition of printf()
394  patterns.add<GPUPrintfOpToLLVMCallLowering>(converter, /*addressSpace=*/4);
395  }
396  // TODO: Add alignment for workgroup memory
398 
399  patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL>(converter);
400 
402 }
403 
404 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
405 mlir::createLowerGpuOpsToROCDLOpsPass(const std::string &chipset,
406  unsigned indexBitwidth,
407  bool useBarePtrCallConv,
408  gpu::amd::Runtime runtime) {
409  return std::make_unique<LowerGpuOpsToROCDLOpsPass>(
410  chipset, indexBitwidth, useBarePtrCallConv, runtime);
411 }
static MLIRContext * getContext(OpFoldResult val)
static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func)
Returns true if the given gpu.func can be safely called using the bare pointer calling convention.
static constexpr StringLiteral amdgcnDataLayout
static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, StringRef f32Func, StringRef f64Func, StringRef f32ApproxFunc, StringRef f16Func)
Value getLaneId(ConversionPatternRewriter &rewriter, Location loc, const unsigned indexBitwidth)
static llvm::ManagedStatic< PassManagerOptions > options
MLIRContext * getContext() const
Definition: Builders.h:56
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
This class describes a specific conversion target.
void addLegalOp(OperationName op)
Register the given operations as legal.
void addLegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as legal.
void addDynamicallyLegalOp(OperationName op, const DynamicLegalityCallbackFn &callback)
Register the given operation as dynamically legal and set the dynamic legalization callback to the on...
void addIllegalDialect(StringRef name, Names... names)
Register the operations of the given dialects as illegal, i.e.
void addIllegalOp(OperationName op)
Register the given operation as illegal, i.e.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:143
The main mechanism for performing data layout queries.
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
static bool canConvertToBarePtr(BaseMemRefType type)
Check if a memref type can be converted to a bare pointer.
MLIRContext & getContext() const
Returns the MLIR context.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:497
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
void populateExpandBFloat16Patterns(RewritePatternSet &patterns)
Add patterns to expand Arith bf16 patterns to lower level bitcasts/shifts.
Definition: ExpandOps.cpp:397
void populateArithToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
void populateControlFlowToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect the patterns to convert from the ControlFlow dialect to LLVM.
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
void populateMathToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, bool approximateLog1p=true)
Definition: MathToLLVM.cpp:301
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
void populateGpuToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, gpu::amd::Runtime runtime)
Collect a set of patterns to convert from the GPU dialect to ROCDL.
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:91
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void populateFinalizeMemRefToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Collect a set of patterns to convert memory-related operations from the MemRef dialect to the LLVM di...
void populateAMDGPUToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, amdgpu::Chipset chipset)
Note: The ROCDL target does not support the LLVM bfloat type at this time and so this function will a...
void configureGpuToROCDLConversionLegality(ConversionTarget &target)
Configure target to convert from the GPU dialect to ROCDL.
std::unique_ptr< OperationPass< gpu::GPUModuleOp > > createLowerGpuOpsToROCDLOpsPass(const std::string &chipset="gfx900", unsigned indexBitwidth=kDeriveIndexBitwidthFromDataLayout, bool useBarePtrCallConv=false, gpu::amd::Runtime runtime=gpu::amd::Runtime::Unknown)
Creates a pass that lowers GPU dialect operations to ROCDL counterparts.
const FrozenRewritePatternSet & patterns
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void populateVectorToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, bool reassociateFPReductions=false, bool force32BitVectorIndices=false)
Collect a set of patterns to convert from the Vector dialect to LLVM.
void populateFuncToLLVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, const SymbolTable *symbolTable=nullptr)
Collect the patterns to convert from the Func dialect to LLVM.
Definition: FuncToLLVM.cpp:733
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
void populateMathToROCDLConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
Populate the given list with patterns that convert from Math to ROCDL calls.
Definition: MathToROCDL.cpp:48
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
The lowering of gpu.printf to a call to HIP hostcalls.
The lowering of gpu.printf to a call to an external printf() function.
Rewriting that replace SourceOp with a CallOp to f32Func or f64Func or f32ApproxFunc or f16Func depen...
Rewriting that unrolls SourceOp to scalars if it's operating on vectors.
static FailureOr< Chipset > parse(StringRef name)
Parses the chipset version string and returns the chipset on success, and failure otherwise.
Definition: Chipset.cpp:14