MLIR  20.0.0git
GPUToLLVMSPV.cpp
Go to the documentation of this file.
1 //===- GPUToLLVMSPV.cpp - Convert GPU operations to LLVM dialect ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
11 #include "../GPUCommon/GPUOpsLowering.h"
26 #include "mlir/IR/BuiltinTypes.h"
27 #include "mlir/IR/Matchers.h"
28 #include "mlir/IR/PatternMatch.h"
29 #include "mlir/IR/SymbolTable.h"
30 #include "mlir/Pass/Pass.h"
31 #include "mlir/Support/LLVM.h"
33 
34 #include "llvm/ADT/TypeSwitch.h"
35 #include "llvm/Support/FormatVariadic.h"
36 
37 using namespace mlir;
38 
39 namespace mlir {
40 #define GEN_PASS_DEF_CONVERTGPUOPSTOLLVMSPVOPS
41 #include "mlir/Conversion/Passes.h.inc"
42 } // namespace mlir
43 
44 //===----------------------------------------------------------------------===//
45 // Helper Functions
46 //===----------------------------------------------------------------------===//
47 
48 static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
49  StringRef name,
50  ArrayRef<Type> paramTypes,
51  Type resultType, bool isMemNone,
52  bool isConvergent) {
53  auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
54  SymbolTable::lookupSymbolIn(symbolTable, name));
55  if (!func) {
56  OpBuilder b(symbolTable->getRegion(0));
57  func = b.create<LLVM::LLVMFuncOp>(
58  symbolTable->getLoc(), name,
59  LLVM::LLVMFunctionType::get(resultType, paramTypes));
60  func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
61  func.setNoUnwind(true);
62  func.setWillReturn(true);
63 
64  if (isMemNone) {
65  // no externally observable effects
66  constexpr auto noModRef = mlir::LLVM::ModRefInfo::NoModRef;
67  auto memAttr = b.getAttr<LLVM::MemoryEffectsAttr>(
68  /*other=*/noModRef,
69  /*argMem=*/noModRef, /*inaccessibleMem=*/noModRef);
70  func.setMemoryEffectsAttr(memAttr);
71  }
72 
73  func.setConvergent(isConvergent);
74  }
75  return func;
76 }
77 
78 static LLVM::CallOp createSPIRVBuiltinCall(Location loc,
79  ConversionPatternRewriter &rewriter,
80  LLVM::LLVMFuncOp func,
81  ValueRange args) {
82  auto call = rewriter.create<LLVM::CallOp>(loc, func, args);
83  call.setCConv(func.getCConv());
84  call.setConvergentAttr(func.getConvergentAttr());
85  call.setNoUnwindAttr(func.getNoUnwindAttr());
86  call.setWillReturnAttr(func.getWillReturnAttr());
87  call.setMemoryEffectsAttr(func.getMemoryEffectsAttr());
88  return call;
89 }
90 
91 namespace {
92 //===----------------------------------------------------------------------===//
93 // Barriers
94 //===----------------------------------------------------------------------===//
95 
96 /// Replace `gpu.barrier` with an `llvm.call` to `barrier` with
97 /// `CLK_LOCAL_MEM_FENCE` argument, indicating work-group memory scope:
98 /// ```
99 /// // gpu.barrier
100 /// %c1 = llvm.mlir.constant(1: i32) : i32
101 /// llvm.call spir_funccc @_Z7barrierj(%c1) : (i32) -> ()
102 /// ```
103 struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
105 
106  LogicalResult
107  matchAndRewrite(gpu::BarrierOp op, OpAdaptor adaptor,
108  ConversionPatternRewriter &rewriter) const final {
109  constexpr StringLiteral funcName = "_Z7barrierj";
110 
112  assert(moduleOp && "Expecting module");
113  Type flagTy = rewriter.getI32Type();
114  Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
115  LLVM::LLVMFuncOp func =
116  lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy,
117  /*isMemNone=*/false, /*isConvergent=*/true);
118 
119  // Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
120  // See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
121  constexpr int64_t localMemFenceFlag = 1;
122  Location loc = op->getLoc();
123  Value flag =
124  rewriter.create<LLVM::ConstantOp>(loc, flagTy, localMemFenceFlag);
125  rewriter.replaceOp(op, createSPIRVBuiltinCall(loc, rewriter, func, flag));
126  return success();
127  }
128 };
129 
130 //===----------------------------------------------------------------------===//
131 // SPIR-V Builtins
132 //===----------------------------------------------------------------------===//
133 
134 /// Replace `gpu.*` with an `llvm.call` to the corresponding SPIR-V builtin with
135 /// a constant argument for the `dimension` attribute. Return type will depend
136 /// on index width option:
137 /// ```
138 /// // %thread_id_y = gpu.thread_id y
139 /// %c1 = llvm.mlir.constant(1: i32) : i32
140 /// %0 = llvm.call spir_funccc @_Z12get_local_idj(%c1) : (i32) -> i64
141 /// ```
142 struct LaunchConfigConversion : ConvertToLLVMPattern {
143  LaunchConfigConversion(StringRef funcName, StringRef rootOpName,
144  MLIRContext *context,
145  const LLVMTypeConverter &typeConverter,
146  PatternBenefit benefit)
147  : ConvertToLLVMPattern(rootOpName, context, typeConverter, benefit),
148  funcName(funcName) {}
149 
150  virtual gpu::Dimension getDimension(Operation *op) const = 0;
151 
152  LogicalResult
153  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
154  ConversionPatternRewriter &rewriter) const final {
156  assert(moduleOp && "Expecting module");
157  Type dimTy = rewriter.getI32Type();
158  Type indexTy = getTypeConverter()->getIndexType();
159  LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(moduleOp, funcName, dimTy,
160  indexTy, /*isMemNone=*/true,
161  /*isConvergent=*/false);
162 
163  Location loc = op->getLoc();
164  gpu::Dimension dim = getDimension(op);
165  Value dimVal = rewriter.create<LLVM::ConstantOp>(loc, dimTy,
166  static_cast<int64_t>(dim));
167  rewriter.replaceOp(op, createSPIRVBuiltinCall(loc, rewriter, func, dimVal));
168  return success();
169  }
170 
171  StringRef funcName;
172 };
173 
174 template <typename SourceOp>
175 struct LaunchConfigOpConversion final : LaunchConfigConversion {
176  static StringRef getFuncName();
177 
178  explicit LaunchConfigOpConversion(const LLVMTypeConverter &typeConverter,
179  PatternBenefit benefit = 1)
180  : LaunchConfigConversion(getFuncName(), SourceOp::getOperationName(),
181  &typeConverter.getContext(), typeConverter,
182  benefit) {}
183 
184  gpu::Dimension getDimension(Operation *op) const final {
185  return cast<SourceOp>(op).getDimension();
186  }
187 };
188 
189 template <>
190 StringRef LaunchConfigOpConversion<gpu::BlockIdOp>::getFuncName() {
191  return "_Z12get_group_idj";
192 }
193 
194 template <>
195 StringRef LaunchConfigOpConversion<gpu::GridDimOp>::getFuncName() {
196  return "_Z14get_num_groupsj";
197 }
198 
199 template <>
200 StringRef LaunchConfigOpConversion<gpu::BlockDimOp>::getFuncName() {
201  return "_Z14get_local_sizej";
202 }
203 
204 template <>
205 StringRef LaunchConfigOpConversion<gpu::ThreadIdOp>::getFuncName() {
206  return "_Z12get_local_idj";
207 }
208 
209 template <>
210 StringRef LaunchConfigOpConversion<gpu::GlobalIdOp>::getFuncName() {
211  return "_Z13get_global_idj";
212 }
213 
214 //===----------------------------------------------------------------------===//
215 // Shuffles
216 //===----------------------------------------------------------------------===//
217 
218 /// Replace `gpu.shuffle` with an `llvm.call` to the corresponding SPIR-V
219 /// builtin for `shuffleResult`, keeping `value` and `offset` arguments, and a
220 /// `true` constant for the `valid` result type. Conversion will only take place
221 /// if `width` is constant and equal to the `subgroup` pass option:
222 /// ```
223 /// // %0 = gpu.shuffle idx %value, %offset, %width : f64
224 /// %0 = llvm.call spir_funccc @_Z17sub_group_shuffledj(%value, %offset)
225 /// : (f64, i32) -> f64
226 /// ```
227 struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
229 
230  static StringRef getBaseName(gpu::ShuffleMode mode) {
231  switch (mode) {
232  case gpu::ShuffleMode::IDX:
233  return "sub_group_shuffle";
234  case gpu::ShuffleMode::XOR:
235  return "sub_group_shuffle_xor";
236  case gpu::ShuffleMode::UP:
237  return "sub_group_shuffle_up";
238  case gpu::ShuffleMode::DOWN:
239  return "sub_group_shuffle_down";
240  }
241  llvm_unreachable("Unhandled shuffle mode");
242  }
243 
244  static std::optional<StringRef> getTypeMangling(Type type) {
246  .Case<Float16Type>([](auto) { return "Dhj"; })
247  .Case<Float32Type>([](auto) { return "fj"; })
248  .Case<Float64Type>([](auto) { return "dj"; })
249  .Case<IntegerType>([](auto intTy) -> std::optional<StringRef> {
250  switch (intTy.getWidth()) {
251  case 8:
252  return "cj";
253  case 16:
254  return "sj";
255  case 32:
256  return "ij";
257  case 64:
258  return "lj";
259  }
260  return std::nullopt;
261  })
262  .Default([](auto) { return std::nullopt; });
263  }
264 
265  static std::optional<std::string> getFuncName(gpu::ShuffleOp op) {
266  StringRef baseName = getBaseName(op.getMode());
267  std::optional<StringRef> typeMangling = getTypeMangling(op.getType(0));
268  if (!typeMangling)
269  return std::nullopt;
270  return llvm::formatv("_Z{0}{1}{2}", baseName.size(), baseName,
271  typeMangling.value());
272  }
273 
274  /// Get the subgroup size from the target or return a default.
275  static int getSubgroupSize(Operation *op) {
278  .getSubgroupSize();
279  }
280 
281  static bool hasValidWidth(gpu::ShuffleOp op) {
282  llvm::APInt val;
283  Value width = op.getWidth();
284  return matchPattern(width, m_ConstantInt(&val)) &&
285  val == getSubgroupSize(op);
286  }
287 
288  LogicalResult
289  matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
290  ConversionPatternRewriter &rewriter) const final {
291  if (!hasValidWidth(op))
292  return rewriter.notifyMatchFailure(
293  op, "shuffle width and subgroup size mismatch");
294 
295  std::optional<std::string> funcName = getFuncName(op);
296  if (!funcName)
297  return rewriter.notifyMatchFailure(op, "unsupported value type");
298 
300  assert(moduleOp && "Expecting module");
301  Type valueType = adaptor.getValue().getType();
302  Type offsetType = adaptor.getOffset().getType();
303  Type resultType = valueType;
304  LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
305  moduleOp, funcName.value(), {valueType, offsetType}, resultType,
306  /*isMemNone=*/false, /*isConvergent=*/true);
307 
308  Location loc = op->getLoc();
309  std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};
310  Value result =
311  createSPIRVBuiltinCall(loc, rewriter, func, args).getResult();
312  Value trueVal =
313  rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI1Type(), true);
314  rewriter.replaceOp(op, {result, trueVal});
315  return success();
316  }
317 };
318 
319 //===----------------------------------------------------------------------===//
320 // GPU To LLVM-SPV Pass.
321 //===----------------------------------------------------------------------===//
322 
323 struct GPUToLLVMSPVConversionPass final
324  : impl::ConvertGpuOpsToLLVMSPVOpsBase<GPUToLLVMSPVConversionPass> {
325  using Base::Base;
326 
327  void runOnOperation() final {
328  MLIRContext *context = &getContext();
329  RewritePatternSet patterns(context);
330 
331  LowerToLLVMOptions options(context);
332  if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
333  options.overrideIndexBitwidth(indexBitwidth);
334 
335  LLVMTypeConverter converter(context, options);
336  LLVMConversionTarget target(*context);
337 
338  target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
339  gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
340  gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
341 
342  populateGpuToLLVMSPVConversionPatterns(converter, patterns);
344 
345  if (failed(applyPartialConversion(getOperation(), target,
346  std::move(patterns))))
347  signalPassFailure();
348  }
349 };
350 } // namespace
351 
352 //===----------------------------------------------------------------------===//
353 // GPU To LLVM-SPV Patterns.
354 //===----------------------------------------------------------------------===//
355 
356 namespace mlir {
357 namespace {
358 static unsigned
359 gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace addressSpace) {
360  constexpr spirv::ClientAPI clientAPI = spirv::ClientAPI::OpenCL;
361  return storageClassToAddressSpace(clientAPI,
362  addressSpaceToStorageClass(addressSpace));
363 }
364 } // namespace
365 
367  RewritePatternSet &patterns) {
368  patterns.add<GPUBarrierConversion, GPUReturnOpLowering, GPUShuffleConversion,
369  LaunchConfigOpConversion<gpu::BlockIdOp>,
370  LaunchConfigOpConversion<gpu::GridDimOp>,
371  LaunchConfigOpConversion<gpu::BlockDimOp>,
372  LaunchConfigOpConversion<gpu::ThreadIdOp>,
373  LaunchConfigOpConversion<gpu::GlobalIdOp>>(typeConverter);
374  MLIRContext *context = &typeConverter.getContext();
375  unsigned privateAddressSpace =
376  gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private);
377  unsigned localAddressSpace =
378  gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Workgroup);
379  OperationName llvmFuncOpName(LLVM::LLVMFuncOp::getOperationName(), context);
380  StringAttr kernelBlockSizeAttributeName =
381  LLVM::LLVMFuncOp::getReqdWorkGroupSizeAttrName(llvmFuncOpName);
382  patterns.add<GPUFuncOpLowering>(
383  typeConverter,
385  privateAddressSpace, localAddressSpace,
386  /*kernelAttributeName=*/{}, kernelBlockSizeAttributeName,
387  LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC,
388  /*encodeWorkgroupAttributionsAsArguments=*/true});
389 }
390 
393  gpuAddressSpaceToOCLAddressSpace);
394 }
395 } // namespace mlir
static LLVM::CallOp createSPIRVBuiltinCall(Location loc, ConversionPatternRewriter &rewriter, LLVM::LLVMFuncOp func, ValueRange args)
static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable, StringRef name, ArrayRef< Type > paramTypes, Type resultType, bool isMemNone, bool isConvergent)
static MLIRContext * getContext(OpFoldResult val)
static llvm::ManagedStatic< PassManagerOptions > options
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
Definition: Builders.h:102
This class implements a pattern rewriter for use with ConversionPatterns.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:143
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Definition: Pattern.h:147
Base class for operation conversions targeting the LLVM IR dialect.
Definition: Pattern.h:41
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
MLIRContext & getContext() const
Returns the MLIR context.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:211
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:472
A trait used to provide symbol table functionalities to a region operation.
Definition: SymbolTable.h:435
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
Definition: Operation.h:248
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
Definition: PatternMatch.h:847
static Operation * lookupSymbolIn(Operation *op, StringAttr symbol)
Returns the operation registered with the given symbol name with the regions of 'symbolTableOp'.
Type conversion class.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
ResourceLimitsAttr getResourceLimits() const
Returns the target resource limits.
TargetEnvAttr lookupTargetEnvOrDefault(Operation *op)
Queries the target environment recursively from enclosing symbol table ops containing the given op or...
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:485
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
Definition: Matchers.h:522
unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI, spirv::StorageClass storageClass)
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
spirv::StorageClass addressSpaceToStorageClass(gpu::AddressSpace addressSpace)
void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns)
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.