11 #include "../GPUCommon/GPUOpsLowering.h"
34 #include "llvm/ADT/TypeSwitch.h"
35 #include "llvm/Support/FormatVariadic.h"
37 #define DEBUG_TYPE "gpu-to-llvm-spv"
42 #define GEN_PASS_DEF_CONVERTGPUOPSTOLLVMSPVOPS
43 #include "mlir/Conversion/Passes.h.inc"
53 Type resultType,
bool isMemNone,
55 auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
59 func = b.
create<LLVM::LLVMFuncOp>(
60 symbolTable->
getLoc(), name,
62 func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
63 func.setNoUnwind(
true);
64 func.setWillReturn(
true);
68 constexpr
auto noModRef = mlir::LLVM::ModRefInfo::NoModRef;
69 auto memAttr = b.
getAttr<LLVM::MemoryEffectsAttr>(
72 func.setMemoryEffectsAttr(memAttr);
75 func.setConvergent(isConvergent);
82 LLVM::LLVMFuncOp func,
84 auto call = rewriter.
create<LLVM::CallOp>(loc, func, args);
85 call.setCConv(func.getCConv());
86 call.setConvergentAttr(func.getConvergentAttr());
87 call.setNoUnwindAttr(func.getNoUnwindAttr());
88 call.setWillReturnAttr(func.getWillReturnAttr());
89 call.setMemoryEffectsAttr(func.getMemoryEffectsAttr());
109 matchAndRewrite(gpu::BarrierOp op, OpAdaptor adaptor,
111 constexpr StringLiteral funcName =
"_Z7barrierj";
114 assert(moduleOp &&
"Expecting module");
115 Type flagTy = rewriter.getI32Type();
116 Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
117 LLVM::LLVMFuncOp func =
123 constexpr int64_t localMemFenceFlag = 1;
126 rewriter.create<LLVM::ConstantOp>(loc, flagTy, localMemFenceFlag);
145 LaunchConfigConversion(StringRef funcName, StringRef rootOpName,
150 funcName(funcName) {}
152 virtual gpu::Dimension getDimension(
Operation *op)
const = 0;
158 assert(moduleOp &&
"Expecting module");
159 Type dimTy = rewriter.getI32Type();
160 Type indexTy = getTypeConverter()->getIndexType();
166 gpu::Dimension dim = getDimension(op);
167 Value dimVal = rewriter.create<LLVM::ConstantOp>(loc, dimTy,
168 static_cast<int64_t
>(dim));
176 template <
typename SourceOp>
177 struct LaunchConfigOpConversion final : LaunchConfigConversion {
178 static StringRef getFuncName();
182 : LaunchConfigConversion(getFuncName(), SourceOp::getOperationName(),
186 gpu::Dimension getDimension(
Operation *op)
const final {
187 return cast<SourceOp>(op).getDimension();
192 StringRef LaunchConfigOpConversion<gpu::BlockIdOp>::getFuncName() {
193 return "_Z12get_group_idj";
197 StringRef LaunchConfigOpConversion<gpu::GridDimOp>::getFuncName() {
198 return "_Z14get_num_groupsj";
202 StringRef LaunchConfigOpConversion<gpu::BlockDimOp>::getFuncName() {
203 return "_Z14get_local_sizej";
207 StringRef LaunchConfigOpConversion<gpu::ThreadIdOp>::getFuncName() {
208 return "_Z12get_local_idj";
212 StringRef LaunchConfigOpConversion<gpu::GlobalIdOp>::getFuncName() {
213 return "_Z13get_global_idj";
232 static StringRef getBaseName(gpu::ShuffleMode mode) {
234 case gpu::ShuffleMode::IDX:
235 return "sub_group_shuffle";
236 case gpu::ShuffleMode::XOR:
237 return "sub_group_shuffle_xor";
238 case gpu::ShuffleMode::UP:
239 return "sub_group_shuffle_up";
240 case gpu::ShuffleMode::DOWN:
241 return "sub_group_shuffle_down";
243 llvm_unreachable(
"Unhandled shuffle mode");
246 static std::optional<StringRef> getTypeMangling(
Type type) {
248 .Case<Float16Type>([](
auto) {
return "Dhj"; })
249 .Case<Float32Type>([](
auto) {
return "fj"; })
250 .Case<Float64Type>([](
auto) {
return "dj"; })
251 .Case<IntegerType>([](
auto intTy) -> std::optional<StringRef> {
252 switch (intTy.getWidth()) {
264 .Default([](
auto) {
return std::nullopt; });
267 static std::optional<std::string> getFuncName(gpu::ShuffleOp op) {
268 StringRef baseName = getBaseName(op.getMode());
269 std::optional<StringRef> typeMangling = getTypeMangling(op.getType(0));
272 return llvm::formatv(
"_Z{0}{1}{2}", baseName.size(), baseName,
273 typeMangling.value());
277 static int getSubgroupSize(
Operation *op) {
283 static bool hasValidWidth(gpu::ShuffleOp op) {
285 Value width = op.getWidth();
287 val == getSubgroupSize(op);
291 matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
293 if (!hasValidWidth(op))
294 return rewriter.notifyMatchFailure(
295 op,
"shuffle width and subgroup size mismatch");
297 std::optional<std::string> funcName = getFuncName(op);
299 return rewriter.notifyMatchFailure(op,
"unsupported value type");
302 assert(moduleOp &&
"Expecting module");
303 Type valueType = adaptor.getValue().getType();
304 Type offsetType = adaptor.getOffset().getType();
305 Type resultType = valueType;
307 moduleOp, funcName.value(), {valueType, offsetType}, resultType,
311 std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};
315 rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI1Type(),
true);
316 rewriter.replaceOp(op, {result, trueVal});
321 class MemorySpaceToOpenCLMemorySpaceConverter final :
public TypeConverter {
323 MemorySpaceToOpenCLMemorySpaceConverter(
MLIRContext *ctx) {
324 addConversion([](
Type t) {
return t; });
325 addConversion([ctx](
BaseMemRefType memRefType) -> std::optional<Type> {
335 if (
auto rankedType = dyn_cast<MemRefType>(memRefType)) {
338 rankedType.getLayout(), addrSpaceAttr);
343 addConversion([
this](FunctionType type) {
344 auto inputs = llvm::map_to_vector(
345 type.getInputs(), [
this](
Type ty) { return convertType(ty); });
346 auto results = llvm::map_to_vector(
347 type.getResults(), [
this](
Type ty) { return convertType(ty); });
357 template <
typename SubgroupOp>
363 matchAndRewrite(SubgroupOp op,
typename SubgroupOp::Adaptor adaptor,
365 constexpr StringRef funcName = [] {
366 if constexpr (std::is_same_v<SubgroupOp, gpu::SubgroupIdOp>) {
367 return "_Z16get_sub_group_id";
368 }
else if constexpr (std::is_same_v<SubgroupOp, gpu::LaneIdOp>) {
369 return "_Z22get_sub_group_local_id";
370 }
else if constexpr (std::is_same_v<SubgroupOp, gpu::NumSubgroupsOp>) {
371 return "_Z18get_num_sub_groups";
372 }
else if constexpr (std::is_same_v<SubgroupOp, gpu::SubgroupSizeOp>) {
373 return "_Z18get_sub_group_size";
378 op->template getParentWithTrait<OpTrait::SymbolTable>();
379 Type resultTy = rewriter.getI32Type();
380 LLVM::LLVMFuncOp func =
387 Type indexTy = getTypeConverter()->getIndexType();
388 if (resultTy != indexTy) {
392 result = rewriter.create<LLVM::ZExtOp>(loc, indexTy, result);
395 rewriter.replaceOp(op, result);
404 struct GPUToLLVMSPVConversionPass final
405 : impl::ConvertGpuOpsToLLVMSPVOpsBase<GPUToLLVMSPVConversionPass> {
408 void runOnOperation() final {
414 options.overrideIndexBitwidth(indexBitwidth);
421 MemorySpaceToOpenCLMemorySpaceConverter converter(context);
424 -> std::optional<BaseMemRefType> {
434 target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
435 gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
436 gpu::LaneIdOp, gpu::NumSubgroupsOp, gpu::ReturnOp,
437 gpu::ShuffleOp, gpu::SubgroupIdOp, gpu::SubgroupSizeOp,
444 std::move(patterns))))
457 gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace addressSpace) {
467 GPUSubgroupOpConversion<gpu::LaneIdOp>,
468 GPUSubgroupOpConversion<gpu::NumSubgroupsOp>,
469 GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
470 GPUSubgroupOpConversion<gpu::SubgroupSizeOp>,
471 LaunchConfigOpConversion<gpu::BlockDimOp>,
472 LaunchConfigOpConversion<gpu::BlockIdOp>,
473 LaunchConfigOpConversion<gpu::GlobalIdOp>,
474 LaunchConfigOpConversion<gpu::GridDimOp>,
475 LaunchConfigOpConversion<gpu::ThreadIdOp>>(typeConverter);
477 unsigned privateAddressSpace =
478 gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private);
479 unsigned localAddressSpace =
480 gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Workgroup);
481 OperationName llvmFuncOpName(LLVM::LLVMFuncOp::getOperationName(), context);
482 StringAttr kernelBlockSizeAttributeName =
483 LLVM::LLVMFuncOp::getReqdWorkGroupSizeAttrName(llvmFuncOpName);
487 privateAddressSpace, localAddressSpace,
488 {}, kernelBlockSizeAttributeName,
489 LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC,
495 gpuAddressSpaceToOCLAddressSpace);
static LLVM::CallOp createSPIRVBuiltinCall(Location loc, ConversionPatternRewriter &rewriter, LLVM::LLVMFuncOp func, ValueRange args)
static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable, StringRef name, ArrayRef< Type > paramTypes, Type resultType, bool isMemNone, bool isConvergent)
static MLIRContext * getContext(OpFoldResult val)
static llvm::ManagedStatic< PassManagerOptions > options
This is an attribute/type replacer that is naively cached.
Attributes are known-constant values of operations.
This class provides a shared interface for ranked and unranked memref types.
ArrayRef< int64_t > getShape() const
Returns the shape of this memref type.
Attribute getMemorySpace() const
Returns the memory space in which data referred to by this memref resides.
Type getElementType() const
Returns the element type of this memref type.
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
This class implements a pattern rewriter for use with ConversionPatterns.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Base class for operation conversions targeting the LLVM IR dialect.
const LLVMTypeConverter * getTypeConverter() const
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
MLIRContext & getContext() const
Returns the MLIR context.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
A trait used to provide symbol table functionalities to a region operation.
Operation is the basic unit of execution within MLIR.
Location getLoc()
The source location the operation was defined or derived from.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
static Operation * lookupSymbolIn(Operation *op, StringAttr symbol)
Returns the operation registered with the given symbol name with the regions of 'symbolTableOp'.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
Register a replacement function for mapping a given attribute or type.
ResourceLimitsAttr getResourceLimits() const
Returns the target resource limits.
TargetEnvAttr lookupTargetEnvOrDefault(Operation *op)
Queries the target environment recursively from enclosing symbol table ops containing the given op or...
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI, spirv::StorageClass storageClass)
void populateGpuToLLVMSPVConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
static constexpr unsigned kDeriveIndexBitwidthFromDataLayout
Value to pass as bitwidth for the index type when the converter is expected to derive the bitwidth fr...
spirv::StorageClass addressSpaceToStorageClass(gpu::AddressSpace addressSpace)
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.