11 #include "../GPUCommon/GPUOpsLowering.h"
32 #include "llvm/ADT/TypeSwitch.h"
33 #include "llvm/Support/FormatVariadic.h"
35 #define DEBUG_TYPE "gpu-to-llvm-spv"
40 #define GEN_PASS_DEF_CONVERTGPUOPSTOLLVMSPVOPS
41 #include "mlir/Conversion/Passes.h.inc"
51 Type resultType,
bool isMemNone,
53 auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
57 func = b.
create<LLVM::LLVMFuncOp>(
58 symbolTable->
getLoc(), name,
60 func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
61 func.setNoUnwind(
true);
62 func.setWillReturn(
true);
66 constexpr
auto noModRef = mlir::LLVM::ModRefInfo::NoModRef;
67 auto memAttr = b.
getAttr<LLVM::MemoryEffectsAttr>(
70 func.setMemoryEffectsAttr(memAttr);
73 func.setConvergent(isConvergent);
80 LLVM::LLVMFuncOp func,
82 auto call = rewriter.
create<LLVM::CallOp>(loc, func, args);
83 call.setCConv(func.getCConv());
84 call.setConvergentAttr(func.getConvergentAttr());
85 call.setNoUnwindAttr(func.getNoUnwindAttr());
86 call.setWillReturnAttr(func.getWillReturnAttr());
87 call.setMemoryEffectsAttr(func.getMemoryEffectsAttr());
107 matchAndRewrite(gpu::BarrierOp op, OpAdaptor adaptor,
109 constexpr StringLiteral funcName =
"_Z7barrierj";
112 assert(moduleOp &&
"Expecting module");
113 Type flagTy = rewriter.getI32Type();
114 Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
115 LLVM::LLVMFuncOp func =
121 constexpr int64_t localMemFenceFlag = 1;
124 rewriter.create<LLVM::ConstantOp>(loc, flagTy, localMemFenceFlag);
143 LaunchConfigConversion(StringRef funcName, StringRef rootOpName,
148 funcName(funcName) {}
150 virtual gpu::Dimension getDimension(
Operation *op)
const = 0;
156 assert(moduleOp &&
"Expecting module");
157 Type dimTy = rewriter.getI32Type();
158 Type indexTy = getTypeConverter()->getIndexType();
164 gpu::Dimension dim = getDimension(op);
165 Value dimVal = rewriter.create<LLVM::ConstantOp>(loc, dimTy,
166 static_cast<int64_t
>(dim));
174 template <
typename SourceOp>
175 struct LaunchConfigOpConversion final : LaunchConfigConversion {
176 static StringRef getFuncName();
180 : LaunchConfigConversion(getFuncName(), SourceOp::getOperationName(),
184 gpu::Dimension getDimension(
Operation *op)
const final {
185 return cast<SourceOp>(op).getDimension();
190 StringRef LaunchConfigOpConversion<gpu::BlockIdOp>::getFuncName() {
191 return "_Z12get_group_idj";
195 StringRef LaunchConfigOpConversion<gpu::GridDimOp>::getFuncName() {
196 return "_Z14get_num_groupsj";
200 StringRef LaunchConfigOpConversion<gpu::BlockDimOp>::getFuncName() {
201 return "_Z14get_local_sizej";
205 StringRef LaunchConfigOpConversion<gpu::ThreadIdOp>::getFuncName() {
206 return "_Z12get_local_idj";
210 StringRef LaunchConfigOpConversion<gpu::GlobalIdOp>::getFuncName() {
211 return "_Z13get_global_idj";
230 static StringRef getBaseName(gpu::ShuffleMode mode) {
232 case gpu::ShuffleMode::IDX:
233 return "sub_group_shuffle";
234 case gpu::ShuffleMode::XOR:
235 return "sub_group_shuffle_xor";
236 case gpu::ShuffleMode::UP:
237 return "sub_group_shuffle_up";
238 case gpu::ShuffleMode::DOWN:
239 return "sub_group_shuffle_down";
241 llvm_unreachable(
"Unhandled shuffle mode");
244 static std::optional<StringRef> getTypeMangling(
Type type) {
246 .Case<Float16Type>([](
auto) {
return "Dhj"; })
247 .Case<Float32Type>([](
auto) {
return "fj"; })
248 .Case<Float64Type>([](
auto) {
return "dj"; })
249 .Case<IntegerType>([](
auto intTy) -> std::optional<StringRef> {
250 switch (intTy.getWidth()) {
262 .Default([](
auto) {
return std::nullopt; });
265 static std::optional<std::string> getFuncName(gpu::ShuffleOp op) {
266 StringRef baseName = getBaseName(op.getMode());
267 std::optional<StringRef> typeMangling = getTypeMangling(op.getType(0));
270 return llvm::formatv(
"_Z{0}{1}{2}", baseName.size(), baseName,
271 typeMangling.value());
275 static std::optional<int> getSubgroupSize(
Operation *op) {
279 return parentFunc.getIntelReqdSubGroupSize();
282 static bool hasValidWidth(gpu::ShuffleOp op) {
284 Value width = op.getWidth();
286 val == getSubgroupSize(op);
290 matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
292 if (!hasValidWidth(op))
293 return rewriter.notifyMatchFailure(
294 op,
"shuffle width and subgroup size mismatch");
296 std::optional<std::string> funcName = getFuncName(op);
298 return rewriter.notifyMatchFailure(op,
"unsupported value type");
301 assert(moduleOp &&
"Expecting module");
302 Type valueType = adaptor.getValue().getType();
303 Type offsetType = adaptor.getOffset().getType();
304 Type resultType = valueType;
306 moduleOp, funcName.value(), {valueType, offsetType}, resultType,
310 std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};
314 rewriter.create<LLVM::ConstantOp>(loc, rewriter.getI1Type(),
true);
315 rewriter.replaceOp(op, {result, trueVal});
320 class MemorySpaceToOpenCLMemorySpaceConverter final :
public TypeConverter {
322 MemorySpaceToOpenCLMemorySpaceConverter(
MLIRContext *ctx) {
323 addConversion([](
Type t) {
return t; });
324 addConversion([ctx](
BaseMemRefType memRefType) -> std::optional<Type> {
334 if (
auto rankedType = dyn_cast<MemRefType>(memRefType)) {
337 rankedType.getLayout(), addrSpaceAttr);
342 addConversion([
this](FunctionType type) {
343 auto inputs = llvm::map_to_vector(
344 type.getInputs(), [
this](
Type ty) { return convertType(ty); });
345 auto results = llvm::map_to_vector(
346 type.getResults(), [
this](
Type ty) { return convertType(ty); });
356 template <
typename SubgroupOp>
362 matchAndRewrite(SubgroupOp op,
typename SubgroupOp::Adaptor adaptor,
364 constexpr StringRef funcName = [] {
365 if constexpr (std::is_same_v<SubgroupOp, gpu::SubgroupIdOp>) {
366 return "_Z16get_sub_group_id";
367 }
else if constexpr (std::is_same_v<SubgroupOp, gpu::LaneIdOp>) {
368 return "_Z22get_sub_group_local_id";
369 }
else if constexpr (std::is_same_v<SubgroupOp, gpu::NumSubgroupsOp>) {
370 return "_Z18get_num_sub_groups";
371 }
else if constexpr (std::is_same_v<SubgroupOp, gpu::SubgroupSizeOp>) {
372 return "_Z18get_sub_group_size";
377 op->template getParentWithTrait<OpTrait::SymbolTable>();
378 Type resultTy = rewriter.getI32Type();
379 LLVM::LLVMFuncOp func =
386 Type indexTy = getTypeConverter()->getIndexType();
387 if (resultTy != indexTy) {
391 result = rewriter.create<LLVM::ZExtOp>(loc, indexTy, result);
394 rewriter.replaceOp(op, result);
403 struct GPUToLLVMSPVConversionPass final
404 : impl::ConvertGpuOpsToLLVMSPVOpsBase<GPUToLLVMSPVConversionPass> {
407 void runOnOperation() final {
412 options.overrideIndexBitwidth(this->use64bitIndex ? 64 : 32);
418 MemorySpaceToOpenCLMemorySpaceConverter
converter(context);
421 -> std::optional<BaseMemRefType> {
431 target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
432 gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
433 gpu::LaneIdOp, gpu::NumSubgroupsOp, gpu::ReturnOp,
434 gpu::ShuffleOp, gpu::SubgroupIdOp, gpu::SubgroupSizeOp,
454 gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace addressSpace) {
464 GPUSubgroupOpConversion<gpu::LaneIdOp>,
465 GPUSubgroupOpConversion<gpu::NumSubgroupsOp>,
466 GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
467 GPUSubgroupOpConversion<gpu::SubgroupSizeOp>,
468 LaunchConfigOpConversion<gpu::BlockDimOp>,
469 LaunchConfigOpConversion<gpu::BlockIdOp>,
470 LaunchConfigOpConversion<gpu::GlobalIdOp>,
471 LaunchConfigOpConversion<gpu::GridDimOp>,
474 unsigned privateAddressSpace =
475 gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private);
476 unsigned localAddressSpace =
477 gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Workgroup);
478 OperationName llvmFuncOpName(LLVM::LLVMFuncOp::getOperationName(), context);
479 StringAttr kernelBlockSizeAttributeName =
480 LLVM::LLVMFuncOp::getReqdWorkGroupSizeAttrName(llvmFuncOpName);
484 privateAddressSpace, localAddressSpace,
485 {}, kernelBlockSizeAttributeName,
486 LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC,
492 gpuAddressSpaceToOCLAddressSpace);
static LLVM::CallOp createSPIRVBuiltinCall(Location loc, ConversionPatternRewriter &rewriter, LLVM::LLVMFuncOp func, ValueRange args)
static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable, StringRef name, ArrayRef< Type > paramTypes, Type resultType, bool isMemNone, bool isConvergent)
static MLIRContext * getContext(OpFoldResult val)
static llvm::ManagedStatic< PassManagerOptions > options
This is an attribute/type replacer that is naively cached.
Attributes are known-constant values of operations.
This class provides a shared interface for ranked and unranked memref types.
ArrayRef< int64_t > getShape() const
Returns the shape of this memref type.
Attribute getMemorySpace() const
Returns the memory space in which data referred to by this memref resides.
Type getElementType() const
Returns the element type of this memref type.
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
This class implements a pattern rewriter for use with ConversionPatterns.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Base class for operation conversions targeting the LLVM IR dialect.
const LLVMTypeConverter * getTypeConverter() const
Derived class that automatically populates legalization information for different LLVM ops.
Conversion from types to the LLVM IR dialect.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Options to control the LLVM lowering.
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
A trait used to provide symbol table functionalities to a region operation.
Operation is the basic unit of execution within MLIR.
Location getLoc()
The source location the operation was defined or derived from.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
static Operation * lookupSymbolIn(Operation *op, StringAttr symbol)
Returns the operation registered with the given symbol name with the regions of 'symbolTableOp'.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
Register a replacement function for mapping a given attribute or type.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI, spirv::StorageClass storageClass)
void populateGpuToLLVMSPVConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns)
TypeConverter & typeConverter
spirv::StorageClass addressSpaceToStorageClass(gpu::AddressSpace addressSpace)
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
const FrozenRewritePatternSet & patterns
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
const TypeConverter & converter
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.