MLIR 23.0.0git
GPUToXeVMPipeline.cpp
Go to the documentation of this file.
1//===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a pass for testing the lowering to XeVM as a generally
10// usable sink pass. If XeGPU ops are used, it expects the MLIR code to have
11// XeGPU ops already embedded in gpu code.
12//
13//===----------------------------------------------------------------------===//
14
36
37using namespace mlir;
38
39namespace {
40//===----------------------------------------------------------------------===//
41// Pre-GPU common pipeline for both Host and GPU.
42//===----------------------------------------------------------------------===//
43void buildPreGPUCommonPassPipeline(
45 // builtin.module scope passes.
48 {
49 GpuXeVMAttachTargetOptions xevmTargetOptions;
50 xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher;
51 xevmTargetOptions.triple = options.zebinTriple;
52 xevmTargetOptions.chip = options.zebinChip;
53 xevmTargetOptions.optLevel = options.optLevel;
54 xevmTargetOptions.cmdOptions = options.cmdOptions;
55 pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
56 }
58 pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
59}
60
61//===----------------------------------------------------------------------===//
62// GPUModule-specific stuff.
63//===----------------------------------------------------------------------===//
64void buildGPUPassPipeline(OpPassManager &pm,
66 xegpu::XeGPUPropagateLayoutOptions laneLayoutOptions;
67 laneLayoutOptions.indexBitWidth = options.use64bitIndex ? 64 : 32;
68 laneLayoutOptions.layoutKind = "lane";
69 pm.addNestedPass<ModuleOp>(createCSEPass());
70 if (options.xegpuOpLevel == "workgroup") {
72 sgLayoutOptions.layoutKind = "subgroup";
73 pm.addNestedPass<gpu::GPUModuleOp>(
74 xegpu::createXeGPUPropagateLayout(sgLayoutOptions));
76 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
77 pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());
78 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
80 instDataOptions.layoutKind = "inst";
81 pm.addNestedPass<gpu::GPUModuleOp>(
82 xegpu::createXeGPUPropagateLayout(instDataOptions));
83 pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking());
84 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
85 }
86 if (options.xegpuOpLevel == "subgroup" ||
87 options.xegpuOpLevel == "workgroup") {
88 pm.addNestedPass<gpu::GPUModuleOp>(
89 xegpu::createXeGPUPropagateLayout(laneLayoutOptions));
91 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
92 pm.addNestedPass<gpu::GPUModuleOp>(
93 xegpu::createXeGPUPropagateLayout(laneLayoutOptions));
95 pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
96 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
98 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
100 pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
101 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
102 }
103 // Break down high-level micro-scaling (MX) ops (arith.scaling_extf and
104 // arith.scaling_truncf) into standard arith ops (extf/truncf + mulf), and
105 // expand extf/truncf on f8E8M0FNU into integer bit manipulation. This runs
106 // before the XeVM/LLVM conversions. The f4E2M1FN expansion patterns are
107 // intentionally left disabled: f4E2M1FN extf/truncf are lowered by the XeVM
108 // conversions (xevm.extf), whereas f8E8M0FNU is not supported there and so
109 // must be expanded here.
110 {
111 arith::ArithExpandOpsPassOptions arithExpandOptions;
112 arithExpandOptions.includeF8E8M0 = true;
113 pm.addNestedPass<gpu::GPUModuleOp>(
114 arith::createArithExpandOpsPass(arithExpandOptions));
115 }
116 pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToXeVM());
117 ConvertXeGPUToXeVMPassOptions xegpuToXeVMOptions;
118 xegpuToXeVMOptions.use64bitIndex = options.use64bitIndex;
119 pm.addNestedPass<gpu::GPUModuleOp>(
120 createConvertXeGPUToXeVMPass(xegpuToXeVMOptions));
121 {
122 ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions;
123 gpuToLLVMSPVOptions.use64bitIndex = options.use64bitIndex;
124 pm.addNestedPass<gpu::GPUModuleOp>(
125 createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
126 }
127 // Legalize math/arith ops on floating-point types that the XeVM target
128 // cannot handle natively (e.g. bf16) by wrapping them with extf/truncf
129 // around a supported type (defaulting to f32).
130 {
132 mathExtendOptions.extraTypeStrs.assign(options.mathExtendExtraTypes.begin(),
133 options.mathExtendExtraTypes.end());
134 mathExtendOptions.targetTypeStr = options.supportedTargetTypes;
135 pm.addNestedPass<gpu::GPUModuleOp>(
136 math::createMathExtendToSupportedTypes(mathExtendOptions));
137 }
138 {
140 arithEmulateOptions.sourceTypeStrs.assign(
141 options.unsupportedSourceTypes.begin(),
142 options.unsupportedSourceTypes.end());
143 arithEmulateOptions.targetTypeStr = options.supportedTargetTypes;
144 pm.addNestedPass<gpu::GPUModuleOp>(
145 arith::createArithEmulateUnsupportedFloats(arithEmulateOptions));
146 }
147 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
149}
150
151//===----------------------------------------------------------------------===//
152// Post-GPU pipeline for both Host and GPU.
153//===----------------------------------------------------------------------===//
154void buildPostGPUCommonPassPipeline(
156 // builtin.module scope passes.
159 {
160 GpuToLLVMConversionPassOptions gpuToLLVMOptions;
161 gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv;
162 gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv;
163 pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions));
164 }
169 pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
170 pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
171 // XeVM-to-LLVM must be the last pass before gpu-module-to-binary.
172 pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeVMToLLVMPass());
173 // gpu-module-to-binary
174 {
175 GpuModuleToBinaryPassOptions gpuToModuleBinOptions;
176 gpuToModuleBinOptions.compilationTarget = options.binaryFormat;
177 gpuToModuleBinOptions.cmdOptions = options.cmdOptions;
178 pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions));
179 }
180}
181} // namespace
182
185 // Pre-GPU common pipelines.
186 buildPreGPUCommonPassPipeline(pm, options);
187
188 // GPUModule-specific stuff.
189 buildGPUPassPipeline(pm, options);
190
191 // Post-GPU pipeline for both Host and GPU.
192 buildPostGPUCommonPassPipeline(pm, options);
193}
194
197 "gpu-lower-to-xevm-pipeline",
198 "The default GPU to XeVM lowering pipeline. It starts by lowering GPU "
199 "code to the "
200 "specified compilation target (default is fatbin) then lowers the host "
201 "code.",
203}
static llvm::ManagedStatic< PassManagerOptions > options
This class represents a pass manager that runs passes on either a specific operation type,...
Definition PassManager.h:46
void addPass(std::unique_ptr< Pass > pass)
Add the given pass to this pass manager.
Definition Pass.cpp:392
void addNestedPass(std::unique_ptr< Pass > pass)
Add the given pass to a nested pass manager for the given operation kind OpT.
std::unique_ptr<::mlir::Pass > createArithEmulateUnsupportedFloats()
std::unique_ptr<::mlir::Pass > createArithExpandOpsPass()
void registerGPUToXeVMPipeline()
void buildLowerToXeVMPassPipeline(OpPassManager &pm, const GPUToXeVMPipelineOptions &options)
Adds the GPU to XeVM pipeline to the given pass manager.
std::unique_ptr<::mlir::Pass > createMathExtendToSupportedTypes()
std::unique_ptr<::mlir::Pass > createExpandStridedMetadataPass()
std::unique_ptr<::mlir::Pass > createXeGPUWgToSgDistribute()
std::unique_ptr<::mlir::Pass > createXeGPUVectorLinearize()
std::unique_ptr<::mlir::Pass > createXeGPUSgToLaneDistribute()
std::unique_ptr<::mlir::Pass > createXeGPUPeepHoleOptimizer()
std::unique_ptr<::mlir::Pass > createXeGPUBlocking()
std::unique_ptr<::mlir::Pass > createXeGPUPropagateLayout()
Include the generated interface declarations.
std::unique_ptr<::mlir::Pass > createConvertToLLVMPass()
std::unique_ptr<::mlir::Pass > createSCFToControlFlowPass()
std::unique_ptr<::mlir::Pass > createConvertGpuOpsToLLVMSPVOps()
std::unique_ptr<::mlir::Pass > createReconcileUnrealizedCastsPass()
std::unique_ptr<::mlir::Pass > createConvertVectorToLLVMPass()
std::unique_ptr<::mlir::Pass > createGpuAsyncRegionPass()
std::unique_ptr<::mlir::Pass > createCanonicalizerPass()
std::unique_ptr<::mlir::Pass > createConvertXeGPUToXeVMPass()
std::unique_ptr<::mlir::Pass > createLowerAffinePass()
std::unique_ptr<::mlir::Pass > createGpuXeVMAttachTarget()
std::unique_ptr<::mlir::Pass > createConvertMathToXeVM()
std::unique_ptr<::mlir::Pass > createLoopInvariantCodeMotionPass()
std::unique_ptr<::mlir::Pass > createGpuToLLVMConversionPass()
std::unique_ptr<::mlir::Pass > createGpuModuleToBinaryPass()
std::unique_ptr< Pass > createConvertVectorToSCFPass(const VectorTransferToSCFOptions &options=VectorTransferToSCFOptions())
Create a pass to convert a subset of vector ops to SCF.
std::unique_ptr<::mlir::Pass > createConvertXeVMToLLVMPass()
std::unique_ptr<::mlir::Pass > createCSEPass()
Definition CSE.cpp:177
PassPipelineRegistration provides a global initializer that registers a Pass pipeline builder routine...
::llvm::SmallVector< std::string > sourceTypeStrs
Definition Passes.h:20
::llvm::SmallVector< std::string > extraTypeStrs
Definition Passes.h:113