74#include "llvm/ADT/SetVector.h"
79#define GEN_PASS_DEF_ACCROUTINETOGPUFUNC
80#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
84#define DEBUG_TYPE "acc-routine-to-gpu-func"
92static gpu::GPUFuncOp createGPUFuncFromFunc(
OpBuilder &builder,
93 func::FuncOp sourceFunc) {
95 StringRef name = sourceFunc.getName();
96 FunctionType type = sourceFunc.getFunctionType();
99 gpu::GPUFuncOp gpuFunc =
100 gpu::GPUFuncOp::create(builder, loc, name, type,
104 Region &sourceBody = sourceFunc.getBody();
105 Region &deviceBody = gpuFunc.getBody();
112 for (
auto [srcArg, destArg] : llvm::zip(sourceEntryBlock.
getArguments(),
114 mapping.
map(srcArg, destArg);
116 sourceBody.cloneInto(&deviceBody, mapping);
119 gpuFunc.walk([](func::ReturnOp op) {
121 gpu::ReturnOp gpuReturn = gpu::ReturnOp::create(replacer, op.getLoc());
122 gpuReturn->setOperands(op.getOperands());
129 Block *clonedSourceEntry = mapping.
lookup(&sourceEntryBlock);
133 clonedSourceEntry->
erase();
138using CloneCandidate = std::pair<func::FuncOp, RoutineOp>;
142static void collectRoutineCandidates(
143 ModuleOp mod,
SymbolTable &symTab, acc::DeviceType deviceType,
145 llvm::SmallSetVector<llvm::StringRef, 4> &funcsToCloneCandidates,
146 llvm::SmallSetVector<RoutineOp, 4> &materializedAccRoutines,
147 llvm::SmallSetVector<RoutineOp, 4> &bindAccRoutines) {
148 auto isParallelRoutine = [deviceType](RoutineOp routineOp) {
149 return routineOp.hasGang(deviceType) || routineOp.hasGang() ||
150 routineOp.hasWorker(deviceType) || routineOp.hasWorker() ||
151 routineOp.hasVector(deviceType) || routineOp.hasVector() ||
152 routineOp.getGangDimValue(deviceType) || routineOp.getGangDimValue();
155 mod.walk([&](RoutineOp op) {
156 if (op.getBindNameValue() || op.getBindNameValue(deviceType)) {
157 bindAccRoutines.insert(op);
160 func::FuncOp callee =
161 symTab.
lookup<func::FuncOp>(op.getFuncName().getLeafReference());
163 callee ? callee.getOperation() : op.getOperation(),
164 [&op, &isParallelRoutine]() {
165 std::string msg =
"Generating";
166 if (op.getImplicitAttr())
168 msg +=
" acc routine";
169 if (!isParallelRoutine(op))
174 funcsToCloneCandidates.insert(op.getFuncName().getLeafReference());
175 materializedAccRoutines.insert(op);
181static LogicalResult processCallsInRoutines(
183 const llvm::SmallSetVector<llvm::StringRef, 4> &funcsToCloneCandidates,
184 const llvm::SmallSetVector<RoutineOp, 4> &materializedAccRoutines,
185 llvm::SmallSetVector<CloneCandidate, 4> &funcsToClone) {
186 LogicalResult callCheckResult =
success();
187 auto processCalls = [&](CallOpInterface callOp) {
188 if (!callOp.getCallableForCallee())
190 auto calleeSymbolRef =
191 dyn_cast<SymbolRefAttr>(callOp.getCallableForCallee());
192 if (!calleeSymbolRef)
196 symTab.
lookup<func::FuncOp>(calleeSymbolRef.getLeafReference());
200 if (gpuSymTab.
lookup(callee.getName()))
206 accSupport.
emitNYI(callOp->getLoc(),
"Unsupported call in acc routine");
207 callCheckResult = failure();
210 funcsToClone.insert({callee, RoutineOp{}});
213 for (
auto [funcName, accRoutine] :
214 llvm::zip(funcsToCloneCandidates, materializedAccRoutines)) {
215 func::FuncOp
func = symTab.
lookup<func::FuncOp>(funcName);
218 if (!gpuSymTab.
lookup(funcName))
219 funcsToClone.insert({
func, accRoutine});
220 func.walk([&](CallOpInterface callOp) { processCalls(callOp); });
221 if (failed(callCheckResult))
230static LogicalResult cloneFuncsToGPUModule(
232 const llvm::SmallSetVector<CloneCandidate, 4> &funcsToClone) {
236 for (CloneCandidate candidate : funcsToClone) {
237 func::FuncOp srcFunc = candidate.first;
239 if (srcFunc.isDeclaration()) {
245 gpu::GPUFuncOp deviceFuncOp = createGPUFuncFromFunc(builder, srcFunc);
247 if (
auto specRoutineAttr = srcFunc->getAttrOfType<SpecializedRoutineAttr>(
249 StringAttr funcName = specRoutineAttr.getFuncName();
251 StringAttr::get(ctx, deviceFuncOp.getName()), funcName, mod))) {
252 accSupport.
emitNYI(deviceFuncOp.getLoc(),
253 "cannot replace symbol for acc routine");
258 if (
auto specAttr = srcFunc->getAttrOfType<SpecializedRoutineAttr>(
262 gpuSymTab.
insert(deviceFuncOp);
269cleanupHostModule(
const llvm::SmallSetVector<CloneCandidate, 4> &funcsToClone) {
270 for (CloneCandidate candidate : funcsToClone) {
271 func::FuncOp funcCandidate = candidate.first;
272 RoutineOp routineCandidate = candidate.second;
273 if ((routineCandidate && routineCandidate.getNohost()) ||
275 funcCandidate.erase();
279class ACCRoutineToGPUFunc
280 :
public acc::impl::ACCRoutineToGPUFuncBase<ACCRoutineToGPUFunc> {
282 using acc::impl::ACCRoutineToGPUFuncBase<
283 ACCRoutineToGPUFunc>::ACCRoutineToGPUFuncBase;
285 void runOnOperation()
override {
286 ModuleOp mod = getOperation();
287 if (mod.getOps<RoutineOp>().empty()) {
288 LLVM_DEBUG(llvm::dbgs()
289 <<
"Skipping ACCRoutineToGPUFunc - no acc.routine ops\n");
294 std::optional<gpu::GPUModuleOp> gpuModOpt =
297 accSupport.
emitNYI(mod.getLoc(),
"Failed to create GPU module");
298 return signalPassFailure();
300 gpu::GPUModuleOp gpuMod = *gpuModOpt;
305 llvm::SmallSetVector<llvm::StringRef, 4> funcsToCloneCandidates;
306 llvm::SmallSetVector<RoutineOp, 4> materializedAccRoutines;
307 llvm::SmallSetVector<RoutineOp, 4> bindAccRoutines;
309 collectRoutineCandidates(mod, symTab, this->deviceType, accSupport,
310 funcsToCloneCandidates, materializedAccRoutines,
313 llvm::SmallSetVector<CloneCandidate, 4> funcsToClone;
314 if (failed(processCallsInRoutines(symTab, gpuSymTab, accSupport,
315 funcsToCloneCandidates,
316 materializedAccRoutines, funcsToClone)))
317 return signalPassFailure();
319 if (failed(cloneFuncsToGPUModule(mod, accSupport, gpuSymTab, funcsToClone)))
320 return signalPassFailure();
322 cleanupHostModule(funcsToClone);
323 for (RoutineOp bindOp : bindAccRoutines)
Block represents an ordered list of Operations.
void erase()
Unlink this Block from its parent region and delete it.
OpListType & getOperations()
BlockArgListType getArguments()
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, const CloneOptions &options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
This class allows for representing and managing the symbol table used by operations with the 'SymbolT...
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
static LogicalResult replaceAllSymbolUses(StringAttr oldSymbol, StringAttr newSymbol, Operation *from)
Attempt to replace all uses of the given symbol 'oldSymbol' with the provided symbol 'newSymbol' that...
Operation * lookup(StringRef name) const
Look up a symbol with the specified name, returning null if no such name exists.
StringAttr insert(Operation *symbol, Block::iterator insertPt={})
Insert a new symbol into the table, and rename it as necessary to avoid collisions.
remark::detail::InFlightRemark emitRemark(Operation *op, std::function< std::string()> messageFn, llvm::StringRef category="openacc")
Emit an OpenACC remark with lazy message generation.
InFlightDiagnostic emitNYI(Location loc, const Twine &message)
Report a case that is not yet supported by the implementation.
bool isValidSymbolUse(Operation *user, SymbolRefAttr symbol, Operation **definingOpPtr=nullptr)
Check if a symbol use is valid for use in an OpenACC region.
std::optional< gpu::GPUModuleOp > getOrCreateGPUModule(ModuleOp mod, bool create=true, llvm::StringRef name="")
Get or optionally create a GPU module in the given module.
bool isAccRoutine(mlir::Operation *op)
Used to check whether the current operation is marked with acc routine.
static constexpr StringLiteral getSpecializedRoutineAttrName()
bool isSpecializedAccRoutine(mlir::Operation *op)
Used to check whether this is a specialized accelerator version of acc routine function.
Include the generated interface declarations.