MLIR 22.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1//===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a translation between the MLIR OpenMP dialect and LLVM
10// IR.
11//
12//===----------------------------------------------------------------------===//
19#include "mlir/IR/Operation.h"
20#include "mlir/Support/LLVM.h"
23
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/TypeSwitch.h"
27#include "llvm/Frontend/OpenMP/OMPConstants.h"
28#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/DebugInfoMetadata.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/MDBuilder.h"
34#include "llvm/IR/ReplaceConstant.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/VirtualFileSystem.h"
37#include "llvm/TargetParser/Triple.h"
38#include "llvm/Transforms/Utils/ModuleUtils.h"
39
40#include <cstdint>
41#include <iterator>
42#include <numeric>
43#include <optional>
44#include <utility>
45
46using namespace mlir;
47
48namespace {
49static llvm::omp::ScheduleKind
50convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
51 if (!schedKind.has_value())
52 return llvm::omp::OMP_SCHEDULE_Default;
53 switch (schedKind.value()) {
54 case omp::ClauseScheduleKind::Static:
55 return llvm::omp::OMP_SCHEDULE_Static;
56 case omp::ClauseScheduleKind::Dynamic:
57 return llvm::omp::OMP_SCHEDULE_Dynamic;
58 case omp::ClauseScheduleKind::Guided:
59 return llvm::omp::OMP_SCHEDULE_Guided;
60 case omp::ClauseScheduleKind::Auto:
61 return llvm::omp::OMP_SCHEDULE_Auto;
62 case omp::ClauseScheduleKind::Runtime:
63 return llvm::omp::OMP_SCHEDULE_Runtime;
64 }
65 llvm_unreachable("unhandled schedule clause argument");
66}
67
68/// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
69/// insertion points for allocas.
70class OpenMPAllocaStackFrame
71 : public StateStackFrameBase<OpenMPAllocaStackFrame> {
72public:
74
75 explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
76 : allocaInsertPoint(allocaIP) {}
77 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
78};
79
80/// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
81/// collapsed canonical loop information corresponding to an \c omp.loop_nest
82/// operation.
83class OpenMPLoopInfoStackFrame
84 : public StateStackFrameBase<OpenMPLoopInfoStackFrame> {
85public:
86 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
87 llvm::CanonicalLoopInfo *loopInfo = nullptr;
88};
89
90/// Custom error class to signal translation errors that don't need reporting,
91/// since encountering them will have already triggered relevant error messages.
92///
93/// Its purpose is to serve as the glue between MLIR failures represented as
94/// \see LogicalResult instances and \see llvm::Error instances used to
95/// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
96/// error of the first type is raised, a message is emitted directly (the \see
97/// LogicalResult itself does not hold any information). If we need to forward
98/// this error condition as an \see llvm::Error while avoiding triggering some
99/// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
100/// class to just signal this situation has happened.
101///
102/// For example, this class should be used to trigger errors from within
103/// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
104/// translation of their own regions. This unclutters the error log from
105/// redundant messages.
106class PreviouslyReportedError
107 : public llvm::ErrorInfo<PreviouslyReportedError> {
108public:
109 void log(raw_ostream &) const override {
110 // Do not log anything.
111 }
112
113 std::error_code convertToErrorCode() const override {
114 llvm_unreachable(
115 "PreviouslyReportedError doesn't support ECError conversion");
116 }
117
118 // Used by ErrorInfo::classID.
119 static char ID;
120};
121
122char PreviouslyReportedError::ID = 0;
123
124/*
125 * Custom class for processing linear clause for omp.wsloop
126 * and omp.simd. Linear clause translation requires setup,
127 * initialization, update, and finalization at varying
128 * basic blocks in the IR. This class helps maintain
129 * internal state to allow consistent translation in
130 * each of these stages.
131 */
132
133class LinearClauseProcessor {
134
135private:
136 SmallVector<llvm::Value *> linearPreconditionVars;
137 SmallVector<llvm::Value *> linearLoopBodyTemps;
138 SmallVector<llvm::AllocaInst *> linearOrigVars;
139 SmallVector<llvm::Value *> linearOrigVal;
140 SmallVector<llvm::Value *> linearSteps;
141 llvm::BasicBlock *linearFinalizationBB;
142 llvm::BasicBlock *linearExitBB;
143 llvm::BasicBlock *linearLastIterExitBB;
144
145public:
146 // Allocate space for linear variabes
147 void createLinearVar(llvm::IRBuilderBase &builder,
148 LLVM::ModuleTranslation &moduleTranslation,
149 mlir::Value &linearVar) {
150 if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
151 moduleTranslation.lookupValue(linearVar))) {
152 linearPreconditionVars.push_back(builder.CreateAlloca(
153 linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
154 llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
155 linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
156 linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
157 linearLoopBodyTemps.push_back(linearLoopBodyTemp);
158 linearOrigVars.push_back(linearVarAlloca);
159 }
160 }
161
162 // Initialize linear step
163 inline void initLinearStep(LLVM::ModuleTranslation &moduleTranslation,
164 mlir::Value &linearStep) {
165 linearSteps.push_back(moduleTranslation.lookupValue(linearStep));
166 }
167
168 // Emit IR for initialization of linear variables
169 llvm::OpenMPIRBuilder::InsertPointOrErrorTy
170 initLinearVar(llvm::IRBuilderBase &builder,
171 LLVM::ModuleTranslation &moduleTranslation,
172 llvm::BasicBlock *loopPreHeader) {
173 builder.SetInsertPoint(loopPreHeader->getTerminator());
174 for (size_t index = 0; index < linearOrigVars.size(); index++) {
175 llvm::LoadInst *linearVarLoad = builder.CreateLoad(
176 linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
177 builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
178 }
179 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
180 moduleTranslation.getOpenMPBuilder()->createBarrier(
181 builder.saveIP(), llvm::omp::OMPD_barrier);
182 return afterBarrierIP;
183 }
184
185 // Emit IR for updating Linear variables
186 void updateLinearVar(llvm::IRBuilderBase &builder, llvm::BasicBlock *loopBody,
187 llvm::Value *loopInductionVar) {
188 builder.SetInsertPoint(loopBody->getTerminator());
189 for (size_t index = 0; index < linearPreconditionVars.size(); index++) {
190 // Emit increments for linear vars
191 llvm::LoadInst *linearVarStart =
192 builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
193
194 linearPreconditionVars[index]);
195 auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
196 auto addInst = builder.CreateAdd(linearVarStart, mulInst);
197 builder.CreateStore(addInst, linearLoopBodyTemps[index]);
198 }
199 }
200
201 // Linear variable finalization is conditional on the last logical iteration.
202 // Create BB splits to manage the same.
203 void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
204 llvm::BasicBlock *loopExit) {
205 linearFinalizationBB = loopExit->splitBasicBlock(
206 loopExit->getTerminator(), "omp_loop.linear_finalization");
207 linearExitBB = linearFinalizationBB->splitBasicBlock(
208 linearFinalizationBB->getTerminator(), "omp_loop.linear_exit");
209 linearLastIterExitBB = linearFinalizationBB->splitBasicBlock(
210 linearFinalizationBB->getTerminator(), "omp_loop.linear_lastiter_exit");
211 }
212
213 // Finalize the linear vars
214 llvm::OpenMPIRBuilder::InsertPointOrErrorTy
215 finalizeLinearVar(llvm::IRBuilderBase &builder,
216 LLVM::ModuleTranslation &moduleTranslation,
217 llvm::Value *lastIter) {
218 // Emit condition to check whether last logical iteration is being executed
219 builder.SetInsertPoint(linearFinalizationBB->getTerminator());
220 llvm::Value *loopLastIterLoad = builder.CreateLoad(
221 llvm::Type::getInt32Ty(builder.getContext()), lastIter);
222 llvm::Value *isLast =
223 builder.CreateCmp(llvm::CmpInst::ICMP_NE, loopLastIterLoad,
224 llvm::ConstantInt::get(
225 llvm::Type::getInt32Ty(builder.getContext()), 0));
226 // Store the linear variable values to original variables.
227 builder.SetInsertPoint(linearLastIterExitBB->getTerminator());
228 for (size_t index = 0; index < linearOrigVars.size(); index++) {
229 llvm::LoadInst *linearVarTemp =
230 builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
231 linearLoopBodyTemps[index]);
232 builder.CreateStore(linearVarTemp, linearOrigVars[index]);
233 }
234
235 // Create conditional branch such that the linear variable
236 // values are stored to original variables only at the
237 // last logical iteration
238 builder.SetInsertPoint(linearFinalizationBB->getTerminator());
239 builder.CreateCondBr(isLast, linearLastIterExitBB, linearExitBB);
240 linearFinalizationBB->getTerminator()->eraseFromParent();
241 // Emit barrier
242 builder.SetInsertPoint(linearExitBB->getTerminator());
243 return moduleTranslation.getOpenMPBuilder()->createBarrier(
244 builder.saveIP(), llvm::omp::OMPD_barrier);
245 }
246
247 // Rewrite all uses of the original variable in `BBName`
248 // with the linear variable in-place
249 void rewriteInPlace(llvm::IRBuilderBase &builder, const std::string &BBName,
250 size_t varIndex) {
251 llvm::SmallVector<llvm::User *> users;
252 for (llvm::User *user : linearOrigVal[varIndex]->users())
253 users.push_back(user);
254 for (auto *user : users) {
255 if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
256 if (userInst->getParent()->getName().str() == BBName)
257 user->replaceUsesOfWith(linearOrigVal[varIndex],
258 linearLoopBodyTemps[varIndex]);
259 }
260 }
261 }
262};
263
264} // namespace
265
266/// Looks up from the operation from and returns the PrivateClauseOp with
267/// name symbolName
268static omp::PrivateClauseOp findPrivatizer(Operation *from,
269 SymbolRefAttr symbolName) {
270 omp::PrivateClauseOp privatizer =
272 symbolName);
273 assert(privatizer && "privatizer not found in the symbol table");
274 return privatizer;
275}
276
277/// Check whether translation to LLVM IR for the given operation is currently
278/// supported. If not, descriptive diagnostics will be emitted to let users know
279/// this is a not-yet-implemented feature.
280///
281/// \returns success if no unimplemented features are needed to translate the
282/// given operation.
283static LogicalResult checkImplementationStatus(Operation &op) {
284 auto todo = [&op](StringRef clauseName) {
285 return op.emitError() << "not yet implemented: Unhandled clause "
286 << clauseName << " in " << op.getName()
287 << " operation";
288 };
289
290 auto checkAllocate = [&todo](auto op, LogicalResult &result) {
291 if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
292 result = todo("allocate");
293 };
294 auto checkBare = [&todo](auto op, LogicalResult &result) {
295 if (op.getBare())
296 result = todo("ompx_bare");
297 };
298 auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
299 omp::ClauseCancellationConstructType cancelledDirective =
300 op.getCancelDirective();
301 // Cancelling a taskloop is not yet supported because we don't yet have LLVM
302 // IR conversion for taskloop
303 if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) {
304 Operation *parent = op->getParentOp();
305 while (parent) {
306 if (parent->getDialect() == op->getDialect())
307 break;
308 parent = parent->getParentOp();
309 }
310 if (isa_and_nonnull<omp::TaskloopOp>(parent))
311 result = todo("cancel directive inside of taskloop");
312 }
313 };
314 auto checkDepend = [&todo](auto op, LogicalResult &result) {
315 if (!op.getDependVars().empty() || op.getDependKinds())
316 result = todo("depend");
317 };
318 auto checkDevice = [&todo](auto op, LogicalResult &result) {
319 if (op.getDevice())
320 result = todo("device");
321 };
322 auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
323 if (op.getDistScheduleChunkSize())
324 result = todo("dist_schedule with chunk_size");
325 };
326 auto checkHint = [](auto op, LogicalResult &) {
327 if (op.getHint())
328 op.emitWarning("hint clause discarded");
329 };
330 auto checkInReduction = [&todo](auto op, LogicalResult &result) {
331 if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
332 op.getInReductionSyms())
333 result = todo("in_reduction");
334 };
335 auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
336 if (!op.getIsDevicePtrVars().empty())
337 result = todo("is_device_ptr");
338 };
339 auto checkLinear = [&todo](auto op, LogicalResult &result) {
340 if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
341 result = todo("linear");
342 };
343 auto checkNowait = [&todo](auto op, LogicalResult &result) {
344 if (op.getNowait())
345 result = todo("nowait");
346 };
347 auto checkOrder = [&todo](auto op, LogicalResult &result) {
348 if (op.getOrder() || op.getOrderMod())
349 result = todo("order");
350 };
351 auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
352 if (op.getParLevelSimd())
353 result = todo("parallelization-level");
354 };
355 auto checkPriority = [&todo](auto op, LogicalResult &result) {
356 if (op.getPriority())
357 result = todo("priority");
358 };
359 auto checkPrivate = [&todo](auto op, LogicalResult &result) {
360 if (!op.getPrivateVars().empty() || op.getPrivateSyms())
361 result = todo("privatization");
362 };
363 auto checkReduction = [&todo](auto op, LogicalResult &result) {
364 if (isa<omp::TeamsOp>(op))
365 if (!op.getReductionVars().empty() || op.getReductionByref() ||
366 op.getReductionSyms())
367 result = todo("reduction");
368 if (op.getReductionMod() &&
369 op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
370 result = todo("reduction with modifier");
371 };
372 auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
373 if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
374 op.getTaskReductionSyms())
375 result = todo("task_reduction");
376 };
377 auto checkUntied = [&todo](auto op, LogicalResult &result) {
378 if (op.getUntied())
379 result = todo("untied");
380 };
381
382 LogicalResult result = success();
384 .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); })
385 .Case([&](omp::CancellationPointOp op) {
386 checkCancelDirective(op, result);
387 })
388 .Case([&](omp::DistributeOp op) {
389 checkAllocate(op, result);
390 checkDistSchedule(op, result);
391 checkOrder(op, result);
392 })
393 .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
394 .Case([&](omp::SectionsOp op) {
395 checkAllocate(op, result);
396 checkPrivate(op, result);
397 checkReduction(op, result);
398 })
399 .Case([&](omp::SingleOp op) {
400 checkAllocate(op, result);
401 checkPrivate(op, result);
402 })
403 .Case([&](omp::TeamsOp op) {
404 checkAllocate(op, result);
405 checkPrivate(op, result);
406 })
407 .Case([&](omp::TaskOp op) {
408 checkAllocate(op, result);
409 checkInReduction(op, result);
410 })
411 .Case([&](omp::TaskgroupOp op) {
412 checkAllocate(op, result);
413 checkTaskReduction(op, result);
414 })
415 .Case([&](omp::TaskwaitOp op) {
416 checkDepend(op, result);
417 checkNowait(op, result);
418 })
419 .Case([&](omp::TaskloopOp op) {
420 // TODO: Add other clauses check
421 checkUntied(op, result);
422 checkPriority(op, result);
423 })
424 .Case([&](omp::WsloopOp op) {
425 checkAllocate(op, result);
426 checkLinear(op, result);
427 checkOrder(op, result);
428 checkReduction(op, result);
429 })
430 .Case([&](omp::ParallelOp op) {
431 checkAllocate(op, result);
432 checkReduction(op, result);
433 })
434 .Case([&](omp::SimdOp op) {
435 checkLinear(op, result);
436 checkReduction(op, result);
437 })
438 .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
439 omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
440 .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
441 [&](auto op) { checkDepend(op, result); })
442 .Case([&](omp::TargetOp op) {
443 checkAllocate(op, result);
444 checkBare(op, result);
445 checkDevice(op, result);
446 checkInReduction(op, result);
447 checkIsDevicePtr(op, result);
448 })
449 .Default([](Operation &) {
450 // Assume all clauses for an operation can be translated unless they are
451 // checked above.
452 });
453 return result;
454}
455
456static LogicalResult handleError(llvm::Error error, Operation &op) {
457 LogicalResult result = success();
458 if (error) {
459 llvm::handleAllErrors(
460 std::move(error),
461 [&](const PreviouslyReportedError &) { result = failure(); },
462 [&](const llvm::ErrorInfoBase &err) {
463 result = op.emitError(err.message());
464 });
465 }
466 return result;
467}
468
469template <typename T>
470static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
471 if (!result)
472 return handleError(result.takeError(), op);
473
474 return success();
475}
476
477/// Find the insertion point for allocas given the current insertion point for
478/// normal operations in the builder.
479static llvm::OpenMPIRBuilder::InsertPointTy
480findAllocaInsertPoint(llvm::IRBuilderBase &builder,
481 LLVM::ModuleTranslation &moduleTranslation) {
482 // If there is an alloca insertion point on stack, i.e. we are in a nested
483 // operation and a specific point was provided by some surrounding operation,
484 // use it.
485 llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
486 WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
487 [&](OpenMPAllocaStackFrame &frame) {
488 allocaInsertPoint = frame.allocaInsertPoint;
489 return WalkResult::interrupt();
490 });
491 // In cases with multiple levels of outlining, the tree walk might find an
492 // alloca insertion point that is inside the original function while the
493 // builder insertion point is inside the outlined function. We need to make
494 // sure that we do not use it in those cases.
495 if (walkResult.wasInterrupted() &&
496 allocaInsertPoint.getBlock()->getParent() ==
497 builder.GetInsertBlock()->getParent())
498 return allocaInsertPoint;
499
500 // Otherwise, insert to the entry block of the surrounding function.
501 // If the current IRBuilder InsertPoint is the function's entry, it cannot
502 // also be used for alloca insertion which would result in insertion order
503 // confusion. Create a new BasicBlock for the Builder and use the entry block
504 // for the allocs.
505 // TODO: Create a dedicated alloca BasicBlock at function creation such that
506 // we do not need to move the current InertPoint here.
507 if (builder.GetInsertBlock() ==
508 &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
509 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
510 "Assuming end of basic block");
511 llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
512 builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
513 builder.GetInsertBlock()->getNextNode());
514 builder.CreateBr(entryBB);
515 builder.SetInsertPoint(entryBB);
516 }
517
518 llvm::BasicBlock &funcEntryBlock =
519 builder.GetInsertBlock()->getParent()->getEntryBlock();
520 return llvm::OpenMPIRBuilder::InsertPointTy(
521 &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
522}
523
524/// Find the loop information structure for the loop nest being translated. It
525/// will return a `null` value unless called from the translation function for
526/// a loop wrapper operation after successfully translating its body.
527static llvm::CanonicalLoopInfo *
529 llvm::CanonicalLoopInfo *loopInfo = nullptr;
530 moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
531 [&](OpenMPLoopInfoStackFrame &frame) {
532 loopInfo = frame.loopInfo;
533 return WalkResult::interrupt();
534 });
535 return loopInfo;
536}
537
538/// Converts the given region that appears within an OpenMP dialect operation to
539/// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
540/// region, and a branch from any block with an successor-less OpenMP terminator
541/// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
542/// of the continuation block if provided.
544 Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
545 LLVM::ModuleTranslation &moduleTranslation,
546 SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
547 bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
548
549 llvm::BasicBlock *continuationBlock =
550 splitBB(builder, true, "omp.region.cont");
551 llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
552
553 llvm::LLVMContext &llvmContext = builder.getContext();
554 for (Block &bb : region) {
555 llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
556 llvmContext, blockName, builder.GetInsertBlock()->getParent(),
557 builder.GetInsertBlock()->getNextNode());
558 moduleTranslation.mapBlock(&bb, llvmBB);
559 }
560
561 llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
562
563 // Terminators (namely YieldOp) may be forwarding values to the region that
564 // need to be available in the continuation block. Collect the types of these
565 // operands in preparation of creating PHI nodes. This is skipped for loop
566 // wrapper operations, for which we know in advance they have no terminators.
567 SmallVector<llvm::Type *> continuationBlockPHITypes;
568 unsigned numYields = 0;
569
570 if (!isLoopWrapper) {
571 bool operandsProcessed = false;
572 for (Block &bb : region.getBlocks()) {
573 if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
574 if (!operandsProcessed) {
575 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
576 continuationBlockPHITypes.push_back(
577 moduleTranslation.convertType(yield->getOperand(i).getType()));
578 }
579 operandsProcessed = true;
580 } else {
581 assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
582 "mismatching number of values yielded from the region");
583 for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
584 llvm::Type *operandType =
585 moduleTranslation.convertType(yield->getOperand(i).getType());
586 (void)operandType;
587 assert(continuationBlockPHITypes[i] == operandType &&
588 "values of mismatching types yielded from the region");
589 }
590 }
591 numYields++;
592 }
593 }
594 }
595
596 // Insert PHI nodes in the continuation block for any values forwarded by the
597 // terminators in this region.
598 if (!continuationBlockPHITypes.empty())
599 assert(
600 continuationBlockPHIs &&
601 "expected continuation block PHIs if converted regions yield values");
602 if (continuationBlockPHIs) {
603 llvm::IRBuilderBase::InsertPointGuard guard(builder);
604 continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
605 builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
606 for (llvm::Type *ty : continuationBlockPHITypes)
607 continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
608 }
609
610 // Convert blocks one by one in topological order to ensure
611 // defs are converted before uses.
613 for (Block *bb : blocks) {
614 llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
615 // Retarget the branch of the entry block to the entry block of the
616 // converted region (regions are single-entry).
617 if (bb->isEntryBlock()) {
618 assert(sourceTerminator->getNumSuccessors() == 1 &&
619 "provided entry block has multiple successors");
620 assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
621 "ContinuationBlock is not the successor of the entry block");
622 sourceTerminator->setSuccessor(0, llvmBB);
623 }
624
625 llvm::IRBuilderBase::InsertPointGuard guard(builder);
626 if (failed(
627 moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
628 return llvm::make_error<PreviouslyReportedError>();
629
630 // Create a direct branch here for loop wrappers to prevent their lack of a
631 // terminator from causing a crash below.
632 if (isLoopWrapper) {
633 builder.CreateBr(continuationBlock);
634 continue;
635 }
636
637 // Special handling for `omp.yield` and `omp.terminator` (we may have more
638 // than one): they return the control to the parent OpenMP dialect operation
639 // so replace them with the branch to the continuation block. We handle this
640 // here to avoid relying inter-function communication through the
641 // ModuleTranslation class to set up the correct insertion point. This is
642 // also consistent with MLIR's idiom of handling special region terminators
643 // in the same code that handles the region-owning operation.
644 Operation *terminator = bb->getTerminator();
645 if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
646 builder.CreateBr(continuationBlock);
647
648 for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
649 (*continuationBlockPHIs)[i]->addIncoming(
650 moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
651 }
652 }
653 // After all blocks have been traversed and values mapped, connect the PHI
654 // nodes to the results of preceding blocks.
655 LLVM::detail::connectPHINodes(region, moduleTranslation);
656
657 // Remove the blocks and values defined in this region from the mapping since
658 // they are not visible outside of this region. This allows the same region to
659 // be converted several times, that is cloned, without clashes, and slightly
660 // speeds up the lookups.
661 moduleTranslation.forgetMapping(region);
662
663 return continuationBlock;
664}
665
666/// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
667static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
668 switch (kind) {
669 case omp::ClauseProcBindKind::Close:
670 return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
671 case omp::ClauseProcBindKind::Master:
672 return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
673 case omp::ClauseProcBindKind::Primary:
674 return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
675 case omp::ClauseProcBindKind::Spread:
676 return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
677 }
678 llvm_unreachable("Unknown ClauseProcBindKind kind");
679}
680
681/// Maps block arguments from \p blockArgIface (which are MLIR values) to the
682/// corresponding LLVM values of \p the interface's operands. This is useful
683/// when an OpenMP region with entry block arguments is converted to LLVM. In
684/// this case the block arguments are (part of) of the OpenMP region's entry
685/// arguments and the operands are (part of) of the operands to the OpenMP op
686/// containing the region.
687static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
688 omp::BlockArgOpenMPOpInterface blockArgIface) {
690 blockArgIface.getBlockArgsPairs(blockArgsPairs);
691 for (auto [var, arg] : blockArgsPairs)
692 moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
693}
694
695/// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
696static LogicalResult
697convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
698 LLVM::ModuleTranslation &moduleTranslation) {
699 auto maskedOp = cast<omp::MaskedOp>(opInst);
700 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
701
702 if (failed(checkImplementationStatus(opInst)))
703 return failure();
704
705 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
706 // MaskedOp has only one region associated with it.
707 auto &region = maskedOp.getRegion();
708 builder.restoreIP(codeGenIP);
709 return convertOmpOpRegions(region, "omp.masked.region", builder,
710 moduleTranslation)
711 .takeError();
712 };
713
714 // TODO: Perform finalization actions for variables. This has to be
715 // called for variables which have destructors/finalizers.
716 auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
717
718 llvm::Value *filterVal = nullptr;
719 if (auto filterVar = maskedOp.getFilteredThreadId()) {
720 filterVal = moduleTranslation.lookupValue(filterVar);
721 } else {
722 llvm::LLVMContext &llvmContext = builder.getContext();
723 filterVal =
724 llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
725 }
726 assert(filterVal != nullptr);
727 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
728 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
729 moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
730 finiCB, filterVal);
731
732 if (failed(handleError(afterIP, opInst)))
733 return failure();
734
735 builder.restoreIP(*afterIP);
736 return success();
737}
738
739/// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
740static LogicalResult
741convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
742 LLVM::ModuleTranslation &moduleTranslation) {
743 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
744 auto masterOp = cast<omp::MasterOp>(opInst);
745
746 if (failed(checkImplementationStatus(opInst)))
747 return failure();
748
749 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
750 // MasterOp has only one region associated with it.
751 auto &region = masterOp.getRegion();
752 builder.restoreIP(codeGenIP);
753 return convertOmpOpRegions(region, "omp.master.region", builder,
754 moduleTranslation)
755 .takeError();
756 };
757
758 // TODO: Perform finalization actions for variables. This has to be
759 // called for variables which have destructors/finalizers.
760 auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
761
762 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
763 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
764 moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
765 finiCB);
766
767 if (failed(handleError(afterIP, opInst)))
768 return failure();
769
770 builder.restoreIP(*afterIP);
771 return success();
772}
773
774/// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
775static LogicalResult
776convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
777 LLVM::ModuleTranslation &moduleTranslation) {
778 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
779 auto criticalOp = cast<omp::CriticalOp>(opInst);
780
781 if (failed(checkImplementationStatus(opInst)))
782 return failure();
783
784 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
785 // CriticalOp has only one region associated with it.
786 auto &region = cast<omp::CriticalOp>(opInst).getRegion();
787 builder.restoreIP(codeGenIP);
788 return convertOmpOpRegions(region, "omp.critical.region", builder,
789 moduleTranslation)
790 .takeError();
791 };
792
793 // TODO: Perform finalization actions for variables. This has to be
794 // called for variables which have destructors/finalizers.
795 auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
796
797 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
798 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
799 llvm::Constant *hint = nullptr;
800
801 // If it has a name, it probably has a hint too.
802 if (criticalOp.getNameAttr()) {
803 // The verifiers in OpenMP Dialect guarentee that all the pointers are
804 // non-null
805 auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
806 auto criticalDeclareOp =
808 symbolRef);
809 hint =
810 llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
811 static_cast<int>(criticalDeclareOp.getHint()));
812 }
813 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
814 moduleTranslation.getOpenMPBuilder()->createCritical(
815 ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
816
817 if (failed(handleError(afterIP, opInst)))
818 return failure();
819
820 builder.restoreIP(*afterIP);
821 return success();
822}
823
824/// A util to collect info needed to convert delayed privatizers from MLIR to
825/// LLVM.
827 template <typename OP>
829 : blockArgs(
830 cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
831 mlirVars.reserve(blockArgs.size());
832 llvmVars.reserve(blockArgs.size());
833 collectPrivatizationDecls<OP>(op);
834
835 for (mlir::Value privateVar : op.getPrivateVars())
836 mlirVars.push_back(privateVar);
837 }
838
843
844private:
845 /// Populates `privatizations` with privatization declarations used for the
846 /// given op.
847 template <class OP>
848 void collectPrivatizationDecls(OP op) {
849 std::optional<ArrayAttr> attr = op.getPrivateSyms();
850 if (!attr)
851 return;
852
853 privatizers.reserve(privatizers.size() + attr->size());
854 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
855 privatizers.push_back(findPrivatizer(op, symbolRef));
856 }
857 }
858};
859
860/// Populates `reductions` with reduction declarations used in the given op.
861template <typename T>
862static void
865 std::optional<ArrayAttr> attr = op.getReductionSyms();
866 if (!attr)
867 return;
868
869 reductions.reserve(reductions.size() + op.getNumReductionVars());
870 for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
871 reductions.push_back(
873 op, symbolRef));
874 }
875}
876
877/// Translates the blocks contained in the given region and appends them to at
878/// the current insertion point of `builder`. The operations of the entry block
879/// are appended to the current insertion block. If set, `continuationBlockArgs`
880/// is populated with translated values that correspond to the values
881/// omp.yield'ed from the region.
882static LogicalResult inlineConvertOmpRegions(
883 Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
884 LLVM::ModuleTranslation &moduleTranslation,
885 SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
886 if (region.empty())
887 return success();
888
889 // Special case for single-block regions that don't create additional blocks:
890 // insert operations without creating additional blocks.
891 if (region.hasOneBlock()) {
892 llvm::Instruction *potentialTerminator =
893 builder.GetInsertBlock()->empty() ? nullptr
894 : &builder.GetInsertBlock()->back();
895
896 if (potentialTerminator && potentialTerminator->isTerminator())
897 potentialTerminator->removeFromParent();
898 moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
899
900 if (failed(moduleTranslation.convertBlock(
901 region.front(), /*ignoreArguments=*/true, builder)))
902 return failure();
903
904 // The continuation arguments are simply the translated terminator operands.
905 if (continuationBlockArgs)
906 llvm::append_range(
907 *continuationBlockArgs,
908 moduleTranslation.lookupValues(region.front().back().getOperands()));
909
910 // Drop the mapping that is no longer necessary so that the same region can
911 // be processed multiple times.
912 moduleTranslation.forgetMapping(region);
913
914 if (potentialTerminator && potentialTerminator->isTerminator()) {
915 llvm::BasicBlock *block = builder.GetInsertBlock();
916 if (block->empty()) {
917 // this can happen for really simple reduction init regions e.g.
918 // %0 = llvm.mlir.constant(0 : i32) : i32
919 // omp.yield(%0 : i32)
920 // because the llvm.mlir.constant (MLIR op) isn't converted into any
921 // llvm op
922 potentialTerminator->insertInto(block, block->begin());
923 } else {
924 potentialTerminator->insertAfter(&block->back());
925 }
926 }
927
928 return success();
929 }
930
932 llvm::Expected<llvm::BasicBlock *> continuationBlock =
933 convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
934
935 if (failed(handleError(continuationBlock, *region.getParentOp())))
936 return failure();
937
938 if (continuationBlockArgs)
939 llvm::append_range(*continuationBlockArgs, phis);
940 builder.SetInsertPoint(*continuationBlock,
941 (*continuationBlock)->getFirstInsertionPt());
942 return success();
943}
944
945namespace {
946/// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
947/// store lambdas with capture.
948using OwningReductionGen =
949 std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
950 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
951 llvm::Value *&)>;
952using OwningAtomicReductionGen =
953 std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
954 llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
955 llvm::Value *)>;
956} // namespace
957
958/// Create an OpenMPIRBuilder-compatible reduction generator for the given
959/// reduction declaration. The generator uses `builder` but ignores its
960/// insertion point.
961static OwningReductionGen
962makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
963 LLVM::ModuleTranslation &moduleTranslation) {
964 // The lambda is mutable because we need access to non-const methods of decl
965 // (which aren't actually mutating it), and we must capture decl by-value to
966 // avoid the dangling reference after the parent function returns.
967 OwningReductionGen gen =
968 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
969 llvm::Value *lhs, llvm::Value *rhs,
970 llvm::Value *&result) mutable
971 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
972 moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
973 moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
974 builder.restoreIP(insertPoint);
976 if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
977 "omp.reduction.nonatomic.body", builder,
978 moduleTranslation, &phis)))
979 return llvm::createStringError(
980 "failed to inline `combiner` region of `omp.declare_reduction`");
981 result = llvm::getSingleElement(phis);
982 return builder.saveIP();
983 };
984 return gen;
985}
986
987/// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
988/// given reduction declaration. The generator uses `builder` but ignores its
989/// insertion point. Returns null if there is no atomic region available in the
990/// reduction declaration.
991static OwningAtomicReductionGen
992makeAtomicReductionGen(omp::DeclareReductionOp decl,
993 llvm::IRBuilderBase &builder,
994 LLVM::ModuleTranslation &moduleTranslation) {
995 if (decl.getAtomicReductionRegion().empty())
996 return OwningAtomicReductionGen();
997
998 // The lambda is mutable because we need access to non-const methods of decl
999 // (which aren't actually mutating it), and we must capture decl by-value to
1000 // avoid the dangling reference after the parent function returns.
1001 OwningAtomicReductionGen atomicGen =
1002 [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
1003 llvm::Value *lhs, llvm::Value *rhs) mutable
1004 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1005 moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
1006 moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
1007 builder.restoreIP(insertPoint);
1009 if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
1010 "omp.reduction.atomic.body", builder,
1011 moduleTranslation, &phis)))
1012 return llvm::createStringError(
1013 "failed to inline `atomic` region of `omp.declare_reduction`");
1014 assert(phis.empty());
1015 return builder.saveIP();
1016 };
1017 return atomicGen;
1018}
1019
1020/// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
1021static LogicalResult
1022convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
1023 LLVM::ModuleTranslation &moduleTranslation) {
1024 auto orderedOp = cast<omp::OrderedOp>(opInst);
1025
1026 if (failed(checkImplementationStatus(opInst)))
1027 return failure();
1028
1029 omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
1030 bool isDependSource = dependType == omp::ClauseDepend::dependsource;
1031 unsigned numLoops = *orderedOp.getDoacrossNumLoops();
1032 SmallVector<llvm::Value *> vecValues =
1033 moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
1034
1035 size_t indexVecValues = 0;
1036 while (indexVecValues < vecValues.size()) {
1037 SmallVector<llvm::Value *> storeValues;
1038 storeValues.reserve(numLoops);
1039 for (unsigned i = 0; i < numLoops; i++) {
1040 storeValues.push_back(vecValues[indexVecValues]);
1041 indexVecValues++;
1042 }
1043 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1044 findAllocaInsertPoint(builder, moduleTranslation);
1045 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1046 builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
1047 ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
1048 }
1049 return success();
1050}
1051
1052/// Converts an OpenMP 'ordered_region' operation into LLVM IR using
1053/// OpenMPIRBuilder.
1054static LogicalResult
1055convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
1056 LLVM::ModuleTranslation &moduleTranslation) {
1057 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1058 auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
1059
1060 if (failed(checkImplementationStatus(opInst)))
1061 return failure();
1062
1063 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1064 // OrderedOp has only one region associated with it.
1065 auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
1066 builder.restoreIP(codeGenIP);
1067 return convertOmpOpRegions(region, "omp.ordered.region", builder,
1068 moduleTranslation)
1069 .takeError();
1070 };
1071
1072 // TODO: Perform finalization actions for variables. This has to be
1073 // called for variables which have destructors/finalizers.
1074 auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1075
1076 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1077 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1078 moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
1079 ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
1080
1081 if (failed(handleError(afterIP, opInst)))
1082 return failure();
1083
1084 builder.restoreIP(*afterIP);
1085 return success();
1086}
1087
1088namespace {
1089/// Contains the arguments for an LLVM store operation
1090struct DeferredStore {
1091 DeferredStore(llvm::Value *value, llvm::Value *address)
1092 : value(value), address(address) {}
1093
1094 llvm::Value *value;
1095 llvm::Value *address;
1096};
1097} // namespace
1098
1099/// Allocate space for privatized reduction variables.
1100/// `deferredStores` contains information to create store operations which needs
1101/// to be inserted after all allocas
1102template <typename T>
1103static LogicalResult
1105 llvm::IRBuilderBase &builder,
1106 LLVM::ModuleTranslation &moduleTranslation,
1107 const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1109 SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1110 DenseMap<Value, llvm::Value *> &reductionVariableMap,
1111 SmallVectorImpl<DeferredStore> &deferredStores,
1112 llvm::ArrayRef<bool> isByRefs) {
1113 llvm::IRBuilderBase::InsertPointGuard guard(builder);
1114 builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1115
1116 // delay creating stores until after all allocas
1117 deferredStores.reserve(loop.getNumReductionVars());
1118
1119 for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1120 Region &allocRegion = reductionDecls[i].getAllocRegion();
1121 if (isByRefs[i]) {
1122 if (allocRegion.empty())
1123 continue;
1124
1126 if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1127 builder, moduleTranslation, &phis)))
1128 return loop.emitError(
1129 "failed to inline `alloc` region of `omp.declare_reduction`");
1130
1131 assert(phis.size() == 1 && "expected one allocation to be yielded");
1132 builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1133
1134 // Allocate reduction variable (which is a pointer to the real reduction
1135 // variable allocated in the inlined region)
1136 llvm::Value *var = builder.CreateAlloca(
1137 moduleTranslation.convertType(reductionDecls[i].getType()));
1138
1139 llvm::Type *ptrTy = builder.getPtrTy();
1140 llvm::Value *castVar =
1141 builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1142 llvm::Value *castPhi =
1143 builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
1144
1145 deferredStores.emplace_back(castPhi, castVar);
1146
1147 privateReductionVariables[i] = castVar;
1148 moduleTranslation.mapValue(reductionArgs[i], castPhi);
1149 reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
1150 } else {
1151 assert(allocRegion.empty() &&
1152 "allocaction is implicit for by-val reduction");
1153 llvm::Value *var = builder.CreateAlloca(
1154 moduleTranslation.convertType(reductionDecls[i].getType()));
1155
1156 llvm::Type *ptrTy = builder.getPtrTy();
1157 llvm::Value *castVar =
1158 builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1159
1160 moduleTranslation.mapValue(reductionArgs[i], castVar);
1161 privateReductionVariables[i] = castVar;
1162 reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
1163 }
1164 }
1165
1166 return success();
1167}
1168
1169/// Map input arguments to reduction initialization region
1170template <typename T>
1171static void
1174 DenseMap<Value, llvm::Value *> &reductionVariableMap,
1175 unsigned i) {
1176 // map input argument to the initialization region
1177 mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1178 Region &initializerRegion = reduction.getInitializerRegion();
1179 Block &entry = initializerRegion.front();
1180
1181 mlir::Value mlirSource = loop.getReductionVars()[i];
1182 llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1183 assert(llvmSource && "lookup reduction var");
1184 moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1185
1186 if (entry.getNumArguments() > 1) {
1187 llvm::Value *allocation =
1188 reductionVariableMap.lookup(loop.getReductionVars()[i]);
1189 moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1190 }
1191}
1192
1193static void
1194setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1195 llvm::BasicBlock *block = nullptr) {
1196 if (block == nullptr)
1197 block = builder.GetInsertBlock();
1198
1199 if (block->empty() || block->getTerminator() == nullptr)
1200 builder.SetInsertPoint(block);
1201 else
1202 builder.SetInsertPoint(block->getTerminator());
1203}
1204
1205/// Inline reductions' `init` regions. This functions assumes that the
1206/// `builder`'s insertion point is where the user wants the `init` regions to be
1207/// inlined; i.e. it does not try to find a proper insertion location for the
1208/// `init` regions. It also leaves the `builder's insertions point in a state
1209/// where the user can continue the code-gen directly afterwards.
1210template <typename OP>
1211static LogicalResult
1212initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
1213 llvm::IRBuilderBase &builder,
1214 LLVM::ModuleTranslation &moduleTranslation,
1215 llvm::BasicBlock *latestAllocaBlock,
1217 SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1218 DenseMap<Value, llvm::Value *> &reductionVariableMap,
1219 llvm::ArrayRef<bool> isByRef,
1220 SmallVectorImpl<DeferredStore> &deferredStores) {
1221 if (op.getNumReductionVars() == 0)
1222 return success();
1223
1224 llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1225 auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1226 latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1227 builder.restoreIP(allocaIP);
1228 SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1229
1230 for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1231 if (isByRef[i]) {
1232 if (!reductionDecls[i].getAllocRegion().empty())
1233 continue;
1234
1235 // TODO: remove after all users of by-ref are updated to use the alloc
1236 // region: Allocate reduction variable (which is a pointer to the real
1237 // reduciton variable allocated in the inlined region)
1238 byRefVars[i] = builder.CreateAlloca(
1239 moduleTranslation.convertType(reductionDecls[i].getType()));
1240 }
1241 }
1242
1243 setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1244
1245 // store result of the alloc region to the allocated pointer to the real
1246 // reduction variable
1247 for (auto [data, addr] : deferredStores)
1248 builder.CreateStore(data, addr);
1249
1250 // Before the loop, store the initial values of reductions into reduction
1251 // variables. Although this could be done after allocas, we don't want to mess
1252 // up with the alloca insertion point.
1253 for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1255
1256 // map block argument to initializer region
1257 mapInitializationArgs(op, moduleTranslation, reductionDecls,
1258 reductionVariableMap, i);
1259
1260 // TODO In some cases (specially on the GPU), the init regions may
1261 // contains stack alloctaions. If the region is inlined in a loop, this is
1262 // problematic. Instead of just inlining the region, handle allocations by
1263 // hoisting fixed length allocations to the function entry and using
1264 // stacksave and restore for variable length ones.
1265 if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1266 "omp.reduction.neutral", builder,
1267 moduleTranslation, &phis)))
1268 return failure();
1269
1270 assert(phis.size() == 1 && "expected one value to be yielded from the "
1271 "reduction neutral element declaration region");
1272
1274
1275 if (isByRef[i]) {
1276 if (!reductionDecls[i].getAllocRegion().empty())
1277 // done in allocReductionVars
1278 continue;
1279
1280 // TODO: this path can be removed once all users of by-ref are updated to
1281 // use an alloc region
1282
1283 // Store the result of the inlined region to the allocated reduction var
1284 // ptr
1285 builder.CreateStore(phis[0], byRefVars[i]);
1286
1287 privateReductionVariables[i] = byRefVars[i];
1288 moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1289 reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1290 } else {
1291 // for by-ref case the store is inside of the reduction region
1292 builder.CreateStore(phis[0], privateReductionVariables[i]);
1293 // the rest was handled in allocByValReductionVars
1294 }
1295
1296 // forget the mapping for the initializer region because we might need a
1297 // different mapping if this reduction declaration is re-used for a
1298 // different variable
1299 moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1300 }
1301
1302 return success();
1303}
1304
1305/// Collect reduction info
1306template <typename T>
1307static void collectReductionInfo(
1308 T loop, llvm::IRBuilderBase &builder,
1309 LLVM::ModuleTranslation &moduleTranslation,
1312 SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1313 const ArrayRef<llvm::Value *> privateReductionVariables,
1315 unsigned numReductions = loop.getNumReductionVars();
1316
1317 for (unsigned i = 0; i < numReductions; ++i) {
1318 owningReductionGens.push_back(
1319 makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1320 owningAtomicReductionGens.push_back(
1321 makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1322 }
1323
1324 // Collect the reduction information.
1325 reductionInfos.reserve(numReductions);
1326 for (unsigned i = 0; i < numReductions; ++i) {
1327 llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1328 if (owningAtomicReductionGens[i])
1329 atomicGen = owningAtomicReductionGens[i];
1330 llvm::Value *variable =
1331 moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1332 reductionInfos.push_back(
1333 {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1334 privateReductionVariables[i],
1335 /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1337 /*ReductionGenClang=*/nullptr, atomicGen});
1338 }
1339}
1340
1341/// handling of DeclareReductionOp's cleanup region
1342static LogicalResult
1344 llvm::ArrayRef<llvm::Value *> privateVariables,
1345 LLVM::ModuleTranslation &moduleTranslation,
1346 llvm::IRBuilderBase &builder, StringRef regionName,
1347 bool shouldLoadCleanupRegionArg = true) {
1348 for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1349 if (cleanupRegion->empty())
1350 continue;
1351
1352 // map the argument to the cleanup region
1353 Block &entry = cleanupRegion->front();
1354
1355 llvm::Instruction *potentialTerminator =
1356 builder.GetInsertBlock()->empty() ? nullptr
1357 : &builder.GetInsertBlock()->back();
1358 if (potentialTerminator && potentialTerminator->isTerminator())
1359 builder.SetInsertPoint(potentialTerminator);
1360 llvm::Value *privateVarValue =
1361 shouldLoadCleanupRegionArg
1362 ? builder.CreateLoad(
1363 moduleTranslation.convertType(entry.getArgument(0).getType()),
1364 privateVariables[i])
1365 : privateVariables[i];
1366
1367 moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1368
1369 if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1370 moduleTranslation)))
1371 return failure();
1372
1373 // clear block argument mapping in case it needs to be re-created with a
1374 // different source for another use of the same reduction decl
1375 moduleTranslation.forgetMapping(*cleanupRegion);
1376 }
1377 return success();
1378}
1379
1380// TODO: not used by ParallelOp
1381template <class OP>
1382static LogicalResult createReductionsAndCleanup(
1383 OP op, llvm::IRBuilderBase &builder,
1384 LLVM::ModuleTranslation &moduleTranslation,
1385 llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1387 ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
1388 bool isNowait = false, bool isTeamsReduction = false) {
1389 // Process the reductions if required.
1390 if (op.getNumReductionVars() == 0)
1391 return success();
1392
1394 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1396
1397 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1398
1399 // Create the reduction generators. We need to own them here because
1400 // ReductionInfo only accepts references to the generators.
1401 collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1402 owningReductionGens, owningAtomicReductionGens,
1403 privateReductionVariables, reductionInfos);
1404
1405 // The call to createReductions below expects the block to have a
1406 // terminator. Create an unreachable instruction to serve as terminator
1407 // and remove it later.
1408 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1409 builder.SetInsertPoint(tempTerminator);
1410 llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1411 ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1412 isByRef, isNowait, isTeamsReduction);
1413
1414 if (failed(handleError(contInsertPoint, *op)))
1415 return failure();
1416
1417 if (!contInsertPoint->getBlock())
1418 return op->emitOpError() << "failed to convert reductions";
1419
1420 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1421 ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1422
1423 if (failed(handleError(afterIP, *op)))
1424 return failure();
1425
1426 tempTerminator->eraseFromParent();
1427 builder.restoreIP(*afterIP);
1428
1429 // after the construct, deallocate private reduction variables
1430 SmallVector<Region *> reductionRegions;
1431 llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1432 [](omp::DeclareReductionOp reductionDecl) {
1433 return &reductionDecl.getCleanupRegion();
1434 });
1435 return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1436 moduleTranslation, builder,
1437 "omp.reduction.cleanup");
1438 return success();
1439}
1440
1441static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1442 if (!attr)
1443 return {};
1444 return *attr;
1445}
1446
1447// TODO: not used by omp.parallel
1448template <typename OP>
1450 OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1451 LLVM::ModuleTranslation &moduleTranslation,
1452 llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1454 SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1455 DenseMap<Value, llvm::Value *> &reductionVariableMap,
1456 llvm::ArrayRef<bool> isByRef) {
1457 if (op.getNumReductionVars() == 0)
1458 return success();
1459
1460 SmallVector<DeferredStore> deferredStores;
1461
1462 if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1463 allocaIP, reductionDecls,
1464 privateReductionVariables, reductionVariableMap,
1465 deferredStores, isByRef)))
1466 return failure();
1467
1468 return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1469 allocaIP.getBlock(), reductionDecls,
1470 privateReductionVariables, reductionVariableMap,
1471 isByRef, deferredStores);
1472}
1473
1474/// Return the llvm::Value * corresponding to the `privateVar` that
1475/// is being privatized. It isn't always as simple as looking up
1476/// moduleTranslation with privateVar. For instance, in case of
1477/// an allocatable, the descriptor for the allocatable is privatized.
1478/// This descriptor is mapped using an MapInfoOp. So, this function
1479/// will return a pointer to the llvm::Value corresponding to the
1480/// block argument for the mapped descriptor.
1481static llvm::Value *
1482findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1483 LLVM::ModuleTranslation &moduleTranslation,
1484 llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1485 if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1486 return moduleTranslation.lookupValue(privateVar);
1487
1488 Value blockArg = (*mappedPrivateVars)[privateVar];
1489 Type privVarType = privateVar.getType();
1490 Type blockArgType = blockArg.getType();
1491 assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1492 "A block argument corresponding to a mapped var should have "
1493 "!llvm.ptr type");
1494
1495 if (privVarType == blockArgType)
1496 return moduleTranslation.lookupValue(blockArg);
1497
1498 // This typically happens when the privatized type is lowered from
1499 // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1500 // struct/pair is passed by value. But, mapped values are passed only as
1501 // pointers, so before we privatize, we must load the pointer.
1502 if (!isa<LLVM::LLVMPointerType>(privVarType))
1503 return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1504 moduleTranslation.lookupValue(blockArg));
1505
1506 return moduleTranslation.lookupValue(privateVar);
1507}
1508
1509/// Initialize a single (first)private variable. You probably want to use
1510/// allocateAndInitPrivateVars instead of this.
1511/// This returns the private variable which has been initialized. This
1512/// variable should be mapped before constructing the body of the Op.
1514 llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1515 omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1516 llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1517 llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1518 Region &initRegion = privDecl.getInitRegion();
1519 if (initRegion.empty())
1520 return llvmPrivateVar;
1521
1522 // map initialization region block arguments
1523 llvm::Value *nonPrivateVar = findAssociatedValue(
1524 mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1525 assert(nonPrivateVar);
1526 moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1527 moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1528
1529 // in-place convert the private initialization region
1531 if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1532 moduleTranslation, &phis)))
1533 return llvm::createStringError(
1534 "failed to inline `init` region of `omp.private`");
1535
1536 assert(phis.size() == 1 && "expected one allocation to be yielded");
1537
1538 // clear init region block argument mapping in case it needs to be
1539 // re-created with a different source for another use of the same
1540 // reduction decl
1541 moduleTranslation.forgetMapping(initRegion);
1542
1543 // Prefer the value yielded from the init region to the allocated private
1544 // variable in case the region is operating on arguments by-value (e.g.
1545 // Fortran character boxes).
1546 return phis[0];
1547}
1548
1549static llvm::Error
1550initPrivateVars(llvm::IRBuilderBase &builder,
1551 LLVM::ModuleTranslation &moduleTranslation,
1552 PrivateVarsInfo &privateVarsInfo,
1553 llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1554 if (privateVarsInfo.blockArgs.empty())
1555 return llvm::Error::success();
1556
1557 llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1558 setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1559
1560 for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1561 privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1562 privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1563 auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1565 builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1566 llvmPrivateVar, privInitBlock, mappedPrivateVars);
1567
1568 if (!privVarOrErr)
1569 return privVarOrErr.takeError();
1570
1571 llvmPrivateVar = privVarOrErr.get();
1572 moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1573
1575 }
1576
1577 return llvm::Error::success();
1578}
1579
1580/// Allocate and initialize delayed private variables. Returns the basic block
1581/// which comes after all of these allocations. llvm::Value * for each of these
1582/// private variables are populated in llvmPrivateVars.
1584allocatePrivateVars(llvm::IRBuilderBase &builder,
1585 LLVM::ModuleTranslation &moduleTranslation,
1586 PrivateVarsInfo &privateVarsInfo,
1587 const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1588 llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1589 // Allocate private vars
1590 llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1591 splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1592 allocaTerminator->getIterator()),
1593 true, allocaTerminator->getStableDebugLoc(),
1594 "omp.region.after_alloca");
1595
1596 llvm::IRBuilderBase::InsertPointGuard guard(builder);
1597 // Update the allocaTerminator since the alloca block was split above.
1598 allocaTerminator = allocaIP.getBlock()->getTerminator();
1599 builder.SetInsertPoint(allocaTerminator);
1600 // The new terminator is an uncondition branch created by the splitBB above.
1601 assert(allocaTerminator->getNumSuccessors() == 1 &&
1602 "This is an unconditional branch created by splitBB");
1603
1604 llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
1605 llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1606
1607 unsigned int allocaAS =
1608 moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1609 unsigned int defaultAS = moduleTranslation.getLLVMModule()
1610 ->getDataLayout()
1611 .getProgramAddressSpace();
1612
1613 for (auto [privDecl, mlirPrivVar, blockArg] :
1614 llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1615 privateVarsInfo.blockArgs)) {
1616 llvm::Type *llvmAllocType =
1617 moduleTranslation.convertType(privDecl.getType());
1618 builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1619 llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1620 llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1621 if (allocaAS != defaultAS)
1622 llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1623 builder.getPtrTy(defaultAS));
1624
1625 privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1626 }
1627
1628 return afterAllocas;
1629}
1630
1631static LogicalResult copyFirstPrivateVars(
1632 mlir::Operation *op, llvm::IRBuilderBase &builder,
1633 LLVM::ModuleTranslation &moduleTranslation,
1634 SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1635 ArrayRef<llvm::Value *> llvmPrivateVars,
1636 SmallVectorImpl<omp::PrivateClauseOp> &privateDecls, bool insertBarrier,
1637 llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1638 // Apply copy region for firstprivate.
1639 bool needsFirstprivate =
1640 llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1641 return privOp.getDataSharingType() ==
1642 omp::DataSharingClauseType::FirstPrivate;
1643 });
1644
1645 if (!needsFirstprivate)
1646 return success();
1647
1648 llvm::BasicBlock *copyBlock =
1649 splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1650 setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1651
1652 for (auto [decl, mlirVar, llvmVar] :
1653 llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1654 if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1655 continue;
1656
1657 // copyRegion implements `lhs = rhs`
1658 Region &copyRegion = decl.getCopyRegion();
1659
1660 // map copyRegion rhs arg
1661 llvm::Value *nonPrivateVar = findAssociatedValue(
1662 mlirVar, builder, moduleTranslation, mappedPrivateVars);
1663 assert(nonPrivateVar);
1664 moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1665
1666 // map copyRegion lhs arg
1667 moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1668
1669 // in-place convert copy region
1670 if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1671 moduleTranslation)))
1672 return decl.emitError("failed to inline `copy` region of `omp.private`");
1673
1675
1676 // ignore unused value yielded from copy region
1677
1678 // clear copy region block argument mapping in case it needs to be
1679 // re-created with different sources for reuse of the same reduction
1680 // decl
1681 moduleTranslation.forgetMapping(copyRegion);
1682 }
1683
1684 if (insertBarrier) {
1685 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1686 llvm::OpenMPIRBuilder::InsertPointOrErrorTy res =
1687 ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1688 if (failed(handleError(res, *op)))
1689 return failure();
1690 }
1691
1692 return success();
1693}
1694
1695static LogicalResult
1696cleanupPrivateVars(llvm::IRBuilderBase &builder,
1697 LLVM::ModuleTranslation &moduleTranslation, Location loc,
1698 SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1700 // private variable deallocation
1701 SmallVector<Region *> privateCleanupRegions;
1702 llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1703 [](omp::PrivateClauseOp privatizer) {
1704 return &privatizer.getDeallocRegion();
1705 });
1706
1707 if (failed(inlineOmpRegionCleanup(
1708 privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1709 "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1710 return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1711 "`omp.private` op in");
1712
1713 return success();
1714}
1715
1716/// Returns true if the construct contains omp.cancel or omp.cancellation_point
1718 // omp.cancel and omp.cancellation_point must be "closely nested" so they will
1719 // be visible and not inside of function calls. This is enforced by the
1720 // verifier.
1721 return op
1722 ->walk([](Operation *child) {
1723 if (mlir::isa<omp::CancelOp, omp::CancellationPointOp>(child))
1724 return WalkResult::interrupt();
1725 return WalkResult::advance();
1726 })
1727 .wasInterrupted();
1728}
1729
1730static LogicalResult
1731convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1732 LLVM::ModuleTranslation &moduleTranslation) {
1733 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1734 using StorableBodyGenCallbackTy =
1735 llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1736
1737 auto sectionsOp = cast<omp::SectionsOp>(opInst);
1738
1739 if (failed(checkImplementationStatus(opInst)))
1740 return failure();
1741
1742 llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1743 assert(isByRef.size() == sectionsOp.getNumReductionVars());
1744
1746 collectReductionDecls(sectionsOp, reductionDecls);
1747 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1748 findAllocaInsertPoint(builder, moduleTranslation);
1749
1750 SmallVector<llvm::Value *> privateReductionVariables(
1751 sectionsOp.getNumReductionVars());
1752 DenseMap<Value, llvm::Value *> reductionVariableMap;
1753
1754 MutableArrayRef<BlockArgument> reductionArgs =
1755 cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1756
1758 sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1759 reductionDecls, privateReductionVariables, reductionVariableMap,
1760 isByRef)))
1761 return failure();
1762
1764
1765 for (Operation &op : *sectionsOp.getRegion().begin()) {
1766 auto sectionOp = dyn_cast<omp::SectionOp>(op);
1767 if (!sectionOp) // omp.terminator
1768 continue;
1769
1770 Region &region = sectionOp.getRegion();
1771 auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1772 InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1773 builder.restoreIP(codeGenIP);
1774
1775 // map the omp.section reduction block argument to the omp.sections block
1776 // arguments
1777 // TODO: this assumes that the only block arguments are reduction
1778 // variables
1779 assert(region.getNumArguments() ==
1780 sectionsOp.getRegion().getNumArguments());
1781 for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1782 sectionsOp.getRegion().getArguments(), region.getArguments())) {
1783 llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1784 assert(llvmVal);
1785 moduleTranslation.mapValue(sectionArg, llvmVal);
1786 }
1787
1788 return convertOmpOpRegions(region, "omp.section.region", builder,
1789 moduleTranslation)
1790 .takeError();
1791 };
1792 sectionCBs.push_back(sectionCB);
1793 }
1794
1795 // No sections within omp.sections operation - skip generation. This situation
1796 // is only possible if there is only a terminator operation inside the
1797 // sections operation
1798 if (sectionCBs.empty())
1799 return success();
1800
1801 assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1802
1803 // TODO: Perform appropriate actions according to the data-sharing
1804 // attribute (shared, private, firstprivate, ...) of variables.
1805 // Currently defaults to shared.
1806 auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1807 llvm::Value &vPtr, llvm::Value *&replacementValue)
1808 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1809 replacementValue = &vPtr;
1810 return codeGenIP;
1811 };
1812
1813 // TODO: Perform finalization actions for variables. This has to be
1814 // called for variables which have destructors/finalizers.
1815 auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1816
1817 allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1818 bool isCancellable = constructIsCancellable(sectionsOp);
1819 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1820 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1821 moduleTranslation.getOpenMPBuilder()->createSections(
1822 ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable,
1823 sectionsOp.getNowait());
1824
1825 if (failed(handleError(afterIP, opInst)))
1826 return failure();
1827
1828 builder.restoreIP(*afterIP);
1829
1830 // Process the reductions if required.
1832 sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
1833 privateReductionVariables, isByRef, sectionsOp.getNowait());
1834}
1835
1836/// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1837static LogicalResult
1838convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1839 LLVM::ModuleTranslation &moduleTranslation) {
1840 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1841 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1842
1843 if (failed(checkImplementationStatus(*singleOp)))
1844 return failure();
1845
1846 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1847 builder.restoreIP(codegenIP);
1848 return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1849 builder, moduleTranslation)
1850 .takeError();
1851 };
1852 auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1853
1854 // Handle copyprivate
1855 Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1856 std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1859 for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1860 llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1862 singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1863 llvmCPFuncs.push_back(
1864 moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1865 }
1866
1867 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1868 moduleTranslation.getOpenMPBuilder()->createSingle(
1869 ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1870 llvmCPFuncs);
1871
1872 if (failed(handleError(afterIP, *singleOp)))
1873 return failure();
1874
1875 builder.restoreIP(*afterIP);
1876 return success();
1877}
1878
1879static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
1880 auto iface =
1881 llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation());
1882 // Check that all uses of the reduction block arg has the same distribute op
1883 // parent.
1885 Operation *distOp = nullptr;
1886 for (auto ra : iface.getReductionBlockArgs())
1887 for (auto &use : ra.getUses()) {
1888 auto *useOp = use.getOwner();
1889 // Ignore debug uses.
1890 if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
1891 debugUses.push_back(useOp);
1892 continue;
1893 }
1894
1895 auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>();
1896 // Use is not inside a distribute op - return false
1897 if (!currentDistOp)
1898 return false;
1899 // Multiple distribute operations - return false
1900 Operation *currentOp = currentDistOp.getOperation();
1901 if (distOp && (distOp != currentOp))
1902 return false;
1903
1904 distOp = currentOp;
1905 }
1906
1907 // If we are going to use distribute reduction then remove any debug uses of
1908 // the reduction parameters in teamsOp. Otherwise they will be left without
1909 // any mapped value in moduleTranslation and will eventually error out.
1910 for (auto use : debugUses)
1911 use->erase();
1912 return true;
1913}
1914
1915// Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1916static LogicalResult
1917convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1918 LLVM::ModuleTranslation &moduleTranslation) {
1919 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1920 if (failed(checkImplementationStatus(*op)))
1921 return failure();
1922
1923 DenseMap<Value, llvm::Value *> reductionVariableMap;
1924 unsigned numReductionVars = op.getNumReductionVars();
1926 SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
1927 llvm::ArrayRef<bool> isByRef;
1928 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1929 findAllocaInsertPoint(builder, moduleTranslation);
1930
1931 // Only do teams reduction if there is no distribute op that captures the
1932 // reduction instead.
1933 bool doTeamsReduction = !teamsReductionContainedInDistribute(op);
1934 if (doTeamsReduction) {
1935 isByRef = getIsByRef(op.getReductionByref());
1936
1937 assert(isByRef.size() == op.getNumReductionVars());
1938
1939 MutableArrayRef<BlockArgument> reductionArgs =
1940 llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
1941
1942 collectReductionDecls(op, reductionDecls);
1943
1945 op, reductionArgs, builder, moduleTranslation, allocaIP,
1946 reductionDecls, privateReductionVariables, reductionVariableMap,
1947 isByRef)))
1948 return failure();
1949 }
1950
1951 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1953 moduleTranslation, allocaIP);
1954 builder.restoreIP(codegenIP);
1955 return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1956 moduleTranslation)
1957 .takeError();
1958 };
1959
1960 llvm::Value *numTeamsLower = nullptr;
1961 if (Value numTeamsLowerVar = op.getNumTeamsLower())
1962 numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1963
1964 llvm::Value *numTeamsUpper = nullptr;
1965 if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1966 numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1967
1968 llvm::Value *threadLimit = nullptr;
1969 if (Value threadLimitVar = op.getThreadLimit())
1970 threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1971
1972 llvm::Value *ifExpr = nullptr;
1973 if (Value ifVar = op.getIfExpr())
1974 ifExpr = moduleTranslation.lookupValue(ifVar);
1975
1976 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1977 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1978 moduleTranslation.getOpenMPBuilder()->createTeams(
1979 ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1980
1981 if (failed(handleError(afterIP, *op)))
1982 return failure();
1983
1984 builder.restoreIP(*afterIP);
1985 if (doTeamsReduction) {
1986 // Process the reductions if required.
1988 op, builder, moduleTranslation, allocaIP, reductionDecls,
1989 privateReductionVariables, isByRef,
1990 /*isNoWait*/ false, /*isTeamsReduction*/ true);
1991 }
1992 return success();
1993}
1994
1995static void
1996buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
1997 LLVM::ModuleTranslation &moduleTranslation,
1999 if (dependVars.empty())
2000 return;
2001 for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
2002 llvm::omp::RTLDependenceKindTy type;
2003 switch (
2004 cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
2005 case mlir::omp::ClauseTaskDepend::taskdependin:
2006 type = llvm::omp::RTLDependenceKindTy::DepIn;
2007 break;
2008 // The OpenMP runtime requires that the codegen for 'depend' clause for
2009 // 'out' dependency kind must be the same as codegen for 'depend' clause
2010 // with 'inout' dependency.
2011 case mlir::omp::ClauseTaskDepend::taskdependout:
2012 case mlir::omp::ClauseTaskDepend::taskdependinout:
2013 type = llvm::omp::RTLDependenceKindTy::DepInOut;
2014 break;
2015 case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
2016 type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
2017 break;
2018 case mlir::omp::ClauseTaskDepend::taskdependinoutset:
2019 type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
2020 break;
2021 };
2022 llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
2023 llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
2024 dds.emplace_back(dd);
2025 }
2026}
2027
2028/// Shared implementation of a callback which adds a termiator for the new block
2029/// created for the branch taken when an openmp construct is cancelled. The
2030/// terminator is saved in \p cancelTerminators. This callback is invoked only
2031/// if there is cancellation inside of the taskgroup body.
2032/// The terminator will need to be fixed to branch to the correct block to
2033/// cleanup the construct.
2034static void
2036 llvm::IRBuilderBase &llvmBuilder,
2037 llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op,
2038 llvm::omp::Directive cancelDirective) {
2039 auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
2040 llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
2041
2042 // ip is currently in the block branched to if cancellation occured.
2043 // We need to create a branch to terminate that block.
2044 llvmBuilder.restoreIP(ip);
2045
2046 // We must still clean up the construct after cancelling it, so we need to
2047 // branch to the block that finalizes the taskgroup.
2048 // That block has not been created yet so use this block as a dummy for now
2049 // and fix this after creating the operation.
2050 cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
2051 return llvm::Error::success();
2052 };
2053 // We have to add the cleanup to the OpenMPIRBuilder before the body gets
2054 // created in case the body contains omp.cancel (which will then expect to be
2055 // able to find this cleanup callback).
2056 ompBuilder.pushFinalizationCB(
2057 {finiCB, cancelDirective, constructIsCancellable(op)});
2058}
2059
2060/// If we cancelled the construct, we should branch to the finalization block of
2061/// that construct. OMPIRBuilder structures the CFG such that the cleanup block
2062/// is immediately before the continuation block. Now this finalization has
2063/// been created we can fix the branch.
2064static void
2066 llvm::OpenMPIRBuilder &ompBuilder,
2067 const llvm::OpenMPIRBuilder::InsertPointTy &afterIP) {
2068 ompBuilder.popFinalizationCB();
2069 llvm::BasicBlock *constructFini = afterIP.getBlock()->getSinglePredecessor();
2070 for (llvm::BranchInst *cancelBranch : cancelTerminators) {
2071 assert(cancelBranch->getNumSuccessors() == 1 &&
2072 "cancel branch should have one target");
2073 cancelBranch->setSuccessor(0, constructFini);
2074 }
2075}
2076
2077namespace {
2078/// TaskContextStructManager takes care of creating and freeing a structure
2079/// containing information needed by the task body to execute.
2080class TaskContextStructManager {
2081public:
2082 TaskContextStructManager(llvm::IRBuilderBase &builder,
2083 LLVM::ModuleTranslation &moduleTranslation,
2084 MutableArrayRef<omp::PrivateClauseOp> privateDecls)
2085 : builder{builder}, moduleTranslation{moduleTranslation},
2086 privateDecls{privateDecls} {}
2087
2088 /// Creates a heap allocated struct containing space for each private
2089 /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
2090 /// the structure should all have the same order (although privateDecls which
2091 /// do not read from the mold argument are skipped).
2092 void generateTaskContextStruct();
2093
2094 /// Create GEPs to access each member of the structure representing a private
2095 /// variable, adding them to llvmPrivateVars. Null values are added where
2096 /// private decls were skipped so that the ordering continues to match the
2097 /// private decls.
2098 void createGEPsToPrivateVars();
2099
2100 /// De-allocate the task context structure.
2101 void freeStructPtr();
2102
2103 MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
2104 return llvmPrivateVarGEPs;
2105 }
2106
2107 llvm::Value *getStructPtr() { return structPtr; }
2108
2109private:
2110 llvm::IRBuilderBase &builder;
2111 LLVM::ModuleTranslation &moduleTranslation;
2112 MutableArrayRef<omp::PrivateClauseOp> privateDecls;
2113
2114 /// The type of each member of the structure, in order.
2115 SmallVector<llvm::Type *> privateVarTypes;
2116
2117 /// LLVM values for each private variable, or null if that private variable is
2118 /// not included in the task context structure
2119 SmallVector<llvm::Value *> llvmPrivateVarGEPs;
2120
2121 /// A pointer to the structure containing context for this task.
2122 llvm::Value *structPtr = nullptr;
2123 /// The type of the structure
2124 llvm::Type *structTy = nullptr;
2125};
2126} // namespace
2127
2128void TaskContextStructManager::generateTaskContextStruct() {
2129 if (privateDecls.empty())
2130 return;
2131 privateVarTypes.reserve(privateDecls.size());
2132
2133 for (omp::PrivateClauseOp &privOp : privateDecls) {
2134 // Skip private variables which can safely be allocated and initialised
2135 // inside of the task
2136 if (!privOp.readsFromMold())
2137 continue;
2138 Type mlirType = privOp.getType();
2139 privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
2140 }
2141
2142 structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
2143 privateVarTypes);
2144
2145 llvm::DataLayout dataLayout =
2146 builder.GetInsertBlock()->getModule()->getDataLayout();
2147 llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
2148 llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
2149
2150 // Heap allocate the structure
2151 structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
2152 /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
2153 "omp.task.context_ptr");
2154}
2155
2156void TaskContextStructManager::createGEPsToPrivateVars() {
2157 if (!structPtr) {
2158 assert(privateVarTypes.empty());
2159 return;
2160 }
2161
2162 // Create GEPs for each struct member
2163 llvmPrivateVarGEPs.clear();
2164 llvmPrivateVarGEPs.reserve(privateDecls.size());
2165 llvm::Value *zero = builder.getInt32(0);
2166 unsigned i = 0;
2167 for (auto privDecl : privateDecls) {
2168 if (!privDecl.readsFromMold()) {
2169 // Handle this inside of the task so we don't pass unnessecary vars in
2170 llvmPrivateVarGEPs.push_back(nullptr);
2171 continue;
2172 }
2173 llvm::Value *iVal = builder.getInt32(i);
2174 llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
2175 llvmPrivateVarGEPs.push_back(gep);
2176 i += 1;
2177 }
2178}
2179
2180void TaskContextStructManager::freeStructPtr() {
2181 if (!structPtr)
2182 return;
2183
2184 llvm::IRBuilderBase::InsertPointGuard guard{builder};
2185 // Ensure we don't put the call to free() after the terminator
2186 builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2187 builder.CreateFree(structPtr);
2188}
2189
2190/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
2191static LogicalResult
2192convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
2193 LLVM::ModuleTranslation &moduleTranslation) {
2194 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2195 if (failed(checkImplementationStatus(*taskOp)))
2196 return failure();
2197
2198 PrivateVarsInfo privateVarsInfo(taskOp);
2199 TaskContextStructManager taskStructMgr{builder, moduleTranslation,
2200 privateVarsInfo.privatizers};
2201
2202 // Allocate and copy private variables before creating the task. This avoids
2203 // accessing invalid memory if (after this scope ends) the private variables
2204 // are initialized from host variables or if the variables are copied into
2205 // from host variables (firstprivate). The insertion point is just before
2206 // where the code for creating and scheduling the task will go. That puts this
2207 // code outside of the outlined task region, which is what we want because
2208 // this way the initialization and copy regions are executed immediately while
2209 // the host variable data are still live.
2210
2211 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2212 findAllocaInsertPoint(builder, moduleTranslation);
2213
2214 // Not using splitBB() because that requires the current block to have a
2215 // terminator.
2216 assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
2217 llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
2218 builder.getContext(), "omp.task.start",
2219 /*Parent=*/builder.GetInsertBlock()->getParent());
2220 llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
2221 builder.SetInsertPoint(branchToTaskStartBlock);
2222
2223 // Now do this again to make the initialization and copy blocks
2224 llvm::BasicBlock *copyBlock =
2225 splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
2226 llvm::BasicBlock *initBlock =
2227 splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
2228
2229 // Now the control flow graph should look like
2230 // starter_block:
2231 // <---- where we started when convertOmpTaskOp was called
2232 // br %omp.private.init
2233 // omp.private.init:
2234 // br %omp.private.copy
2235 // omp.private.copy:
2236 // br %omp.task.start
2237 // omp.task.start:
2238 // <---- where we want the insertion point to be when we call createTask()
2239
2240 // Save the alloca insertion point on ModuleTranslation stack for use in
2241 // nested regions.
2243 moduleTranslation, allocaIP);
2244
2245 // Allocate and initialize private variables
2246 builder.SetInsertPoint(initBlock->getTerminator());
2247
2248 // Create task variable structure
2249 taskStructMgr.generateTaskContextStruct();
2250 // GEPs so that we can initialize the variables. Don't use these GEPs inside
2251 // of the body otherwise it will be the GEP not the struct which is fowarded
2252 // to the outlined function. GEPs forwarded in this way are passed in a
2253 // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
2254 // which may not be executed until after the current stack frame goes out of
2255 // scope.
2256 taskStructMgr.createGEPsToPrivateVars();
2257
2258 for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
2259 llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
2260 privateVarsInfo.blockArgs,
2261 taskStructMgr.getLLVMPrivateVarGEPs())) {
2262 // To be handled inside the task.
2263 if (!privDecl.readsFromMold())
2264 continue;
2265 assert(llvmPrivateVarAlloc &&
2266 "reads from mold so shouldn't have been skipped");
2267
2268 llvm::Expected<llvm::Value *> privateVarOrErr =
2269 initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2270 blockArg, llvmPrivateVarAlloc, initBlock);
2271 if (!privateVarOrErr)
2272 return handleError(privateVarOrErr, *taskOp.getOperation());
2273
2275
2276 // TODO: this is a bit of a hack for Fortran character boxes.
2277 // Character boxes are passed by value into the init region and then the
2278 // initialized character box is yielded by value. Here we need to store the
2279 // yielded value into the private allocation, and load the private
2280 // allocation to match the type expected by region block arguments.
2281 if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2282 !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2283 builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2284 // Load it so we have the value pointed to by the GEP
2285 llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2286 llvmPrivateVarAlloc);
2287 }
2288 assert(llvmPrivateVarAlloc->getType() ==
2289 moduleTranslation.convertType(blockArg.getType()));
2290
2291 // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2292 // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2293 // stack allocated structure.
2294 }
2295
2296 // firstprivate copy region
2297 setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2298 if (failed(copyFirstPrivateVars(
2299 taskOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2300 taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
2301 taskOp.getPrivateNeedsBarrier())))
2302 return llvm::failure();
2303
2304 // Set up for call to createTask()
2305 builder.SetInsertPoint(taskStartBlock);
2306
2307 auto bodyCB = [&](InsertPointTy allocaIP,
2308 InsertPointTy codegenIP) -> llvm::Error {
2309 // Save the alloca insertion point on ModuleTranslation stack for use in
2310 // nested regions.
2312 moduleTranslation, allocaIP);
2313
2314 // translate the body of the task:
2315 builder.restoreIP(codegenIP);
2316
2317 llvm::BasicBlock *privInitBlock = nullptr;
2318 privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2319 for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2320 privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2321 privateVarsInfo.mlirVars))) {
2322 auto [blockArg, privDecl, mlirPrivVar] = zip;
2323 // This is handled before the task executes
2324 if (privDecl.readsFromMold())
2325 continue;
2326
2327 llvm::IRBuilderBase::InsertPointGuard guard(builder);
2328 llvm::Type *llvmAllocType =
2329 moduleTranslation.convertType(privDecl.getType());
2330 builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2331 llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2332 llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2333
2334 llvm::Expected<llvm::Value *> privateVarOrError =
2335 initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2336 blockArg, llvmPrivateVar, privInitBlock);
2337 if (!privateVarOrError)
2338 return privateVarOrError.takeError();
2339 moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2340 privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2341 }
2342
2343 taskStructMgr.createGEPsToPrivateVars();
2344 for (auto [i, llvmPrivVar] :
2345 llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2346 if (!llvmPrivVar) {
2347 assert(privateVarsInfo.llvmVars[i] &&
2348 "This is added in the loop above");
2349 continue;
2350 }
2351 privateVarsInfo.llvmVars[i] = llvmPrivVar;
2352 }
2353
2354 // Find and map the addresses of each variable within the task context
2355 // structure
2356 for (auto [blockArg, llvmPrivateVar, privateDecl] :
2357 llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2358 privateVarsInfo.privatizers)) {
2359 // This was handled above.
2360 if (!privateDecl.readsFromMold())
2361 continue;
2362 // Fix broken pass-by-value case for Fortran character boxes
2363 if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2364 llvmPrivateVar = builder.CreateLoad(
2365 moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2366 }
2367 assert(llvmPrivateVar->getType() ==
2368 moduleTranslation.convertType(blockArg.getType()));
2369 moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2370 }
2371
2372 auto continuationBlockOrError = convertOmpOpRegions(
2373 taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2374 if (failed(handleError(continuationBlockOrError, *taskOp)))
2375 return llvm::make_error<PreviouslyReportedError>();
2376
2377 builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2378
2379 if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2380 privateVarsInfo.llvmVars,
2381 privateVarsInfo.privatizers)))
2382 return llvm::make_error<PreviouslyReportedError>();
2383
2384 // Free heap allocated task context structure at the end of the task.
2385 taskStructMgr.freeStructPtr();
2386
2387 return llvm::Error::success();
2388 };
2389
2390 llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
2391 SmallVector<llvm::BranchInst *> cancelTerminators;
2392 // The directive to match here is OMPD_taskgroup because it is the taskgroup
2393 // which is canceled. This is handled here because it is the task's cleanup
2394 // block which should be branched to.
2395 pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskOp,
2396 llvm::omp::Directive::OMPD_taskgroup);
2397
2399 buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2400 moduleTranslation, dds);
2401
2402 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2403 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2404 moduleTranslation.getOpenMPBuilder()->createTask(
2405 ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2406 moduleTranslation.lookupValue(taskOp.getFinal()),
2407 moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2408 taskOp.getMergeable(),
2409 moduleTranslation.lookupValue(taskOp.getEventHandle()),
2410 moduleTranslation.lookupValue(taskOp.getPriority()));
2411
2412 if (failed(handleError(afterIP, *taskOp)))
2413 return failure();
2414
2415 // Set the correct branch target for task cancellation
2416 popCancelFinalizationCB(cancelTerminators, ompBuilder, afterIP.get());
2417
2418 builder.restoreIP(*afterIP);
2419 return success();
2420}
2421
2422/// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2423static LogicalResult
2424convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2425 LLVM::ModuleTranslation &moduleTranslation) {
2426 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2427 if (failed(checkImplementationStatus(*tgOp)))
2428 return failure();
2429
2430 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2431 builder.restoreIP(codegenIP);
2432 return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2433 builder, moduleTranslation)
2434 .takeError();
2435 };
2436
2437 InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2438 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2439 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2440 moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2441 bodyCB);
2442
2443 if (failed(handleError(afterIP, *tgOp)))
2444 return failure();
2445
2446 builder.restoreIP(*afterIP);
2447 return success();
2448}
2449
2450static LogicalResult
2451convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2452 LLVM::ModuleTranslation &moduleTranslation) {
2453 if (failed(checkImplementationStatus(*twOp)))
2454 return failure();
2455
2456 moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2457 return success();
2458}
2459
2460/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2461static LogicalResult
2462convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2463 LLVM::ModuleTranslation &moduleTranslation) {
2464 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2465 auto wsloopOp = cast<omp::WsloopOp>(opInst);
2466 if (failed(checkImplementationStatus(opInst)))
2467 return failure();
2468
2469 auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2470 llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2471 assert(isByRef.size() == wsloopOp.getNumReductionVars());
2472
2473 // Static is the default.
2474 auto schedule =
2475 wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2476
2477 // Find the loop configuration.
2478 llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2479 llvm::Type *ivType = step->getType();
2480 llvm::Value *chunk = nullptr;
2481 if (wsloopOp.getScheduleChunk()) {
2482 llvm::Value *chunkVar =
2483 moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2484 chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2485 }
2486
2487 PrivateVarsInfo privateVarsInfo(wsloopOp);
2488
2490 collectReductionDecls(wsloopOp, reductionDecls);
2491 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2492 findAllocaInsertPoint(builder, moduleTranslation);
2493
2494 SmallVector<llvm::Value *> privateReductionVariables(
2495 wsloopOp.getNumReductionVars());
2496
2498 builder, moduleTranslation, privateVarsInfo, allocaIP);
2499 if (handleError(afterAllocas, opInst).failed())
2500 return failure();
2501
2502 DenseMap<Value, llvm::Value *> reductionVariableMap;
2503
2504 MutableArrayRef<BlockArgument> reductionArgs =
2505 cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2506
2507 SmallVector<DeferredStore> deferredStores;
2508
2509 if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2510 moduleTranslation, allocaIP, reductionDecls,
2511 privateReductionVariables, reductionVariableMap,
2512 deferredStores, isByRef)))
2513 return failure();
2514
2515 if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2516 opInst)
2517 .failed())
2518 return failure();
2519
2520 if (failed(copyFirstPrivateVars(
2521 wsloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2522 privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2523 wsloopOp.getPrivateNeedsBarrier())))
2524 return failure();
2525
2526 assert(afterAllocas.get()->getSinglePredecessor());
2527 if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2528 moduleTranslation,
2529 afterAllocas.get()->getSinglePredecessor(),
2530 reductionDecls, privateReductionVariables,
2531 reductionVariableMap, isByRef, deferredStores)))
2532 return failure();
2533
2534 // TODO: Handle doacross loops when the ordered clause has a parameter.
2535 bool isOrdered = wsloopOp.getOrdered().has_value();
2536 std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2537 bool isSimd = wsloopOp.getScheduleSimd();
2538 bool loopNeedsBarrier = !wsloopOp.getNowait();
2539
2540 // The only legal way for the direct parent to be omp.distribute is that this
2541 // represents 'distribute parallel do'. Otherwise, this is a regular
2542 // worksharing loop.
2543 llvm::omp::WorksharingLoopType workshareLoopType =
2544 llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2545 ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2546 : llvm::omp::WorksharingLoopType::ForStaticLoop;
2547
2548 SmallVector<llvm::BranchInst *> cancelTerminators;
2549 pushCancelFinalizationCB(cancelTerminators, builder, *ompBuilder, wsloopOp,
2550 llvm::omp::Directive::OMPD_for);
2551
2552 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2553
2554 // Initialize linear variables and linear step
2555 LinearClauseProcessor linearClauseProcessor;
2556 if (!wsloopOp.getLinearVars().empty()) {
2557 for (mlir::Value linearVar : wsloopOp.getLinearVars())
2558 linearClauseProcessor.createLinearVar(builder, moduleTranslation,
2559 linearVar);
2560 for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
2561 linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
2562 }
2563
2565 wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2566
2567 if (failed(handleError(regionBlock, opInst)))
2568 return failure();
2569
2570 llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2571
2572 // Emit Initialization and Update IR for linear variables
2573 if (!wsloopOp.getLinearVars().empty()) {
2574 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2575 linearClauseProcessor.initLinearVar(builder, moduleTranslation,
2576 loopInfo->getPreheader());
2577 if (failed(handleError(afterBarrierIP, *loopOp)))
2578 return failure();
2579 builder.restoreIP(*afterBarrierIP);
2580 linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
2581 loopInfo->getIndVar());
2582 linearClauseProcessor.outlineLinearFinalizationBB(builder,
2583 loopInfo->getExit());
2584 }
2585
2586 builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2587
2588 // Check if we can generate no-loop kernel
2589 bool noLoopMode = false;
2590 omp::TargetOp targetOp = wsloopOp->getParentOfType<mlir::omp::TargetOp>();
2591 if (targetOp) {
2592 Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
2593 // We need this check because, without it, noLoopMode would be set to true
2594 // for every omp.wsloop nested inside a no-loop SPMD target region, even if
2595 // that loop is not the top-level SPMD one.
2596 if (loopOp == targetCapturedOp) {
2597 omp::TargetRegionFlags kernelFlags =
2598 targetOp.getKernelExecFlags(targetCapturedOp);
2599 if (omp::bitEnumContainsAll(kernelFlags,
2600 omp::TargetRegionFlags::spmd |
2601 omp::TargetRegionFlags::no_loop) &&
2602 !omp::bitEnumContainsAny(kernelFlags,
2603 omp::TargetRegionFlags::generic))
2604 noLoopMode = true;
2605 }
2606 }
2607
2608 llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2609 ompBuilder->applyWorkshareLoop(
2610 ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2611 convertToScheduleKind(schedule), chunk, isSimd,
2612 scheduleMod == omp::ScheduleModifier::monotonic,
2613 scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2614 workshareLoopType, noLoopMode);
2615
2616 if (failed(handleError(wsloopIP, opInst)))
2617 return failure();
2618
2619 // Emit finalization and in-place rewrites for linear vars.
2620 if (!wsloopOp.getLinearVars().empty()) {
2621 llvm::OpenMPIRBuilder::InsertPointTy oldIP = builder.saveIP();
2622 assert(loopInfo->getLastIter() &&
2623 "`lastiter` in CanonicalLoopInfo is nullptr");
2624 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2625 linearClauseProcessor.finalizeLinearVar(builder, moduleTranslation,
2626 loopInfo->getLastIter());
2627 if (failed(handleError(afterBarrierIP, *loopOp)))
2628 return failure();
2629 for (size_t index = 0; index < wsloopOp.getLinearVars().size(); index++)
2630 linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
2631 index);
2632 builder.restoreIP(oldIP);
2633 }
2634
2635 // Set the correct branch target for task cancellation
2636 popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
2637
2638 // Process the reductions if required.
2639 if (failed(createReductionsAndCleanup(
2640 wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
2641 privateReductionVariables, isByRef, wsloopOp.getNowait(),
2642 /*isTeamsReduction=*/false)))
2643 return failure();
2644
2645 return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2646 privateVarsInfo.llvmVars,
2647 privateVarsInfo.privatizers);
2648}
2649
2650/// Converts the OpenMP parallel operation to LLVM IR.
2651static LogicalResult
2652convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2653 LLVM::ModuleTranslation &moduleTranslation) {
2654 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2655 ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2656 assert(isByRef.size() == opInst.getNumReductionVars());
2657 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2658
2659 if (failed(checkImplementationStatus(*opInst)))
2660 return failure();
2661
2662 PrivateVarsInfo privateVarsInfo(opInst);
2663
2664 // Collect reduction declarations
2666 collectReductionDecls(opInst, reductionDecls);
2667 SmallVector<llvm::Value *> privateReductionVariables(
2668 opInst.getNumReductionVars());
2669 SmallVector<DeferredStore> deferredStores;
2670
2671 auto bodyGenCB = [&](InsertPointTy allocaIP,
2672 InsertPointTy codeGenIP) -> llvm::Error {
2674 builder, moduleTranslation, privateVarsInfo, allocaIP);
2675 if (handleError(afterAllocas, *opInst).failed())
2676 return llvm::make_error<PreviouslyReportedError>();
2677
2678 // Allocate reduction vars
2679 DenseMap<Value, llvm::Value *> reductionVariableMap;
2680
2681 MutableArrayRef<BlockArgument> reductionArgs =
2682 cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2683
2684 allocaIP =
2685 InsertPointTy(allocaIP.getBlock(),
2686 allocaIP.getBlock()->getTerminator()->getIterator());
2687
2688 if (failed(allocReductionVars(
2689 opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2690 reductionDecls, privateReductionVariables, reductionVariableMap,
2691 deferredStores, isByRef)))
2692 return llvm::make_error<PreviouslyReportedError>();
2693
2694 assert(afterAllocas.get()->getSinglePredecessor());
2695 builder.restoreIP(codeGenIP);
2696
2697 if (handleError(
2698 initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2699 *opInst)
2700 .failed())
2701 return llvm::make_error<PreviouslyReportedError>();
2702
2703 if (failed(copyFirstPrivateVars(
2704 opInst, builder, moduleTranslation, privateVarsInfo.mlirVars,
2705 privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2706 opInst.getPrivateNeedsBarrier())))
2707 return llvm::make_error<PreviouslyReportedError>();
2708
2709 if (failed(
2710 initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2711 afterAllocas.get()->getSinglePredecessor(),
2712 reductionDecls, privateReductionVariables,
2713 reductionVariableMap, isByRef, deferredStores)))
2714 return llvm::make_error<PreviouslyReportedError>();
2715
2716 // Save the alloca insertion point on ModuleTranslation stack for use in
2717 // nested regions.
2719 moduleTranslation, allocaIP);
2720
2721 // ParallelOp has only one region associated with it.
2723 opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2724 if (!regionBlock)
2725 return regionBlock.takeError();
2726
2727 // Process the reductions if required.
2728 if (opInst.getNumReductionVars() > 0) {
2729 // Collect reduction info
2731 SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2733 collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2734 owningReductionGens, owningAtomicReductionGens,
2735 privateReductionVariables, reductionInfos);
2736
2737 // Move to region cont block
2738 builder.SetInsertPoint((*regionBlock)->getTerminator());
2739
2740 // Generate reductions from info
2741 llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2742 builder.SetInsertPoint(tempTerminator);
2743
2744 llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2745 ompBuilder->createReductions(
2746 builder.saveIP(), allocaIP, reductionInfos, isByRef,
2747 /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
2748 if (!contInsertPoint)
2749 return contInsertPoint.takeError();
2750
2751 if (!contInsertPoint->getBlock())
2752 return llvm::make_error<PreviouslyReportedError>();
2753
2754 tempTerminator->eraseFromParent();
2755 builder.restoreIP(*contInsertPoint);
2756 }
2757
2758 return llvm::Error::success();
2759 };
2760
2761 auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2762 llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2763 // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2764 // bodyGenCB.
2765 replVal = &val;
2766 return codeGenIP;
2767 };
2768
2769 // TODO: Perform finalization actions for variables. This has to be
2770 // called for variables which have destructors/finalizers.
2771 auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2772 InsertPointTy oldIP = builder.saveIP();
2773 builder.restoreIP(codeGenIP);
2774
2775 // if the reduction has a cleanup region, inline it here to finalize the
2776 // reduction variables
2777 SmallVector<Region *> reductionCleanupRegions;
2778 llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2779 [](omp::DeclareReductionOp reductionDecl) {
2780 return &reductionDecl.getCleanupRegion();
2781 });
2782 if (failed(inlineOmpRegionCleanup(
2783 reductionCleanupRegions, privateReductionVariables,
2784 moduleTranslation, builder, "omp.reduction.cleanup")))
2785 return llvm::createStringError(
2786 "failed to inline `cleanup` region of `omp.declare_reduction`");
2787
2788 if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2789 privateVarsInfo.llvmVars,
2790 privateVarsInfo.privatizers)))
2791 return llvm::make_error<PreviouslyReportedError>();
2792
2793 builder.restoreIP(oldIP);
2794 return llvm::Error::success();
2795 };
2796
2797 llvm::Value *ifCond = nullptr;
2798 if (auto ifVar = opInst.getIfExpr())
2799 ifCond = moduleTranslation.lookupValue(ifVar);
2800 llvm::Value *numThreads = nullptr;
2801 if (auto numThreadsVar = opInst.getNumThreads())
2802 numThreads = moduleTranslation.lookupValue(numThreadsVar);
2803 auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2804 if (auto bind = opInst.getProcBindKind())
2805 pbKind = getProcBindKind(*bind);
2806 bool isCancellable = constructIsCancellable(opInst);
2807
2808 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2809 findAllocaInsertPoint(builder, moduleTranslation);
2810 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2811
2812 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2813 ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2814 ifCond, numThreads, pbKind, isCancellable);
2815
2816 if (failed(handleError(afterIP, *opInst)))
2817 return failure();
2818
2819 builder.restoreIP(*afterIP);
2820 return success();
2821}
2822
2823/// Convert Order attribute to llvm::omp::OrderKind.
2824static llvm::omp::OrderKind
2825convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2826 if (!o)
2827 return llvm::omp::OrderKind::OMP_ORDER_unknown;
2828 switch (*o) {
2829 case omp::ClauseOrderKind::Concurrent:
2830 return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2831 }
2832 llvm_unreachable("Unknown ClauseOrderKind kind");
2833}
2834
2835/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2836static LogicalResult
2837convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2838 LLVM::ModuleTranslation &moduleTranslation) {
2839 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2840 auto simdOp = cast<omp::SimdOp>(opInst);
2841
2842 if (failed(checkImplementationStatus(opInst)))
2843 return failure();
2844
2845 PrivateVarsInfo privateVarsInfo(simdOp);
2846
2847 MutableArrayRef<BlockArgument> reductionArgs =
2848 cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2849 DenseMap<Value, llvm::Value *> reductionVariableMap;
2850 SmallVector<llvm::Value *> privateReductionVariables(
2851 simdOp.getNumReductionVars());
2852 SmallVector<DeferredStore> deferredStores;
2854 collectReductionDecls(simdOp, reductionDecls);
2855 llvm::ArrayRef<bool> isByRef = getIsByRef(simdOp.getReductionByref());
2856 assert(isByRef.size() == simdOp.getNumReductionVars());
2857
2858 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2859 findAllocaInsertPoint(builder, moduleTranslation);
2860
2862 builder, moduleTranslation, privateVarsInfo, allocaIP);
2863 if (handleError(afterAllocas, opInst).failed())
2864 return failure();
2865
2866 if (failed(allocReductionVars(simdOp, reductionArgs, builder,
2867 moduleTranslation, allocaIP, reductionDecls,
2868 privateReductionVariables, reductionVariableMap,
2869 deferredStores, isByRef)))
2870 return failure();
2871
2872 if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2873 opInst)
2874 .failed())
2875 return failure();
2876
2877 // No call to copyFirstPrivateVars because FIRSTPRIVATE is not allowed for
2878 // SIMD.
2879
2880 assert(afterAllocas.get()->getSinglePredecessor());
2881 if (failed(initReductionVars(simdOp, reductionArgs, builder,
2882 moduleTranslation,
2883 afterAllocas.get()->getSinglePredecessor(),
2884 reductionDecls, privateReductionVariables,
2885 reductionVariableMap, isByRef, deferredStores)))
2886 return failure();
2887
2888 llvm::ConstantInt *simdlen = nullptr;
2889 if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2890 simdlen = builder.getInt64(simdlenVar.value());
2891
2892 llvm::ConstantInt *safelen = nullptr;
2893 if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2894 safelen = builder.getInt64(safelenVar.value());
2895
2896 llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2897 llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2898
2899 llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2900 std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2901 mlir::OperandRange operands = simdOp.getAlignedVars();
2902 for (size_t i = 0; i < operands.size(); ++i) {
2903 llvm::Value *alignment = nullptr;
2904 llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2905 llvm::Type *ty = llvmVal->getType();
2906
2907 auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2908 alignment = builder.getInt64(intAttr.getInt());
2909 assert(ty->isPointerTy() && "Invalid type for aligned variable");
2910 assert(alignment && "Invalid alignment value");
2911
2912 // Check if the alignment value is not a power of 2. If so, skip emitting
2913 // alignment.
2914 if (!intAttr.getValue().isPowerOf2())
2915 continue;
2916
2917 auto curInsert = builder.saveIP();
2918 builder.SetInsertPoint(sourceBlock);
2919 llvmVal = builder.CreateLoad(ty, llvmVal);
2920 builder.restoreIP(curInsert);
2921 alignedVars[llvmVal] = alignment;
2922 }
2923
2925 simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2926
2927 if (failed(handleError(regionBlock, opInst)))
2928 return failure();
2929
2930 builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2931 llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2932 ompBuilder->applySimd(loopInfo, alignedVars,
2933 simdOp.getIfExpr()
2934 ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2935 : nullptr,
2936 order, simdlen, safelen);
2937
2938 // We now need to reduce the per-simd-lane reduction variable into the
2939 // original variable. This works a bit differently to other reductions (e.g.
2940 // wsloop) because we don't need to call into the OpenMP runtime to handle
2941 // threads: everything happened in this one thread.
2942 for (auto [i, tuple] : llvm::enumerate(
2943 llvm::zip(reductionDecls, isByRef, simdOp.getReductionVars(),
2944 privateReductionVariables))) {
2945 auto [decl, byRef, reductionVar, privateReductionVar] = tuple;
2946
2947 OwningReductionGen gen = makeReductionGen(decl, builder, moduleTranslation);
2948 llvm::Value *originalVariable = moduleTranslation.lookupValue(reductionVar);
2949 llvm::Type *reductionType = moduleTranslation.convertType(decl.getType());
2950
2951 // We have one less load for by-ref case because that load is now inside of
2952 // the reduction region.
2953 llvm::Value *redValue = originalVariable;
2954 if (!byRef)
2955 redValue =
2956 builder.CreateLoad(reductionType, redValue, "red.value." + Twine(i));
2957 llvm::Value *privateRedValue = builder.CreateLoad(
2958 reductionType, privateReductionVar, "red.private.value." + Twine(i));
2959 llvm::Value *reduced;
2960
2961 auto res = gen(builder.saveIP(), redValue, privateRedValue, reduced);
2962 if (failed(handleError(res, opInst)))
2963 return failure();
2964 builder.restoreIP(res.get());
2965
2966 // For by-ref case, the store is inside of the reduction region.
2967 if (!byRef)
2968 builder.CreateStore(reduced, originalVariable);
2969 }
2970
2971 // After the construct, deallocate private reduction variables.
2972 SmallVector<Region *> reductionRegions;
2973 llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
2974 [](omp::DeclareReductionOp reductionDecl) {
2975 return &reductionDecl.getCleanupRegion();
2976 });
2977 if (failed(inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
2978 moduleTranslation, builder,
2979 "omp.reduction.cleanup")))
2980 return failure();
2981
2982 return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2983 privateVarsInfo.llvmVars,
2984 privateVarsInfo.privatizers);
2985}
2986
2987/// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2988static LogicalResult
2989convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2990 LLVM::ModuleTranslation &moduleTranslation) {
2991 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2992 auto loopOp = cast<omp::LoopNestOp>(opInst);
2993
2994 // Set up the source location value for OpenMP runtime.
2995 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2996
2997 // Generator of the canonical loop body.
3000 auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
3001 llvm::Value *iv) -> llvm::Error {
3002 // Make sure further conversions know about the induction variable.
3003 moduleTranslation.mapValue(
3004 loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
3005
3006 // Capture the body insertion point for use in nested loops. BodyIP of the
3007 // CanonicalLoopInfo always points to the beginning of the entry block of
3008 // the body.
3009 bodyInsertPoints.push_back(ip);
3010
3011 if (loopInfos.size() != loopOp.getNumLoops() - 1)
3012 return llvm::Error::success();
3013
3014 // Convert the body of the loop.
3015 builder.restoreIP(ip);
3017 loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
3018 if (!regionBlock)
3019 return regionBlock.takeError();
3020
3021 builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
3022 return llvm::Error::success();
3023 };
3024
3025 // Delegate actual loop construction to the OpenMP IRBuilder.
3026 // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
3027 // loop, i.e. it has a positive step, uses signed integer semantics.
3028 // Reconsider this code when the nested loop operation clearly supports more
3029 // cases.
3030 for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
3031 llvm::Value *lowerBound =
3032 moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
3033 llvm::Value *upperBound =
3034 moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
3035 llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
3036
3037 // Make sure loop trip count are emitted in the preheader of the outermost
3038 // loop at the latest so that they are all available for the new collapsed
3039 // loop will be created below.
3040 llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
3041 llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
3042 if (i != 0) {
3043 loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
3044 ompLoc.DL);
3045 computeIP = loopInfos.front()->getPreheaderIP();
3046 }
3047
3049 ompBuilder->createCanonicalLoop(
3050 loc, bodyGen, lowerBound, upperBound, step,
3051 /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
3052
3053 if (failed(handleError(loopResult, *loopOp)))
3054 return failure();
3055
3056 loopInfos.push_back(*loopResult);
3057 }
3058
3059 llvm::OpenMPIRBuilder::InsertPointTy afterIP =
3060 loopInfos.front()->getAfterIP();
3061
3062 // Do tiling.
3063 if (const auto &tiles = loopOp.getTileSizes()) {
3064 llvm::Type *ivType = loopInfos.front()->getIndVarType();
3066
3067 for (auto tile : tiles.value()) {
3068 llvm::Value *tileVal = llvm::ConstantInt::get(ivType, tile);
3069 tileSizes.push_back(tileVal);
3070 }
3071
3072 std::vector<llvm::CanonicalLoopInfo *> newLoops =
3073 ompBuilder->tileLoops(ompLoc.DL, loopInfos, tileSizes);
3074
3075 // Update afterIP to get the correct insertion point after
3076 // tiling.
3077 llvm::BasicBlock *afterBB = newLoops.front()->getAfter();
3078 llvm::BasicBlock *afterAfterBB = afterBB->getSingleSuccessor();
3079 afterIP = {afterAfterBB, afterAfterBB->begin()};
3080
3081 // Update the loop infos.
3082 loopInfos.clear();
3083 for (const auto &newLoop : newLoops)
3084 loopInfos.push_back(newLoop);
3085 } // Tiling done.
3086
3087 // Do collapse.
3088 const auto &numCollapse = loopOp.getCollapseNumLoops();
3090 loopInfos.begin(), loopInfos.begin() + (numCollapse));
3091
3092 auto newTopLoopInfo =
3093 ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {});
3094
3095 assert(newTopLoopInfo && "New top loop information is missing");
3096 moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
3097 [&](OpenMPLoopInfoStackFrame &frame) {
3098 frame.loopInfo = newTopLoopInfo;
3099 return WalkResult::interrupt();
3100 });
3101
3102 // Continue building IR after the loop. Note that the LoopInfo returned by
3103 // `collapseLoops` points inside the outermost loop and is intended for
3104 // potential further loop transformations. Use the insertion point stored
3105 // before collapsing loops instead.
3106 builder.restoreIP(afterIP);
3107 return success();
3108}
3109
3110/// Convert an omp.canonical_loop to LLVM-IR
3111static LogicalResult
3112convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder,
3113 LLVM::ModuleTranslation &moduleTranslation) {
3114 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3115
3116 llvm::OpenMPIRBuilder::LocationDescription loopLoc(builder);
3117 Value loopIV = op.getInductionVar();
3118 Value loopTC = op.getTripCount();
3119
3120 llvm::Value *llvmTC = moduleTranslation.lookupValue(loopTC);
3121
3123 ompBuilder->createCanonicalLoop(
3124 loopLoc,
3125 [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *llvmIV) {
3126 // Register the mapping of MLIR induction variable to LLVM-IR
3127 // induction variable
3128 moduleTranslation.mapValue(loopIV, llvmIV);
3129
3130 builder.restoreIP(ip);
3132 convertOmpOpRegions(op.getRegion(), "omp.loop.region", builder,
3133 moduleTranslation);
3134
3135 return bodyGenStatus.takeError();
3136 },
3137 llvmTC, "omp.loop");
3138 if (!llvmOrError)
3139 return op.emitError(llvm::toString(llvmOrError.takeError()));
3140
3141 llvm::CanonicalLoopInfo *llvmCLI = *llvmOrError;
3142 llvm::IRBuilderBase::InsertPoint afterIP = llvmCLI->getAfterIP();
3143 builder.restoreIP(afterIP);
3144
3145 // Register the mapping of MLIR loop to LLVM-IR OpenMPIRBuilder loop
3146 if (Value cli = op.getCli())
3147 moduleTranslation.mapOmpLoop(cli, llvmCLI);
3148
3149 return success();
3150}
3151
3152/// Apply a `#pragma omp unroll` / "!$omp unroll" transformation using the
3153/// OpenMPIRBuilder.
3154static LogicalResult
3155applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
3156 LLVM::ModuleTranslation &moduleTranslation) {
3157 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3158
3159 Value applyee = op.getApplyee();
3160 assert(applyee && "Loop to apply unrolling on required");
3161
3162 llvm::CanonicalLoopInfo *consBuilderCLI =
3163 moduleTranslation.lookupOMPLoop(applyee);
3164 llvm::OpenMPIRBuilder::LocationDescription loc(builder);
3165 ompBuilder->unrollLoopHeuristic(loc.DL, consBuilderCLI);
3166
3167 moduleTranslation.invalidateOmpLoop(applyee);
3168 return success();
3169}
3170
3171/// Apply a `#pragma omp tile` / `!$omp tile` transformation using the
3172/// OpenMPIRBuilder.
3173static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
3174 LLVM::ModuleTranslation &moduleTranslation) {
3175 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3176 llvm::OpenMPIRBuilder::LocationDescription loc(builder);
3177
3179 SmallVector<llvm::Value *> translatedSizes;
3180
3181 for (Value size : op.getSizes()) {
3182 llvm::Value *translatedSize = moduleTranslation.lookupValue(size);
3183 assert(translatedSize &&
3184 "sizes clause arguments must already be translated");
3185 translatedSizes.push_back(translatedSize);
3186 }
3187
3188 for (Value applyee : op.getApplyees()) {
3189 llvm::CanonicalLoopInfo *consBuilderCLI =
3190 moduleTranslation.lookupOMPLoop(applyee);
3191 assert(applyee && "Canonical loop must already been translated");
3192 translatedLoops.push_back(consBuilderCLI);
3193 }
3194
3195 auto generatedLoops =
3196 ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes);
3197 if (!op.getGeneratees().empty()) {
3198 for (auto [mlirLoop, genLoop] :
3199 zip_equal(op.getGeneratees(), generatedLoops))
3200 moduleTranslation.mapOmpLoop(mlirLoop, genLoop);
3201 }
3202
3203 // CLIs can only be consumed once
3204 for (Value applyee : op.getApplyees())
3205 moduleTranslation.invalidateOmpLoop(applyee);
3206
3207 return success();
3208}
3209
3210/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
3211static llvm::AtomicOrdering
3212convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
3213 if (!ao)
3214 return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
3215
3216 switch (*ao) {
3217 case omp::ClauseMemoryOrderKind::Seq_cst:
3218 return llvm::AtomicOrdering::SequentiallyConsistent;
3219 case omp::ClauseMemoryOrderKind::Acq_rel:
3220 return llvm::AtomicOrdering::AcquireRelease;
3221 case omp::ClauseMemoryOrderKind::Acquire:
3222 return llvm::AtomicOrdering::Acquire;
3223 case omp::ClauseMemoryOrderKind::Release:
3224 return llvm::AtomicOrdering::Release;
3225 case omp::ClauseMemoryOrderKind::Relaxed:
3226 return llvm::AtomicOrdering::Monotonic;
3227 }
3228 llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
3229}
3230
3231/// Convert omp.atomic.read operation to LLVM IR.
3232static LogicalResult
3233convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
3234 LLVM::ModuleTranslation &moduleTranslation) {
3235 auto readOp = cast<omp::AtomicReadOp>(opInst);
3236 if (failed(checkImplementationStatus(opInst)))
3237 return failure();
3238
3239 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3240 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3241 findAllocaInsertPoint(builder, moduleTranslation);
3242
3243 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3244
3245 llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
3246 llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
3247 llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
3248
3249 llvm::Type *elementType =
3250 moduleTranslation.convertType(readOp.getElementType());
3251
3252 llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
3253 llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
3254 builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP));
3255 return success();
3256}
3257
3258/// Converts an omp.atomic.write operation to LLVM IR.
3259static LogicalResult
3260convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
3261 LLVM::ModuleTranslation &moduleTranslation) {
3262 auto writeOp = cast<omp::AtomicWriteOp>(opInst);
3263 if (failed(checkImplementationStatus(opInst)))
3264 return failure();
3265
3266 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3267 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3268 findAllocaInsertPoint(builder, moduleTranslation);
3269
3270 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3271 llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
3272 llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
3273 llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
3274 llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
3275 llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
3276 /*isVolatile=*/false};
3277 builder.restoreIP(
3278 ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP));
3279 return success();
3280}
3281
3282/// Converts an LLVM dialect binary operation to the corresponding enum value
3283/// for `atomicrmw` supported binary operation.
3284static llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
3286 .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
3287 .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
3288 .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
3289 .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
3290 .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
3291 .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
3292 .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
3293 .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
3294 .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
3295 .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
3296}
3297
3298static void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp,
3299 bool &isIgnoreDenormalMode,
3300 bool &isFineGrainedMemory,
3301 bool &isRemoteMemory) {
3302 isIgnoreDenormalMode = false;
3303 isFineGrainedMemory = false;
3304 isRemoteMemory = false;
3305 if (atomicUpdateOp &&
3306 atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
3307 mlir::omp::AtomicControlAttr atomicControlAttr =
3308 atomicUpdateOp.getAtomicControlAttr();
3309 isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3310 isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3311 isRemoteMemory = atomicControlAttr.getRemoteMemory();
3312 }
3313}
3314
3315/// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
3316static LogicalResult
3317convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
3318 llvm::IRBuilderBase &builder,
3319 LLVM::ModuleTranslation &moduleTranslation) {
3320 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3321 if (failed(checkImplementationStatus(*opInst)))
3322 return failure();
3323
3324 // Convert values and types.
3325 auto &innerOpList = opInst.getRegion().front().getOperations();
3326 bool isXBinopExpr{false};
3327 llvm::AtomicRMWInst::BinOp binop;
3328 mlir::Value mlirExpr;
3329 llvm::Value *llvmExpr = nullptr;
3330 llvm::Value *llvmX = nullptr;
3331 llvm::Type *llvmXElementType = nullptr;
3332 if (innerOpList.size() == 2) {
3333 // The two operations here are the update and the terminator.
3334 // Since we can identify the update operation, there is a possibility
3335 // that we can generate the atomicrmw instruction.
3336 mlir::Operation &innerOp = *opInst.getRegion().front().begin();
3337 if (!llvm::is_contained(innerOp.getOperands(),
3338 opInst.getRegion().getArgument(0))) {
3339 return opInst.emitError("no atomic update operation with region argument"
3340 " as operand found inside atomic.update region");
3341 }
3342 binop = convertBinOpToAtomic(innerOp);
3343 isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
3344 mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3345 llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3346 } else {
3347 // Since the update region includes more than one operation
3348 // we will resort to generating a cmpxchg loop.
3349 binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3350 }
3351 llvmX = moduleTranslation.lookupValue(opInst.getX());
3352 llvmXElementType = moduleTranslation.convertType(
3353 opInst.getRegion().getArgument(0).getType());
3354 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3355 /*isSigned=*/false,
3356 /*isVolatile=*/false};
3357
3358 llvm::AtomicOrdering atomicOrdering =
3359 convertAtomicOrdering(opInst.getMemoryOrder());
3360
3361 // Generate update code.
3362 auto updateFn =
3363 [&opInst, &moduleTranslation](
3364 llvm::Value *atomicx,
3365 llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3366 Block &bb = *opInst.getRegion().begin();
3367 moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
3368 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3369 if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3370 return llvm::make_error<PreviouslyReportedError>();
3371
3372 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3373 assert(yieldop && yieldop.getResults().size() == 1 &&
3374 "terminator must be omp.yield op and it must have exactly one "
3375 "argument");
3376 return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3377 };
3378
3379 bool isIgnoreDenormalMode;
3380 bool isFineGrainedMemory;
3381 bool isRemoteMemory;
3382 extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory,
3383 isRemoteMemory);
3384 // Handle ambiguous alloca, if any.
3385 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3386 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3387 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3388 ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
3389 atomicOrdering, binop, updateFn,
3390 isXBinopExpr, isIgnoreDenormalMode,
3391 isFineGrainedMemory, isRemoteMemory);
3392
3393 if (failed(handleError(afterIP, *opInst)))
3394 return failure();
3395
3396 builder.restoreIP(*afterIP);
3397 return success();
3398}
3399
3400static LogicalResult
3401convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
3402 llvm::IRBuilderBase &builder,
3403 LLVM::ModuleTranslation &moduleTranslation) {
3404 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3405 if (failed(checkImplementationStatus(*atomicCaptureOp)))
3406 return failure();
3407
3408 mlir::Value mlirExpr;
3409 bool isXBinopExpr = false, isPostfixUpdate = false;
3410 llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3411
3412 omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3413 omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
3414
3415 assert((atomicUpdateOp || atomicWriteOp) &&
3416 "internal op must be an atomic.update or atomic.write op");
3417
3418 if (atomicWriteOp) {
3419 isPostfixUpdate = true;
3420 mlirExpr = atomicWriteOp.getExpr();
3421 } else {
3422 isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
3423 atomicCaptureOp.getAtomicUpdateOp().getOperation();
3424 auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
3425 // Find the binary update operation that uses the region argument
3426 // and get the expression to update
3427 if (innerOpList.size() == 2) {
3428 mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
3429 if (!llvm::is_contained(innerOp.getOperands(),
3430 atomicUpdateOp.getRegion().getArgument(0))) {
3431 return atomicUpdateOp.emitError(
3432 "no atomic update operation with region argument"
3433 " as operand found inside atomic.update region");
3434 }
3435 binop = convertBinOpToAtomic(innerOp);
3436 isXBinopExpr =
3437 innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
3438 mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3439 } else {
3440 binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3441 }
3442 }
3443
3444 llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3445 llvm::Value *llvmX =
3446 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
3447 llvm::Value *llvmV =
3448 moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
3449 llvm::Type *llvmXElementType = moduleTranslation.convertType(
3450 atomicCaptureOp.getAtomicReadOp().getElementType());
3451 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3452 /*isSigned=*/false,
3453 /*isVolatile=*/false};
3454 llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
3455 /*isSigned=*/false,
3456 /*isVolatile=*/false};
3457
3458 llvm::AtomicOrdering atomicOrdering =
3459 convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
3460
3461 auto updateFn =
3462 [&](llvm::Value *atomicx,
3463 llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3464 if (atomicWriteOp)
3465 return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
3466 Block &bb = *atomicUpdateOp.getRegion().begin();
3467 moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
3468 atomicx);
3469 moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3470 if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3471 return llvm::make_error<PreviouslyReportedError>();
3472
3473 omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3474 assert(yieldop && yieldop.getResults().size() == 1 &&
3475 "terminator must be omp.yield op and it must have exactly one "
3476 "argument");
3477 return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3478 };
3479
3480 bool isIgnoreDenormalMode;
3481 bool isFineGrainedMemory;
3482 bool isRemoteMemory;
3483 extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode,
3484 isFineGrainedMemory, isRemoteMemory);
3485 // Handle ambiguous alloca, if any.
3486 auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3487 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3488 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3489 ompBuilder->createAtomicCapture(
3490 ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3491 binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr,
3492 isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory);
3493
3494 if (failed(handleError(afterIP, *atomicCaptureOp)))
3495 return failure();
3496
3497 builder.restoreIP(*afterIP);
3498 return success();
3499}
3500
3501static llvm::omp::Directive convertCancellationConstructType(
3502 omp::ClauseCancellationConstructType directive) {
3503 switch (directive) {
3504 case omp::ClauseCancellationConstructType::Loop:
3505 return llvm::omp::Directive::OMPD_for;
3506 case omp::ClauseCancellationConstructType::Parallel:
3507 return llvm::omp::Directive::OMPD_parallel;
3508 case omp::ClauseCancellationConstructType::Sections:
3509 return llvm::omp::Directive::OMPD_sections;
3510 case omp::ClauseCancellationConstructType::Taskgroup:
3511 return llvm::omp::Directive::OMPD_taskgroup;
3512 }
3513 llvm_unreachable("Unhandled cancellation construct type");
3514}
3515
3516static LogicalResult
3517convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder,
3518 LLVM::ModuleTranslation &moduleTranslation) {
3519 if (failed(checkImplementationStatus(*op.getOperation())))
3520 return failure();
3521
3522 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3523 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3524
3525 llvm::Value *ifCond = nullptr;
3526 if (Value ifVar = op.getIfExpr())
3527 ifCond = moduleTranslation.lookupValue(ifVar);
3528
3529 llvm::omp::Directive cancelledDirective =
3530 convertCancellationConstructType(op.getCancelDirective());
3531
3532 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3533 ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective);
3534
3535 if (failed(handleError(afterIP, *op.getOperation())))
3536 return failure();
3537
3538 builder.restoreIP(afterIP.get());
3539
3540 return success();
3541}
3542
3543static LogicalResult
3544convertOmpCancellationPoint(omp::CancellationPointOp op,
3545 llvm::IRBuilderBase &builder,
3546 LLVM::ModuleTranslation &moduleTranslation) {
3547 if (failed(checkImplementationStatus(*op.getOperation())))
3548 return failure();
3549
3550 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3551 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3552
3553 llvm::omp::Directive cancelledDirective =
3554 convertCancellationConstructType(op.getCancelDirective());
3555
3556 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3557 ompBuilder->createCancellationPoint(ompLoc, cancelledDirective);
3558
3559 if (failed(handleError(afterIP, *op.getOperation())))
3560 return failure();
3561
3562 builder.restoreIP(afterIP.get());
3563
3564 return success();
3565}
3566
3567/// Converts an OpenMP Threadprivate operation into LLVM IR using
3568/// OpenMPIRBuilder.
3569static LogicalResult
3570convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
3571 LLVM::ModuleTranslation &moduleTranslation) {
3572 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3573 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3574 auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
3575
3576 if (failed(checkImplementationStatus(opInst)))
3577 return failure();
3578
3579 Value symAddr = threadprivateOp.getSymAddr();
3580 auto *symOp = symAddr.getDefiningOp();
3581
3582 if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
3583 symOp = asCast.getOperand().getDefiningOp();
3584
3585 if (!isa<LLVM::AddressOfOp>(symOp))
3586 return opInst.emitError("Addressing symbol not found");
3587 LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
3588
3589 LLVM::GlobalOp global =
3590 addressOfOp.getGlobal(moduleTranslation.symbolTable());
3591 llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
3592
3593 if (!ompBuilder->Config.isTargetDevice()) {
3594 llvm::Type *type = globalValue->getValueType();
3595 llvm::TypeSize typeSize =
3596 builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
3597 type);
3598 llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
3599 llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
3600 ompLoc, globalValue, size, global.getSymName() + ".cache");
3601 moduleTranslation.mapValue(opInst.getResult(0), callInst);
3602 } else {
3603 moduleTranslation.mapValue(opInst.getResult(0), globalValue);
3604 }
3605
3606 return success();
3607}
3608
3609static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
3610convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
3611 switch (deviceClause) {
3612 case mlir::omp::DeclareTargetDeviceType::host:
3613 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
3614 break;
3615 case mlir::omp::DeclareTargetDeviceType::nohost:
3616 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
3617 break;
3618 case mlir::omp::DeclareTargetDeviceType::any:
3619 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
3620 break;
3621 }
3622 llvm_unreachable("unhandled device clause");
3623}
3624
3625static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
3627 mlir::omp::DeclareTargetCaptureClause captureClause) {
3628 switch (captureClause) {
3629 case mlir::omp::DeclareTargetCaptureClause::to:
3630 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
3631 case mlir::omp::DeclareTargetCaptureClause::link:
3632 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
3633 case mlir::omp::DeclareTargetCaptureClause::enter:
3634 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
3635 }
3636 llvm_unreachable("unhandled capture clause");
3637}
3638
3639static llvm::SmallString<64>
3640getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
3641 llvm::OpenMPIRBuilder &ompBuilder) {
3642 llvm::SmallString<64> suffix;
3643 llvm::raw_svector_ostream os(suffix);
3644 if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
3645 auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
3646 auto fileInfoCallBack = [&loc]() {
3647 return std::pair<std::string, uint64_t>(
3648 llvm::StringRef(loc.getFilename()), loc.getLine());
3649 };
3650
3651 auto vfs = llvm::vfs::getRealFileSystem();
3652 os << llvm::format(
3653 "_%x",
3654 ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack, *vfs).FileID);
3655 }
3656 os << "_decl_tgt_ref_ptr";
3657
3658 return suffix;
3659}
3660
3662 if (auto addressOfOp = value.getDefiningOp<LLVM::AddressOfOp>()) {
3663 auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3664 Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3665 if (auto declareTargetGlobal =
3666 llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3667 if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3668 mlir::omp::DeclareTargetCaptureClause::link)
3669 return true;
3670 }
3671 return false;
3672}
3673
3674// Returns the reference pointer generated by the lowering of the declare target
3675// operation in cases where the link clause is used or the to clause is used in
3676// USM mode.
3677static llvm::Value *
3679 LLVM::ModuleTranslation &moduleTranslation) {
3680 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3681 Operation *op = value.getDefiningOp();
3682 if (auto addrCast = llvm::dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
3683 op = addrCast->getOperand(0).getDefiningOp();
3684
3685 // An easier way to do this may just be to keep track of any pointer
3686 // references and their mapping to their respective operation
3687 if (auto addressOfOp = llvm::dyn_cast_if_present<LLVM::AddressOfOp>(op)) {
3688 if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3689 addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3690 addressOfOp.getGlobalName()))) {
3691
3692 if (auto declareTargetGlobal =
3693 llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3694 gOp.getOperation())) {
3695
3696 // In this case, we must utilise the reference pointer generated by the
3697 // declare target operation, similar to Clang
3698 if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3699 mlir::omp::DeclareTargetCaptureClause::link) ||
3700 (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3701 mlir::omp::DeclareTargetCaptureClause::to &&
3702 ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3703 llvm::SmallString<64> suffix =
3704 getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3705
3706 if (gOp.getSymName().contains(suffix))
3707 return moduleTranslation.getLLVMModule()->getNamedValue(
3708 gOp.getSymName());
3709
3710 return moduleTranslation.getLLVMModule()->getNamedValue(
3711 (gOp.getSymName().str() + suffix.str()).str());
3712 }
3713 }
3714 }
3715 }
3716
3717 return nullptr;
3718}
3719
3720namespace {
3721// Append customMappers information to existing MapInfosTy
3722struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3723 SmallVector<Operation *, 4> Mappers;
3724
3725 /// Append arrays in \a CurInfo.
3726 void append(MapInfosTy &curInfo) {
3727 Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3728 llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3729 }
3730};
3731// A small helper structure to contain data gathered
3732// for map lowering and coalese it into one area and
3733// avoiding extra computations such as searches in the
3734// llvm module for lowered mapped variables or checking
3735// if something is declare target (and retrieving the
3736// value) more than neccessary.
3737struct MapInfoData : MapInfosTy {
3738 llvm::SmallVector<bool, 4> IsDeclareTarget;
3739 llvm::SmallVector<bool, 4> IsAMember;
3740 // Identify if mapping was added by mapClause or use_device clauses.
3741 llvm::SmallVector<bool, 4> IsAMapping;
3742 llvm::SmallVector<mlir::Operation *, 4> MapClause;
3743 llvm::SmallVector<llvm::Value *, 4> OriginalValue;
3744 // Stripped off array/pointer to get the underlying
3745 // element type
3746 llvm::SmallVector<llvm::Type *, 4> BaseType;
3747
3748 /// Append arrays in \a CurInfo.
3749 void append(MapInfoData &CurInfo) {
3750 IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3751 CurInfo.IsDeclareTarget.end());
3752 MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3753 OriginalValue.append(CurInfo.OriginalValue.begin(),
3754 CurInfo.OriginalValue.end());
3755 BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3756 MapInfosTy::append(CurInfo);
3757 }
3758};
3759} // namespace
3760
3761static uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy,
3762 DataLayout &dl) {
3763 if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3764 arrTy.getElementType()))
3765 return getArrayElementSizeInBits(nestedArrTy, dl);
3766 return dl.getTypeSizeInBits(arrTy.getElementType());
3767}
3768
3769// This function calculates the size to be offloaded for a specified type, given
3770// its associated map clause (which can contain bounds information which affects
3771// the total size), this size is calculated based on the underlying element type
3772// e.g. given a 1-D array of ints, we will calculate the size from the integer
3773// type * number of elements in the array. This size can be used in other
3774// calculations but is ultimately used as an argument to the OpenMP runtimes
3775// kernel argument structure which is generated through the combinedInfo data
3776// structures.
3777// This function is somewhat equivalent to Clang's getExprTypeSize inside of
3778// CGOpenMPRuntime.cpp.
3779static llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3780 Operation *clauseOp,
3781 llvm::Value *basePointer,
3782 llvm::Type *baseType,
3783 llvm::IRBuilderBase &builder,
3784 LLVM::ModuleTranslation &moduleTranslation) {
3785 if (auto memberClause =
3786 mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3787 // This calculates the size to transfer based on bounds and the underlying
3788 // element type, provided bounds have been specified (Fortran
3789 // pointers/allocatables/target and arrays that have sections specified fall
3790 // into this as well).
3791 if (!memberClause.getBounds().empty()) {
3792 llvm::Value *elementCount = builder.getInt64(1);
3793 for (auto bounds : memberClause.getBounds()) {
3794 if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3795 bounds.getDefiningOp())) {
3796 // The below calculation for the size to be mapped calculated from the
3797 // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3798 // multiply by the underlying element types byte size to get the full
3799 // size to be offloaded based on the bounds
3800 elementCount = builder.CreateMul(
3801 elementCount,
3802 builder.CreateAdd(
3803 builder.CreateSub(
3804 moduleTranslation.lookupValue(boundOp.getUpperBound()),
3805 moduleTranslation.lookupValue(boundOp.getLowerBound())),
3806 builder.getInt64(1)));
3807 }
3808 }
3809
3810 // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3811 // the size in inconsistent byte or bit format.
3812 uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3813 if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3814 underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3815
3816 // The size in bytes x number of elements, the sizeInBytes stored is
3817 // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3818 // size, so we do some on the fly runtime math to get the size in
3819 // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3820 // some adjustment for members with more complex types.
3821 return builder.CreateMul(elementCount,
3822 builder.getInt64(underlyingTypeSzInBits / 8));
3823 }
3824 }
3825
3826 return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3827}
3828
3829// Convert the MLIR map flag set to the runtime map flag set for embedding
3830// in LLVM-IR. This is important as the two bit-flag lists do not correspond
3831// 1-to-1 as there's flags the runtime doesn't care about and vice versa.
3832// Certain flags are discarded here such as RefPtee and co.
3833static llvm::omp::OpenMPOffloadMappingFlags
3834convertClauseMapFlags(omp::ClauseMapFlags mlirFlags) {
3835 auto mapTypeToBool = [&mlirFlags](omp::ClauseMapFlags flag) {
3836 return (mlirFlags & flag) == flag;
3837 };
3838
3839 llvm::omp::OpenMPOffloadMappingFlags mapType =
3840 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
3841
3842 if (mapTypeToBool(omp::ClauseMapFlags::to))
3843 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
3844
3845 if (mapTypeToBool(omp::ClauseMapFlags::from))
3846 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
3847
3848 if (mapTypeToBool(omp::ClauseMapFlags::always))
3849 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3850
3851 if (mapTypeToBool(omp::ClauseMapFlags::del))
3852 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
3853
3854 if (mapTypeToBool(omp::ClauseMapFlags::return_param))
3855 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3856
3857 if (mapTypeToBool(omp::ClauseMapFlags::priv))
3858 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE;
3859
3860 if (mapTypeToBool(omp::ClauseMapFlags::literal))
3861 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
3862
3863 if (mapTypeToBool(omp::ClauseMapFlags::implicit))
3864 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
3865
3866 if (mapTypeToBool(omp::ClauseMapFlags::close))
3867 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
3868
3869 if (mapTypeToBool(omp::ClauseMapFlags::present))
3870 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
3871
3872 if (mapTypeToBool(omp::ClauseMapFlags::ompx_hold))
3873 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
3874
3875 if (mapTypeToBool(omp::ClauseMapFlags::attach))
3876 mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
3877
3878 return mapType;
3879}
3880
3882 MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3883 LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3884 llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3885 ArrayRef<Value> useDevAddrOperands = {},
3886 ArrayRef<Value> hasDevAddrOperands = {}) {
3887 auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3888 // Check if this is a member mapping and correctly assign that it is, if
3889 // it is a member of a larger object.
3890 // TODO: Need better handling of members, and distinguishing of members
3891 // that are implicitly allocated on device vs explicitly passed in as
3892 // arguments.
3893 // TODO: May require some further additions to support nested record
3894 // types, i.e. member maps that can have member maps.
3895 for (Value mapValue : mapVars) {
3896 auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3897 for (auto member : map.getMembers())
3898 if (member == mapOp)
3899 return true;
3900 }
3901 return false;
3902 };
3903
3904 // Process MapOperands
3905 for (Value mapValue : mapVars) {
3906 auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3907 Value offloadPtr =
3908 mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3909 mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3910 mapData.Pointers.push_back(mapData.OriginalValue.back());
3911
3912 if (llvm::Value *refPtr =
3913 getRefPtrIfDeclareTarget(offloadPtr,
3914 moduleTranslation)) { // declare target
3915 mapData.IsDeclareTarget.push_back(true);
3916 mapData.BasePointers.push_back(refPtr);
3917 } else { // regular mapped variable
3918 mapData.IsDeclareTarget.push_back(false);
3919 mapData.BasePointers.push_back(mapData.OriginalValue.back());
3920 }
3921
3922 mapData.BaseType.push_back(
3923 moduleTranslation.convertType(mapOp.getVarType()));
3924 mapData.Sizes.push_back(
3925 getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3926 mapData.BaseType.back(), builder, moduleTranslation));
3927 mapData.MapClause.push_back(mapOp.getOperation());
3928 mapData.Types.push_back(convertClauseMapFlags(mapOp.getMapType()));
3929 mapData.Names.push_back(LLVM::createMappingInformation(
3930 mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3931 mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3932 if (mapOp.getMapperId())
3933 mapData.Mappers.push_back(
3935 mapOp, mapOp.getMapperIdAttr()));
3936 else
3937 mapData.Mappers.push_back(nullptr);
3938 mapData.IsAMapping.push_back(true);
3939 mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3940 }
3941
3942 auto findMapInfo = [&mapData](llvm::Value *val,
3943 llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3944 unsigned index = 0;
3945 bool found = false;
3946 for (llvm::Value *basePtr : mapData.OriginalValue) {
3947 if (basePtr == val && mapData.IsAMapping[index]) {
3948 found = true;
3949 mapData.Types[index] |=
3950 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3951 mapData.DevicePointers[index] = devInfoTy;
3952 }
3953 index++;
3954 }
3955 return found;
3956 };
3957
3958 // Process useDevPtr(Addr)Operands
3959 auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3960 llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3961 for (Value mapValue : useDevOperands) {
3962 auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3963 Value offloadPtr =
3964 mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3965 llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3966
3967 // Check if map info is already present for this entry.
3968 if (!findMapInfo(origValue, devInfoTy)) {
3969 mapData.OriginalValue.push_back(origValue);
3970 mapData.Pointers.push_back(mapData.OriginalValue.back());
3971 mapData.IsDeclareTarget.push_back(false);
3972 mapData.BasePointers.push_back(mapData.OriginalValue.back());
3973 mapData.BaseType.push_back(
3974 moduleTranslation.convertType(mapOp.getVarType()));
3975 mapData.Sizes.push_back(builder.getInt64(0));
3976 mapData.MapClause.push_back(mapOp.getOperation());
3977 mapData.Types.push_back(
3978 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3979 mapData.Names.push_back(LLVM::createMappingInformation(
3980 mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3981 mapData.DevicePointers.push_back(devInfoTy);
3982 mapData.Mappers.push_back(nullptr);
3983 mapData.IsAMapping.push_back(false);
3984 mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3985 }
3986 }
3987 };
3988
3989 addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3990 addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3991
3992 for (Value mapValue : hasDevAddrOperands) {
3993 auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3994 Value offloadPtr =
3995 mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3996 llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3997 auto mapType = convertClauseMapFlags(mapOp.getMapType());
3998 auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3999
4000 mapData.OriginalValue.push_back(origValue);
4001 mapData.BasePointers.push_back(origValue);
4002 mapData.Pointers.push_back(origValue);
4003 mapData.IsDeclareTarget.push_back(false);
4004 mapData.BaseType.push_back(
4005 moduleTranslation.convertType(mapOp.getVarType()));
4006 mapData.Sizes.push_back(
4007 builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
4008 mapData.MapClause.push_back(mapOp.getOperation());
4009 if (llvm::to_underlying(mapType & mapTypeAlways)) {
4010 // Descriptors are mapped with the ALWAYS flag, since they can get
4011 // rematerialized, so the address of the decriptor for a given object
4012 // may change from one place to another.
4013 mapData.Types.push_back(mapType);
4014 // Technically it's possible for a non-descriptor mapping to have
4015 // both has-device-addr and ALWAYS, so lookup the mapper in case it
4016 // exists.
4017 if (mapOp.getMapperId()) {
4018 mapData.Mappers.push_back(
4020 mapOp, mapOp.getMapperIdAttr()));
4021 } else {
4022 mapData.Mappers.push_back(nullptr);
4023 }
4024 } else {
4025 mapData.Types.push_back(
4026 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
4027 mapData.Mappers.push_back(nullptr);
4028 }
4029 mapData.Names.push_back(LLVM::createMappingInformation(
4030 mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
4031 mapData.DevicePointers.push_back(
4032 llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
4033 mapData.IsAMapping.push_back(false);
4034 mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
4035 }
4036}
4037
4038static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
4039 auto *res = llvm::find(mapData.MapClause, memberOp);
4040 assert(res != mapData.MapClause.end() &&
4041 "MapInfoOp for member not found in MapData, cannot return index");
4042 return std::distance(mapData.MapClause.begin(), res);
4043}
4044
4045static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
4046 bool first) {
4047 ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
4048 // Only 1 member has been mapped, we can return it.
4049 if (indexAttr.size() == 1)
4050 return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
4051
4052 llvm::SmallVector<size_t> indices(indexAttr.size());
4053 std::iota(indices.begin(), indices.end(), 0);
4054
4055 llvm::sort(indices, [&](const size_t a, const size_t b) {
4056 auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
4057 auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
4058 for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
4059 int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
4060 int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
4061
4062 if (aIndex == bIndex)
4063 continue;
4064
4065 if (aIndex < bIndex)
4066 return first;
4067
4068 if (aIndex > bIndex)
4069 return !first;
4070 }
4071
4072 // Iterated the up until the end of the smallest member and
4073 // they were found to be equal up to that point, so select
4074 // the member with the lowest index count, so the "parent"
4075 return memberIndicesA.size() < memberIndicesB.size();
4076 });
4077
4078 return llvm::cast<omp::MapInfoOp>(
4079 mapInfo.getMembers()[indices.front()].getDefiningOp());
4080}
4081
4082/// This function calculates the array/pointer offset for map data provided
4083/// with bounds operations, e.g. when provided something like the following:
4084///
4085/// Fortran
4086/// map(tofrom: array(2:5, 3:2))
4087///
4088/// We must calculate the initial pointer offset to pass across, this function
4089/// performs this using bounds.
4090///
4091/// TODO/WARNING: This only supports Fortran's column major indexing currently
4092/// as is noted in the note below and comments in the function, we must extend
4093/// this function when we add a C++ frontend.
4094/// NOTE: which while specified in row-major order it currently needs to be
4095/// flipped for Fortran's column order array allocation and access (as
4096/// opposed to C++'s row-major, hence the backwards processing where order is
4097/// important). This is likely important to keep in mind for the future when
4098/// we incorporate a C++ frontend, both frontends will need to agree on the
4099/// ordering of generated bounds operations (one may have to flip them) to
4100/// make the below lowering frontend agnostic. The offload size
4101/// calcualtion may also have to be adjusted for C++.
4102static std::vector<llvm::Value *>
4104 llvm::IRBuilderBase &builder, bool isArrayTy,
4105 OperandRange bounds) {
4106 std::vector<llvm::Value *> idx;
4107 // There's no bounds to calculate an offset from, we can safely
4108 // ignore and return no indices.
4109 if (bounds.empty())
4110 return idx;
4111
4112 // If we have an array type, then we have its type so can treat it as a
4113 // normal GEP instruction where the bounds operations are simply indexes
4114 // into the array. We currently do reverse order of the bounds, which
4115 // I believe leans more towards Fortran's column-major in memory.
4116 if (isArrayTy) {
4117 idx.push_back(builder.getInt64(0));
4118 for (int i = bounds.size() - 1; i >= 0; --i) {
4119 if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4120 bounds[i].getDefiningOp())) {
4121 idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
4122 }
4123 }
4124 } else {
4125 // If we do not have an array type, but we have bounds, then we're dealing
4126 // with a pointer that's being treated like an array and we have the
4127 // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
4128 // address (pointer pointing to the actual data) so we must caclulate the
4129 // offset using a single index which the following loop attempts to
4130 // compute using the standard column-major algorithm e.g for a 3D array:
4131 //
4132 // ((((c_idx * b_len) + b_idx) * a_len) + a_idx)
4133 //
4134 // It is of note that it's doing column-major rather than row-major at the
4135 // moment, but having a way for the frontend to indicate which major format
4136 // to use or standardizing/canonicalizing the order of the bounds to compute
4137 // the offset may be useful in the future when there's other frontends with
4138 // different formats.
4139 std::vector<llvm::Value *> dimensionIndexSizeOffset;
4140 for (int i = bounds.size() - 1; i >= 0; --i) {
4141 if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4142 bounds[i].getDefiningOp())) {
4143 if (i == ((int)bounds.size() - 1))
4144 idx.emplace_back(
4145 moduleTranslation.lookupValue(boundOp.getLowerBound()));
4146 else
4147 idx.back() = builder.CreateAdd(
4148 builder.CreateMul(idx.back(), moduleTranslation.lookupValue(
4149 boundOp.getExtent())),
4150 moduleTranslation.lookupValue(boundOp.getLowerBound()));
4151 }
4152 }
4153 }
4154
4155 return idx;
4156}
4157
4158// This creates two insertions into the MapInfosTy data structure for the
4159// "parent" of a set of members, (usually a container e.g.
4160// class/structure/derived type) when subsequent members have also been
4161// explicitly mapped on the same map clause. Certain types, such as Fortran
4162// descriptors are mapped like this as well, however, the members are
4163// implicit as far as a user is concerned, but we must explicitly map them
4164// internally.
4165//
4166// This function also returns the memberOfFlag for this particular parent,
4167// which is utilised in subsequent member mappings (by modifying there map type
4168// with it) to indicate that a member is part of this parent and should be
4169// treated by the runtime as such. Important to achieve the correct mapping.
4170//
4171// This function borrows a lot from Clang's emitCombinedEntry function
4172// inside of CGOpenMPRuntime.cpp
4173static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
4174 LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4175 llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4176 MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
4177 assert(!ompBuilder.Config.isTargetDevice() &&
4178 "function only supported for host device codegen");
4179
4180 // Map the first segment of the parent. If a user-defined mapper is attached,
4181 // include the parent's to/from-style bits (and common modifiers) in this
4182 // base entry so the mapper receives correct copy semantics via its 'type'
4183 // parameter. Also keep TARGET_PARAM when required for kernel arguments.
4184 llvm::omp::OpenMPOffloadMappingFlags baseFlag =
4185 isTargetParams
4186 ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
4187 : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
4188
4189 // Detect if this mapping uses a user-defined mapper.
4190 bool hasUserMapper = mapData.Mappers[mapDataIndex] != nullptr;
4191 if (hasUserMapper) {
4192 using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
4193 // Preserve relevant map-type bits from the parent clause. These include
4194 // the copy direction (TO/FROM), as well as commonly used modifiers that
4195 // should be visible to the mapper for correct behaviour.
4196 mapFlags parentFlags = mapData.Types[mapDataIndex];
4197 mapFlags preserve = mapFlags::OMP_MAP_TO | mapFlags::OMP_MAP_FROM |
4198 mapFlags::OMP_MAP_ALWAYS | mapFlags::OMP_MAP_CLOSE |
4199 mapFlags::OMP_MAP_PRESENT | mapFlags::OMP_MAP_OMPX_HOLD;
4200 baseFlag |= (parentFlags & preserve);
4201 }
4202
4203 combinedInfo.Types.emplace_back(baseFlag);
4204 combinedInfo.DevicePointers.emplace_back(
4205 mapData.DevicePointers[mapDataIndex]);
4206 combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
4207 combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
4208 mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
4209 combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
4210
4211 // Calculate size of the parent object being mapped based on the
4212 // addresses at runtime, highAddr - lowAddr = size. This of course
4213 // doesn't factor in allocated data like pointers, hence the further
4214 // processing of members specified by users, or in the case of
4215 // Fortran pointers and allocatables, the mapping of the pointed to
4216 // data by the descriptor (which itself, is a structure containing
4217 // runtime information on the dynamically allocated data).
4218 auto parentClause =
4219 llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4220
4221 llvm::Value *lowAddr, *highAddr;
4222 if (!parentClause.getPartialMap()) {
4223 lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
4224 builder.getPtrTy());
4225 highAddr = builder.CreatePointerCast(
4226 builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
4227 mapData.Pointers[mapDataIndex], 1),
4228 builder.getPtrTy());
4229 combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
4230 } else {
4231 auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4232 int firstMemberIdx = getMapDataMemberIdx(
4233 mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
4234 lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
4235 builder.getPtrTy());
4236 int lastMemberIdx = getMapDataMemberIdx(
4237 mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
4238 highAddr = builder.CreatePointerCast(
4239 builder.CreateGEP(mapData.BaseType[lastMemberIdx],
4240 mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
4241 builder.getPtrTy());
4242 combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
4243 }
4244
4245 llvm::Value *size = builder.CreateIntCast(
4246 builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
4247 builder.getInt64Ty(),
4248 /*isSigned=*/false);
4249 combinedInfo.Sizes.push_back(size);
4250
4251 llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
4252 ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
4253
4254 // This creates the initial MEMBER_OF mapping that consists of
4255 // the parent/top level container (same as above effectively, except
4256 // with a fixed initial compile time size and separate maptype which
4257 // indicates the true mape type (tofrom etc.). This parent mapping is
4258 // only relevant if the structure in its totality is being mapped,
4259 // otherwise the above suffices.
4260 if (!parentClause.getPartialMap()) {
4261 // TODO: This will need to be expanded to include the whole host of logic
4262 // for the map flags that Clang currently supports (e.g. it should do some
4263 // further case specific flag modifications). For the moment, it handles
4264 // what we support as expected.
4265 llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
4266 ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4267 combinedInfo.Types.emplace_back(mapFlag);
4268 combinedInfo.DevicePointers.emplace_back(
4269 llvm::OpenMPIRBuilder::DeviceInfoTy::None);
4270 combinedInfo.Mappers.emplace_back(nullptr);
4271 combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
4272 mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
4273 combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
4274 combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
4275 combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
4276 }
4277 return memberOfFlag;
4278}
4279
4280// The intent is to verify if the mapped data being passed is a
4281// pointer -> pointee that requires special handling in certain cases,
4282// e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
4283//
4284// There may be a better way to verify this, but unfortunately with
4285// opaque pointers we lose the ability to easily check if something is
4286// a pointer whilst maintaining access to the underlying type.
4287static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
4288 // If we have a varPtrPtr field assigned then the underlying type is a pointer
4289 if (mapOp.getVarPtrPtr())
4290 return true;
4291
4292 // If the map data is declare target with a link clause, then it's represented
4293 // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
4294 // no relation to pointers.
4295 if (isDeclareTargetLink(mapOp.getVarPtr()))
4296 return true;
4297
4298 return false;
4299}
4300
4301// This function is intended to add explicit mappings of members
4303 LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4304 llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4305 MapInfoData &mapData, uint64_t mapDataIndex,
4306 llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
4307 assert(!ompBuilder.Config.isTargetDevice() &&
4308 "function only supported for host device codegen");
4309
4310 auto parentClause =
4311 llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4312
4313 for (auto mappedMembers : parentClause.getMembers()) {
4314 auto memberClause =
4315 llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
4316 int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4317
4318 assert(memberDataIdx >= 0 && "could not find mapped member of structure");
4319
4320 // If we're currently mapping a pointer to a block of data, we must
4321 // initially map the pointer, and then attatch/bind the data with a
4322 // subsequent map to the pointer. This segment of code generates the
4323 // pointer mapping, which can in certain cases be optimised out as Clang
4324 // currently does in its lowering. However, for the moment we do not do so,
4325 // in part as we currently have substantially less information on the data
4326 // being mapped at this stage.
4327 if (checkIfPointerMap(memberClause)) {
4328 auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
4329 mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4330 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4331 ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4332 combinedInfo.Types.emplace_back(mapFlag);
4333 combinedInfo.DevicePointers.emplace_back(
4334 llvm::OpenMPIRBuilder::DeviceInfoTy::None);
4335 combinedInfo.Mappers.emplace_back(nullptr);
4336 combinedInfo.Names.emplace_back(
4337 LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4338 combinedInfo.BasePointers.emplace_back(
4339 mapData.BasePointers[mapDataIndex]);
4340 combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
4341 combinedInfo.Sizes.emplace_back(builder.getInt64(
4342 moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
4343 }
4344
4345 // Same MemberOfFlag to indicate its link with parent and other members
4346 // of.
4347 auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
4348 mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4349 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4350 ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4351 if (checkIfPointerMap(memberClause))
4352 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4353
4354 combinedInfo.Types.emplace_back(mapFlag);
4355 combinedInfo.DevicePointers.emplace_back(
4356 mapData.DevicePointers[memberDataIdx]);
4357 combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
4358 combinedInfo.Names.emplace_back(
4359 LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4360 uint64_t basePointerIndex =
4361 checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
4362 combinedInfo.BasePointers.emplace_back(
4363 mapData.BasePointers[basePointerIndex]);
4364 combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
4365
4366 llvm::Value *size = mapData.Sizes[memberDataIdx];
4367 if (checkIfPointerMap(memberClause)) {
4368 size = builder.CreateSelect(
4369 builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
4370 builder.getInt64(0), size);
4371 }
4372
4373 combinedInfo.Sizes.emplace_back(size);
4374 }
4375}
4376
4377static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
4378 MapInfosTy &combinedInfo, bool isTargetParams,
4379 int mapDataParentIdx = -1) {
4380 // Declare Target Mappings are excluded from being marked as
4381 // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
4382 // marked with OMP_MAP_PTR_AND_OBJ instead.
4383 auto mapFlag = mapData.Types[mapDataIdx];
4384 auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
4385
4386 bool isPtrTy = checkIfPointerMap(mapInfoOp);
4387 if (isPtrTy)
4388 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4389
4390 if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
4391 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4392
4393 if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
4394 !isPtrTy)
4395 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
4396
4397 // if we're provided a mapDataParentIdx, then the data being mapped is
4398 // part of a larger object (in a parent <-> member mapping) and in this
4399 // case our BasePointer should be the parent.
4400 if (mapDataParentIdx >= 0)
4401 combinedInfo.BasePointers.emplace_back(
4402 mapData.BasePointers[mapDataParentIdx]);
4403 else
4404 combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
4405
4406 combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
4407 combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
4408 combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
4409 combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
4410 combinedInfo.Types.emplace_back(mapFlag);
4411 combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
4412}
4413
4415 llvm::IRBuilderBase &builder,
4416 llvm::OpenMPIRBuilder &ompBuilder,
4417 DataLayout &dl, MapInfosTy &combinedInfo,
4418 MapInfoData &mapData, uint64_t mapDataIndex,
4419 bool isTargetParams) {
4420 assert(!ompBuilder.Config.isTargetDevice() &&
4421 "function only supported for host device codegen");
4422
4423 auto parentClause =
4424 llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4425
4426 // If we have a partial map (no parent referenced in the map clauses of the
4427 // directive, only members) and only a single member, we do not need to bind
4428 // the map of the member to the parent, we can pass the member separately.
4429 if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
4430 auto memberClause = llvm::cast<omp::MapInfoOp>(
4431 parentClause.getMembers()[0].getDefiningOp());
4432 int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4433 // Note: Clang treats arrays with explicit bounds that fall into this
4434 // category as a parent with map case, however, it seems this isn't a
4435 // requirement, and processing them as an individual map is fine. So,
4436 // we will handle them as individual maps for the moment, as it's
4437 // difficult for us to check this as we always require bounds to be
4438 // specified currently and it's also marginally more optimal (single
4439 // map rather than two). The difference may come from the fact that
4440 // Clang maps array without bounds as pointers (which we do not
4441 // currently do), whereas we treat them as arrays in all cases
4442 // currently.
4443 processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
4444 mapDataIndex);
4445 return;
4446 }
4447
4448 llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
4449 mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
4450 combinedInfo, mapData, mapDataIndex, isTargetParams);
4451 processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
4452 combinedInfo, mapData, mapDataIndex,
4453 memberOfParentFlag);
4454}
4455
4456// This is a variation on Clang's GenerateOpenMPCapturedVars, which
4457// generates different operation (e.g. load/store) combinations for
4458// arguments to the kernel, based on map capture kinds which are then
4459// utilised in the combinedInfo in place of the original Map value.
4460static void
4461createAlteredByCaptureMap(MapInfoData &mapData,
4462 LLVM::ModuleTranslation &moduleTranslation,
4463 llvm::IRBuilderBase &builder) {
4464 assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4465 "function only supported for host device codegen");
4466 for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4467 // if it's declare target, skip it, it's handled separately.
4468 if (!mapData.IsDeclareTarget[i]) {
4469 auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4470 omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
4471 bool isPtrTy = checkIfPointerMap(mapOp);
4472
4473 // Currently handles array sectioning lowerbound case, but more
4474 // logic may be required in the future. Clang invokes EmitLValue,
4475 // which has specialised logic for special Clang types such as user
4476 // defines, so it is possible we will have to extend this for
4477 // structures or other complex types. As the general idea is that this
4478 // function mimics some of the logic from Clang that we require for
4479 // kernel argument passing from host -> device.
4480 switch (captureKind) {
4481 case omp::VariableCaptureKind::ByRef: {
4482 llvm::Value *newV = mapData.Pointers[i];
4483 std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
4484 moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
4485 mapOp.getBounds());
4486 if (isPtrTy)
4487 newV = builder.CreateLoad(builder.getPtrTy(), newV);
4488
4489 if (!offsetIdx.empty())
4490 newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
4491 "array_offset");
4492 mapData.Pointers[i] = newV;
4493 } break;
4494 case omp::VariableCaptureKind::ByCopy: {
4495 llvm::Type *type = mapData.BaseType[i];
4496 llvm::Value *newV;
4497 if (mapData.Pointers[i]->getType()->isPointerTy())
4498 newV = builder.CreateLoad(type, mapData.Pointers[i]);
4499 else
4500 newV = mapData.Pointers[i];
4501
4502 if (!isPtrTy) {
4503 auto curInsert = builder.saveIP();
4504 llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation();
4505 builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
4506 auto *memTempAlloc =
4507 builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
4508 builder.SetCurrentDebugLocation(DbgLoc);
4509 builder.restoreIP(curInsert);
4510
4511 builder.CreateStore(newV, memTempAlloc);
4512 newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
4513 }
4514
4515 mapData.Pointers[i] = newV;
4516 mapData.BasePointers[i] = newV;
4517 } break;
4518 case omp::VariableCaptureKind::This:
4519 case omp::VariableCaptureKind::VLAType:
4520 mapData.MapClause[i]->emitOpError("Unhandled capture kind");
4521 break;
4522 }
4523 }
4524 }
4525}
4526
4527// Generate all map related information and fill the combinedInfo.
4528static void genMapInfos(llvm::IRBuilderBase &builder,
4529 LLVM::ModuleTranslation &moduleTranslation,
4530 DataLayout &dl, MapInfosTy &combinedInfo,
4531 MapInfoData &mapData, bool isTargetParams = false) {
4532 assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4533 "function only supported for host device codegen");
4534
4535 // We wish to modify some of the methods in which arguments are
4536 // passed based on their capture type by the target region, this can
4537 // involve generating new loads and stores, which changes the
4538 // MLIR value to LLVM value mapping, however, we only wish to do this
4539 // locally for the current function/target and also avoid altering
4540 // ModuleTranslation, so we remap the base pointer or pointer stored
4541 // in the map infos corresponding MapInfoData, which is later accessed
4542 // by genMapInfos and createTarget to help generate the kernel and
4543 // kernel arg structure. It primarily becomes relevant in cases like
4544 // bycopy, or byref range'd arrays. In the default case, we simply
4545 // pass thee pointer byref as both basePointer and pointer.
4546 createAlteredByCaptureMap(mapData, moduleTranslation, builder);
4547
4548 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4549
4550 // We operate under the assumption that all vectors that are
4551 // required in MapInfoData are of equal lengths (either filled with
4552 // default constructed data or appropiate information) so we can
4553 // utilise the size from any component of MapInfoData, if we can't
4554 // something is missing from the initial MapInfoData construction.
4555 for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4556 // NOTE/TODO: We currently do not support arbitrary depth record
4557 // type mapping.
4558 if (mapData.IsAMember[i])
4559 continue;
4560
4561 auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
4562 if (!mapInfoOp.getMembers().empty()) {
4563 processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
4564 combinedInfo, mapData, i, isTargetParams);
4565 continue;
4566 }
4567
4568 processIndividualMap(mapData, i, combinedInfo, isTargetParams);
4569 }
4570}
4571
4572static llvm::Expected<llvm::Function *>
4573emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
4574 LLVM::ModuleTranslation &moduleTranslation,
4575 llvm::StringRef mapperFuncName);
4576
4577static llvm::Expected<llvm::Function *>
4578getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
4579 LLVM::ModuleTranslation &moduleTranslation) {
4580 assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4581 "function only supported for host device codegen");
4582 auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4583 std::string mapperFuncName =
4584 moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
4585 {"omp_mapper", declMapperOp.getSymName()});
4586
4587 if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
4588 return lookupFunc;
4589
4590 return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
4591 mapperFuncName);
4592}
4593
4594static llvm::Expected<llvm::Function *>
4595emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
4596 LLVM::ModuleTranslation &moduleTranslation,
4597 llvm::StringRef mapperFuncName) {
4598 assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4599 "function only supported for host device codegen");
4600 auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4601 auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
4602 DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
4603 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4604 llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
4605 SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
4606
4607 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4608
4609 // Fill up the arrays with all the mapped variables.
4610 MapInfosTy combinedInfo;
4611 auto genMapInfoCB =
4612 [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
4613 llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
4614 builder.restoreIP(codeGenIP);
4615 moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
4616 moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
4617 builder.GetInsertBlock());
4618 if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
4619 /*ignoreArguments=*/true,
4620 builder)))
4621 return llvm::make_error<PreviouslyReportedError>();
4622 MapInfoData mapData;
4623 collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4624 builder);
4625 genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
4626
4627 // Drop the mapping that is no longer necessary so that the same region can
4628 // be processed multiple times.
4629 moduleTranslation.forgetMapping(declMapperOp.getRegion());
4630 return combinedInfo;
4631 };
4632
4633 auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
4634 if (!combinedInfo.Mappers[i])
4635 return nullptr;
4636 return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4637 moduleTranslation);
4638 };
4639
4640 llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
4641 genMapInfoCB, varType, mapperFuncName, customMapperCB);
4642 if (!newFn)
4643 return newFn.takeError();
4644 moduleTranslation.mapFunction(mapperFuncName, *newFn);
4645 return *newFn;
4646}
4647
4648static LogicalResult
4649convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
4650 LLVM::ModuleTranslation &moduleTranslation) {
4651 llvm::Value *ifCond = nullptr;
4652 int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
4653 SmallVector<Value> mapVars;
4654 SmallVector<Value> useDevicePtrVars;
4655 SmallVector<Value> useDeviceAddrVars;
4656 llvm::omp::RuntimeFunction RTLFn;
4657 DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
4658
4659 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4660 llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
4661 /*SeparateBeginEndCalls=*/true);
4662 bool isTargetDevice = ompBuilder->Config.isTargetDevice();
4663 bool isOffloadEntry =
4664 isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
4665
4666 LogicalResult result =
4668 .Case([&](omp::TargetDataOp dataOp) {
4669 if (failed(checkImplementationStatus(*dataOp)))
4670 return failure();
4671
4672 if (auto ifVar = dataOp.getIfExpr())
4673 ifCond = moduleTranslation.lookupValue(ifVar);
4674
4675 if (auto devId = dataOp.getDevice())
4676 if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4677 if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4678 deviceID = intAttr.getInt();
4679
4680 mapVars = dataOp.getMapVars();
4681 useDevicePtrVars = dataOp.getUseDevicePtrVars();
4682 useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
4683 return success();
4684 })
4685 .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
4686 if (failed(checkImplementationStatus(*enterDataOp)))
4687 return failure();
4688
4689 if (auto ifVar = enterDataOp.getIfExpr())
4690 ifCond = moduleTranslation.lookupValue(ifVar);
4691
4692 if (auto devId = enterDataOp.getDevice())
4693 if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4694 if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4695 deviceID = intAttr.getInt();
4696 RTLFn =
4697 enterDataOp.getNowait()
4698 ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
4699 : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
4700 mapVars = enterDataOp.getMapVars();
4701 info.HasNoWait = enterDataOp.getNowait();
4702 return success();
4703 })
4704 .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
4705 if (failed(checkImplementationStatus(*exitDataOp)))
4706 return failure();
4707
4708 if (auto ifVar = exitDataOp.getIfExpr())
4709 ifCond = moduleTranslation.lookupValue(ifVar);
4710
4711 if (auto devId = exitDataOp.getDevice())
4712 if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4713 if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4714 deviceID = intAttr.getInt();
4715
4716 RTLFn = exitDataOp.getNowait()
4717 ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
4718 : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
4719 mapVars = exitDataOp.getMapVars();
4720 info.HasNoWait = exitDataOp.getNowait();
4721 return success();
4722 })
4723 .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
4724 if (failed(checkImplementationStatus(*updateDataOp)))
4725 return failure();
4726
4727 if (auto ifVar = updateDataOp.getIfExpr())
4728 ifCond = moduleTranslation.lookupValue(ifVar);
4729
4730 if (auto devId = updateDataOp.getDevice())
4731 if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4732 if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4733 deviceID = intAttr.getInt();
4734
4735 RTLFn =
4736 updateDataOp.getNowait()
4737 ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4738 : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4739 mapVars = updateDataOp.getMapVars();
4740 info.HasNoWait = updateDataOp.getNowait();
4741 return success();
4742 })
4743 .DefaultUnreachable("unexpected operation");
4744
4745 if (failed(result))
4746 return failure();
4747 // Pretend we have IF(false) if we're not doing offload.
4748 if (!isOffloadEntry)
4749 ifCond = builder.getFalse();
4750
4751 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4752 MapInfoData mapData;
4753 collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4754 builder, useDevicePtrVars, useDeviceAddrVars);
4755
4756 // Fill up the arrays with all the mapped variables.
4757 MapInfosTy combinedInfo;
4758 auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4759 builder.restoreIP(codeGenIP);
4760 genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4761 return combinedInfo;
4762 };
4763
4764 // Define a lambda to apply mappings between use_device_addr and
4765 // use_device_ptr base pointers, and their associated block arguments.
4766 auto mapUseDevice =
4767 [&moduleTranslation](
4768 llvm::OpenMPIRBuilder::DeviceInfoTy type,
4770 llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4771 llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4772 for (auto [arg, useDevVar] :
4773 llvm::zip_equal(blockArgs, useDeviceVars)) {
4774
4775 auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4776 return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4777 : mapInfoOp.getVarPtr();
4778 };
4779
4780 auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4781 for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4782 mapInfoData.MapClause, mapInfoData.DevicePointers,
4783 mapInfoData.BasePointers)) {
4784 auto mapOp = cast<omp::MapInfoOp>(mapClause);
4785 if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4786 devicePointer != type)
4787 continue;
4788
4789 if (llvm::Value *devPtrInfoMap =
4790 mapper ? mapper(basePointer) : basePointer) {
4791 moduleTranslation.mapValue(arg, devPtrInfoMap);
4792 break;
4793 }
4794 }
4795 }
4796 };
4797
4798 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4799 auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4800 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4801 // We must always restoreIP regardless of doing anything the caller
4802 // does not restore it, leading to incorrect (no) branch generation.
4803 builder.restoreIP(codeGenIP);
4804 assert(isa<omp::TargetDataOp>(op) &&
4805 "BodyGen requested for non TargetDataOp");
4806 auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4807 Region &region = cast<omp::TargetDataOp>(op).getRegion();
4808 switch (bodyGenType) {
4809 case BodyGenTy::Priv:
4810 // Check if any device ptr/addr info is available
4811 if (!info.DevicePtrInfoMap.empty()) {
4812 mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4813 blockArgIface.getUseDeviceAddrBlockArgs(),
4814 useDeviceAddrVars, mapData,
4815 [&](llvm::Value *basePointer) -> llvm::Value * {
4816 if (!info.DevicePtrInfoMap[basePointer].second)
4817 return nullptr;
4818 return builder.CreateLoad(
4819 builder.getPtrTy(),
4820 info.DevicePtrInfoMap[basePointer].second);
4821 });
4822 mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4823 blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4824 mapData, [&](llvm::Value *basePointer) {
4825 return info.DevicePtrInfoMap[basePointer].second;
4826 });
4827
4828 if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4829 moduleTranslation)))
4830 return llvm::make_error<PreviouslyReportedError>();
4831 }
4832 break;
4833 case BodyGenTy::DupNoPriv:
4834 if (info.DevicePtrInfoMap.empty()) {
4835 // For host device we still need to do the mapping for codegen,
4836 // otherwise it may try to lookup a missing value.
4837 if (!ompBuilder->Config.IsTargetDevice.value_or(false)) {
4838 mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4839 blockArgIface.getUseDeviceAddrBlockArgs(),
4840 useDeviceAddrVars, mapData);
4841 mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4842 blockArgIface.getUseDevicePtrBlockArgs(),
4843 useDevicePtrVars, mapData);
4844 }
4845 }
4846 break;
4847 case BodyGenTy::NoPriv:
4848 // If device info is available then region has already been generated
4849 if (info.DevicePtrInfoMap.empty()) {
4850 // For device pass, if use_device_ptr(addr) mappings were present,
4851 // we need to link them here before codegen.
4852 if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4853 mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4854 blockArgIface.getUseDeviceAddrBlockArgs(),
4855 useDeviceAddrVars, mapData);
4856 mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4857 blockArgIface.getUseDevicePtrBlockArgs(),
4858 useDevicePtrVars, mapData);
4859 }
4860
4861 if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4862 moduleTranslation)))
4863 return llvm::make_error<PreviouslyReportedError>();
4864 }
4865 break;
4866 }
4867 return builder.saveIP();
4868 };
4869
4870 auto customMapperCB =
4871 [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4872 if (!combinedInfo.Mappers[i])
4873 return nullptr;
4874 info.HasMapper = true;
4875 return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4876 moduleTranslation);
4877 };
4878
4879 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4880 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4881 findAllocaInsertPoint(builder, moduleTranslation);
4882 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4883 if (isa<omp::TargetDataOp>(op))
4884 return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4885 builder.getInt64(deviceID), ifCond,
4886 info, genMapInfoCB, customMapperCB,
4887 /*MapperFunc=*/nullptr, bodyGenCB,
4888 /*DeviceAddrCB=*/nullptr);
4889 return ompBuilder->createTargetData(
4890 ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4891 info, genMapInfoCB, customMapperCB, &RTLFn);
4892 }();
4893
4894 if (failed(handleError(afterIP, *op)))
4895 return failure();
4896
4897 builder.restoreIP(*afterIP);
4898 return success();
4899}
4900
4901static LogicalResult
4902convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4903 LLVM::ModuleTranslation &moduleTranslation) {
4904 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4905 auto distributeOp = cast<omp::DistributeOp>(opInst);
4906 if (failed(checkImplementationStatus(opInst)))
4907 return failure();
4908
4909 /// Process teams op reduction in distribute if the reduction is contained in
4910 /// the distribute op.
4911 omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>();
4912 bool doDistributeReduction =
4913 teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false;
4914
4915 DenseMap<Value, llvm::Value *> reductionVariableMap;
4916 unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0;
4918 SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
4919 llvm::ArrayRef<bool> isByRef;
4920
4921 if (doDistributeReduction) {
4922 isByRef = getIsByRef(teamsOp.getReductionByref());
4923 assert(isByRef.size() == teamsOp.getNumReductionVars());
4924
4925 collectReductionDecls(teamsOp, reductionDecls);
4926 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4927 findAllocaInsertPoint(builder, moduleTranslation);
4928
4929 MutableArrayRef<BlockArgument> reductionArgs =
4930 llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
4931 .getReductionBlockArgs();
4932
4934 teamsOp, reductionArgs, builder, moduleTranslation, allocaIP,
4935 reductionDecls, privateReductionVariables, reductionVariableMap,
4936 isByRef)))
4937 return failure();
4938 }
4939
4940 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4941 auto bodyGenCB = [&](InsertPointTy allocaIP,
4942 InsertPointTy codeGenIP) -> llvm::Error {
4943 // Save the alloca insertion point on ModuleTranslation stack for use in
4944 // nested regions.
4946 moduleTranslation, allocaIP);
4947
4948 // DistributeOp has only one region associated with it.
4949 builder.restoreIP(codeGenIP);
4950 PrivateVarsInfo privVarsInfo(distributeOp);
4951
4953 allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4954 if (handleError(afterAllocas, opInst).failed())
4955 return llvm::make_error<PreviouslyReportedError>();
4956
4957 if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4958 opInst)
4959 .failed())
4960 return llvm::make_error<PreviouslyReportedError>();
4961
4962 if (failed(copyFirstPrivateVars(
4963 distributeOp, builder, moduleTranslation, privVarsInfo.mlirVars,
4964 privVarsInfo.llvmVars, privVarsInfo.privatizers,
4965 distributeOp.getPrivateNeedsBarrier())))
4966 return llvm::make_error<PreviouslyReportedError>();
4967
4968 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4969 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4971 convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4972 builder, moduleTranslation);
4973 if (!regionBlock)
4974 return regionBlock.takeError();
4975 builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4976
4977 // Skip applying a workshare loop below when translating 'distribute
4978 // parallel do' (it's been already handled by this point while translating
4979 // the nested omp.wsloop).
4980 if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4981 // TODO: Add support for clauses which are valid for DISTRIBUTE
4982 // constructs. Static schedule is the default.
4983 auto schedule = omp::ClauseScheduleKind::Static;
4984 bool isOrdered = false;
4985 std::optional<omp::ScheduleModifier> scheduleMod;
4986 bool isSimd = false;
4987 llvm::omp::WorksharingLoopType workshareLoopType =
4988 llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4989 bool loopNeedsBarrier = false;
4990 llvm::Value *chunk = nullptr;
4991
4992 llvm::CanonicalLoopInfo *loopInfo =
4993 findCurrentLoopInfo(moduleTranslation);
4994 llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4995 ompBuilder->applyWorkshareLoop(
4996 ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4997 convertToScheduleKind(schedule), chunk, isSimd,
4998 scheduleMod == omp::ScheduleModifier::monotonic,
4999 scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
5000 workshareLoopType);
5001
5002 if (!wsloopIP)
5003 return wsloopIP.takeError();
5004 }
5005
5006 if (failed(cleanupPrivateVars(builder, moduleTranslation,
5007 distributeOp.getLoc(), privVarsInfo.llvmVars,
5008 privVarsInfo.privatizers)))
5009 return llvm::make_error<PreviouslyReportedError>();
5010
5011 return llvm::Error::success();
5012 };
5013
5014 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5015 findAllocaInsertPoint(builder, moduleTranslation);
5016 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5017 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5018 ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
5019
5020 if (failed(handleError(afterIP, opInst)))
5021 return failure();
5022
5023 builder.restoreIP(*afterIP);
5024
5025 if (doDistributeReduction) {
5026 // Process the reductions if required.
5028 teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
5029 privateReductionVariables, isByRef,
5030 /*isNoWait*/ false, /*isTeamsReduction*/ true);
5031 }
5032 return success();
5033}
5034
5035/// Lowers the FlagsAttr which is applied to the module on the device
5036/// pass when offloading, this attribute contains OpenMP RTL globals that can
5037/// be passed as flags to the frontend, otherwise they are set to default
5038static LogicalResult
5039convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
5040 LLVM::ModuleTranslation &moduleTranslation) {
5041 if (!cast<mlir::ModuleOp>(op))
5042 return failure();
5043
5044 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5045
5046 ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
5047 attribute.getOpenmpDeviceVersion());
5048
5049 if (attribute.getNoGpuLib())
5050 return success();
5051
5052 ompBuilder->createGlobalFlag(
5053 attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
5054 "__omp_rtl_debug_kind");
5055 ompBuilder->createGlobalFlag(
5056 attribute
5057 .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
5058 ,
5059 "__omp_rtl_assume_teams_oversubscription");
5060 ompBuilder->createGlobalFlag(
5061 attribute
5062 .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
5063 ,
5064 "__omp_rtl_assume_threads_oversubscription");
5065 ompBuilder->createGlobalFlag(
5066 attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
5067 "__omp_rtl_assume_no_thread_state");
5068 ompBuilder->createGlobalFlag(
5069 attribute
5070 .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
5071 ,
5072 "__omp_rtl_assume_no_nested_parallelism");
5073 return success();
5074}
5075
5076static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
5077 omp::TargetOp targetOp,
5078 llvm::StringRef parentName = "") {
5079 auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
5080
5081 assert(fileLoc && "No file found from location");
5082 StringRef fileName = fileLoc.getFilename().getValue();
5083
5084 llvm::sys::fs::UniqueID id;
5085 uint64_t line = fileLoc.getLine();
5086 if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
5087 size_t fileHash = llvm::hash_value(fileName.str());
5088 size_t deviceId = 0xdeadf17e;
5089 targetInfo =
5090 llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
5091 } else {
5092 targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
5093 id.getFile(), line);
5094 }
5095}
5096
5097static void
5098handleDeclareTargetMapVar(MapInfoData &mapData,
5099 LLVM::ModuleTranslation &moduleTranslation,
5100 llvm::IRBuilderBase &builder, llvm::Function *func) {
5101 assert(moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
5102 "function only supported for target device codegen");
5103 llvm::IRBuilderBase::InsertPointGuard guard(builder);
5104 for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
5105 // In the case of declare target mapped variables, the basePointer is
5106 // the reference pointer generated by the convertDeclareTargetAttr
5107 // method. Whereas the kernelValue is the original variable, so for
5108 // the device we must replace all uses of this original global variable
5109 // (stored in kernelValue) with the reference pointer (stored in
5110 // basePointer for declare target mapped variables), as for device the
5111 // data is mapped into this reference pointer and should be loaded
5112 // from it, the original variable is discarded. On host both exist and
5113 // metadata is generated (elsewhere in the convertDeclareTargetAttr)
5114 // function to link the two variables in the runtime and then both the
5115 // reference pointer and the pointer are assigned in the kernel argument
5116 // structure for the host.
5117 if (mapData.IsDeclareTarget[i]) {
5118 // If the original map value is a constant, then we have to make sure all
5119 // of it's uses within the current kernel/function that we are going to
5120 // rewrite are converted to instructions, as we will be altering the old
5121 // use (OriginalValue) from a constant to an instruction, which will be
5122 // illegal and ICE the compiler if the user is a constant expression of
5123 // some kind e.g. a constant GEP.
5124 if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
5125 convertUsersOfConstantsToInstructions(constant, func, false);
5126
5127 // The users iterator will get invalidated if we modify an element,
5128 // so we populate this vector of uses to alter each user on an
5129 // individual basis to emit its own load (rather than one load for
5130 // all).
5132 for (llvm::User *user : mapData.OriginalValue[i]->users())
5133 userVec.push_back(user);
5134
5135 for (llvm::User *user : userVec) {
5136 if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
5137 if (insn->getFunction() == func) {
5138 builder.SetCurrentDebugLocation(insn->getDebugLoc());
5139 auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
5140 mapData.BasePointers[i]);
5141 load->moveBefore(insn->getIterator());
5142 user->replaceUsesOfWith(mapData.OriginalValue[i], load);
5143 }
5144 }
5145 }
5146 }
5147 }
5148}
5149
5150// The createDeviceArgumentAccessor function generates
5151// instructions for retrieving (acessing) kernel
5152// arguments inside of the device kernel for use by
5153// the kernel. This enables different semantics such as
5154// the creation of temporary copies of data allowing
5155// semantics like read-only/no host write back kernel
5156// arguments.
5157//
5158// This currently implements a very light version of Clang's
5159// EmitParmDecl's handling of direct argument handling as well
5160// as a portion of the argument access generation based on
5161// capture types found at the end of emitOutlinedFunctionPrologue
5162// in Clang. The indirect path handling of EmitParmDecl's may be
5163// required for future work, but a direct 1-to-1 copy doesn't seem
5164// possible as the logic is rather scattered throughout Clang's
5165// lowering and perhaps we wish to deviate slightly.
5166//
5167// \param mapData - A container containing vectors of information
5168// corresponding to the input argument, which should have a
5169// corresponding entry in the MapInfoData containers
5170// OrigialValue's.
5171// \param arg - This is the generated kernel function argument that
5172// corresponds to the passed in input argument. We generated different
5173// accesses of this Argument, based on capture type and other Input
5174// related information.
5175// \param input - This is the host side value that will be passed to
5176// the kernel i.e. the kernel input, we rewrite all uses of this within
5177// the kernel (as we generate the kernel body based on the target's region
5178// which maintians references to the original input) to the retVal argument
5179// apon exit of this function inside of the OMPIRBuilder. This interlinks
5180// the kernel argument to future uses of it in the function providing
5181// appropriate "glue" instructions inbetween.
5182// \param retVal - This is the value that all uses of input inside of the
5183// kernel will be re-written to, the goal of this function is to generate
5184// an appropriate location for the kernel argument to be accessed from,
5185// e.g. ByRef will result in a temporary allocation location and then
5186// a store of the kernel argument into this allocated memory which
5187// will then be loaded from, ByCopy will use the allocated memory
5188// directly.
5189static llvm::IRBuilderBase::InsertPoint
5190createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
5191 llvm::Value *input, llvm::Value *&retVal,
5192 llvm::IRBuilderBase &builder,
5193 llvm::OpenMPIRBuilder &ompBuilder,
5194 LLVM::ModuleTranslation &moduleTranslation,
5195 llvm::IRBuilderBase::InsertPoint allocaIP,
5196 llvm::IRBuilderBase::InsertPoint codeGenIP) {
5197 assert(ompBuilder.Config.isTargetDevice() &&
5198 "function only supported for target device codegen");
5199 builder.restoreIP(allocaIP);
5200
5201 omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
5202 LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
5203 ompBuilder.M.getContext());
5204 unsigned alignmentValue = 0;
5205 // Find the associated MapInfoData entry for the current input
5206 for (size_t i = 0; i < mapData.MapClause.size(); ++i)
5207 if (mapData.OriginalValue[i] == input) {
5208 auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
5209 capture = mapOp.getMapCaptureType();
5210 // Get information of alignment of mapped object
5211 alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
5212 mapOp.getVarType(), ompBuilder.M.getDataLayout());
5213 break;
5214 }
5215
5216 unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
5217 unsigned int defaultAS =
5218 ompBuilder.M.getDataLayout().getProgramAddressSpace();
5219
5220 // Create the alloca for the argument the current point.
5221 llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
5222
5223 if (allocaAS != defaultAS && arg.getType()->isPointerTy())
5224 v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
5225
5226 builder.CreateStore(&arg, v);
5227
5228 builder.restoreIP(codeGenIP);
5229
5230 switch (capture) {
5231 case omp::VariableCaptureKind::ByCopy: {
5232 retVal = v;
5233 break;
5234 }
5235 case omp::VariableCaptureKind::ByRef: {
5236 llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
5237 v->getType(), v,
5238 ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
5239 // CreateAlignedLoad function creates similar LLVM IR:
5240 // %res = load ptr, ptr %input, align 8
5241 // This LLVM IR does not contain information about alignment
5242 // of the loaded value. We need to add !align metadata to unblock
5243 // optimizer. The existence of the !align metadata on the instruction
5244 // tells the optimizer that the value loaded is known to be aligned to
5245 // a boundary specified by the integer value in the metadata node.
5246 // Example:
5247 // %res = load ptr, ptr %input, align 8, !align !align_md_node
5248 // ^ ^
5249 // | |
5250 // alignment of %input address |
5251 // |
5252 // alignment of %res object
5253 if (v->getType()->isPointerTy() && alignmentValue) {
5254 llvm::MDBuilder MDB(builder.getContext());
5255 loadInst->setMetadata(
5256 llvm::LLVMContext::MD_align,
5257 llvm::MDNode::get(builder.getContext(),
5258 MDB.createConstant(llvm::ConstantInt::get(
5259 llvm::Type::getInt64Ty(builder.getContext()),
5260 alignmentValue))));
5261 }
5262 retVal = loadInst;
5263
5264 break;
5265 }
5266 case omp::VariableCaptureKind::This:
5267 case omp::VariableCaptureKind::VLAType:
5268 // TODO: Consider returning error to use standard reporting for
5269 // unimplemented features.
5270 assert(false && "Currently unsupported capture kind");
5271 break;
5272 }
5273
5274 return builder.saveIP();
5275}
5276
5277/// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
5278/// operation and populate output variables with their corresponding host value
5279/// (i.e. operand evaluated outside of the target region), based on their uses
5280/// inside of the target region.
5281///
5282/// Loop bounds and steps are only optionally populated, if output vectors are
5283/// provided.
5284static void
5285extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
5286 Value &numTeamsLower, Value &numTeamsUpper,
5287 Value &threadLimit,
5288 llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
5289 llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
5290 llvm::SmallVectorImpl<Value> *steps = nullptr) {
5291 auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
5292 for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
5293 blockArgIface.getHostEvalBlockArgs())) {
5294 Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
5295
5296 for (Operation *user : blockArg.getUsers()) {
5298 .Case([&](omp::TeamsOp teamsOp) {
5299 if (teamsOp.getNumTeamsLower() == blockArg)
5300 numTeamsLower = hostEvalVar;
5301 else if (teamsOp.getNumTeamsUpper() == blockArg)
5302 numTeamsUpper = hostEvalVar;
5303 else if (teamsOp.getThreadLimit() == blockArg)
5304 threadLimit = hostEvalVar;
5305 else
5306 llvm_unreachable("unsupported host_eval use");
5307 })
5308 .Case([&](omp::ParallelOp parallelOp) {
5309 if (parallelOp.getNumThreads() == blockArg)
5310 numThreads = hostEvalVar;
5311 else
5312 llvm_unreachable("unsupported host_eval use");
5313 })
5314 .Case([&](omp::LoopNestOp loopOp) {
5315 auto processBounds =
5316 [&](OperandRange opBounds,
5317 llvm::SmallVectorImpl<Value> *outBounds) -> bool {
5318 bool found = false;
5319 for (auto [i, lb] : llvm::enumerate(opBounds)) {
5320 if (lb == blockArg) {
5321 found = true;
5322 if (outBounds)
5323 (*outBounds)[i] = hostEvalVar;
5324 }
5325 }
5326 return found;
5327 };
5328 bool found =
5329 processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
5330 found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
5331 found;
5332 found = processBounds(loopOp.getLoopSteps(), steps) || found;
5333 (void)found;
5334 assert(found && "unsupported host_eval use");
5335 })
5336 .DefaultUnreachable("unsupported host_eval use");
5337 }
5338 }
5339}
5340
5341/// If \p op is of the given type parameter, return it casted to that type.
5342/// Otherwise, if its immediate parent operation (or some other higher-level
5343/// parent, if \p immediateParent is false) is of that type, return that parent
5344/// casted to the given type.
5345///
5346/// If \p op is \c null or neither it or its parent(s) are of the specified
5347/// type, return a \c null operation.
5348template <typename OpTy>
5349static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
5350 if (!op)
5351 return OpTy();
5352
5353 if (OpTy casted = dyn_cast<OpTy>(op))
5354 return casted;
5355
5356 if (immediateParent)
5357 return dyn_cast_if_present<OpTy>(op->getParentOp());
5358
5359 return op->getParentOfType<OpTy>();
5360}
5361
5362/// If the given \p value is defined by an \c llvm.mlir.constant operation and
5363/// it is of an integer type, return its value.
5364static std::optional<int64_t> extractConstInteger(Value value) {
5365 if (!value)
5366 return std::nullopt;
5367
5368 if (auto constOp = value.getDefiningOp<LLVM::ConstantOp>())
5369 if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
5370 return constAttr.getInt();
5371
5372 return std::nullopt;
5373}
5374
5375static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
5376 uint64_t sizeInBits = dl.getTypeSizeInBits(type);
5377 uint64_t sizeInBytes = sizeInBits / 8;
5378 return sizeInBytes;
5379}
5380
5381template <typename OpTy>
5382static uint64_t getReductionDataSize(OpTy &op) {
5383 if (op.getNumReductionVars() > 0) {
5385 collectReductionDecls(op, reductions);
5386
5388 members.reserve(reductions.size());
5389 for (omp::DeclareReductionOp &red : reductions)
5390 members.push_back(red.getType());
5391 Operation *opp = op.getOperation();
5392 auto structType = mlir::LLVM::LLVMStructType::getLiteral(
5393 opp->getContext(), members, /*isPacked=*/false);
5394 DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
5395 return getTypeByteSize(structType, dl);
5396 }
5397 return 0;
5398}
5399
5400/// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
5401/// values as stated by the corresponding clauses, if constant.
5402///
5403/// These default values must be set before the creation of the outlined LLVM
5404/// function for the target region, so that they can be used to initialize the
5405/// corresponding global `ConfigurationEnvironmentTy` structure.
5406static void
5407initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
5408 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
5409 bool isTargetDevice, bool isGPU) {
5410 // TODO: Handle constant 'if' clauses.
5411
5412 Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
5413 if (!isTargetDevice) {
5414 extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5415 threadLimit);
5416 } else {
5417 // In the target device, values for these clauses are not passed as
5418 // host_eval, but instead evaluated prior to entry to the region. This
5419 // ensures values are mapped and available inside of the target region.
5420 if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5421 numTeamsLower = teamsOp.getNumTeamsLower();
5422 numTeamsUpper = teamsOp.getNumTeamsUpper();
5423 threadLimit = teamsOp.getThreadLimit();
5424 }
5425
5426 if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5427 numThreads = parallelOp.getNumThreads();
5428 }
5429
5430 // Handle clauses impacting the number of teams.
5431
5432 int32_t minTeamsVal = 1, maxTeamsVal = -1;
5433 if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5434 // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
5435 // clang and set min and max to the same value.
5436 if (numTeamsUpper) {
5437 if (auto val = extractConstInteger(numTeamsUpper))
5438 minTeamsVal = maxTeamsVal = *val;
5439 } else {
5440 minTeamsVal = maxTeamsVal = 0;
5441 }
5442 } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
5443 /*immediateParent=*/true) ||
5445 /*immediateParent=*/true)) {
5446 minTeamsVal = maxTeamsVal = 1;
5447 } else {
5448 minTeamsVal = maxTeamsVal = -1;
5449 }
5450
5451 // Handle clauses impacting the number of threads.
5452
5453 auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
5454 if (!clauseValue)
5455 return;
5456
5457 if (auto val = extractConstInteger(clauseValue))
5458 result = *val;
5459
5460 // Found an applicable clause, so it's not undefined. Mark as unknown
5461 // because it's not constant.
5462 if (result < 0)
5463 result = 0;
5464 };
5465
5466 // Extract 'thread_limit' clause from 'target' and 'teams' directives.
5467 int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
5468 setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
5469 setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
5470
5471 // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
5472 int32_t maxThreadsVal = -1;
5474 setMaxValueFromClause(numThreads, maxThreadsVal);
5475 else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
5476 /*immediateParent=*/true))
5477 maxThreadsVal = 1;
5478
5479 // For max values, < 0 means unset, == 0 means set but unknown. Select the
5480 // minimum value between 'max_threads' and 'thread_limit' clauses that were
5481 // set.
5482 int32_t combinedMaxThreadsVal = targetThreadLimitVal;
5483 if (combinedMaxThreadsVal < 0 ||
5484 (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
5485 combinedMaxThreadsVal = teamsThreadLimitVal;
5486
5487 if (combinedMaxThreadsVal < 0 ||
5488 (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
5489 combinedMaxThreadsVal = maxThreadsVal;
5490
5491 int32_t reductionDataSize = 0;
5492 if (isGPU && capturedOp) {
5493 if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
5494 reductionDataSize = getReductionDataSize(teamsOp);
5495 }
5496
5497 // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
5498 omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
5499 assert(
5500 omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
5501 omp::TargetRegionFlags::spmd) &&
5502 "invalid kernel flags");
5503 attrs.ExecFlags =
5504 omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
5505 ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
5506 ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
5507 : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
5508 : llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
5509 if (omp::bitEnumContainsAll(kernelFlags,
5510 omp::TargetRegionFlags::spmd |
5511 omp::TargetRegionFlags::no_loop) &&
5512 !omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic))
5513 attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_SPMD_NO_LOOP;
5514
5515 attrs.MinTeams = minTeamsVal;
5516 attrs.MaxTeams.front() = maxTeamsVal;
5517 attrs.MinThreads = 1;
5518 attrs.MaxThreads.front() = combinedMaxThreadsVal;
5519 attrs.ReductionDataSize = reductionDataSize;
5520 // TODO: Allow modified buffer length similar to
5521 // fopenmp-cuda-teams-reduction-recs-num flag in clang.
5522 if (attrs.ReductionDataSize != 0)
5523 attrs.ReductionBufferLength = 1024;
5524}
5525
5526/// Gather LLVM runtime values for all clauses evaluated in the host that are
5527/// passed to the kernel invocation.
5528///
5529/// This function must be called only when compiling for the host. Also, it will
5530/// only provide correct results if it's called after the body of \c targetOp
5531/// has been fully generated.
5532static void
5533initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
5534 LLVM::ModuleTranslation &moduleTranslation,
5535 omp::TargetOp targetOp, Operation *capturedOp,
5536 llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
5537 omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
5538 unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
5539
5540 Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
5541 llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
5542 steps(numLoops);
5543 extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5544 teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
5545
5546 // TODO: Handle constant 'if' clauses.
5547 if (Value targetThreadLimit = targetOp.getThreadLimit())
5548 attrs.TargetThreadLimit.front() =
5549 moduleTranslation.lookupValue(targetThreadLimit);
5550
5551 if (numTeamsLower)
5552 attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
5553
5554 if (numTeamsUpper)
5555 attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
5556
5557 if (teamsThreadLimit)
5558 attrs.TeamsThreadLimit.front() =
5559 moduleTranslation.lookupValue(teamsThreadLimit);
5560
5561 if (numThreads)
5562 attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
5563
5564 if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
5565 omp::TargetRegionFlags::trip_count)) {
5566 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5567 attrs.LoopTripCount = nullptr;
5568
5569 // To calculate the trip count, we multiply together the trip counts of
5570 // every collapsed canonical loop. We don't need to create the loop nests
5571 // here, since we're only interested in the trip count.
5572 for (auto [loopLower, loopUpper, loopStep] :
5573 llvm::zip_equal(lowerBounds, upperBounds, steps)) {
5574 llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
5575 llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
5576 llvm::Value *step = moduleTranslation.lookupValue(loopStep);
5577
5578 llvm::OpenMPIRBuilder::LocationDescription loc(builder);
5579 llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
5580 loc, lowerBound, upperBound, step, /*IsSigned=*/true,
5581 loopOp.getLoopInclusive());
5582
5583 if (!attrs.LoopTripCount) {
5584 attrs.LoopTripCount = tripCount;
5585 continue;
5586 }
5587
5588 // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
5589 attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
5590 {}, /*HasNUW=*/true);
5591 }
5592 }
5593}
5594
5595static LogicalResult
5596convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
5597 LLVM::ModuleTranslation &moduleTranslation) {
5598 auto targetOp = cast<omp::TargetOp>(opInst);
5599 // The current debug location already has the DISubprogram for the outlined
5600 // function that will be created for the target op. We save it here so that
5601 // we can set it on the outlined function.
5602 llvm::DebugLoc outlinedFnLoc = builder.getCurrentDebugLocation();
5603 if (failed(checkImplementationStatus(opInst)))
5604 return failure();
5605
5606 // During the handling of target op, we will generate instructions in the
5607 // parent function like call to the oulined function or branch to a new
5608 // BasicBlock. We set the debug location here to parent function so that those
5609 // get the correct debug locations. For outlined functions, the normal MLIR op
5610 // conversion will automatically pick the correct location.
5611 llvm::BasicBlock *parentBB = builder.GetInsertBlock();
5612 assert(parentBB && "No insert block is set for the builder");
5613 llvm::Function *parentLLVMFn = parentBB->getParent();
5614 assert(parentLLVMFn && "Parent Function must be valid");
5615 if (llvm::DISubprogram *SP = parentLLVMFn->getSubprogram())
5616 builder.SetCurrentDebugLocation(llvm::DILocation::get(
5617 parentLLVMFn->getContext(), outlinedFnLoc.getLine(),
5618 outlinedFnLoc.getCol(), SP, outlinedFnLoc.getInlinedAt()));
5619
5620 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5621 bool isTargetDevice = ompBuilder->Config.isTargetDevice();
5622 bool isGPU = ompBuilder->Config.isGPU();
5623
5624 auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
5625 auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
5626 auto &targetRegion = targetOp.getRegion();
5627 // Holds the private vars that have been mapped along with the block argument
5628 // that corresponds to the MapInfoOp corresponding to the private var in
5629 // question. So, for instance:
5630 //
5631 // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
5632 // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
5633 //
5634 // Then, %10 has been created so that the descriptor can be used by the
5635 // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
5636 // %arg0} in the mappedPrivateVars map.
5637 llvm::DenseMap<Value, Value> mappedPrivateVars;
5638 DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
5639 SmallVector<Value> mapVars = targetOp.getMapVars();
5640 SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
5641 ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
5642 ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
5643 llvm::Function *llvmOutlinedFn = nullptr;
5644
5645 // TODO: It can also be false if a compile-time constant `false` IF clause is
5646 // specified.
5647 bool isOffloadEntry =
5648 isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
5649
5650 // For some private variables, the MapsForPrivatizedVariablesPass
5651 // creates MapInfoOp instances. Go through the private variables and
5652 // the mapped variables so that during codegeneration we are able
5653 // to quickly look up the corresponding map variable, if any for each
5654 // private variable.
5655 if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
5656 OperandRange privateVars = targetOp.getPrivateVars();
5657 std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
5658 std::optional<DenseI64ArrayAttr> privateMapIndices =
5659 targetOp.getPrivateMapsAttr();
5660
5661 for (auto [privVarIdx, privVarSymPair] :
5662 llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
5663 auto privVar = std::get<0>(privVarSymPair);
5664 auto privSym = std::get<1>(privVarSymPair);
5665
5666 SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
5667 omp::PrivateClauseOp privatizer =
5668 findPrivatizer(targetOp, privatizerName);
5669
5670 if (!privatizer.needsMap())
5671 continue;
5672
5673 mlir::Value mappedValue =
5674 targetOp.getMappedValueForPrivateVar(privVarIdx);
5675 assert(mappedValue && "Expected to find mapped value for a privatized "
5676 "variable that needs mapping");
5677
5678 // The MapInfoOp defining the map var isn't really needed later.
5679 // So, we don't store it in any datastructure. Instead, we just
5680 // do some sanity checks on it right now.
5681 auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
5682 [[maybe_unused]] Type varType = mapInfoOp.getVarType();
5683
5684 // Check #1: Check that the type of the private variable matches
5685 // the type of the variable being mapped.
5686 if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
5687 assert(
5688 varType == privVar.getType() &&
5689 "Type of private var doesn't match the type of the mapped value");
5690
5691 // Ok, only 1 sanity check for now.
5692 // Record the block argument corresponding to this mapvar.
5693 mappedPrivateVars.insert(
5694 {privVar,
5695 targetRegion.getArgument(argIface.getMapBlockArgsStart() +
5696 (*privateMapIndices)[privVarIdx])});
5697 }
5698 }
5699
5700 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5701 auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
5702 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5703 llvm::IRBuilderBase::InsertPointGuard guard(builder);
5704 builder.SetCurrentDebugLocation(llvm::DebugLoc());
5705 // Forward target-cpu and target-features function attributes from the
5706 // original function to the new outlined function.
5707 llvm::Function *llvmParentFn =
5708 moduleTranslation.lookupFunction(parentFn.getName());
5709 llvmOutlinedFn = codeGenIP.getBlock()->getParent();
5710 assert(llvmParentFn && llvmOutlinedFn &&
5711 "Both parent and outlined functions must exist at this point");
5712
5713 if (outlinedFnLoc && llvmParentFn->getSubprogram())
5714 llvmOutlinedFn->setSubprogram(outlinedFnLoc->getScope()->getSubprogram());
5715
5716 if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
5717 attr.isStringAttribute())
5718 llvmOutlinedFn->addFnAttr(attr);
5719
5720 if (auto attr = llvmParentFn->getFnAttribute("target-features");
5721 attr.isStringAttribute())
5722 llvmOutlinedFn->addFnAttr(attr);
5723
5724 for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
5725 auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5726 llvm::Value *mapOpValue =
5727 moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5728 moduleTranslation.mapValue(arg, mapOpValue);
5729 }
5730 for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
5731 auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5732 llvm::Value *mapOpValue =
5733 moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5734 moduleTranslation.mapValue(arg, mapOpValue);
5735 }
5736
5737 // Do privatization after moduleTranslation has already recorded
5738 // mapped values.
5739 PrivateVarsInfo privateVarsInfo(targetOp);
5740
5742 allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
5743 allocaIP, &mappedPrivateVars);
5744
5745 if (failed(handleError(afterAllocas, *targetOp)))
5746 return llvm::make_error<PreviouslyReportedError>();
5747
5748 builder.restoreIP(codeGenIP);
5749 if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
5750 &mappedPrivateVars),
5751 *targetOp)
5752 .failed())
5753 return llvm::make_error<PreviouslyReportedError>();
5754
5755 if (failed(copyFirstPrivateVars(
5756 targetOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
5757 privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
5758 targetOp.getPrivateNeedsBarrier(), &mappedPrivateVars)))
5759 return llvm::make_error<PreviouslyReportedError>();
5760
5761 SmallVector<Region *> privateCleanupRegions;
5762 llvm::transform(privateVarsInfo.privatizers,
5763 std::back_inserter(privateCleanupRegions),
5764 [](omp::PrivateClauseOp privatizer) {
5765 return &privatizer.getDeallocRegion();
5766 });
5767
5769 targetRegion, "omp.target", builder, moduleTranslation);
5770
5771 if (!exitBlock)
5772 return exitBlock.takeError();
5773
5774 builder.SetInsertPoint(*exitBlock);
5775 if (!privateCleanupRegions.empty()) {
5776 if (failed(inlineOmpRegionCleanup(
5777 privateCleanupRegions, privateVarsInfo.llvmVars,
5778 moduleTranslation, builder, "omp.targetop.private.cleanup",
5779 /*shouldLoadCleanupRegionArg=*/false))) {
5780 return llvm::createStringError(
5781 "failed to inline `dealloc` region of `omp.private` "
5782 "op in the target region");
5783 }
5784 return builder.saveIP();
5785 }
5786
5787 return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
5788 };
5789
5790 StringRef parentName = parentFn.getName();
5791
5792 llvm::TargetRegionEntryInfo entryInfo;
5793
5794 getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
5795
5796 MapInfoData mapData;
5797 collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
5798 builder, /*useDevPtrOperands=*/{},
5799 /*useDevAddrOperands=*/{}, hdaVars);
5800
5801 MapInfosTy combinedInfos;
5802 auto genMapInfoCB =
5803 [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
5804 builder.restoreIP(codeGenIP);
5805 genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
5806 return combinedInfos;
5807 };
5808
5809 auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
5810 llvm::Value *&retVal, InsertPointTy allocaIP,
5811 InsertPointTy codeGenIP)
5812 -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5813 llvm::IRBuilderBase::InsertPointGuard guard(builder);
5814 builder.SetCurrentDebugLocation(llvm::DebugLoc());
5815 // We just return the unaltered argument for the host function
5816 // for now, some alterations may be required in the future to
5817 // keep host fallback functions working identically to the device
5818 // version (e.g. pass ByCopy values should be treated as such on
5819 // host and device, currently not always the case)
5820 if (!isTargetDevice) {
5821 retVal = cast<llvm::Value>(&arg);
5822 return codeGenIP;
5823 }
5824
5825 return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
5826 *ompBuilder, moduleTranslation,
5827 allocaIP, codeGenIP);
5828 };
5829
5830 llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
5831 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
5832 Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
5833 initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
5834 isTargetDevice, isGPU);
5835
5836 // Collect host-evaluated values needed to properly launch the kernel from the
5837 // host.
5838 if (!isTargetDevice)
5839 initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
5840 targetCapturedOp, runtimeAttrs);
5841
5842 // Pass host-evaluated values as parameters to the kernel / host fallback,
5843 // except if they are constants. In any case, map the MLIR block argument to
5844 // the corresponding LLVM values.
5846 SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
5847 ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
5848 for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
5849 llvm::Value *value = moduleTranslation.lookupValue(var);
5850 moduleTranslation.mapValue(arg, value);
5851
5852 if (!llvm::isa<llvm::Constant>(value))
5853 kernelInput.push_back(value);
5854 }
5855
5856 for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
5857 // declare target arguments are not passed to kernels as arguments
5858 // TODO: We currently do not handle cases where a member is explicitly
5859 // passed in as an argument, this will likley need to be handled in
5860 // the near future, rather than using IsAMember, it may be better to
5861 // test if the relevant BlockArg is used within the target region and
5862 // then use that as a basis for exclusion in the kernel inputs.
5863 if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
5864 kernelInput.push_back(mapData.OriginalValue[i]);
5865 }
5866
5868 buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
5869 moduleTranslation, dds);
5870
5871 llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5872 findAllocaInsertPoint(builder, moduleTranslation);
5873 llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5874
5875 llvm::OpenMPIRBuilder::TargetDataInfo info(
5876 /*RequiresDevicePointerInfo=*/false,
5877 /*SeparateBeginEndCalls=*/true);
5878
5879 auto customMapperCB =
5880 [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
5881 if (!combinedInfos.Mappers[i])
5882 return nullptr;
5883 info.HasMapper = true;
5884 return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
5885 moduleTranslation);
5886 };
5887
5888 llvm::Value *ifCond = nullptr;
5889 if (Value targetIfCond = targetOp.getIfExpr())
5890 ifCond = moduleTranslation.lookupValue(targetIfCond);
5891
5892 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5893 moduleTranslation.getOpenMPBuilder()->createTarget(
5894 ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
5895 defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
5896 argAccessorCB, customMapperCB, dds, targetOp.getNowait());
5897
5898 if (failed(handleError(afterIP, opInst)))
5899 return failure();
5900
5901 builder.restoreIP(*afterIP);
5902
5903 // Remap access operations to declare target reference pointers for the
5904 // device, essentially generating extra loadop's as necessary
5905 if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5906 handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5907 llvmOutlinedFn);
5908
5909 return success();
5910}
5911
5912static LogicalResult
5913convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5914 LLVM::ModuleTranslation &moduleTranslation) {
5915 // Amend omp.declare_target by deleting the IR of the outlined functions
5916 // created for target regions. They cannot be filtered out from MLIR earlier
5917 // because the omp.target operation inside must be translated to LLVM, but
5918 // the wrapper functions themselves must not remain at the end of the
5919 // process. We know that functions where omp.declare_target does not match
5920 // omp.is_target_device at this stage can only be wrapper functions because
5921 // those that aren't are removed earlier as an MLIR transformation pass.
5922 if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5923 if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5924 op->getParentOfType<ModuleOp>().getOperation())) {
5925 if (!offloadMod.getIsTargetDevice())
5926 return success();
5927
5928 omp::DeclareTargetDeviceType declareType =
5929 attribute.getDeviceType().getValue();
5930
5931 if (declareType == omp::DeclareTargetDeviceType::host) {
5932 llvm::Function *llvmFunc =
5933 moduleTranslation.lookupFunction(funcOp.getName());
5934 llvmFunc->dropAllReferences();
5935 llvmFunc->eraseFromParent();
5936 }
5937 }
5938 return success();
5939 }
5940
5941 if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5942 llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5943 if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5944 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5945 bool isDeclaration = gOp.isDeclaration();
5946 bool isExternallyVisible =
5947 gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5948 auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5949 llvm::StringRef mangledName = gOp.getSymName();
5950 auto captureClause =
5951 convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5952 auto deviceClause =
5953 convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5954 // unused for MLIR at the moment, required in Clang for book
5955 // keeping
5956 std::vector<llvm::GlobalVariable *> generatedRefs;
5957
5958 std::vector<llvm::Triple> targetTriple;
5959 auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5960 op->getParentOfType<mlir::ModuleOp>()->getAttr(
5961 LLVM::LLVMDialect::getTargetTripleAttrName()));
5962 if (targetTripleAttr)
5963 targetTriple.emplace_back(targetTripleAttr.data());
5964
5965 auto fileInfoCallBack = [&loc]() {
5966 std::string filename = "";
5967 std::uint64_t lineNo = 0;
5968
5969 if (loc) {
5970 filename = loc.getFilename().str();
5971 lineNo = loc.getLine();
5972 }
5973
5974 return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5975 lineNo);
5976 };
5977
5978 auto vfs = llvm::vfs::getRealFileSystem();
5979
5980 ompBuilder->registerTargetGlobalVariable(
5981 captureClause, deviceClause, isDeclaration, isExternallyVisible,
5982 ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack, *vfs),
5983 mangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5984 /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5985 gVal->getType(), gVal);
5986
5987 if (ompBuilder->Config.isTargetDevice() &&
5988 (attribute.getCaptureClause().getValue() !=
5989 mlir::omp::DeclareTargetCaptureClause::to ||
5990 ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5991 ompBuilder->getAddrOfDeclareTargetVar(
5992 captureClause, deviceClause, isDeclaration, isExternallyVisible,
5993 ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack, *vfs),
5994 mangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5995 gVal->getType(), /*GlobalInitializer*/ nullptr,
5996 /*VariableLinkage*/ nullptr);
5997 }
5998 }
5999 }
6000
6001 return success();
6002}
6003
6004// Returns true if the operation is inside a TargetOp or
6005// is part of a declare target function.
6006static bool isTargetDeviceOp(Operation *op) {
6007 // Assumes no reverse offloading
6008 if (op->getParentOfType<omp::TargetOp>())
6009 return true;
6010
6011 // Certain operations return results, and whether utilised in host or
6012 // target there is a chance an LLVM Dialect operation depends on it
6013 // by taking it in as an operand, so we must always lower these in
6014 // some manner or result in an ICE (whether they end up in a no-op
6015 // or otherwise).
6016 if (mlir::isa<omp::ThreadprivateOp>(op))
6017 return true;
6018
6019 if (mlir::isa<omp::TargetAllocMemOp>(op) ||
6020 mlir::isa<omp::TargetFreeMemOp>(op))
6021 return true;
6022
6023 if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
6024 if (auto declareTargetIface =
6025 llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
6026 parentFn.getOperation()))
6027 if (declareTargetIface.isDeclareTarget() &&
6028 declareTargetIface.getDeclareTargetDeviceType() !=
6029 mlir::omp::DeclareTargetDeviceType::host)
6030 return true;
6031
6032 return false;
6033}
6034
6035static llvm::Function *getOmpTargetAlloc(llvm::IRBuilderBase &builder,
6036 llvm::Module *llvmModule) {
6037 llvm::Type *i64Ty = builder.getInt64Ty();
6038 llvm::Type *i32Ty = builder.getInt32Ty();
6039 llvm::Type *returnType = builder.getPtrTy(0);
6040 llvm::FunctionType *fnType =
6041 llvm::FunctionType::get(returnType, {i64Ty, i32Ty}, false);
6042 llvm::Function *func = cast<llvm::Function>(
6043 llvmModule->getOrInsertFunction("omp_target_alloc", fnType).getCallee());
6044 return func;
6045}
6046
6047static LogicalResult
6048convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
6049 LLVM::ModuleTranslation &moduleTranslation) {
6050 auto allocMemOp = cast<omp::TargetAllocMemOp>(opInst);
6051 if (!allocMemOp)
6052 return failure();
6053
6054 // Get "omp_target_alloc" function
6055 llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
6056 llvm::Function *ompTargetAllocFunc = getOmpTargetAlloc(builder, llvmModule);
6057 // Get the corresponding device value in llvm
6058 mlir::Value deviceNum = allocMemOp.getDevice();
6059 llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
6060 // Get the allocation size.
6061 llvm::DataLayout dataLayout = llvmModule->getDataLayout();
6062 mlir::Type heapTy = allocMemOp.getAllocatedType();
6063 llvm::Type *llvmHeapTy = moduleTranslation.convertType(heapTy);
6064 llvm::TypeSize typeSize = dataLayout.getTypeStoreSize(llvmHeapTy);
6065 llvm::Value *allocSize = builder.getInt64(typeSize.getFixedValue());
6066 for (auto typeParam : allocMemOp.getTypeparams())
6067 allocSize =
6068 builder.CreateMul(allocSize, moduleTranslation.lookupValue(typeParam));
6069 // Create call to "omp_target_alloc" with the args as translated llvm values.
6070 llvm::CallInst *call =
6071 builder.CreateCall(ompTargetAllocFunc, {allocSize, llvmDeviceNum});
6072 llvm::Value *resultI64 = builder.CreatePtrToInt(call, builder.getInt64Ty());
6073
6074 // Map the result
6075 moduleTranslation.mapValue(allocMemOp.getResult(), resultI64);
6076 return success();
6077}
6078
6079static llvm::Function *getOmpTargetFree(llvm::IRBuilderBase &builder,
6080 llvm::Module *llvmModule) {
6081 llvm::Type *ptrTy = builder.getPtrTy(0);
6082 llvm::Type *i32Ty = builder.getInt32Ty();
6083 llvm::Type *voidTy = builder.getVoidTy();
6084 llvm::FunctionType *fnType =
6085 llvm::FunctionType::get(voidTy, {ptrTy, i32Ty}, false);
6086 llvm::Function *func = dyn_cast<llvm::Function>(
6087 llvmModule->getOrInsertFunction("omp_target_free", fnType).getCallee());
6088 return func;
6089}
6090
6091static LogicalResult
6092convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
6093 LLVM::ModuleTranslation &moduleTranslation) {
6094 auto freeMemOp = cast<omp::TargetFreeMemOp>(opInst);
6095 if (!freeMemOp)
6096 return failure();
6097
6098 // Get "omp_target_free" function
6099 llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
6100 llvm::Function *ompTragetFreeFunc = getOmpTargetFree(builder, llvmModule);
6101 // Get the corresponding device value in llvm
6102 mlir::Value deviceNum = freeMemOp.getDevice();
6103 llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
6104 // Get the corresponding heapref value in llvm
6105 mlir::Value heapref = freeMemOp.getHeapref();
6106 llvm::Value *llvmHeapref = moduleTranslation.lookupValue(heapref);
6107 // Convert heapref int to ptr and call "omp_target_free"
6108 llvm::Value *intToPtr =
6109 builder.CreateIntToPtr(llvmHeapref, builder.getPtrTy(0));
6110 builder.CreateCall(ompTragetFreeFunc, {intToPtr, llvmDeviceNum});
6111 return success();
6112}
6113
6114/// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
6115/// OpenMP runtime calls).
6116static LogicalResult
6117convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
6118 LLVM::ModuleTranslation &moduleTranslation) {
6119 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6120
6121 // For each loop, introduce one stack frame to hold loop information. Ensure
6122 // this is only done for the outermost loop wrapper to prevent introducing
6123 // multiple stack frames for a single loop. Initially set to null, the loop
6124 // information structure is initialized during translation of the nested
6125 // omp.loop_nest operation, making it available to translation of all loop
6126 // wrappers after their body has been successfully translated.
6127 bool isOutermostLoopWrapper =
6128 isa_and_present<omp::LoopWrapperInterface>(op) &&
6129 !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
6130
6131 if (isOutermostLoopWrapper)
6132 moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
6133
6134 auto result =
6136 .Case([&](omp::BarrierOp op) -> LogicalResult {
6137 if (failed(checkImplementationStatus(*op)))
6138 return failure();
6139
6140 llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
6141 ompBuilder->createBarrier(builder.saveIP(),
6142 llvm::omp::OMPD_barrier);
6143 LogicalResult res = handleError(afterIP, *op);
6144 if (res.succeeded()) {
6145 // If the barrier generated a cancellation check, the insertion
6146 // point might now need to be changed to a new continuation block
6147 builder.restoreIP(*afterIP);
6148 }
6149 return res;
6150 })
6151 .Case([&](omp::TaskyieldOp op) {
6152 if (failed(checkImplementationStatus(*op)))
6153 return failure();
6154
6155 ompBuilder->createTaskyield(builder.saveIP());
6156 return success();
6157 })
6158 .Case([&](omp::FlushOp op) {
6159 if (failed(checkImplementationStatus(*op)))
6160 return failure();
6161
6162 // No support in Openmp runtime function (__kmpc_flush) to accept
6163 // the argument list.
6164 // OpenMP standard states the following:
6165 // "An implementation may implement a flush with a list by ignoring
6166 // the list, and treating it the same as a flush without a list."
6167 //
6168 // The argument list is discarded so that, flush with a list is
6169 // treated same as a flush without a list.
6170 ompBuilder->createFlush(builder.saveIP());
6171 return success();
6172 })
6173 .Case([&](omp::ParallelOp op) {
6174 return convertOmpParallel(op, builder, moduleTranslation);
6175 })
6176 .Case([&](omp::MaskedOp) {
6177 return convertOmpMasked(*op, builder, moduleTranslation);
6178 })
6179 .Case([&](omp::MasterOp) {
6180 return convertOmpMaster(*op, builder, moduleTranslation);
6181 })
6182 .Case([&](omp::CriticalOp) {
6183 return convertOmpCritical(*op, builder, moduleTranslation);
6184 })
6185 .Case([&](omp::OrderedRegionOp) {
6186 return convertOmpOrderedRegion(*op, builder, moduleTranslation);
6187 })
6188 .Case([&](omp::OrderedOp) {
6189 return convertOmpOrdered(*op, builder, moduleTranslation);
6190 })
6191 .Case([&](omp::WsloopOp) {
6192 return convertOmpWsloop(*op, builder, moduleTranslation);
6193 })
6194 .Case([&](omp::SimdOp) {
6195 return convertOmpSimd(*op, builder, moduleTranslation);
6196 })
6197 .Case([&](omp::AtomicReadOp) {
6198 return convertOmpAtomicRead(*op, builder, moduleTranslation);
6199 })
6200 .Case([&](omp::AtomicWriteOp) {
6201 return convertOmpAtomicWrite(*op, builder, moduleTranslation);
6202 })
6203 .Case([&](omp::AtomicUpdateOp op) {
6204 return convertOmpAtomicUpdate(op, builder, moduleTranslation);
6205 })
6206 .Case([&](omp::AtomicCaptureOp op) {
6207 return convertOmpAtomicCapture(op, builder, moduleTranslation);
6208 })
6209 .Case([&](omp::CancelOp op) {
6210 return convertOmpCancel(op, builder, moduleTranslation);
6211 })
6212 .Case([&](omp::CancellationPointOp op) {
6213 return convertOmpCancellationPoint(op, builder, moduleTranslation);
6214 })
6215 .Case([&](omp::SectionsOp) {
6216 return convertOmpSections(*op, builder, moduleTranslation);
6217 })
6218 .Case([&](omp::SingleOp op) {
6219 return convertOmpSingle(op, builder, moduleTranslation);
6220 })
6221 .Case([&](omp::TeamsOp op) {
6222 return convertOmpTeams(op, builder, moduleTranslation);
6223 })
6224 .Case([&](omp::TaskOp op) {
6225 return convertOmpTaskOp(op, builder, moduleTranslation);
6226 })
6227 .Case([&](omp::TaskgroupOp op) {
6228 return convertOmpTaskgroupOp(op, builder, moduleTranslation);
6229 })
6230 .Case([&](omp::TaskwaitOp op) {
6231 return convertOmpTaskwaitOp(op, builder, moduleTranslation);
6232 })
6233 .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
6234 omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
6235 omp::CriticalDeclareOp>([](auto op) {
6236 // `yield` and `terminator` can be just omitted. The block structure
6237 // was created in the region that handles their parent operation.
6238 // `declare_reduction` will be used by reductions and is not
6239 // converted directly, skip it.
6240 // `declare_mapper` and `declare_mapper.info` are handled whenever
6241 // they are referred to through a `map` clause.
6242 // `critical.declare` is only used to declare names of critical
6243 // sections which will be used by `critical` ops and hence can be
6244 // ignored for lowering. The OpenMP IRBuilder will create unique
6245 // name for critical section names.
6246 return success();
6247 })
6248 .Case([&](omp::ThreadprivateOp) {
6249 return convertOmpThreadprivate(*op, builder, moduleTranslation);
6250 })
6251 .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
6252 omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
6253 return convertOmpTargetData(op, builder, moduleTranslation);
6254 })
6255 .Case([&](omp::TargetOp) {
6256 return convertOmpTarget(*op, builder, moduleTranslation);
6257 })
6258 .Case([&](omp::DistributeOp) {
6259 return convertOmpDistribute(*op, builder, moduleTranslation);
6260 })
6261 .Case([&](omp::LoopNestOp) {
6262 return convertOmpLoopNest(*op, builder, moduleTranslation);
6263 })
6264 .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
6265 [&](auto op) {
6266 // No-op, should be handled by relevant owning operations e.g.
6267 // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
6268 // etc. and then discarded
6269 return success();
6270 })
6271 .Case([&](omp::NewCliOp op) {
6272 // Meta-operation: Doesn't do anything by itself, but used to
6273 // identify a loop.
6274 return success();
6275 })
6276 .Case([&](omp::CanonicalLoopOp op) {
6277 return convertOmpCanonicalLoopOp(op, builder, moduleTranslation);
6278 })
6279 .Case([&](omp::UnrollHeuristicOp op) {
6280 // FIXME: Handling omp.unroll_heuristic as an executable requires
6281 // that the generator (e.g. omp.canonical_loop) has been seen first.
6282 // For construct that require all codegen to occur inside a callback
6283 // (e.g. OpenMPIRBilder::createParallel), all codegen of that
6284 // contained region including their transformations must occur at
6285 // the omp.canonical_loop.
6286 return applyUnrollHeuristic(op, builder, moduleTranslation);
6287 })
6288 .Case([&](omp::TileOp op) {
6289 return applyTile(op, builder, moduleTranslation);
6290 })
6291 .Case([&](omp::TargetAllocMemOp) {
6292 return convertTargetAllocMemOp(*op, builder, moduleTranslation);
6293 })
6294 .Case([&](omp::TargetFreeMemOp) {
6295 return convertTargetFreeMemOp(*op, builder, moduleTranslation);
6296 })
6297 .Default([&](Operation *inst) {
6298 return inst->emitError()
6299 << "not yet implemented: " << inst->getName();
6300 });
6301
6302 if (isOutermostLoopWrapper)
6303 moduleTranslation.stackPop();
6304
6305 return result;
6306}
6307
6308static LogicalResult
6309convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
6310 LLVM::ModuleTranslation &moduleTranslation) {
6311 return convertHostOrTargetOperation(op, builder, moduleTranslation);
6312}
6313
6314static LogicalResult
6315convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
6316 LLVM::ModuleTranslation &moduleTranslation) {
6317 if (isa<omp::TargetOp>(op))
6318 return convertOmpTarget(*op, builder, moduleTranslation);
6319 if (isa<omp::TargetDataOp>(op))
6320 return convertOmpTargetData(op, builder, moduleTranslation);
6321 bool interrupted =
6322 op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
6323 if (isa<omp::TargetOp>(oper)) {
6324 if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
6325 return WalkResult::interrupt();
6326 return WalkResult::skip();
6327 }
6328 if (isa<omp::TargetDataOp>(oper)) {
6329 if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
6330 return WalkResult::interrupt();
6331 return WalkResult::skip();
6332 }
6333
6334 // Non-target ops might nest target-related ops, therefore, we
6335 // translate them as non-OpenMP scopes. Translating them is needed by
6336 // nested target-related ops since they might need LLVM values defined
6337 // in their parent non-target ops.
6338 if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
6339 oper->getParentOfType<LLVM::LLVMFuncOp>() &&
6340 !oper->getRegions().empty()) {
6341 if (auto blockArgsIface =
6342 dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
6343 forwardArgs(moduleTranslation, blockArgsIface);
6344 else {
6345 // Here we map entry block arguments of
6346 // non-BlockArgOpenMPOpInterface ops if they can be encountered
6347 // inside of a function and they define any of these arguments.
6348 if (isa<mlir::omp::AtomicUpdateOp>(oper))
6349 for (auto [operand, arg] :
6350 llvm::zip_equal(oper->getOperands(),
6351 oper->getRegion(0).getArguments())) {
6352 moduleTranslation.mapValue(
6353 arg, builder.CreateLoad(
6354 moduleTranslation.convertType(arg.getType()),
6355 moduleTranslation.lookupValue(operand)));
6356 }
6357 }
6358
6359 if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
6360 assert(builder.GetInsertBlock() &&
6361 "No insert block is set for the builder");
6362 for (auto iv : loopNest.getIVs()) {
6363 // Map iv to an undefined value just to keep the IR validity.
6364 moduleTranslation.mapValue(
6365 iv, llvm::PoisonValue::get(
6366 moduleTranslation.convertType(iv.getType())));
6367 }
6368 }
6369
6370 for (Region &region : oper->getRegions()) {
6371 // Regions are fake in the sense that they are not a truthful
6372 // translation of the OpenMP construct being converted (e.g. no
6373 // OpenMP runtime calls will be generated). We just need this to
6374 // prepare the kernel invocation args.
6377 region, oper->getName().getStringRef().str() + ".fake.region",
6378 builder, moduleTranslation, &phis);
6379 if (failed(handleError(result, *oper)))
6380 return WalkResult::interrupt();
6381
6382 builder.SetInsertPoint(result.get(), result.get()->end());
6383 }
6384
6385 return WalkResult::skip();
6386 }
6387
6388 return WalkResult::advance();
6389 }).wasInterrupted();
6390 return failure(interrupted);
6391}
6392
6393namespace {
6394
6395/// Implementation of the dialect interface that converts operations belonging
6396/// to the OpenMP dialect to LLVM IR.
6397class OpenMPDialectLLVMIRTranslationInterface
6398 : public LLVMTranslationDialectInterface {
6399public:
6401
6402 /// Translates the given operation to LLVM IR using the provided IR builder
6403 /// and saving the state in `moduleTranslation`.
6404 LogicalResult
6405 convertOperation(Operation *op, llvm::IRBuilderBase &builder,
6406 LLVM::ModuleTranslation &moduleTranslation) const final;
6407
6408 /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
6409 /// runtime calls, or operation amendments
6410 LogicalResult
6411 amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
6412 NamedAttribute attribute,
6413 LLVM::ModuleTranslation &moduleTranslation) const final;
6414};
6415
6416} // namespace
6417
6418LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
6419 Operation *op, ArrayRef<llvm::Instruction *> instructions,
6420 NamedAttribute attribute,
6421 LLVM::ModuleTranslation &moduleTranslation) const {
6422 return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
6423 attribute.getName())
6424 .Case("omp.is_target_device",
6425 [&](Attribute attr) {
6426 if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
6427 llvm::OpenMPIRBuilderConfig &config =
6428 moduleTranslation.getOpenMPBuilder()->Config;
6429 config.setIsTargetDevice(deviceAttr.getValue());
6430 return success();
6431 }
6432 return failure();
6433 })
6434 .Case("omp.is_gpu",
6435 [&](Attribute attr) {
6436 if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
6437 llvm::OpenMPIRBuilderConfig &config =
6438 moduleTranslation.getOpenMPBuilder()->Config;
6439 config.setIsGPU(gpuAttr.getValue());
6440 return success();
6441 }
6442 return failure();
6443 })
6444 .Case("omp.host_ir_filepath",
6445 [&](Attribute attr) {
6446 if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
6447 llvm::OpenMPIRBuilder *ompBuilder =
6448 moduleTranslation.getOpenMPBuilder();
6449 auto VFS = llvm::vfs::getRealFileSystem();
6450 ompBuilder->loadOffloadInfoMetadata(*VFS,
6451 filepathAttr.getValue());
6452 return success();
6453 }
6454 return failure();
6455 })
6456 .Case("omp.flags",
6457 [&](Attribute attr) {
6458 if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
6459 return convertFlagsAttr(op, rtlAttr, moduleTranslation);
6460 return failure();
6461 })
6462 .Case("omp.version",
6463 [&](Attribute attr) {
6464 if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
6465 llvm::OpenMPIRBuilder *ompBuilder =
6466 moduleTranslation.getOpenMPBuilder();
6467 ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
6468 versionAttr.getVersion());
6469 return success();
6470 }
6471 return failure();
6472 })
6473 .Case("omp.declare_target",
6474 [&](Attribute attr) {
6475 if (auto declareTargetAttr =
6476 dyn_cast<omp::DeclareTargetAttr>(attr))
6477 return convertDeclareTargetAttr(op, declareTargetAttr,
6478 moduleTranslation);
6479 return failure();
6480 })
6481 .Case("omp.requires",
6482 [&](Attribute attr) {
6483 if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
6484 using Requires = omp::ClauseRequires;
6485 Requires flags = requiresAttr.getValue();
6486 llvm::OpenMPIRBuilderConfig &config =
6487 moduleTranslation.getOpenMPBuilder()->Config;
6488 config.setHasRequiresReverseOffload(
6489 bitEnumContainsAll(flags, Requires::reverse_offload));
6490 config.setHasRequiresUnifiedAddress(
6491 bitEnumContainsAll(flags, Requires::unified_address));
6492 config.setHasRequiresUnifiedSharedMemory(
6493 bitEnumContainsAll(flags, Requires::unified_shared_memory));
6494 config.setHasRequiresDynamicAllocators(
6495 bitEnumContainsAll(flags, Requires::dynamic_allocators));
6496 return success();
6497 }
6498 return failure();
6499 })
6500 .Case("omp.target_triples",
6501 [&](Attribute attr) {
6502 if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
6503 llvm::OpenMPIRBuilderConfig &config =
6504 moduleTranslation.getOpenMPBuilder()->Config;
6505 config.TargetTriples.clear();
6506 config.TargetTriples.reserve(triplesAttr.size());
6507 for (Attribute tripleAttr : triplesAttr) {
6508 if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
6509 config.TargetTriples.emplace_back(tripleStrAttr.getValue());
6510 else
6511 return failure();
6512 }
6513 return success();
6514 }
6515 return failure();
6516 })
6517 .Default([](Attribute) {
6518 // Fall through for omp attributes that do not require lowering.
6519 return success();
6520 })(attribute.getValue());
6521
6522 return failure();
6523}
6524
6525/// Given an OpenMP MLIR operation, create the corresponding LLVM IR
6526/// (including OpenMP runtime calls).
6527LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
6528 Operation *op, llvm::IRBuilderBase &builder,
6529 LLVM::ModuleTranslation &moduleTranslation) const {
6530
6531 llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6532 if (ompBuilder->Config.isTargetDevice()) {
6533 if (isTargetDeviceOp(op)) {
6534 return convertTargetDeviceOp(op, builder, moduleTranslation);
6535 }
6536 return convertTargetOpsInNest(op, builder, moduleTranslation);
6537 }
6538 return convertHostOrTargetOperation(op, builder, moduleTranslation);
6539}
6540
6542 registry.insert<omp::OpenMPDialect>();
6543 registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
6544 dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
6545 });
6546}
6547
6549 DialectRegistry registry;
6551 context.appendDialectRegistry(registry);
6552}
for(Operation *op :ops)
return success()
lhs
static ze_device_handle_t getDevice(const uint32_t driverIdx=0, const int32_t devIdx=0)
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
ArrayAttr()
auto load
static bool mapTypeToBool(ClauseMapFlags value, ClauseMapFlags flag)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp)
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type. Otherwise, if its immediate pare...
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable. You probably want to use allocateAndInitPrivateVars inst...
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Apply a #pragma omp unroll / "!$omp unroll" transformation using the OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized....
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool > > attr)
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static void popCancelFinalizationCB(const ArrayRef< llvm::BranchInst * > cancelTerminators, llvm::OpenMPIRBuilder &ompBuilder, const llvm::OpenMPIRBuilder::InsertPointTy &afterIP)
If we cancelled the construct, we should branch to the finalization block of that construct....
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert an omp.canonical_loop to LLVM-IR.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Function * getOmpTargetAlloc(llvm::IRBuilderBase &builder, llvm::Module *llvmModule)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables. Returns the basic block which comes after all of t...
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static void pushCancelFinalizationCB(SmallVectorImpl< llvm::BranchInst * > &cancelTerminators, llvm::IRBuilderBase &llvmBuilder, llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op, llvm::omp::Directive cancelDirective)
Shared implementation of a callback which adds a termiator for the new block created for the branch t...
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl)
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static bool constructIsCancellable(Operation *op)
Returns true if the construct contains omp.cancel or omp.cancellation_point.
static llvm::omp::OpenMPOffloadMappingFlags convertClauseMapFlags(omp::ClauseMapFlags mlirFlags)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static LogicalResult copyFirstPrivateVars(mlir::Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, bool insertBarrier, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef, bool isNowait=false, bool isTeamsReduction=false)
static LogicalResult convertOmpCancellationPoint(omp::CancellationPointOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static uint64_t getReductionDataSize(OpTy &op)
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static llvm::omp::Directive convertCancellationConstructType(omp::ClauseCancellationConstructType directive)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice, bool isGPU)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Apply a #pragma omp tile / !$omp tile transformation using the OpenMPIRBuilder.
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Function * getOmpTargetFree(llvm::IRBuilderBase &builder, llvm::Module *llvmModule)
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
static void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp, bool &isIgnoreDenormalMode, bool &isFineGrainedMemory, bool &isRemoteMemory)
static Operation * genLoop(CodegenEnv &env, OpBuilder &builder, LoopId curr, unsigned numCases, bool needsUniv, ArrayRef< TensorLevel > tidLvls)
Generates a for-loop or a while-loop, depending on whether it implements singleton iteration or co-it...
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition TypeID.h:331
This class represents an argument of a Block.
Definition Value.h:309
Block represents an ordered list of Operations.
Definition Block.h:33
BlockArgument getArgument(unsigned i)
Definition Block.h:129
unsigned getNumArguments()
Definition Block.h:128
Operation & front()
Definition Block.h:153
Operation & back()
Definition Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:244
iterator begin()
Definition Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition Location.h:174
Implementation class for module translation.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
WalkResult stackWalk(llvm::function_ref< WalkResult(T &)> callback)
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void stackPush(Args &&...args)
Creates a stack frame of type T on ModuleTranslation stack.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
void mapFunction(StringRef name, llvm::Function *func)
Stores the mapping between a function name and its LLVM IR representation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
void invalidateOmpLoop(omp::NewCliOp mlir)
Mark an OpenMP loop as having been consumed.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
void mapOmpLoop(omp::NewCliOp mlir, llvm::CanonicalLoopInfo *llvm)
Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR OpenMPIRBuilder CanonicalLoopInfo...
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
SaveStateStack< T, ModuleTranslation > SaveStack
RAII object calling stackPush/stackPop on construction/destruction.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
void stackPop()
Pops the last element from the ModuleTranslation stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
llvm::CanonicalLoopInfo * lookupOMPLoop(omp::NewCliOp mlir) const
Find the LLVM-IR loop that represents an MLIR loop.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
Utility class to translate MLIR LLVM dialect types to LLVM IR.
Definition TypeToLLVM.h:39
unsigned getPreferredAlignment(Type type, const llvm::DataLayout &layout)
Returns the preferred alignment for the type given the data layout.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition Location.h:45
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
StringAttr getName() const
Return the name of the attribute.
Attribute getValue() const
Return the value of the attribute.
Definition Attributes.h:179
This class implements the operand iterators for the Operation class.
Definition ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Dialect * getDialect()
Return the dialect this operation is associated with, or nullptr if the associated dialect is not loa...
Definition Operation.h:220
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition Operation.h:686
Value getOperand(unsigned idx)
Definition Operation.h:350
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
unsigned getNumOperands()
Definition Operation.h:346
OperandRange operand_range
Definition Operation.h:371
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:238
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
user_range getUsers()
Returns a range of all users.
Definition Operation.h:873
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216
void erase()
Remove this operation from its parent block and delete it.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
BlockArgListType getArguments()
Definition Region.h:81
bool empty()
Definition Region.h:60
unsigned getNumArguments()
Definition Region.h:123
iterator begin()
Definition Region.h:55
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition Region.h:200
BlockListType & getBlocks()
Definition Region.h:45
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
Concrete CRTP base class for StateStack frames.
Definition StateStack.h:47
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Definition SymbolTable.h:97
static Operation * lookupNearestSymbolFrom(Operation *from, StringAttr symbol)
Returns the operation registered with the given symbol name within the closest parent operation of,...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
A utility result that is used to signal how to proceed with an ongoing walk:
Definition WalkResult.h:29
static WalkResult skip()
Definition WalkResult.h:48
static WalkResult advance()
Definition WalkResult.h:47
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition WalkResult.h:51
static WalkResult interrupt()
Definition WalkResult.h:46
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition Utils.cpp:1293
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:126
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars