MLIR  22.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
19 #include "mlir/IR/Operation.h"
20 #include "mlir/Support/LLVM.h"
23 
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/MDBuilder.h"
34 #include "llvm/IR/ReplaceConstant.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/TargetParser/Triple.h"
37 #include "llvm/Transforms/Utils/ModuleUtils.h"
38 
39 #include <cstdint>
40 #include <iterator>
41 #include <numeric>
42 #include <optional>
43 #include <utility>
44 
45 using namespace mlir;
46 
47 namespace {
48 static llvm::omp::ScheduleKind
49 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
50  if (!schedKind.has_value())
51  return llvm::omp::OMP_SCHEDULE_Default;
52  switch (schedKind.value()) {
53  case omp::ClauseScheduleKind::Static:
54  return llvm::omp::OMP_SCHEDULE_Static;
55  case omp::ClauseScheduleKind::Dynamic:
56  return llvm::omp::OMP_SCHEDULE_Dynamic;
57  case omp::ClauseScheduleKind::Guided:
58  return llvm::omp::OMP_SCHEDULE_Guided;
59  case omp::ClauseScheduleKind::Auto:
60  return llvm::omp::OMP_SCHEDULE_Auto;
62  return llvm::omp::OMP_SCHEDULE_Runtime;
63  }
64  llvm_unreachable("unhandled schedule clause argument");
65 }
66 
67 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
68 /// insertion points for allocas.
69 class OpenMPAllocaStackFrame
70  : public StateStackFrameBase<OpenMPAllocaStackFrame> {
71 public:
72  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
73 
74  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
75  : allocaInsertPoint(allocaIP) {}
76  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
77 };
78 
79 /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
80 /// collapsed canonical loop information corresponding to an \c omp.loop_nest
81 /// operation.
82 class OpenMPLoopInfoStackFrame
83  : public StateStackFrameBase<OpenMPLoopInfoStackFrame> {
84 public:
85  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
86  llvm::CanonicalLoopInfo *loopInfo = nullptr;
87 };
88 
89 /// Custom error class to signal translation errors that don't need reporting,
90 /// since encountering them will have already triggered relevant error messages.
91 ///
92 /// Its purpose is to serve as the glue between MLIR failures represented as
93 /// \see LogicalResult instances and \see llvm::Error instances used to
94 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
95 /// error of the first type is raised, a message is emitted directly (the \see
96 /// LogicalResult itself does not hold any information). If we need to forward
97 /// this error condition as an \see llvm::Error while avoiding triggering some
98 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
99 /// class to just signal this situation has happened.
100 ///
101 /// For example, this class should be used to trigger errors from within
102 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
103 /// translation of their own regions. This unclutters the error log from
104 /// redundant messages.
105 class PreviouslyReportedError
106  : public llvm::ErrorInfo<PreviouslyReportedError> {
107 public:
108  void log(raw_ostream &) const override {
109  // Do not log anything.
110  }
111 
112  std::error_code convertToErrorCode() const override {
113  llvm_unreachable(
114  "PreviouslyReportedError doesn't support ECError conversion");
115  }
116 
117  // Used by ErrorInfo::classID.
118  static char ID;
119 };
120 
122 
123 /*
124  * Custom class for processing linear clause for omp.wsloop
125  * and omp.simd. Linear clause translation requires setup,
126  * initialization, update, and finalization at varying
127  * basic blocks in the IR. This class helps maintain
128  * internal state to allow consistent translation in
129  * each of these stages.
130  */
131 
132 class LinearClauseProcessor {
133 
134 private:
135  SmallVector<llvm::Value *> linearPreconditionVars;
136  SmallVector<llvm::Value *> linearLoopBodyTemps;
137  SmallVector<llvm::AllocaInst *> linearOrigVars;
138  SmallVector<llvm::Value *> linearOrigVal;
139  SmallVector<llvm::Value *> linearSteps;
140  llvm::BasicBlock *linearFinalizationBB;
141  llvm::BasicBlock *linearExitBB;
142  llvm::BasicBlock *linearLastIterExitBB;
143 
144 public:
145  // Allocate space for linear variabes
146  void createLinearVar(llvm::IRBuilderBase &builder,
147  LLVM::ModuleTranslation &moduleTranslation,
148  mlir::Value &linearVar) {
149  if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
150  moduleTranslation.lookupValue(linearVar))) {
151  linearPreconditionVars.push_back(builder.CreateAlloca(
152  linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
153  llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
154  linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
155  linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
156  linearLoopBodyTemps.push_back(linearLoopBodyTemp);
157  linearOrigVars.push_back(linearVarAlloca);
158  }
159  }
160 
161  // Initialize linear step
162  inline void initLinearStep(LLVM::ModuleTranslation &moduleTranslation,
163  mlir::Value &linearStep) {
164  linearSteps.push_back(moduleTranslation.lookupValue(linearStep));
165  }
166 
167  // Emit IR for initialization of linear variables
168  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
169  initLinearVar(llvm::IRBuilderBase &builder,
170  LLVM::ModuleTranslation &moduleTranslation,
171  llvm::BasicBlock *loopPreHeader) {
172  builder.SetInsertPoint(loopPreHeader->getTerminator());
173  for (size_t index = 0; index < linearOrigVars.size(); index++) {
174  llvm::LoadInst *linearVarLoad = builder.CreateLoad(
175  linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
176  builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
177  }
178  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
179  moduleTranslation.getOpenMPBuilder()->createBarrier(
180  builder.saveIP(), llvm::omp::OMPD_barrier);
181  return afterBarrierIP;
182  }
183 
184  // Emit IR for updating Linear variables
185  void updateLinearVar(llvm::IRBuilderBase &builder, llvm::BasicBlock *loopBody,
186  llvm::Value *loopInductionVar) {
187  builder.SetInsertPoint(loopBody->getTerminator());
188  for (size_t index = 0; index < linearPreconditionVars.size(); index++) {
189  // Emit increments for linear vars
190  llvm::LoadInst *linearVarStart =
191  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
192 
193  linearPreconditionVars[index]);
194  auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
195  auto addInst = builder.CreateAdd(linearVarStart, mulInst);
196  builder.CreateStore(addInst, linearLoopBodyTemps[index]);
197  }
198  }
199 
200  // Linear variable finalization is conditional on the last logical iteration.
201  // Create BB splits to manage the same.
202  void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
203  llvm::BasicBlock *loopExit) {
204  linearFinalizationBB = loopExit->splitBasicBlock(
205  loopExit->getTerminator(), "omp_loop.linear_finalization");
206  linearExitBB = linearFinalizationBB->splitBasicBlock(
207  linearFinalizationBB->getTerminator(), "omp_loop.linear_exit");
208  linearLastIterExitBB = linearFinalizationBB->splitBasicBlock(
209  linearFinalizationBB->getTerminator(), "omp_loop.linear_lastiter_exit");
210  }
211 
212  // Finalize the linear vars
213  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
214  finalizeLinearVar(llvm::IRBuilderBase &builder,
215  LLVM::ModuleTranslation &moduleTranslation,
216  llvm::Value *lastIter) {
217  // Emit condition to check whether last logical iteration is being executed
218  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
219  llvm::Value *loopLastIterLoad = builder.CreateLoad(
220  llvm::Type::getInt32Ty(builder.getContext()), lastIter);
221  llvm::Value *isLast =
222  builder.CreateCmp(llvm::CmpInst::ICMP_NE, loopLastIterLoad,
224  llvm::Type::getInt32Ty(builder.getContext()), 0));
225  // Store the linear variable values to original variables.
226  builder.SetInsertPoint(linearLastIterExitBB->getTerminator());
227  for (size_t index = 0; index < linearOrigVars.size(); index++) {
228  llvm::LoadInst *linearVarTemp =
229  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
230  linearLoopBodyTemps[index]);
231  builder.CreateStore(linearVarTemp, linearOrigVars[index]);
232  }
233 
234  // Create conditional branch such that the linear variable
235  // values are stored to original variables only at the
236  // last logical iteration
237  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
238  builder.CreateCondBr(isLast, linearLastIterExitBB, linearExitBB);
239  linearFinalizationBB->getTerminator()->eraseFromParent();
240  // Emit barrier
241  builder.SetInsertPoint(linearExitBB->getTerminator());
242  return moduleTranslation.getOpenMPBuilder()->createBarrier(
243  builder.saveIP(), llvm::omp::OMPD_barrier);
244  }
245 
246  // Rewrite all uses of the original variable in `BBName`
247  // with the linear variable in-place
248  void rewriteInPlace(llvm::IRBuilderBase &builder, std::string BBName,
249  size_t varIndex) {
251  for (llvm::User *user : linearOrigVal[varIndex]->users())
252  users.push_back(user);
253  for (auto *user : users) {
254  if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
255  if (userInst->getParent()->getName().str() == BBName)
256  user->replaceUsesOfWith(linearOrigVal[varIndex],
257  linearLoopBodyTemps[varIndex]);
258  }
259  }
260  }
261 };
262 
263 } // namespace
264 
265 /// Looks up from the operation from and returns the PrivateClauseOp with
266 /// name symbolName
267 static omp::PrivateClauseOp findPrivatizer(Operation *from,
268  SymbolRefAttr symbolName) {
269  omp::PrivateClauseOp privatizer =
270  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
271  symbolName);
272  assert(privatizer && "privatizer not found in the symbol table");
273  return privatizer;
274 }
275 
276 /// Check whether translation to LLVM IR for the given operation is currently
277 /// supported. If not, descriptive diagnostics will be emitted to let users know
278 /// this is a not-yet-implemented feature.
279 ///
280 /// \returns success if no unimplemented features are needed to translate the
281 /// given operation.
282 static LogicalResult checkImplementationStatus(Operation &op) {
283  auto todo = [&op](StringRef clauseName) {
284  return op.emitError() << "not yet implemented: Unhandled clause "
285  << clauseName << " in " << op.getName()
286  << " operation";
287  };
288 
289  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
290  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
291  result = todo("allocate");
292  };
293  auto checkBare = [&todo](auto op, LogicalResult &result) {
294  if (op.getBare())
295  result = todo("ompx_bare");
296  };
297  auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
298  omp::ClauseCancellationConstructType cancelledDirective =
299  op.getCancelDirective();
300  // Cancelling a taskloop is not yet supported because we don't yet have LLVM
301  // IR conversion for taskloop
302  if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) {
303  Operation *parent = op->getParentOp();
304  while (parent) {
305  if (parent->getDialect() == op->getDialect())
306  break;
307  parent = parent->getParentOp();
308  }
309  if (isa_and_nonnull<omp::TaskloopOp>(parent))
310  result = todo("cancel directive inside of taskloop");
311  }
312  };
313  auto checkDepend = [&todo](auto op, LogicalResult &result) {
314  if (!op.getDependVars().empty() || op.getDependKinds())
315  result = todo("depend");
316  };
317  auto checkDevice = [&todo](auto op, LogicalResult &result) {
318  if (op.getDevice())
319  result = todo("device");
320  };
321  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
322  if (op.getDistScheduleChunkSize())
323  result = todo("dist_schedule with chunk_size");
324  };
325  auto checkHint = [](auto op, LogicalResult &) {
326  if (op.getHint())
327  op.emitWarning("hint clause discarded");
328  };
329  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
330  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
331  op.getInReductionSyms())
332  result = todo("in_reduction");
333  };
334  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
335  if (!op.getIsDevicePtrVars().empty())
336  result = todo("is_device_ptr");
337  };
338  auto checkLinear = [&todo](auto op, LogicalResult &result) {
339  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
340  result = todo("linear");
341  };
342  auto checkNowait = [&todo](auto op, LogicalResult &result) {
343  if (op.getNowait())
344  result = todo("nowait");
345  };
346  auto checkOrder = [&todo](auto op, LogicalResult &result) {
347  if (op.getOrder() || op.getOrderMod())
348  result = todo("order");
349  };
350  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
351  if (op.getParLevelSimd())
352  result = todo("parallelization-level");
353  };
354  auto checkPriority = [&todo](auto op, LogicalResult &result) {
355  if (op.getPriority())
356  result = todo("priority");
357  };
358  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
359  if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
360  // Privatization is supported only for included target tasks.
361  if (!op.getPrivateVars().empty() && op.getNowait())
362  result = todo("privatization for deferred target tasks");
363  } else {
364  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
365  result = todo("privatization");
366  }
367  };
368  auto checkReduction = [&todo](auto op, LogicalResult &result) {
369  if (isa<omp::TeamsOp>(op))
370  if (!op.getReductionVars().empty() || op.getReductionByref() ||
371  op.getReductionSyms())
372  result = todo("reduction");
373  if (op.getReductionMod() &&
374  op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
375  result = todo("reduction with modifier");
376  };
377  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
378  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
379  op.getTaskReductionSyms())
380  result = todo("task_reduction");
381  };
382  auto checkUntied = [&todo](auto op, LogicalResult &result) {
383  if (op.getUntied())
384  result = todo("untied");
385  };
386 
387  LogicalResult result = success();
389  .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); })
390  .Case([&](omp::CancellationPointOp op) {
391  checkCancelDirective(op, result);
392  })
393  .Case([&](omp::DistributeOp op) {
394  checkAllocate(op, result);
395  checkDistSchedule(op, result);
396  checkOrder(op, result);
397  })
398  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
399  .Case([&](omp::SectionsOp op) {
400  checkAllocate(op, result);
401  checkPrivate(op, result);
402  checkReduction(op, result);
403  })
404  .Case([&](omp::SingleOp op) {
405  checkAllocate(op, result);
406  checkPrivate(op, result);
407  })
408  .Case([&](omp::TeamsOp op) {
409  checkAllocate(op, result);
410  checkPrivate(op, result);
411  })
412  .Case([&](omp::TaskOp op) {
413  checkAllocate(op, result);
414  checkInReduction(op, result);
415  })
416  .Case([&](omp::TaskgroupOp op) {
417  checkAllocate(op, result);
418  checkTaskReduction(op, result);
419  })
420  .Case([&](omp::TaskwaitOp op) {
421  checkDepend(op, result);
422  checkNowait(op, result);
423  })
424  .Case([&](omp::TaskloopOp op) {
425  // TODO: Add other clauses check
426  checkUntied(op, result);
427  checkPriority(op, result);
428  })
429  .Case([&](omp::WsloopOp op) {
430  checkAllocate(op, result);
431  checkLinear(op, result);
432  checkOrder(op, result);
433  checkReduction(op, result);
434  })
435  .Case([&](omp::ParallelOp op) {
436  checkAllocate(op, result);
437  checkReduction(op, result);
438  })
439  .Case([&](omp::SimdOp op) {
440  checkLinear(op, result);
441  checkReduction(op, result);
442  })
443  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
444  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
445  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
446  [&](auto op) { checkDepend(op, result); })
447  .Case([&](omp::TargetOp op) {
448  checkAllocate(op, result);
449  checkBare(op, result);
450  checkDevice(op, result);
451  checkInReduction(op, result);
452  checkIsDevicePtr(op, result);
453  checkPrivate(op, result);
454  })
455  .Default([](Operation &) {
456  // Assume all clauses for an operation can be translated unless they are
457  // checked above.
458  });
459  return result;
460 }
461 
462 static LogicalResult handleError(llvm::Error error, Operation &op) {
463  LogicalResult result = success();
464  if (error) {
465  llvm::handleAllErrors(
466  std::move(error),
467  [&](const PreviouslyReportedError &) { result = failure(); },
468  [&](const llvm::ErrorInfoBase &err) {
469  result = op.emitError(err.message());
470  });
471  }
472  return result;
473 }
474 
475 template <typename T>
476 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
477  if (!result)
478  return handleError(result.takeError(), op);
479 
480  return success();
481 }
482 
483 /// Find the insertion point for allocas given the current insertion point for
484 /// normal operations in the builder.
485 static llvm::OpenMPIRBuilder::InsertPointTy
486 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
487  LLVM::ModuleTranslation &moduleTranslation) {
488  // If there is an alloca insertion point on stack, i.e. we are in a nested
489  // operation and a specific point was provided by some surrounding operation,
490  // use it.
491  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
492  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
493  [&](OpenMPAllocaStackFrame &frame) {
494  allocaInsertPoint = frame.allocaInsertPoint;
495  return WalkResult::interrupt();
496  });
497  // In cases with multiple levels of outlining, the tree walk might find an
498  // alloca insertion point that is inside the original function while the
499  // builder insertion point is inside the outlined function. We need to make
500  // sure that we do not use it in those cases.
501  if (walkResult.wasInterrupted() &&
502  allocaInsertPoint.getBlock()->getParent() ==
503  builder.GetInsertBlock()->getParent())
504  return allocaInsertPoint;
505 
506  // Otherwise, insert to the entry block of the surrounding function.
507  // If the current IRBuilder InsertPoint is the function's entry, it cannot
508  // also be used for alloca insertion which would result in insertion order
509  // confusion. Create a new BasicBlock for the Builder and use the entry block
510  // for the allocs.
511  // TODO: Create a dedicated alloca BasicBlock at function creation such that
512  // we do not need to move the current InertPoint here.
513  if (builder.GetInsertBlock() ==
514  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
515  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
516  "Assuming end of basic block");
517  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
518  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
519  builder.GetInsertBlock()->getNextNode());
520  builder.CreateBr(entryBB);
521  builder.SetInsertPoint(entryBB);
522  }
523 
524  llvm::BasicBlock &funcEntryBlock =
525  builder.GetInsertBlock()->getParent()->getEntryBlock();
526  return llvm::OpenMPIRBuilder::InsertPointTy(
527  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
528 }
529 
530 /// Find the loop information structure for the loop nest being translated. It
531 /// will return a `null` value unless called from the translation function for
532 /// a loop wrapper operation after successfully translating its body.
533 static llvm::CanonicalLoopInfo *
534 findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation) {
535  llvm::CanonicalLoopInfo *loopInfo = nullptr;
536  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
537  [&](OpenMPLoopInfoStackFrame &frame) {
538  loopInfo = frame.loopInfo;
539  return WalkResult::interrupt();
540  });
541  return loopInfo;
542 }
543 
544 /// Converts the given region that appears within an OpenMP dialect operation to
545 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
546 /// region, and a branch from any block with an successor-less OpenMP terminator
547 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
548 /// of the continuation block if provided.
550  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
551  LLVM::ModuleTranslation &moduleTranslation,
552  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
553  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
554 
555  llvm::BasicBlock *continuationBlock =
556  splitBB(builder, true, "omp.region.cont");
557  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
558 
559  llvm::LLVMContext &llvmContext = builder.getContext();
560  for (Block &bb : region) {
561  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
562  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
563  builder.GetInsertBlock()->getNextNode());
564  moduleTranslation.mapBlock(&bb, llvmBB);
565  }
566 
567  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
568 
569  // Terminators (namely YieldOp) may be forwarding values to the region that
570  // need to be available in the continuation block. Collect the types of these
571  // operands in preparation of creating PHI nodes. This is skipped for loop
572  // wrapper operations, for which we know in advance they have no terminators.
573  SmallVector<llvm::Type *> continuationBlockPHITypes;
574  unsigned numYields = 0;
575 
576  if (!isLoopWrapper) {
577  bool operandsProcessed = false;
578  for (Block &bb : region.getBlocks()) {
579  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
580  if (!operandsProcessed) {
581  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
582  continuationBlockPHITypes.push_back(
583  moduleTranslation.convertType(yield->getOperand(i).getType()));
584  }
585  operandsProcessed = true;
586  } else {
587  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
588  "mismatching number of values yielded from the region");
589  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
590  llvm::Type *operandType =
591  moduleTranslation.convertType(yield->getOperand(i).getType());
592  (void)operandType;
593  assert(continuationBlockPHITypes[i] == operandType &&
594  "values of mismatching types yielded from the region");
595  }
596  }
597  numYields++;
598  }
599  }
600  }
601 
602  // Insert PHI nodes in the continuation block for any values forwarded by the
603  // terminators in this region.
604  if (!continuationBlockPHITypes.empty())
605  assert(
606  continuationBlockPHIs &&
607  "expected continuation block PHIs if converted regions yield values");
608  if (continuationBlockPHIs) {
609  llvm::IRBuilderBase::InsertPointGuard guard(builder);
610  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
611  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
612  for (llvm::Type *ty : continuationBlockPHITypes)
613  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
614  }
615 
616  // Convert blocks one by one in topological order to ensure
617  // defs are converted before uses.
619  for (Block *bb : blocks) {
620  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
621  // Retarget the branch of the entry block to the entry block of the
622  // converted region (regions are single-entry).
623  if (bb->isEntryBlock()) {
624  assert(sourceTerminator->getNumSuccessors() == 1 &&
625  "provided entry block has multiple successors");
626  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
627  "ContinuationBlock is not the successor of the entry block");
628  sourceTerminator->setSuccessor(0, llvmBB);
629  }
630 
631  llvm::IRBuilderBase::InsertPointGuard guard(builder);
632  if (failed(
633  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
634  return llvm::make_error<PreviouslyReportedError>();
635 
636  // Create a direct branch here for loop wrappers to prevent their lack of a
637  // terminator from causing a crash below.
638  if (isLoopWrapper) {
639  builder.CreateBr(continuationBlock);
640  continue;
641  }
642 
643  // Special handling for `omp.yield` and `omp.terminator` (we may have more
644  // than one): they return the control to the parent OpenMP dialect operation
645  // so replace them with the branch to the continuation block. We handle this
646  // here to avoid relying inter-function communication through the
647  // ModuleTranslation class to set up the correct insertion point. This is
648  // also consistent with MLIR's idiom of handling special region terminators
649  // in the same code that handles the region-owning operation.
650  Operation *terminator = bb->getTerminator();
651  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
652  builder.CreateBr(continuationBlock);
653 
654  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
655  (*continuationBlockPHIs)[i]->addIncoming(
656  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
657  }
658  }
659  // After all blocks have been traversed and values mapped, connect the PHI
660  // nodes to the results of preceding blocks.
661  LLVM::detail::connectPHINodes(region, moduleTranslation);
662 
663  // Remove the blocks and values defined in this region from the mapping since
664  // they are not visible outside of this region. This allows the same region to
665  // be converted several times, that is cloned, without clashes, and slightly
666  // speeds up the lookups.
667  moduleTranslation.forgetMapping(region);
668 
669  return continuationBlock;
670 }
671 
672 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
673 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
674  switch (kind) {
675  case omp::ClauseProcBindKind::Close:
676  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
677  case omp::ClauseProcBindKind::Master:
678  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
679  case omp::ClauseProcBindKind::Primary:
680  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
681  case omp::ClauseProcBindKind::Spread:
682  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
683  }
684  llvm_unreachable("Unknown ClauseProcBindKind kind");
685 }
686 
687 /// Maps block arguments from \p blockArgIface (which are MLIR values) to the
688 /// corresponding LLVM values of \p the interface's operands. This is useful
689 /// when an OpenMP region with entry block arguments is converted to LLVM. In
690 /// this case the block arguments are (part of) of the OpenMP region's entry
691 /// arguments and the operands are (part of) of the operands to the OpenMP op
692 /// containing the region.
693 static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
694  omp::BlockArgOpenMPOpInterface blockArgIface) {
696  blockArgIface.getBlockArgsPairs(blockArgsPairs);
697  for (auto [var, arg] : blockArgsPairs)
698  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
699 }
700 
701 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
702 static LogicalResult
703 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
704  LLVM::ModuleTranslation &moduleTranslation) {
705  auto maskedOp = cast<omp::MaskedOp>(opInst);
706  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
707 
708  if (failed(checkImplementationStatus(opInst)))
709  return failure();
710 
711  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
712  // MaskedOp has only one region associated with it.
713  auto &region = maskedOp.getRegion();
714  builder.restoreIP(codeGenIP);
715  return convertOmpOpRegions(region, "omp.masked.region", builder,
716  moduleTranslation)
717  .takeError();
718  };
719 
720  // TODO: Perform finalization actions for variables. This has to be
721  // called for variables which have destructors/finalizers.
722  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
723 
724  llvm::Value *filterVal = nullptr;
725  if (auto filterVar = maskedOp.getFilteredThreadId()) {
726  filterVal = moduleTranslation.lookupValue(filterVar);
727  } else {
728  llvm::LLVMContext &llvmContext = builder.getContext();
729  filterVal =
730  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
731  }
732  assert(filterVal != nullptr);
733  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
734  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
735  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
736  finiCB, filterVal);
737 
738  if (failed(handleError(afterIP, opInst)))
739  return failure();
740 
741  builder.restoreIP(*afterIP);
742  return success();
743 }
744 
745 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
746 static LogicalResult
747 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
748  LLVM::ModuleTranslation &moduleTranslation) {
749  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
750  auto masterOp = cast<omp::MasterOp>(opInst);
751 
752  if (failed(checkImplementationStatus(opInst)))
753  return failure();
754 
755  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
756  // MasterOp has only one region associated with it.
757  auto &region = masterOp.getRegion();
758  builder.restoreIP(codeGenIP);
759  return convertOmpOpRegions(region, "omp.master.region", builder,
760  moduleTranslation)
761  .takeError();
762  };
763 
764  // TODO: Perform finalization actions for variables. This has to be
765  // called for variables which have destructors/finalizers.
766  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
767 
768  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
769  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
770  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
771  finiCB);
772 
773  if (failed(handleError(afterIP, opInst)))
774  return failure();
775 
776  builder.restoreIP(*afterIP);
777  return success();
778 }
779 
780 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
781 static LogicalResult
782 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
783  LLVM::ModuleTranslation &moduleTranslation) {
784  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
785  auto criticalOp = cast<omp::CriticalOp>(opInst);
786 
787  if (failed(checkImplementationStatus(opInst)))
788  return failure();
789 
790  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
791  // CriticalOp has only one region associated with it.
792  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
793  builder.restoreIP(codeGenIP);
794  return convertOmpOpRegions(region, "omp.critical.region", builder,
795  moduleTranslation)
796  .takeError();
797  };
798 
799  // TODO: Perform finalization actions for variables. This has to be
800  // called for variables which have destructors/finalizers.
801  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
802 
803  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
804  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
805  llvm::Constant *hint = nullptr;
806 
807  // If it has a name, it probably has a hint too.
808  if (criticalOp.getNameAttr()) {
809  // The verifiers in OpenMP Dialect guarentee that all the pointers are
810  // non-null
811  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
812  auto criticalDeclareOp =
813  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
814  symbolRef);
815  hint =
816  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
817  static_cast<int>(criticalDeclareOp.getHint()));
818  }
819  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
820  moduleTranslation.getOpenMPBuilder()->createCritical(
821  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
822 
823  if (failed(handleError(afterIP, opInst)))
824  return failure();
825 
826  builder.restoreIP(*afterIP);
827  return success();
828 }
829 
830 /// A util to collect info needed to convert delayed privatizers from MLIR to
831 /// LLVM.
833  template <typename OP>
835  : blockArgs(
836  cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
837  mlirVars.reserve(blockArgs.size());
838  llvmVars.reserve(blockArgs.size());
839  collectPrivatizationDecls<OP>(op);
840 
841  for (mlir::Value privateVar : op.getPrivateVars())
842  mlirVars.push_back(privateVar);
843  }
844 
849 
850 private:
851  /// Populates `privatizations` with privatization declarations used for the
852  /// given op.
853  template <class OP>
854  void collectPrivatizationDecls(OP op) {
855  std::optional<ArrayAttr> attr = op.getPrivateSyms();
856  if (!attr)
857  return;
858 
859  privatizers.reserve(privatizers.size() + attr->size());
860  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
861  privatizers.push_back(findPrivatizer(op, symbolRef));
862  }
863  }
864 };
865 
866 /// Populates `reductions` with reduction declarations used in the given op.
867 template <typename T>
868 static void
871  std::optional<ArrayAttr> attr = op.getReductionSyms();
872  if (!attr)
873  return;
874 
875  reductions.reserve(reductions.size() + op.getNumReductionVars());
876  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
877  reductions.push_back(
878  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
879  op, symbolRef));
880  }
881 }
882 
883 /// Translates the blocks contained in the given region and appends them to at
884 /// the current insertion point of `builder`. The operations of the entry block
885 /// are appended to the current insertion block. If set, `continuationBlockArgs`
886 /// is populated with translated values that correspond to the values
887 /// omp.yield'ed from the region.
888 static LogicalResult inlineConvertOmpRegions(
889  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
890  LLVM::ModuleTranslation &moduleTranslation,
891  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
892  if (region.empty())
893  return success();
894 
895  // Special case for single-block regions that don't create additional blocks:
896  // insert operations without creating additional blocks.
897  if (region.hasOneBlock()) {
898  llvm::Instruction *potentialTerminator =
899  builder.GetInsertBlock()->empty() ? nullptr
900  : &builder.GetInsertBlock()->back();
901 
902  if (potentialTerminator && potentialTerminator->isTerminator())
903  potentialTerminator->removeFromParent();
904  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
905 
906  if (failed(moduleTranslation.convertBlock(
907  region.front(), /*ignoreArguments=*/true, builder)))
908  return failure();
909 
910  // The continuation arguments are simply the translated terminator operands.
911  if (continuationBlockArgs)
912  llvm::append_range(
913  *continuationBlockArgs,
914  moduleTranslation.lookupValues(region.front().back().getOperands()));
915 
916  // Drop the mapping that is no longer necessary so that the same region can
917  // be processed multiple times.
918  moduleTranslation.forgetMapping(region);
919 
920  if (potentialTerminator && potentialTerminator->isTerminator()) {
921  llvm::BasicBlock *block = builder.GetInsertBlock();
922  if (block->empty()) {
923  // this can happen for really simple reduction init regions e.g.
924  // %0 = llvm.mlir.constant(0 : i32) : i32
925  // omp.yield(%0 : i32)
926  // because the llvm.mlir.constant (MLIR op) isn't converted into any
927  // llvm op
928  potentialTerminator->insertInto(block, block->begin());
929  } else {
930  potentialTerminator->insertAfter(&block->back());
931  }
932  }
933 
934  return success();
935  }
936 
938  llvm::Expected<llvm::BasicBlock *> continuationBlock =
939  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
940 
941  if (failed(handleError(continuationBlock, *region.getParentOp())))
942  return failure();
943 
944  if (continuationBlockArgs)
945  llvm::append_range(*continuationBlockArgs, phis);
946  builder.SetInsertPoint(*continuationBlock,
947  (*continuationBlock)->getFirstInsertionPt());
948  return success();
949 }
950 
951 namespace {
952 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
953 /// store lambdas with capture.
954 using OwningReductionGen =
955  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
956  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
957  llvm::Value *&)>;
958 using OwningAtomicReductionGen =
959  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
960  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
961  llvm::Value *)>;
962 } // namespace
963 
964 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
965 /// reduction declaration. The generator uses `builder` but ignores its
966 /// insertion point.
967 static OwningReductionGen
968 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
969  LLVM::ModuleTranslation &moduleTranslation) {
970  // The lambda is mutable because we need access to non-const methods of decl
971  // (which aren't actually mutating it), and we must capture decl by-value to
972  // avoid the dangling reference after the parent function returns.
973  OwningReductionGen gen =
974  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
975  llvm::Value *lhs, llvm::Value *rhs,
976  llvm::Value *&result) mutable
977  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
978  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
979  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
980  builder.restoreIP(insertPoint);
982  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
983  "omp.reduction.nonatomic.body", builder,
984  moduleTranslation, &phis)))
985  return llvm::createStringError(
986  "failed to inline `combiner` region of `omp.declare_reduction`");
987  result = llvm::getSingleElement(phis);
988  return builder.saveIP();
989  };
990  return gen;
991 }
992 
993 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
994 /// given reduction declaration. The generator uses `builder` but ignores its
995 /// insertion point. Returns null if there is no atomic region available in the
996 /// reduction declaration.
997 static OwningAtomicReductionGen
998 makeAtomicReductionGen(omp::DeclareReductionOp decl,
999  llvm::IRBuilderBase &builder,
1000  LLVM::ModuleTranslation &moduleTranslation) {
1001  if (decl.getAtomicReductionRegion().empty())
1002  return OwningAtomicReductionGen();
1003 
1004  // The lambda is mutable because we need access to non-const methods of decl
1005  // (which aren't actually mutating it), and we must capture decl by-value to
1006  // avoid the dangling reference after the parent function returns.
1007  OwningAtomicReductionGen atomicGen =
1008  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
1009  llvm::Value *lhs, llvm::Value *rhs) mutable
1010  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1011  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
1012  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
1013  builder.restoreIP(insertPoint);
1015  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
1016  "omp.reduction.atomic.body", builder,
1017  moduleTranslation, &phis)))
1018  return llvm::createStringError(
1019  "failed to inline `atomic` region of `omp.declare_reduction`");
1020  assert(phis.empty());
1021  return builder.saveIP();
1022  };
1023  return atomicGen;
1024 }
1025 
1026 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
1027 static LogicalResult
1028 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
1029  LLVM::ModuleTranslation &moduleTranslation) {
1030  auto orderedOp = cast<omp::OrderedOp>(opInst);
1031 
1032  if (failed(checkImplementationStatus(opInst)))
1033  return failure();
1034 
1035  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
1036  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
1037  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
1038  SmallVector<llvm::Value *> vecValues =
1039  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
1040 
1041  size_t indexVecValues = 0;
1042  while (indexVecValues < vecValues.size()) {
1043  SmallVector<llvm::Value *> storeValues;
1044  storeValues.reserve(numLoops);
1045  for (unsigned i = 0; i < numLoops; i++) {
1046  storeValues.push_back(vecValues[indexVecValues]);
1047  indexVecValues++;
1048  }
1049  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1050  findAllocaInsertPoint(builder, moduleTranslation);
1051  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1052  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
1053  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
1054  }
1055  return success();
1056 }
1057 
1058 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
1059 /// OpenMPIRBuilder.
1060 static LogicalResult
1061 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
1062  LLVM::ModuleTranslation &moduleTranslation) {
1063  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1064  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
1065 
1066  if (failed(checkImplementationStatus(opInst)))
1067  return failure();
1068 
1069  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1070  // OrderedOp has only one region associated with it.
1071  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
1072  builder.restoreIP(codeGenIP);
1073  return convertOmpOpRegions(region, "omp.ordered.region", builder,
1074  moduleTranslation)
1075  .takeError();
1076  };
1077 
1078  // TODO: Perform finalization actions for variables. This has to be
1079  // called for variables which have destructors/finalizers.
1080  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1081 
1082  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1083  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1084  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
1085  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
1086 
1087  if (failed(handleError(afterIP, opInst)))
1088  return failure();
1089 
1090  builder.restoreIP(*afterIP);
1091  return success();
1092 }
1093 
1094 namespace {
1095 /// Contains the arguments for an LLVM store operation
1096 struct DeferredStore {
1097  DeferredStore(llvm::Value *value, llvm::Value *address)
1098  : value(value), address(address) {}
1099 
1100  llvm::Value *value;
1101  llvm::Value *address;
1102 };
1103 } // namespace
1104 
1105 /// Allocate space for privatized reduction variables.
1106 /// `deferredStores` contains information to create store operations which needs
1107 /// to be inserted after all allocas
1108 template <typename T>
1109 static LogicalResult
1111  llvm::IRBuilderBase &builder,
1112  LLVM::ModuleTranslation &moduleTranslation,
1113  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1115  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1116  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1117  SmallVectorImpl<DeferredStore> &deferredStores,
1118  llvm::ArrayRef<bool> isByRefs) {
1119  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1120  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1121 
1122  // delay creating stores until after all allocas
1123  deferredStores.reserve(loop.getNumReductionVars());
1124 
1125  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1126  Region &allocRegion = reductionDecls[i].getAllocRegion();
1127  if (isByRefs[i]) {
1128  if (allocRegion.empty())
1129  continue;
1130 
1132  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1133  builder, moduleTranslation, &phis)))
1134  return loop.emitError(
1135  "failed to inline `alloc` region of `omp.declare_reduction`");
1136 
1137  assert(phis.size() == 1 && "expected one allocation to be yielded");
1138  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1139 
1140  // Allocate reduction variable (which is a pointer to the real reduction
1141  // variable allocated in the inlined region)
1142  llvm::Value *var = builder.CreateAlloca(
1143  moduleTranslation.convertType(reductionDecls[i].getType()));
1144 
1145  llvm::Type *ptrTy = builder.getPtrTy();
1146  llvm::Value *castVar =
1147  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1148  llvm::Value *castPhi =
1149  builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
1150 
1151  deferredStores.emplace_back(castPhi, castVar);
1152 
1153  privateReductionVariables[i] = castVar;
1154  moduleTranslation.mapValue(reductionArgs[i], castPhi);
1155  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
1156  } else {
1157  assert(allocRegion.empty() &&
1158  "allocaction is implicit for by-val reduction");
1159  llvm::Value *var = builder.CreateAlloca(
1160  moduleTranslation.convertType(reductionDecls[i].getType()));
1161 
1162  llvm::Type *ptrTy = builder.getPtrTy();
1163  llvm::Value *castVar =
1164  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1165 
1166  moduleTranslation.mapValue(reductionArgs[i], castVar);
1167  privateReductionVariables[i] = castVar;
1168  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
1169  }
1170  }
1171 
1172  return success();
1173 }
1174 
1175 /// Map input arguments to reduction initialization region
1176 template <typename T>
1177 static void
1178 mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
1180  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1181  unsigned i) {
1182  // map input argument to the initialization region
1183  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1184  Region &initializerRegion = reduction.getInitializerRegion();
1185  Block &entry = initializerRegion.front();
1186 
1187  mlir::Value mlirSource = loop.getReductionVars()[i];
1188  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1189  assert(llvmSource && "lookup reduction var");
1190  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1191 
1192  if (entry.getNumArguments() > 1) {
1193  llvm::Value *allocation =
1194  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1195  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1196  }
1197 }
1198 
1199 static void
1200 setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1201  llvm::BasicBlock *block = nullptr) {
1202  if (block == nullptr)
1203  block = builder.GetInsertBlock();
1204 
1205  if (block->empty() || block->getTerminator() == nullptr)
1206  builder.SetInsertPoint(block);
1207  else
1208  builder.SetInsertPoint(block->getTerminator());
1209 }
1210 
1211 /// Inline reductions' `init` regions. This functions assumes that the
1212 /// `builder`'s insertion point is where the user wants the `init` regions to be
1213 /// inlined; i.e. it does not try to find a proper insertion location for the
1214 /// `init` regions. It also leaves the `builder's insertions point in a state
1215 /// where the user can continue the code-gen directly afterwards.
1216 template <typename OP>
1217 static LogicalResult
1219  llvm::IRBuilderBase &builder,
1220  LLVM::ModuleTranslation &moduleTranslation,
1221  llvm::BasicBlock *latestAllocaBlock,
1223  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1224  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1225  llvm::ArrayRef<bool> isByRef,
1226  SmallVectorImpl<DeferredStore> &deferredStores) {
1227  if (op.getNumReductionVars() == 0)
1228  return success();
1229 
1230  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1231  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1232  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1233  builder.restoreIP(allocaIP);
1234  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1235 
1236  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1237  if (isByRef[i]) {
1238  if (!reductionDecls[i].getAllocRegion().empty())
1239  continue;
1240 
1241  // TODO: remove after all users of by-ref are updated to use the alloc
1242  // region: Allocate reduction variable (which is a pointer to the real
1243  // reduciton variable allocated in the inlined region)
1244  byRefVars[i] = builder.CreateAlloca(
1245  moduleTranslation.convertType(reductionDecls[i].getType()));
1246  }
1247  }
1248 
1249  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1250 
1251  // store result of the alloc region to the allocated pointer to the real
1252  // reduction variable
1253  for (auto [data, addr] : deferredStores)
1254  builder.CreateStore(data, addr);
1255 
1256  // Before the loop, store the initial values of reductions into reduction
1257  // variables. Although this could be done after allocas, we don't want to mess
1258  // up with the alloca insertion point.
1259  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1261 
1262  // map block argument to initializer region
1263  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1264  reductionVariableMap, i);
1265 
1266  // TODO In some cases (specially on the GPU), the init regions may
1267  // contains stack alloctaions. If the region is inlined in a loop, this is
1268  // problematic. Instead of just inlining the region, handle allocations by
1269  // hoisting fixed length allocations to the function entry and using
1270  // stacksave and restore for variable length ones.
1271  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1272  "omp.reduction.neutral", builder,
1273  moduleTranslation, &phis)))
1274  return failure();
1275 
1276  assert(phis.size() == 1 && "expected one value to be yielded from the "
1277  "reduction neutral element declaration region");
1278 
1280 
1281  if (isByRef[i]) {
1282  if (!reductionDecls[i].getAllocRegion().empty())
1283  // done in allocReductionVars
1284  continue;
1285 
1286  // TODO: this path can be removed once all users of by-ref are updated to
1287  // use an alloc region
1288 
1289  // Store the result of the inlined region to the allocated reduction var
1290  // ptr
1291  builder.CreateStore(phis[0], byRefVars[i]);
1292 
1293  privateReductionVariables[i] = byRefVars[i];
1294  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1295  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1296  } else {
1297  // for by-ref case the store is inside of the reduction region
1298  builder.CreateStore(phis[0], privateReductionVariables[i]);
1299  // the rest was handled in allocByValReductionVars
1300  }
1301 
1302  // forget the mapping for the initializer region because we might need a
1303  // different mapping if this reduction declaration is re-used for a
1304  // different variable
1305  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1306  }
1307 
1308  return success();
1309 }
1310 
1311 /// Collect reduction info
1312 template <typename T>
1314  T loop, llvm::IRBuilderBase &builder,
1315  LLVM::ModuleTranslation &moduleTranslation,
1317  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1318  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1319  const ArrayRef<llvm::Value *> privateReductionVariables,
1321  unsigned numReductions = loop.getNumReductionVars();
1322 
1323  for (unsigned i = 0; i < numReductions; ++i) {
1324  owningReductionGens.push_back(
1325  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1326  owningAtomicReductionGens.push_back(
1327  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1328  }
1329 
1330  // Collect the reduction information.
1331  reductionInfos.reserve(numReductions);
1332  for (unsigned i = 0; i < numReductions; ++i) {
1333  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1334  if (owningAtomicReductionGens[i])
1335  atomicGen = owningAtomicReductionGens[i];
1336  llvm::Value *variable =
1337  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1338  reductionInfos.push_back(
1339  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1340  privateReductionVariables[i],
1341  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1342  owningReductionGens[i],
1343  /*ReductionGenClang=*/nullptr, atomicGen});
1344  }
1345 }
1346 
1347 /// handling of DeclareReductionOp's cleanup region
1348 static LogicalResult
1350  llvm::ArrayRef<llvm::Value *> privateVariables,
1351  LLVM::ModuleTranslation &moduleTranslation,
1352  llvm::IRBuilderBase &builder, StringRef regionName,
1353  bool shouldLoadCleanupRegionArg = true) {
1354  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1355  if (cleanupRegion->empty())
1356  continue;
1357 
1358  // map the argument to the cleanup region
1359  Block &entry = cleanupRegion->front();
1360 
1361  llvm::Instruction *potentialTerminator =
1362  builder.GetInsertBlock()->empty() ? nullptr
1363  : &builder.GetInsertBlock()->back();
1364  if (potentialTerminator && potentialTerminator->isTerminator())
1365  builder.SetInsertPoint(potentialTerminator);
1366  llvm::Value *privateVarValue =
1367  shouldLoadCleanupRegionArg
1368  ? builder.CreateLoad(
1369  moduleTranslation.convertType(entry.getArgument(0).getType()),
1370  privateVariables[i])
1371  : privateVariables[i];
1372 
1373  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1374 
1375  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1376  moduleTranslation)))
1377  return failure();
1378 
1379  // clear block argument mapping in case it needs to be re-created with a
1380  // different source for another use of the same reduction decl
1381  moduleTranslation.forgetMapping(*cleanupRegion);
1382  }
1383  return success();
1384 }
1385 
1386 // TODO: not used by ParallelOp
1387 template <class OP>
1388 static LogicalResult createReductionsAndCleanup(
1389  OP op, llvm::IRBuilderBase &builder,
1390  LLVM::ModuleTranslation &moduleTranslation,
1391  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1393  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
1394  bool isNowait = false, bool isTeamsReduction = false) {
1395  // Process the reductions if required.
1396  if (op.getNumReductionVars() == 0)
1397  return success();
1398 
1399  SmallVector<OwningReductionGen> owningReductionGens;
1400  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1402 
1403  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1404 
1405  // Create the reduction generators. We need to own them here because
1406  // ReductionInfo only accepts references to the generators.
1407  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1408  owningReductionGens, owningAtomicReductionGens,
1409  privateReductionVariables, reductionInfos);
1410 
1411  // The call to createReductions below expects the block to have a
1412  // terminator. Create an unreachable instruction to serve as terminator
1413  // and remove it later.
1414  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1415  builder.SetInsertPoint(tempTerminator);
1416  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1417  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1418  isByRef, isNowait, isTeamsReduction);
1419 
1420  if (failed(handleError(contInsertPoint, *op)))
1421  return failure();
1422 
1423  if (!contInsertPoint->getBlock())
1424  return op->emitOpError() << "failed to convert reductions";
1425 
1426  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1427  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1428 
1429  if (failed(handleError(afterIP, *op)))
1430  return failure();
1431 
1432  tempTerminator->eraseFromParent();
1433  builder.restoreIP(*afterIP);
1434 
1435  // after the construct, deallocate private reduction variables
1436  SmallVector<Region *> reductionRegions;
1437  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1438  [](omp::DeclareReductionOp reductionDecl) {
1439  return &reductionDecl.getCleanupRegion();
1440  });
1441  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1442  moduleTranslation, builder,
1443  "omp.reduction.cleanup");
1444  return success();
1445 }
1446 
1447 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1448  if (!attr)
1449  return {};
1450  return *attr;
1451 }
1452 
1453 // TODO: not used by omp.parallel
1454 template <typename OP>
1455 static LogicalResult allocAndInitializeReductionVars(
1456  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1457  LLVM::ModuleTranslation &moduleTranslation,
1458  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1460  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1461  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1462  llvm::ArrayRef<bool> isByRef) {
1463  if (op.getNumReductionVars() == 0)
1464  return success();
1465 
1466  SmallVector<DeferredStore> deferredStores;
1467 
1468  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1469  allocaIP, reductionDecls,
1470  privateReductionVariables, reductionVariableMap,
1471  deferredStores, isByRef)))
1472  return failure();
1473 
1474  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1475  allocaIP.getBlock(), reductionDecls,
1476  privateReductionVariables, reductionVariableMap,
1477  isByRef, deferredStores);
1478 }
1479 
1480 /// Return the llvm::Value * corresponding to the `privateVar` that
1481 /// is being privatized. It isn't always as simple as looking up
1482 /// moduleTranslation with privateVar. For instance, in case of
1483 /// an allocatable, the descriptor for the allocatable is privatized.
1484 /// This descriptor is mapped using an MapInfoOp. So, this function
1485 /// will return a pointer to the llvm::Value corresponding to the
1486 /// block argument for the mapped descriptor.
1487 static llvm::Value *
1488 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1489  LLVM::ModuleTranslation &moduleTranslation,
1490  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1491  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1492  return moduleTranslation.lookupValue(privateVar);
1493 
1494  Value blockArg = (*mappedPrivateVars)[privateVar];
1495  Type privVarType = privateVar.getType();
1496  Type blockArgType = blockArg.getType();
1497  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1498  "A block argument corresponding to a mapped var should have "
1499  "!llvm.ptr type");
1500 
1501  if (privVarType == blockArgType)
1502  return moduleTranslation.lookupValue(blockArg);
1503 
1504  // This typically happens when the privatized type is lowered from
1505  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1506  // struct/pair is passed by value. But, mapped values are passed only as
1507  // pointers, so before we privatize, we must load the pointer.
1508  if (!isa<LLVM::LLVMPointerType>(privVarType))
1509  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1510  moduleTranslation.lookupValue(blockArg));
1511 
1512  return moduleTranslation.lookupValue(privateVar);
1513 }
1514 
1515 /// Initialize a single (first)private variable. You probably want to use
1516 /// allocateAndInitPrivateVars instead of this.
1517 /// This returns the private variable which has been initialized. This
1518 /// variable should be mapped before constructing the body of the Op.
1520  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1521  omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1522  llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1523  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1524  Region &initRegion = privDecl.getInitRegion();
1525  if (initRegion.empty())
1526  return llvmPrivateVar;
1527 
1528  // map initialization region block arguments
1529  llvm::Value *nonPrivateVar = findAssociatedValue(
1530  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1531  assert(nonPrivateVar);
1532  moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1533  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1534 
1535  // in-place convert the private initialization region
1537  if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1538  moduleTranslation, &phis)))
1539  return llvm::createStringError(
1540  "failed to inline `init` region of `omp.private`");
1541 
1542  assert(phis.size() == 1 && "expected one allocation to be yielded");
1543 
1544  // clear init region block argument mapping in case it needs to be
1545  // re-created with a different source for another use of the same
1546  // reduction decl
1547  moduleTranslation.forgetMapping(initRegion);
1548 
1549  // Prefer the value yielded from the init region to the allocated private
1550  // variable in case the region is operating on arguments by-value (e.g.
1551  // Fortran character boxes).
1552  return phis[0];
1553 }
1554 
1555 static llvm::Error
1556 initPrivateVars(llvm::IRBuilderBase &builder,
1557  LLVM::ModuleTranslation &moduleTranslation,
1558  PrivateVarsInfo &privateVarsInfo,
1559  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1560  if (privateVarsInfo.blockArgs.empty())
1561  return llvm::Error::success();
1562 
1563  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1564  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1565 
1566  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1567  privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1568  privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1569  auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1571  builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1572  llvmPrivateVar, privInitBlock, mappedPrivateVars);
1573 
1574  if (!privVarOrErr)
1575  return privVarOrErr.takeError();
1576 
1577  llvmPrivateVar = privVarOrErr.get();
1578  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1579 
1581  }
1582 
1583  return llvm::Error::success();
1584 }
1585 
1586 /// Allocate and initialize delayed private variables. Returns the basic block
1587 /// which comes after all of these allocations. llvm::Value * for each of these
1588 /// private variables are populated in llvmPrivateVars.
1590 allocatePrivateVars(llvm::IRBuilderBase &builder,
1591  LLVM::ModuleTranslation &moduleTranslation,
1592  PrivateVarsInfo &privateVarsInfo,
1593  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1594  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1595  // Allocate private vars
1596  llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1597  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1598  allocaTerminator->getIterator()),
1599  true, allocaTerminator->getStableDebugLoc(),
1600  "omp.region.after_alloca");
1601 
1602  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1603  // Update the allocaTerminator since the alloca block was split above.
1604  allocaTerminator = allocaIP.getBlock()->getTerminator();
1605  builder.SetInsertPoint(allocaTerminator);
1606  // The new terminator is an uncondition branch created by the splitBB above.
1607  assert(allocaTerminator->getNumSuccessors() == 1 &&
1608  "This is an unconditional branch created by splitBB");
1609 
1610  llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
1611  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1612 
1613  unsigned int allocaAS =
1614  moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1615  unsigned int defaultAS = moduleTranslation.getLLVMModule()
1616  ->getDataLayout()
1617  .getProgramAddressSpace();
1618 
1619  for (auto [privDecl, mlirPrivVar, blockArg] :
1620  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1621  privateVarsInfo.blockArgs)) {
1622  llvm::Type *llvmAllocType =
1623  moduleTranslation.convertType(privDecl.getType());
1624  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1625  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1626  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1627  if (allocaAS != defaultAS)
1628  llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1629  builder.getPtrTy(defaultAS));
1630 
1631  privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1632  }
1633 
1634  return afterAllocas;
1635 }
1636 
1637 static LogicalResult copyFirstPrivateVars(
1638  mlir::Operation *op, llvm::IRBuilderBase &builder,
1639  LLVM::ModuleTranslation &moduleTranslation,
1640  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1641  ArrayRef<llvm::Value *> llvmPrivateVars,
1642  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls, bool insertBarrier,
1643  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1644  // Apply copy region for firstprivate.
1645  bool needsFirstprivate =
1646  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1647  return privOp.getDataSharingType() ==
1648  omp::DataSharingClauseType::FirstPrivate;
1649  });
1650 
1651  if (!needsFirstprivate)
1652  return success();
1653 
1654  llvm::BasicBlock *copyBlock =
1655  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1656  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1657 
1658  for (auto [decl, mlirVar, llvmVar] :
1659  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1660  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1661  continue;
1662 
1663  // copyRegion implements `lhs = rhs`
1664  Region &copyRegion = decl.getCopyRegion();
1665 
1666  // map copyRegion rhs arg
1667  llvm::Value *nonPrivateVar = findAssociatedValue(
1668  mlirVar, builder, moduleTranslation, mappedPrivateVars);
1669  assert(nonPrivateVar);
1670  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1671 
1672  // map copyRegion lhs arg
1673  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1674 
1675  // in-place convert copy region
1676  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1677  moduleTranslation)))
1678  return decl.emitError("failed to inline `copy` region of `omp.private`");
1679 
1681 
1682  // ignore unused value yielded from copy region
1683 
1684  // clear copy region block argument mapping in case it needs to be
1685  // re-created with different sources for reuse of the same reduction
1686  // decl
1687  moduleTranslation.forgetMapping(copyRegion);
1688  }
1689 
1690  if (insertBarrier) {
1691  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1692  llvm::OpenMPIRBuilder::InsertPointOrErrorTy res =
1693  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1694  if (failed(handleError(res, *op)))
1695  return failure();
1696  }
1697 
1698  return success();
1699 }
1700 
1701 static LogicalResult
1702 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1703  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1704  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1705  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1706  // private variable deallocation
1707  SmallVector<Region *> privateCleanupRegions;
1708  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1709  [](omp::PrivateClauseOp privatizer) {
1710  return &privatizer.getDeallocRegion();
1711  });
1712 
1714  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1715  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1716  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1717  "`omp.private` op in");
1718 
1719  return success();
1720 }
1721 
1722 /// Returns true if the construct contains omp.cancel or omp.cancellation_point
1724  // omp.cancel and omp.cancellation_point must be "closely nested" so they will
1725  // be visible and not inside of function calls. This is enforced by the
1726  // verifier.
1727  return op
1728  ->walk([](Operation *child) {
1729  if (mlir::isa<omp::CancelOp, omp::CancellationPointOp>(child))
1730  return WalkResult::interrupt();
1731  return WalkResult::advance();
1732  })
1733  .wasInterrupted();
1734 }
1735 
1736 static LogicalResult
1737 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1738  LLVM::ModuleTranslation &moduleTranslation) {
1739  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1740  using StorableBodyGenCallbackTy =
1741  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1742 
1743  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1744 
1745  if (failed(checkImplementationStatus(opInst)))
1746  return failure();
1747 
1748  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1749  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1750 
1751  SmallVector<omp::DeclareReductionOp> reductionDecls;
1752  collectReductionDecls(sectionsOp, reductionDecls);
1753  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1754  findAllocaInsertPoint(builder, moduleTranslation);
1755 
1756  SmallVector<llvm::Value *> privateReductionVariables(
1757  sectionsOp.getNumReductionVars());
1758  DenseMap<Value, llvm::Value *> reductionVariableMap;
1759 
1760  MutableArrayRef<BlockArgument> reductionArgs =
1761  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1762 
1764  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1765  reductionDecls, privateReductionVariables, reductionVariableMap,
1766  isByRef)))
1767  return failure();
1768 
1770 
1771  for (Operation &op : *sectionsOp.getRegion().begin()) {
1772  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1773  if (!sectionOp) // omp.terminator
1774  continue;
1775 
1776  Region &region = sectionOp.getRegion();
1777  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1778  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1779  builder.restoreIP(codeGenIP);
1780 
1781  // map the omp.section reduction block argument to the omp.sections block
1782  // arguments
1783  // TODO: this assumes that the only block arguments are reduction
1784  // variables
1785  assert(region.getNumArguments() ==
1786  sectionsOp.getRegion().getNumArguments());
1787  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1788  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1789  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1790  assert(llvmVal);
1791  moduleTranslation.mapValue(sectionArg, llvmVal);
1792  }
1793 
1794  return convertOmpOpRegions(region, "omp.section.region", builder,
1795  moduleTranslation)
1796  .takeError();
1797  };
1798  sectionCBs.push_back(sectionCB);
1799  }
1800 
1801  // No sections within omp.sections operation - skip generation. This situation
1802  // is only possible if there is only a terminator operation inside the
1803  // sections operation
1804  if (sectionCBs.empty())
1805  return success();
1806 
1807  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1808 
1809  // TODO: Perform appropriate actions according to the data-sharing
1810  // attribute (shared, private, firstprivate, ...) of variables.
1811  // Currently defaults to shared.
1812  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1813  llvm::Value &vPtr, llvm::Value *&replacementValue)
1814  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1815  replacementValue = &vPtr;
1816  return codeGenIP;
1817  };
1818 
1819  // TODO: Perform finalization actions for variables. This has to be
1820  // called for variables which have destructors/finalizers.
1821  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1822 
1823  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1824  bool isCancellable = constructIsCancellable(sectionsOp);
1825  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1826  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1827  moduleTranslation.getOpenMPBuilder()->createSections(
1828  ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable,
1829  sectionsOp.getNowait());
1830 
1831  if (failed(handleError(afterIP, opInst)))
1832  return failure();
1833 
1834  builder.restoreIP(*afterIP);
1835 
1836  // Process the reductions if required.
1838  sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
1839  privateReductionVariables, isByRef, sectionsOp.getNowait());
1840 }
1841 
1842 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1843 static LogicalResult
1844 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1845  LLVM::ModuleTranslation &moduleTranslation) {
1846  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1847  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1848 
1849  if (failed(checkImplementationStatus(*singleOp)))
1850  return failure();
1851 
1852  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1853  builder.restoreIP(codegenIP);
1854  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1855  builder, moduleTranslation)
1856  .takeError();
1857  };
1858  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1859 
1860  // Handle copyprivate
1861  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1862  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1865  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1866  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1867  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1868  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1869  llvmCPFuncs.push_back(
1870  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1871  }
1872 
1873  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1874  moduleTranslation.getOpenMPBuilder()->createSingle(
1875  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1876  llvmCPFuncs);
1877 
1878  if (failed(handleError(afterIP, *singleOp)))
1879  return failure();
1880 
1881  builder.restoreIP(*afterIP);
1882  return success();
1883 }
1884 
1885 static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
1886  auto iface =
1887  llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation());
1888  // Check that all uses of the reduction block arg has the same distribute op
1889  // parent.
1891  Operation *distOp = nullptr;
1892  for (auto ra : iface.getReductionBlockArgs())
1893  for (auto &use : ra.getUses()) {
1894  auto *useOp = use.getOwner();
1895  // Ignore debug uses.
1896  if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
1897  debugUses.push_back(useOp);
1898  continue;
1899  }
1900 
1901  auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>();
1902  // Use is not inside a distribute op - return false
1903  if (!currentDistOp)
1904  return false;
1905  // Multiple distribute operations - return false
1906  Operation *currentOp = currentDistOp.getOperation();
1907  if (distOp && (distOp != currentOp))
1908  return false;
1909 
1910  distOp = currentOp;
1911  }
1912 
1913  // If we are going to use distribute reduction then remove any debug uses of
1914  // the reduction parameters in teamsOp. Otherwise they will be left without
1915  // any mapped value in moduleTranslation and will eventually error out.
1916  for (auto use : debugUses)
1917  use->erase();
1918  return true;
1919 }
1920 
1921 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1922 static LogicalResult
1923 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1924  LLVM::ModuleTranslation &moduleTranslation) {
1925  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1927  return failure();
1928 
1929  DenseMap<Value, llvm::Value *> reductionVariableMap;
1930  unsigned numReductionVars = op.getNumReductionVars();
1931  SmallVector<omp::DeclareReductionOp> reductionDecls;
1932  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
1933  llvm::ArrayRef<bool> isByRef;
1934  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1935  findAllocaInsertPoint(builder, moduleTranslation);
1936 
1937  // Only do teams reduction if there is no distribute op that captures the
1938  // reduction instead.
1939  bool doTeamsReduction = !teamsReductionContainedInDistribute(op);
1940  if (doTeamsReduction) {
1941  isByRef = getIsByRef(op.getReductionByref());
1942 
1943  assert(isByRef.size() == op.getNumReductionVars());
1944 
1945  MutableArrayRef<BlockArgument> reductionArgs =
1946  llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
1947 
1948  collectReductionDecls(op, reductionDecls);
1949 
1951  op, reductionArgs, builder, moduleTranslation, allocaIP,
1952  reductionDecls, privateReductionVariables, reductionVariableMap,
1953  isByRef)))
1954  return failure();
1955  }
1956 
1957  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1959  moduleTranslation, allocaIP);
1960  builder.restoreIP(codegenIP);
1961  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1962  moduleTranslation)
1963  .takeError();
1964  };
1965 
1966  llvm::Value *numTeamsLower = nullptr;
1967  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1968  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1969 
1970  llvm::Value *numTeamsUpper = nullptr;
1971  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1972  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1973 
1974  llvm::Value *threadLimit = nullptr;
1975  if (Value threadLimitVar = op.getThreadLimit())
1976  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1977 
1978  llvm::Value *ifExpr = nullptr;
1979  if (Value ifVar = op.getIfExpr())
1980  ifExpr = moduleTranslation.lookupValue(ifVar);
1981 
1982  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1983  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1984  moduleTranslation.getOpenMPBuilder()->createTeams(
1985  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1986 
1987  if (failed(handleError(afterIP, *op)))
1988  return failure();
1989 
1990  builder.restoreIP(*afterIP);
1991  if (doTeamsReduction) {
1992  // Process the reductions if required.
1994  op, builder, moduleTranslation, allocaIP, reductionDecls,
1995  privateReductionVariables, isByRef,
1996  /*isNoWait*/ false, /*isTeamsReduction*/ true);
1997  }
1998  return success();
1999 }
2000 
2001 static void
2002 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
2003  LLVM::ModuleTranslation &moduleTranslation,
2005  if (dependVars.empty())
2006  return;
2007  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
2008  llvm::omp::RTLDependenceKindTy type;
2009  switch (
2010  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
2011  case mlir::omp::ClauseTaskDepend::taskdependin:
2012  type = llvm::omp::RTLDependenceKindTy::DepIn;
2013  break;
2014  // The OpenMP runtime requires that the codegen for 'depend' clause for
2015  // 'out' dependency kind must be the same as codegen for 'depend' clause
2016  // with 'inout' dependency.
2017  case mlir::omp::ClauseTaskDepend::taskdependout:
2018  case mlir::omp::ClauseTaskDepend::taskdependinout:
2019  type = llvm::omp::RTLDependenceKindTy::DepInOut;
2020  break;
2021  case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
2022  type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
2023  break;
2024  case mlir::omp::ClauseTaskDepend::taskdependinoutset:
2025  type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
2026  break;
2027  };
2028  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
2029  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
2030  dds.emplace_back(dd);
2031  }
2032 }
2033 
2034 /// Shared implementation of a callback which adds a termiator for the new block
2035 /// created for the branch taken when an openmp construct is cancelled. The
2036 /// terminator is saved in \p cancelTerminators. This callback is invoked only
2037 /// if there is cancellation inside of the taskgroup body.
2038 /// The terminator will need to be fixed to branch to the correct block to
2039 /// cleanup the construct.
2040 static void
2042  llvm::IRBuilderBase &llvmBuilder,
2043  llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op,
2044  llvm::omp::Directive cancelDirective) {
2045  auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
2046  llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
2047 
2048  // ip is currently in the block branched to if cancellation occured.
2049  // We need to create a branch to terminate that block.
2050  llvmBuilder.restoreIP(ip);
2051 
2052  // We must still clean up the construct after cancelling it, so we need to
2053  // branch to the block that finalizes the taskgroup.
2054  // That block has not been created yet so use this block as a dummy for now
2055  // and fix this after creating the operation.
2056  cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
2057  return llvm::Error::success();
2058  };
2059  // We have to add the cleanup to the OpenMPIRBuilder before the body gets
2060  // created in case the body contains omp.cancel (which will then expect to be
2061  // able to find this cleanup callback).
2062  ompBuilder.pushFinalizationCB(
2063  {finiCB, cancelDirective, constructIsCancellable(op)});
2064 }
2065 
2066 /// If we cancelled the construct, we should branch to the finalization block of
2067 /// that construct. OMPIRBuilder structures the CFG such that the cleanup block
2068 /// is immediately before the continuation block. Now this finalization has
2069 /// been created we can fix the branch.
2070 static void
2072  llvm::OpenMPIRBuilder &ompBuilder,
2073  const llvm::OpenMPIRBuilder::InsertPointTy &afterIP) {
2074  ompBuilder.popFinalizationCB();
2075  llvm::BasicBlock *constructFini = afterIP.getBlock()->getSinglePredecessor();
2076  for (llvm::BranchInst *cancelBranch : cancelTerminators) {
2077  assert(cancelBranch->getNumSuccessors() == 1 &&
2078  "cancel branch should have one target");
2079  cancelBranch->setSuccessor(0, constructFini);
2080  }
2081 }
2082 
2083 namespace {
2084 /// TaskContextStructManager takes care of creating and freeing a structure
2085 /// containing information needed by the task body to execute.
2086 class TaskContextStructManager {
2087 public:
2088  TaskContextStructManager(llvm::IRBuilderBase &builder,
2089  LLVM::ModuleTranslation &moduleTranslation,
2091  : builder{builder}, moduleTranslation{moduleTranslation},
2092  privateDecls{privateDecls} {}
2093 
2094  /// Creates a heap allocated struct containing space for each private
2095  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
2096  /// the structure should all have the same order (although privateDecls which
2097  /// do not read from the mold argument are skipped).
2098  void generateTaskContextStruct();
2099 
2100  /// Create GEPs to access each member of the structure representing a private
2101  /// variable, adding them to llvmPrivateVars. Null values are added where
2102  /// private decls were skipped so that the ordering continues to match the
2103  /// private decls.
2104  void createGEPsToPrivateVars();
2105 
2106  /// De-allocate the task context structure.
2107  void freeStructPtr();
2108 
2109  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
2110  return llvmPrivateVarGEPs;
2111  }
2112 
2113  llvm::Value *getStructPtr() { return structPtr; }
2114 
2115 private:
2116  llvm::IRBuilderBase &builder;
2117  LLVM::ModuleTranslation &moduleTranslation;
2119 
2120  /// The type of each member of the structure, in order.
2121  SmallVector<llvm::Type *> privateVarTypes;
2122 
2123  /// LLVM values for each private variable, or null if that private variable is
2124  /// not included in the task context structure
2125  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
2126 
2127  /// A pointer to the structure containing context for this task.
2128  llvm::Value *structPtr = nullptr;
2129  /// The type of the structure
2130  llvm::Type *structTy = nullptr;
2131 };
2132 } // namespace
2133 
2134 void TaskContextStructManager::generateTaskContextStruct() {
2135  if (privateDecls.empty())
2136  return;
2137  privateVarTypes.reserve(privateDecls.size());
2138 
2139  for (omp::PrivateClauseOp &privOp : privateDecls) {
2140  // Skip private variables which can safely be allocated and initialised
2141  // inside of the task
2142  if (!privOp.readsFromMold())
2143  continue;
2144  Type mlirType = privOp.getType();
2145  privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
2146  }
2147 
2148  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
2149  privateVarTypes);
2150 
2151  llvm::DataLayout dataLayout =
2152  builder.GetInsertBlock()->getModule()->getDataLayout();
2153  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
2154  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
2155 
2156  // Heap allocate the structure
2157  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
2158  /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
2159  "omp.task.context_ptr");
2160 }
2161 
2162 void TaskContextStructManager::createGEPsToPrivateVars() {
2163  if (!structPtr) {
2164  assert(privateVarTypes.empty());
2165  return;
2166  }
2167 
2168  // Create GEPs for each struct member
2169  llvmPrivateVarGEPs.clear();
2170  llvmPrivateVarGEPs.reserve(privateDecls.size());
2171  llvm::Value *zero = builder.getInt32(0);
2172  unsigned i = 0;
2173  for (auto privDecl : privateDecls) {
2174  if (!privDecl.readsFromMold()) {
2175  // Handle this inside of the task so we don't pass unnessecary vars in
2176  llvmPrivateVarGEPs.push_back(nullptr);
2177  continue;
2178  }
2179  llvm::Value *iVal = builder.getInt32(i);
2180  llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
2181  llvmPrivateVarGEPs.push_back(gep);
2182  i += 1;
2183  }
2184 }
2185 
2186 void TaskContextStructManager::freeStructPtr() {
2187  if (!structPtr)
2188  return;
2189 
2190  llvm::IRBuilderBase::InsertPointGuard guard{builder};
2191  // Ensure we don't put the call to free() after the terminator
2192  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2193  builder.CreateFree(structPtr);
2194 }
2195 
2196 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
2197 static LogicalResult
2198 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
2199  LLVM::ModuleTranslation &moduleTranslation) {
2200  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2201  if (failed(checkImplementationStatus(*taskOp)))
2202  return failure();
2203 
2204  PrivateVarsInfo privateVarsInfo(taskOp);
2205  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
2206  privateVarsInfo.privatizers};
2207 
2208  // Allocate and copy private variables before creating the task. This avoids
2209  // accessing invalid memory if (after this scope ends) the private variables
2210  // are initialized from host variables or if the variables are copied into
2211  // from host variables (firstprivate). The insertion point is just before
2212  // where the code for creating and scheduling the task will go. That puts this
2213  // code outside of the outlined task region, which is what we want because
2214  // this way the initialization and copy regions are executed immediately while
2215  // the host variable data are still live.
2216 
2217  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2218  findAllocaInsertPoint(builder, moduleTranslation);
2219 
2220  // Not using splitBB() because that requires the current block to have a
2221  // terminator.
2222  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
2223  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
2224  builder.getContext(), "omp.task.start",
2225  /*Parent=*/builder.GetInsertBlock()->getParent());
2226  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
2227  builder.SetInsertPoint(branchToTaskStartBlock);
2228 
2229  // Now do this again to make the initialization and copy blocks
2230  llvm::BasicBlock *copyBlock =
2231  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
2232  llvm::BasicBlock *initBlock =
2233  splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
2234 
2235  // Now the control flow graph should look like
2236  // starter_block:
2237  // <---- where we started when convertOmpTaskOp was called
2238  // br %omp.private.init
2239  // omp.private.init:
2240  // br %omp.private.copy
2241  // omp.private.copy:
2242  // br %omp.task.start
2243  // omp.task.start:
2244  // <---- where we want the insertion point to be when we call createTask()
2245 
2246  // Save the alloca insertion point on ModuleTranslation stack for use in
2247  // nested regions.
2249  moduleTranslation, allocaIP);
2250 
2251  // Allocate and initialize private variables
2252  builder.SetInsertPoint(initBlock->getTerminator());
2253 
2254  // Create task variable structure
2255  taskStructMgr.generateTaskContextStruct();
2256  // GEPs so that we can initialize the variables. Don't use these GEPs inside
2257  // of the body otherwise it will be the GEP not the struct which is fowarded
2258  // to the outlined function. GEPs forwarded in this way are passed in a
2259  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
2260  // which may not be executed until after the current stack frame goes out of
2261  // scope.
2262  taskStructMgr.createGEPsToPrivateVars();
2263 
2264  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
2265  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
2266  privateVarsInfo.blockArgs,
2267  taskStructMgr.getLLVMPrivateVarGEPs())) {
2268  // To be handled inside the task.
2269  if (!privDecl.readsFromMold())
2270  continue;
2271  assert(llvmPrivateVarAlloc &&
2272  "reads from mold so shouldn't have been skipped");
2273 
2274  llvm::Expected<llvm::Value *> privateVarOrErr =
2275  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2276  blockArg, llvmPrivateVarAlloc, initBlock);
2277  if (!privateVarOrErr)
2278  return handleError(privateVarOrErr, *taskOp.getOperation());
2279 
2281 
2282  // TODO: this is a bit of a hack for Fortran character boxes.
2283  // Character boxes are passed by value into the init region and then the
2284  // initialized character box is yielded by value. Here we need to store the
2285  // yielded value into the private allocation, and load the private
2286  // allocation to match the type expected by region block arguments.
2287  if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2288  !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2289  builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2290  // Load it so we have the value pointed to by the GEP
2291  llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2292  llvmPrivateVarAlloc);
2293  }
2294  assert(llvmPrivateVarAlloc->getType() ==
2295  moduleTranslation.convertType(blockArg.getType()));
2296 
2297  // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2298  // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2299  // stack allocated structure.
2300  }
2301 
2302  // firstprivate copy region
2303  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2305  taskOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2306  taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
2307  taskOp.getPrivateNeedsBarrier())))
2308  return llvm::failure();
2309 
2310  // Set up for call to createTask()
2311  builder.SetInsertPoint(taskStartBlock);
2312 
2313  auto bodyCB = [&](InsertPointTy allocaIP,
2314  InsertPointTy codegenIP) -> llvm::Error {
2315  // Save the alloca insertion point on ModuleTranslation stack for use in
2316  // nested regions.
2318  moduleTranslation, allocaIP);
2319 
2320  // translate the body of the task:
2321  builder.restoreIP(codegenIP);
2322 
2323  llvm::BasicBlock *privInitBlock = nullptr;
2324  privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2325  for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2326  privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2327  privateVarsInfo.mlirVars))) {
2328  auto [blockArg, privDecl, mlirPrivVar] = zip;
2329  // This is handled before the task executes
2330  if (privDecl.readsFromMold())
2331  continue;
2332 
2333  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2334  llvm::Type *llvmAllocType =
2335  moduleTranslation.convertType(privDecl.getType());
2336  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2337  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2338  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2339 
2340  llvm::Expected<llvm::Value *> privateVarOrError =
2341  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2342  blockArg, llvmPrivateVar, privInitBlock);
2343  if (!privateVarOrError)
2344  return privateVarOrError.takeError();
2345  moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2346  privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2347  }
2348 
2349  taskStructMgr.createGEPsToPrivateVars();
2350  for (auto [i, llvmPrivVar] :
2351  llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2352  if (!llvmPrivVar) {
2353  assert(privateVarsInfo.llvmVars[i] &&
2354  "This is added in the loop above");
2355  continue;
2356  }
2357  privateVarsInfo.llvmVars[i] = llvmPrivVar;
2358  }
2359 
2360  // Find and map the addresses of each variable within the task context
2361  // structure
2362  for (auto [blockArg, llvmPrivateVar, privateDecl] :
2363  llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2364  privateVarsInfo.privatizers)) {
2365  // This was handled above.
2366  if (!privateDecl.readsFromMold())
2367  continue;
2368  // Fix broken pass-by-value case for Fortran character boxes
2369  if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2370  llvmPrivateVar = builder.CreateLoad(
2371  moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2372  }
2373  assert(llvmPrivateVar->getType() ==
2374  moduleTranslation.convertType(blockArg.getType()));
2375  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2376  }
2377 
2378  auto continuationBlockOrError = convertOmpOpRegions(
2379  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2380  if (failed(handleError(continuationBlockOrError, *taskOp)))
2381  return llvm::make_error<PreviouslyReportedError>();
2382 
2383  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2384 
2385  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2386  privateVarsInfo.llvmVars,
2387  privateVarsInfo.privatizers)))
2388  return llvm::make_error<PreviouslyReportedError>();
2389 
2390  // Free heap allocated task context structure at the end of the task.
2391  taskStructMgr.freeStructPtr();
2392 
2393  return llvm::Error::success();
2394  };
2395 
2396  llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
2397  SmallVector<llvm::BranchInst *> cancelTerminators;
2398  // The directive to match here is OMPD_taskgroup because it is the taskgroup
2399  // which is canceled. This is handled here because it is the task's cleanup
2400  // block which should be branched to.
2401  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskOp,
2402  llvm::omp::Directive::OMPD_taskgroup);
2403 
2405  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2406  moduleTranslation, dds);
2407 
2408  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2409  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2410  moduleTranslation.getOpenMPBuilder()->createTask(
2411  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2412  moduleTranslation.lookupValue(taskOp.getFinal()),
2413  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2414  taskOp.getMergeable(),
2415  moduleTranslation.lookupValue(taskOp.getEventHandle()),
2416  moduleTranslation.lookupValue(taskOp.getPriority()));
2417 
2418  if (failed(handleError(afterIP, *taskOp)))
2419  return failure();
2420 
2421  // Set the correct branch target for task cancellation
2422  popCancelFinalizationCB(cancelTerminators, ompBuilder, afterIP.get());
2423 
2424  builder.restoreIP(*afterIP);
2425  return success();
2426 }
2427 
2428 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2429 static LogicalResult
2430 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2431  LLVM::ModuleTranslation &moduleTranslation) {
2432  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2433  if (failed(checkImplementationStatus(*tgOp)))
2434  return failure();
2435 
2436  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2437  builder.restoreIP(codegenIP);
2438  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2439  builder, moduleTranslation)
2440  .takeError();
2441  };
2442 
2443  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2444  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2445  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2446  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2447  bodyCB);
2448 
2449  if (failed(handleError(afterIP, *tgOp)))
2450  return failure();
2451 
2452  builder.restoreIP(*afterIP);
2453  return success();
2454 }
2455 
2456 static LogicalResult
2457 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2458  LLVM::ModuleTranslation &moduleTranslation) {
2459  if (failed(checkImplementationStatus(*twOp)))
2460  return failure();
2461 
2462  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2463  return success();
2464 }
2465 
2466 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2467 static LogicalResult
2468 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2469  LLVM::ModuleTranslation &moduleTranslation) {
2470  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2471  auto wsloopOp = cast<omp::WsloopOp>(opInst);
2472  if (failed(checkImplementationStatus(opInst)))
2473  return failure();
2474 
2475  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2476  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2477  assert(isByRef.size() == wsloopOp.getNumReductionVars());
2478 
2479  // Static is the default.
2480  auto schedule =
2481  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2482 
2483  // Find the loop configuration.
2484  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2485  llvm::Type *ivType = step->getType();
2486  llvm::Value *chunk = nullptr;
2487  if (wsloopOp.getScheduleChunk()) {
2488  llvm::Value *chunkVar =
2489  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2490  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2491  }
2492 
2493  PrivateVarsInfo privateVarsInfo(wsloopOp);
2494 
2495  SmallVector<omp::DeclareReductionOp> reductionDecls;
2496  collectReductionDecls(wsloopOp, reductionDecls);
2497  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2498  findAllocaInsertPoint(builder, moduleTranslation);
2499 
2500  SmallVector<llvm::Value *> privateReductionVariables(
2501  wsloopOp.getNumReductionVars());
2502 
2504  builder, moduleTranslation, privateVarsInfo, allocaIP);
2505  if (handleError(afterAllocas, opInst).failed())
2506  return failure();
2507 
2508  DenseMap<Value, llvm::Value *> reductionVariableMap;
2509 
2510  MutableArrayRef<BlockArgument> reductionArgs =
2511  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2512 
2513  SmallVector<DeferredStore> deferredStores;
2514 
2515  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2516  moduleTranslation, allocaIP, reductionDecls,
2517  privateReductionVariables, reductionVariableMap,
2518  deferredStores, isByRef)))
2519  return failure();
2520 
2521  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2522  opInst)
2523  .failed())
2524  return failure();
2525 
2527  wsloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2528  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2529  wsloopOp.getPrivateNeedsBarrier())))
2530  return failure();
2531 
2532  assert(afterAllocas.get()->getSinglePredecessor());
2533  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2534  moduleTranslation,
2535  afterAllocas.get()->getSinglePredecessor(),
2536  reductionDecls, privateReductionVariables,
2537  reductionVariableMap, isByRef, deferredStores)))
2538  return failure();
2539 
2540  // TODO: Handle doacross loops when the ordered clause has a parameter.
2541  bool isOrdered = wsloopOp.getOrdered().has_value();
2542  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2543  bool isSimd = wsloopOp.getScheduleSimd();
2544  bool loopNeedsBarrier = !wsloopOp.getNowait();
2545 
2546  // The only legal way for the direct parent to be omp.distribute is that this
2547  // represents 'distribute parallel do'. Otherwise, this is a regular
2548  // worksharing loop.
2549  llvm::omp::WorksharingLoopType workshareLoopType =
2550  llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2551  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2552  : llvm::omp::WorksharingLoopType::ForStaticLoop;
2553 
2554  SmallVector<llvm::BranchInst *> cancelTerminators;
2555  pushCancelFinalizationCB(cancelTerminators, builder, *ompBuilder, wsloopOp,
2556  llvm::omp::Directive::OMPD_for);
2557 
2558  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2559 
2560  // Initialize linear variables and linear step
2561  LinearClauseProcessor linearClauseProcessor;
2562  if (wsloopOp.getLinearVars().size()) {
2563  for (mlir::Value linearVar : wsloopOp.getLinearVars())
2564  linearClauseProcessor.createLinearVar(builder, moduleTranslation,
2565  linearVar);
2566  for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
2567  linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
2568  }
2569 
2571  wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2572 
2573  if (failed(handleError(regionBlock, opInst)))
2574  return failure();
2575 
2576  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2577 
2578  // Emit Initialization and Update IR for linear variables
2579  if (wsloopOp.getLinearVars().size()) {
2580  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2581  linearClauseProcessor.initLinearVar(builder, moduleTranslation,
2582  loopInfo->getPreheader());
2583  if (failed(handleError(afterBarrierIP, *loopOp)))
2584  return failure();
2585  builder.restoreIP(*afterBarrierIP);
2586  linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
2587  loopInfo->getIndVar());
2588  linearClauseProcessor.outlineLinearFinalizationBB(builder,
2589  loopInfo->getExit());
2590  }
2591 
2592  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2593  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2594  ompBuilder->applyWorkshareLoop(
2595  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2596  convertToScheduleKind(schedule), chunk, isSimd,
2597  scheduleMod == omp::ScheduleModifier::monotonic,
2598  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2599  workshareLoopType);
2600 
2601  if (failed(handleError(wsloopIP, opInst)))
2602  return failure();
2603 
2604  // Emit finalization and in-place rewrites for linear vars.
2605  if (wsloopOp.getLinearVars().size()) {
2606  llvm::OpenMPIRBuilder::InsertPointTy oldIP = builder.saveIP();
2607  assert(loopInfo->getLastIter() &&
2608  "`lastiter` in CanonicalLoopInfo is nullptr");
2609  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2610  linearClauseProcessor.finalizeLinearVar(builder, moduleTranslation,
2611  loopInfo->getLastIter());
2612  if (failed(handleError(afterBarrierIP, *loopOp)))
2613  return failure();
2614  for (size_t index = 0; index < wsloopOp.getLinearVars().size(); index++)
2615  linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
2616  index);
2617  builder.restoreIP(oldIP);
2618  }
2619 
2620  // Set the correct branch target for task cancellation
2621  popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
2622 
2623  // Process the reductions if required.
2625  wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
2626  privateReductionVariables, isByRef, wsloopOp.getNowait(),
2627  /*isTeamsReduction=*/false)))
2628  return failure();
2629 
2630  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2631  privateVarsInfo.llvmVars,
2632  privateVarsInfo.privatizers);
2633 }
2634 
2635 /// Converts the OpenMP parallel operation to LLVM IR.
2636 static LogicalResult
2637 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2638  LLVM::ModuleTranslation &moduleTranslation) {
2639  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2640  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2641  assert(isByRef.size() == opInst.getNumReductionVars());
2642  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2643 
2644  if (failed(checkImplementationStatus(*opInst)))
2645  return failure();
2646 
2647  PrivateVarsInfo privateVarsInfo(opInst);
2648 
2649  // Collect reduction declarations
2650  SmallVector<omp::DeclareReductionOp> reductionDecls;
2651  collectReductionDecls(opInst, reductionDecls);
2652  SmallVector<llvm::Value *> privateReductionVariables(
2653  opInst.getNumReductionVars());
2654  SmallVector<DeferredStore> deferredStores;
2655 
2656  auto bodyGenCB = [&](InsertPointTy allocaIP,
2657  InsertPointTy codeGenIP) -> llvm::Error {
2659  builder, moduleTranslation, privateVarsInfo, allocaIP);
2660  if (handleError(afterAllocas, *opInst).failed())
2661  return llvm::make_error<PreviouslyReportedError>();
2662 
2663  // Allocate reduction vars
2664  DenseMap<Value, llvm::Value *> reductionVariableMap;
2665 
2666  MutableArrayRef<BlockArgument> reductionArgs =
2667  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2668 
2669  allocaIP =
2670  InsertPointTy(allocaIP.getBlock(),
2671  allocaIP.getBlock()->getTerminator()->getIterator());
2672 
2674  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2675  reductionDecls, privateReductionVariables, reductionVariableMap,
2676  deferredStores, isByRef)))
2677  return llvm::make_error<PreviouslyReportedError>();
2678 
2679  assert(afterAllocas.get()->getSinglePredecessor());
2680  builder.restoreIP(codeGenIP);
2681 
2682  if (handleError(
2683  initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2684  *opInst)
2685  .failed())
2686  return llvm::make_error<PreviouslyReportedError>();
2687 
2689  opInst, builder, moduleTranslation, privateVarsInfo.mlirVars,
2690  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2691  opInst.getPrivateNeedsBarrier())))
2692  return llvm::make_error<PreviouslyReportedError>();
2693 
2694  if (failed(
2695  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2696  afterAllocas.get()->getSinglePredecessor(),
2697  reductionDecls, privateReductionVariables,
2698  reductionVariableMap, isByRef, deferredStores)))
2699  return llvm::make_error<PreviouslyReportedError>();
2700 
2701  // Save the alloca insertion point on ModuleTranslation stack for use in
2702  // nested regions.
2704  moduleTranslation, allocaIP);
2705 
2706  // ParallelOp has only one region associated with it.
2708  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2709  if (!regionBlock)
2710  return regionBlock.takeError();
2711 
2712  // Process the reductions if required.
2713  if (opInst.getNumReductionVars() > 0) {
2714  // Collect reduction info
2715  SmallVector<OwningReductionGen> owningReductionGens;
2716  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2718  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2719  owningReductionGens, owningAtomicReductionGens,
2720  privateReductionVariables, reductionInfos);
2721 
2722  // Move to region cont block
2723  builder.SetInsertPoint((*regionBlock)->getTerminator());
2724 
2725  // Generate reductions from info
2726  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2727  builder.SetInsertPoint(tempTerminator);
2728 
2729  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2730  ompBuilder->createReductions(
2731  builder.saveIP(), allocaIP, reductionInfos, isByRef,
2732  /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
2733  if (!contInsertPoint)
2734  return contInsertPoint.takeError();
2735 
2736  if (!contInsertPoint->getBlock())
2737  return llvm::make_error<PreviouslyReportedError>();
2738 
2739  tempTerminator->eraseFromParent();
2740  builder.restoreIP(*contInsertPoint);
2741  }
2742 
2743  return llvm::Error::success();
2744  };
2745 
2746  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2747  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2748  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2749  // bodyGenCB.
2750  replVal = &val;
2751  return codeGenIP;
2752  };
2753 
2754  // TODO: Perform finalization actions for variables. This has to be
2755  // called for variables which have destructors/finalizers.
2756  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2757  InsertPointTy oldIP = builder.saveIP();
2758  builder.restoreIP(codeGenIP);
2759 
2760  // if the reduction has a cleanup region, inline it here to finalize the
2761  // reduction variables
2762  SmallVector<Region *> reductionCleanupRegions;
2763  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2764  [](omp::DeclareReductionOp reductionDecl) {
2765  return &reductionDecl.getCleanupRegion();
2766  });
2768  reductionCleanupRegions, privateReductionVariables,
2769  moduleTranslation, builder, "omp.reduction.cleanup")))
2770  return llvm::createStringError(
2771  "failed to inline `cleanup` region of `omp.declare_reduction`");
2772 
2773  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2774  privateVarsInfo.llvmVars,
2775  privateVarsInfo.privatizers)))
2776  return llvm::make_error<PreviouslyReportedError>();
2777 
2778  builder.restoreIP(oldIP);
2779  return llvm::Error::success();
2780  };
2781 
2782  llvm::Value *ifCond = nullptr;
2783  if (auto ifVar = opInst.getIfExpr())
2784  ifCond = moduleTranslation.lookupValue(ifVar);
2785  llvm::Value *numThreads = nullptr;
2786  if (auto numThreadsVar = opInst.getNumThreads())
2787  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2788  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2789  if (auto bind = opInst.getProcBindKind())
2790  pbKind = getProcBindKind(*bind);
2791  bool isCancellable = constructIsCancellable(opInst);
2792 
2793  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2794  findAllocaInsertPoint(builder, moduleTranslation);
2795  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2796 
2797  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2798  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2799  ifCond, numThreads, pbKind, isCancellable);
2800 
2801  if (failed(handleError(afterIP, *opInst)))
2802  return failure();
2803 
2804  builder.restoreIP(*afterIP);
2805  return success();
2806 }
2807 
2808 /// Convert Order attribute to llvm::omp::OrderKind.
2809 static llvm::omp::OrderKind
2810 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2811  if (!o)
2812  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2813  switch (*o) {
2814  case omp::ClauseOrderKind::Concurrent:
2815  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2816  }
2817  llvm_unreachable("Unknown ClauseOrderKind kind");
2818 }
2819 
2820 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2821 static LogicalResult
2822 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2823  LLVM::ModuleTranslation &moduleTranslation) {
2824  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2825  auto simdOp = cast<omp::SimdOp>(opInst);
2826 
2827  if (failed(checkImplementationStatus(opInst)))
2828  return failure();
2829 
2830  PrivateVarsInfo privateVarsInfo(simdOp);
2831 
2832  MutableArrayRef<BlockArgument> reductionArgs =
2833  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2834  DenseMap<Value, llvm::Value *> reductionVariableMap;
2835  SmallVector<llvm::Value *> privateReductionVariables(
2836  simdOp.getNumReductionVars());
2837  SmallVector<DeferredStore> deferredStores;
2838  SmallVector<omp::DeclareReductionOp> reductionDecls;
2839  collectReductionDecls(simdOp, reductionDecls);
2840  llvm::ArrayRef<bool> isByRef = getIsByRef(simdOp.getReductionByref());
2841  assert(isByRef.size() == simdOp.getNumReductionVars());
2842 
2843  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2844  findAllocaInsertPoint(builder, moduleTranslation);
2845 
2847  builder, moduleTranslation, privateVarsInfo, allocaIP);
2848  if (handleError(afterAllocas, opInst).failed())
2849  return failure();
2850 
2851  if (failed(allocReductionVars(simdOp, reductionArgs, builder,
2852  moduleTranslation, allocaIP, reductionDecls,
2853  privateReductionVariables, reductionVariableMap,
2854  deferredStores, isByRef)))
2855  return failure();
2856 
2857  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2858  opInst)
2859  .failed())
2860  return failure();
2861 
2862  // No call to copyFirstPrivateVars because FIRSTPRIVATE is not allowed for
2863  // SIMD.
2864 
2865  assert(afterAllocas.get()->getSinglePredecessor());
2866  if (failed(initReductionVars(simdOp, reductionArgs, builder,
2867  moduleTranslation,
2868  afterAllocas.get()->getSinglePredecessor(),
2869  reductionDecls, privateReductionVariables,
2870  reductionVariableMap, isByRef, deferredStores)))
2871  return failure();
2872 
2873  llvm::ConstantInt *simdlen = nullptr;
2874  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2875  simdlen = builder.getInt64(simdlenVar.value());
2876 
2877  llvm::ConstantInt *safelen = nullptr;
2878  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2879  safelen = builder.getInt64(safelenVar.value());
2880 
2881  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2882  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2883 
2884  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2885  std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2886  mlir::OperandRange operands = simdOp.getAlignedVars();
2887  for (size_t i = 0; i < operands.size(); ++i) {
2888  llvm::Value *alignment = nullptr;
2889  llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2890  llvm::Type *ty = llvmVal->getType();
2891 
2892  auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2893  alignment = builder.getInt64(intAttr.getInt());
2894  assert(ty->isPointerTy() && "Invalid type for aligned variable");
2895  assert(alignment && "Invalid alignment value");
2896 
2897  // Check if the alignment value is not a power of 2. If so, skip emitting
2898  // alignment.
2899  if (!intAttr.getValue().isPowerOf2())
2900  continue;
2901 
2902  auto curInsert = builder.saveIP();
2903  builder.SetInsertPoint(sourceBlock);
2904  llvmVal = builder.CreateLoad(ty, llvmVal);
2905  builder.restoreIP(curInsert);
2906  alignedVars[llvmVal] = alignment;
2907  }
2908 
2910  simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2911 
2912  if (failed(handleError(regionBlock, opInst)))
2913  return failure();
2914 
2915  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2916  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2917  ompBuilder->applySimd(loopInfo, alignedVars,
2918  simdOp.getIfExpr()
2919  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2920  : nullptr,
2921  order, simdlen, safelen);
2922 
2923  // We now need to reduce the per-simd-lane reduction variable into the
2924  // original variable. This works a bit differently to other reductions (e.g.
2925  // wsloop) because we don't need to call into the OpenMP runtime to handle
2926  // threads: everything happened in this one thread.
2927  for (auto [i, tuple] : llvm::enumerate(
2928  llvm::zip(reductionDecls, isByRef, simdOp.getReductionVars(),
2929  privateReductionVariables))) {
2930  auto [decl, byRef, reductionVar, privateReductionVar] = tuple;
2931 
2932  OwningReductionGen gen = makeReductionGen(decl, builder, moduleTranslation);
2933  llvm::Value *originalVariable = moduleTranslation.lookupValue(reductionVar);
2934  llvm::Type *reductionType = moduleTranslation.convertType(decl.getType());
2935 
2936  // We have one less load for by-ref case because that load is now inside of
2937  // the reduction region.
2938  llvm::Value *redValue = originalVariable;
2939  if (!byRef)
2940  redValue =
2941  builder.CreateLoad(reductionType, redValue, "red.value." + Twine(i));
2942  llvm::Value *privateRedValue = builder.CreateLoad(
2943  reductionType, privateReductionVar, "red.private.value." + Twine(i));
2944  llvm::Value *reduced;
2945 
2946  auto res = gen(builder.saveIP(), redValue, privateRedValue, reduced);
2947  if (failed(handleError(res, opInst)))
2948  return failure();
2949  builder.restoreIP(res.get());
2950 
2951  // For by-ref case, the store is inside of the reduction region.
2952  if (!byRef)
2953  builder.CreateStore(reduced, originalVariable);
2954  }
2955 
2956  // After the construct, deallocate private reduction variables.
2957  SmallVector<Region *> reductionRegions;
2958  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
2959  [](omp::DeclareReductionOp reductionDecl) {
2960  return &reductionDecl.getCleanupRegion();
2961  });
2962  if (failed(inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
2963  moduleTranslation, builder,
2964  "omp.reduction.cleanup")))
2965  return failure();
2966 
2967  return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2968  privateVarsInfo.llvmVars,
2969  privateVarsInfo.privatizers);
2970 }
2971 
2972 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2973 static LogicalResult
2974 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2975  LLVM::ModuleTranslation &moduleTranslation) {
2976  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2977  auto loopOp = cast<omp::LoopNestOp>(opInst);
2978 
2979  // Set up the source location value for OpenMP runtime.
2980  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2981 
2982  // Generator of the canonical loop body.
2985  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2986  llvm::Value *iv) -> llvm::Error {
2987  // Make sure further conversions know about the induction variable.
2988  moduleTranslation.mapValue(
2989  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2990 
2991  // Capture the body insertion point for use in nested loops. BodyIP of the
2992  // CanonicalLoopInfo always points to the beginning of the entry block of
2993  // the body.
2994  bodyInsertPoints.push_back(ip);
2995 
2996  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2997  return llvm::Error::success();
2998 
2999  // Convert the body of the loop.
3000  builder.restoreIP(ip);
3002  loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
3003  if (!regionBlock)
3004  return regionBlock.takeError();
3005 
3006  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
3007  return llvm::Error::success();
3008  };
3009 
3010  // Delegate actual loop construction to the OpenMP IRBuilder.
3011  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
3012  // loop, i.e. it has a positive step, uses signed integer semantics.
3013  // Reconsider this code when the nested loop operation clearly supports more
3014  // cases.
3015  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
3016  llvm::Value *lowerBound =
3017  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
3018  llvm::Value *upperBound =
3019  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
3020  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
3021 
3022  // Make sure loop trip count are emitted in the preheader of the outermost
3023  // loop at the latest so that they are all available for the new collapsed
3024  // loop will be created below.
3025  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
3026  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
3027  if (i != 0) {
3028  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
3029  ompLoc.DL);
3030  computeIP = loopInfos.front()->getPreheaderIP();
3031  }
3032 
3034  ompBuilder->createCanonicalLoop(
3035  loc, bodyGen, lowerBound, upperBound, step,
3036  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
3037 
3038  if (failed(handleError(loopResult, *loopOp)))
3039  return failure();
3040 
3041  loopInfos.push_back(*loopResult);
3042  }
3043 
3044  // Collapse loops. Store the insertion point because LoopInfos may get
3045  // invalidated.
3046  llvm::OpenMPIRBuilder::InsertPointTy afterIP =
3047  loopInfos.front()->getAfterIP();
3048 
3049  // Update the stack frame created for this loop to point to the resulting loop
3050  // after applying transformations.
3051  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
3052  [&](OpenMPLoopInfoStackFrame &frame) {
3053  frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
3054  return WalkResult::interrupt();
3055  });
3056 
3057  // Continue building IR after the loop. Note that the LoopInfo returned by
3058  // `collapseLoops` points inside the outermost loop and is intended for
3059  // potential further loop transformations. Use the insertion point stored
3060  // before collapsing loops instead.
3061  builder.restoreIP(afterIP);
3062  return success();
3063 }
3064 
3065 /// Convert an omp.canonical_loop to LLVM-IR
3066 static LogicalResult
3067 convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder,
3068  LLVM::ModuleTranslation &moduleTranslation) {
3069  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3070 
3071  llvm::OpenMPIRBuilder::LocationDescription loopLoc(builder);
3072  Value loopIV = op.getInductionVar();
3073  Value loopTC = op.getTripCount();
3074 
3075  llvm::Value *llvmTC = moduleTranslation.lookupValue(loopTC);
3076 
3078  ompBuilder->createCanonicalLoop(
3079  loopLoc,
3080  [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *llvmIV) {
3081  // Register the mapping of MLIR induction variable to LLVM-IR
3082  // induction variable
3083  moduleTranslation.mapValue(loopIV, llvmIV);
3084 
3085  builder.restoreIP(ip);
3086  llvm::Expected<llvm::BasicBlock *> bodyGenStatus =
3087  convertOmpOpRegions(op.getRegion(), "omp.loop.region", builder,
3088  moduleTranslation);
3089 
3090  return bodyGenStatus.takeError();
3091  },
3092  llvmTC, "omp.loop");
3093  if (!llvmOrError)
3094  return op.emitError(llvm::toString(llvmOrError.takeError()));
3095 
3096  llvm::CanonicalLoopInfo *llvmCLI = *llvmOrError;
3097  llvm::IRBuilderBase::InsertPoint afterIP = llvmCLI->getAfterIP();
3098  builder.restoreIP(afterIP);
3099 
3100  // Register the mapping of MLIR loop to LLVM-IR OpenMPIRBuilder loop
3101  if (Value cli = op.getCli())
3102  moduleTranslation.mapOmpLoop(cli, llvmCLI);
3103 
3104  return success();
3105 }
3106 
3107 /// Apply a `#pragma omp unroll` / "!$omp unroll" transformation using the
3108 /// OpenMPIRBuilder.
3109 static LogicalResult
3110 applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
3111  LLVM::ModuleTranslation &moduleTranslation) {
3112  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3113 
3114  Value applyee = op.getApplyee();
3115  assert(applyee && "Loop to apply unrolling on required");
3116 
3117  llvm::CanonicalLoopInfo *consBuilderCLI =
3118  moduleTranslation.lookupOMPLoop(applyee);
3119  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
3120  ompBuilder->unrollLoopHeuristic(loc.DL, consBuilderCLI);
3121 
3122  moduleTranslation.invalidateOmpLoop(applyee);
3123  return success();
3124 }
3125 
3126 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
3127 static llvm::AtomicOrdering
3128 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
3129  if (!ao)
3130  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
3131 
3132  switch (*ao) {
3133  case omp::ClauseMemoryOrderKind::Seq_cst:
3134  return llvm::AtomicOrdering::SequentiallyConsistent;
3135  case omp::ClauseMemoryOrderKind::Acq_rel:
3136  return llvm::AtomicOrdering::AcquireRelease;
3137  case omp::ClauseMemoryOrderKind::Acquire:
3138  return llvm::AtomicOrdering::Acquire;
3139  case omp::ClauseMemoryOrderKind::Release:
3140  return llvm::AtomicOrdering::Release;
3141  case omp::ClauseMemoryOrderKind::Relaxed:
3142  return llvm::AtomicOrdering::Monotonic;
3143  }
3144  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
3145 }
3146 
3147 /// Convert omp.atomic.read operation to LLVM IR.
3148 static LogicalResult
3149 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
3150  LLVM::ModuleTranslation &moduleTranslation) {
3151  auto readOp = cast<omp::AtomicReadOp>(opInst);
3152  if (failed(checkImplementationStatus(opInst)))
3153  return failure();
3154 
3155  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3156  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3157  findAllocaInsertPoint(builder, moduleTranslation);
3158 
3159  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3160 
3161  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
3162  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
3163  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
3164 
3165  llvm::Type *elementType =
3166  moduleTranslation.convertType(readOp.getElementType());
3167 
3168  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
3169  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
3170  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP));
3171  return success();
3172 }
3173 
3174 /// Converts an omp.atomic.write operation to LLVM IR.
3175 static LogicalResult
3176 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
3177  LLVM::ModuleTranslation &moduleTranslation) {
3178  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
3179  if (failed(checkImplementationStatus(opInst)))
3180  return failure();
3181 
3182  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3183  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3184  findAllocaInsertPoint(builder, moduleTranslation);
3185 
3186  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3187  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
3188  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
3189  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
3190  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
3191  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
3192  /*isVolatile=*/false};
3193  builder.restoreIP(
3194  ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP));
3195  return success();
3196 }
3197 
3198 /// Converts an LLVM dialect binary operation to the corresponding enum value
3199 /// for `atomicrmw` supported binary operation.
3200 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
3202  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
3203  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
3204  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
3205  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
3206  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
3207  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
3208  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
3209  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
3210  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
3211  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
3212 }
3213 
3214 void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp,
3215  bool &isIgnoreDenormalMode,
3216  bool &isFineGrainedMemory,
3217  bool &isRemoteMemory) {
3218  isIgnoreDenormalMode = false;
3219  isFineGrainedMemory = false;
3220  isRemoteMemory = false;
3221  if (atomicUpdateOp &&
3222  atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
3223  mlir::omp::AtomicControlAttr atomicControlAttr =
3224  atomicUpdateOp.getAtomicControlAttr();
3225  isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3226  isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3227  isRemoteMemory = atomicControlAttr.getRemoteMemory();
3228  }
3229 }
3230 
3231 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
3232 static LogicalResult
3233 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
3234  llvm::IRBuilderBase &builder,
3235  LLVM::ModuleTranslation &moduleTranslation) {
3236  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3237  if (failed(checkImplementationStatus(*opInst)))
3238  return failure();
3239 
3240  // Convert values and types.
3241  auto &innerOpList = opInst.getRegion().front().getOperations();
3242  bool isXBinopExpr{false};
3243  llvm::AtomicRMWInst::BinOp binop;
3244  mlir::Value mlirExpr;
3245  llvm::Value *llvmExpr = nullptr;
3246  llvm::Value *llvmX = nullptr;
3247  llvm::Type *llvmXElementType = nullptr;
3248  if (innerOpList.size() == 2) {
3249  // The two operations here are the update and the terminator.
3250  // Since we can identify the update operation, there is a possibility
3251  // that we can generate the atomicrmw instruction.
3252  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
3253  if (!llvm::is_contained(innerOp.getOperands(),
3254  opInst.getRegion().getArgument(0))) {
3255  return opInst.emitError("no atomic update operation with region argument"
3256  " as operand found inside atomic.update region");
3257  }
3258  binop = convertBinOpToAtomic(innerOp);
3259  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
3260  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3261  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3262  } else {
3263  // Since the update region includes more than one operation
3264  // we will resort to generating a cmpxchg loop.
3265  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3266  }
3267  llvmX = moduleTranslation.lookupValue(opInst.getX());
3268  llvmXElementType = moduleTranslation.convertType(
3269  opInst.getRegion().getArgument(0).getType());
3270  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3271  /*isSigned=*/false,
3272  /*isVolatile=*/false};
3273 
3274  llvm::AtomicOrdering atomicOrdering =
3275  convertAtomicOrdering(opInst.getMemoryOrder());
3276 
3277  // Generate update code.
3278  auto updateFn =
3279  [&opInst, &moduleTranslation](
3280  llvm::Value *atomicx,
3281  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3282  Block &bb = *opInst.getRegion().begin();
3283  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
3284  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3285  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3286  return llvm::make_error<PreviouslyReportedError>();
3287 
3288  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3289  assert(yieldop && yieldop.getResults().size() == 1 &&
3290  "terminator must be omp.yield op and it must have exactly one "
3291  "argument");
3292  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3293  };
3294 
3295  bool isIgnoreDenormalMode;
3296  bool isFineGrainedMemory;
3297  bool isRemoteMemory;
3298  extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory,
3299  isRemoteMemory);
3300  // Handle ambiguous alloca, if any.
3301  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3302  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3303  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3304  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
3305  atomicOrdering, binop, updateFn,
3306  isXBinopExpr, isIgnoreDenormalMode,
3307  isFineGrainedMemory, isRemoteMemory);
3308 
3309  if (failed(handleError(afterIP, *opInst)))
3310  return failure();
3311 
3312  builder.restoreIP(*afterIP);
3313  return success();
3314 }
3315 
3316 static LogicalResult
3317 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
3318  llvm::IRBuilderBase &builder,
3319  LLVM::ModuleTranslation &moduleTranslation) {
3320  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3321  if (failed(checkImplementationStatus(*atomicCaptureOp)))
3322  return failure();
3323 
3324  mlir::Value mlirExpr;
3325  bool isXBinopExpr = false, isPostfixUpdate = false;
3326  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3327 
3328  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3329  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
3330 
3331  assert((atomicUpdateOp || atomicWriteOp) &&
3332  "internal op must be an atomic.update or atomic.write op");
3333 
3334  if (atomicWriteOp) {
3335  isPostfixUpdate = true;
3336  mlirExpr = atomicWriteOp.getExpr();
3337  } else {
3338  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
3339  atomicCaptureOp.getAtomicUpdateOp().getOperation();
3340  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
3341  // Find the binary update operation that uses the region argument
3342  // and get the expression to update
3343  if (innerOpList.size() == 2) {
3344  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
3345  if (!llvm::is_contained(innerOp.getOperands(),
3346  atomicUpdateOp.getRegion().getArgument(0))) {
3347  return atomicUpdateOp.emitError(
3348  "no atomic update operation with region argument"
3349  " as operand found inside atomic.update region");
3350  }
3351  binop = convertBinOpToAtomic(innerOp);
3352  isXBinopExpr =
3353  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
3354  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3355  } else {
3356  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3357  }
3358  }
3359 
3360  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3361  llvm::Value *llvmX =
3362  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
3363  llvm::Value *llvmV =
3364  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
3365  llvm::Type *llvmXElementType = moduleTranslation.convertType(
3366  atomicCaptureOp.getAtomicReadOp().getElementType());
3367  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3368  /*isSigned=*/false,
3369  /*isVolatile=*/false};
3370  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
3371  /*isSigned=*/false,
3372  /*isVolatile=*/false};
3373 
3374  llvm::AtomicOrdering atomicOrdering =
3375  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
3376 
3377  auto updateFn =
3378  [&](llvm::Value *atomicx,
3379  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3380  if (atomicWriteOp)
3381  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
3382  Block &bb = *atomicUpdateOp.getRegion().begin();
3383  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
3384  atomicx);
3385  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3386  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3387  return llvm::make_error<PreviouslyReportedError>();
3388 
3389  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3390  assert(yieldop && yieldop.getResults().size() == 1 &&
3391  "terminator must be omp.yield op and it must have exactly one "
3392  "argument");
3393  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3394  };
3395 
3396  bool isIgnoreDenormalMode;
3397  bool isFineGrainedMemory;
3398  bool isRemoteMemory;
3399  extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode,
3400  isFineGrainedMemory, isRemoteMemory);
3401  // Handle ambiguous alloca, if any.
3402  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3403  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3404  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3405  ompBuilder->createAtomicCapture(
3406  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3407  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr,
3408  isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory);
3409 
3410  if (failed(handleError(afterIP, *atomicCaptureOp)))
3411  return failure();
3412 
3413  builder.restoreIP(*afterIP);
3414  return success();
3415 }
3416 
3417 static llvm::omp::Directive convertCancellationConstructType(
3418  omp::ClauseCancellationConstructType directive) {
3419  switch (directive) {
3420  case omp::ClauseCancellationConstructType::Loop:
3421  return llvm::omp::Directive::OMPD_for;
3422  case omp::ClauseCancellationConstructType::Parallel:
3423  return llvm::omp::Directive::OMPD_parallel;
3424  case omp::ClauseCancellationConstructType::Sections:
3425  return llvm::omp::Directive::OMPD_sections;
3426  case omp::ClauseCancellationConstructType::Taskgroup:
3427  return llvm::omp::Directive::OMPD_taskgroup;
3428  }
3429  llvm_unreachable("Unhandled cancellation construct type");
3430 }
3431 
3432 static LogicalResult
3433 convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder,
3434  LLVM::ModuleTranslation &moduleTranslation) {
3435  if (failed(checkImplementationStatus(*op.getOperation())))
3436  return failure();
3437 
3438  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3439  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3440 
3441  llvm::Value *ifCond = nullptr;
3442  if (Value ifVar = op.getIfExpr())
3443  ifCond = moduleTranslation.lookupValue(ifVar);
3444 
3445  llvm::omp::Directive cancelledDirective =
3446  convertCancellationConstructType(op.getCancelDirective());
3447 
3448  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3449  ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective);
3450 
3451  if (failed(handleError(afterIP, *op.getOperation())))
3452  return failure();
3453 
3454  builder.restoreIP(afterIP.get());
3455 
3456  return success();
3457 }
3458 
3459 static LogicalResult
3460 convertOmpCancellationPoint(omp::CancellationPointOp op,
3461  llvm::IRBuilderBase &builder,
3462  LLVM::ModuleTranslation &moduleTranslation) {
3463  if (failed(checkImplementationStatus(*op.getOperation())))
3464  return failure();
3465 
3466  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3467  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3468 
3469  llvm::omp::Directive cancelledDirective =
3470  convertCancellationConstructType(op.getCancelDirective());
3471 
3472  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3473  ompBuilder->createCancellationPoint(ompLoc, cancelledDirective);
3474 
3475  if (failed(handleError(afterIP, *op.getOperation())))
3476  return failure();
3477 
3478  builder.restoreIP(afterIP.get());
3479 
3480  return success();
3481 }
3482 
3483 /// Converts an OpenMP Threadprivate operation into LLVM IR using
3484 /// OpenMPIRBuilder.
3485 static LogicalResult
3486 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
3487  LLVM::ModuleTranslation &moduleTranslation) {
3488  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3489  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3490  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
3491 
3492  if (failed(checkImplementationStatus(opInst)))
3493  return failure();
3494 
3495  Value symAddr = threadprivateOp.getSymAddr();
3496  auto *symOp = symAddr.getDefiningOp();
3497 
3498  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
3499  symOp = asCast.getOperand().getDefiningOp();
3500 
3501  if (!isa<LLVM::AddressOfOp>(symOp))
3502  return opInst.emitError("Addressing symbol not found");
3503  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
3504 
3505  LLVM::GlobalOp global =
3506  addressOfOp.getGlobal(moduleTranslation.symbolTable());
3507  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
3508 
3509  if (!ompBuilder->Config.isTargetDevice()) {
3510  llvm::Type *type = globalValue->getValueType();
3511  llvm::TypeSize typeSize =
3512  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
3513  type);
3514  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
3515  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
3516  ompLoc, globalValue, size, global.getSymName() + ".cache");
3517  moduleTranslation.mapValue(opInst.getResult(0), callInst);
3518  } else {
3519  moduleTranslation.mapValue(opInst.getResult(0), globalValue);
3520  }
3521 
3522  return success();
3523 }
3524 
3525 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
3526 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
3527  switch (deviceClause) {
3528  case mlir::omp::DeclareTargetDeviceType::host:
3529  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
3530  break;
3531  case mlir::omp::DeclareTargetDeviceType::nohost:
3532  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
3533  break;
3534  case mlir::omp::DeclareTargetDeviceType::any:
3535  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
3536  break;
3537  }
3538  llvm_unreachable("unhandled device clause");
3539 }
3540 
3541 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
3543  mlir::omp::DeclareTargetCaptureClause captureClause) {
3544  switch (captureClause) {
3545  case mlir::omp::DeclareTargetCaptureClause::to:
3546  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
3547  case mlir::omp::DeclareTargetCaptureClause::link:
3548  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
3549  case mlir::omp::DeclareTargetCaptureClause::enter:
3550  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
3551  }
3552  llvm_unreachable("unhandled capture clause");
3553 }
3554 
3555 static llvm::SmallString<64>
3556 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
3557  llvm::OpenMPIRBuilder &ompBuilder) {
3558  llvm::SmallString<64> suffix;
3559  llvm::raw_svector_ostream os(suffix);
3560  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
3561  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
3562  auto fileInfoCallBack = [&loc]() {
3563  return std::pair<std::string, uint64_t>(
3564  llvm::StringRef(loc.getFilename()), loc.getLine());
3565  };
3566 
3567  os << llvm::format(
3568  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
3569  }
3570  os << "_decl_tgt_ref_ptr";
3571 
3572  return suffix;
3573 }
3574 
3575 static bool isDeclareTargetLink(mlir::Value value) {
3576  if (auto addressOfOp = value.getDefiningOp<LLVM::AddressOfOp>()) {
3577  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3578  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3579  if (auto declareTargetGlobal =
3580  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3581  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3582  mlir::omp::DeclareTargetCaptureClause::link)
3583  return true;
3584  }
3585  return false;
3586 }
3587 
3588 // Returns the reference pointer generated by the lowering of the declare target
3589 // operation in cases where the link clause is used or the to clause is used in
3590 // USM mode.
3591 static llvm::Value *
3593  LLVM::ModuleTranslation &moduleTranslation) {
3594  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3595  Operation *op = value.getDefiningOp();
3596  if (auto addrCast = llvm::dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
3597  op = addrCast->getOperand(0).getDefiningOp();
3598 
3599  // An easier way to do this may just be to keep track of any pointer
3600  // references and their mapping to their respective operation
3601  if (auto addressOfOp = llvm::dyn_cast_if_present<LLVM::AddressOfOp>(op)) {
3602  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3603  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3604  addressOfOp.getGlobalName()))) {
3605 
3606  if (auto declareTargetGlobal =
3607  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3608  gOp.getOperation())) {
3609 
3610  // In this case, we must utilise the reference pointer generated by the
3611  // declare target operation, similar to Clang
3612  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3613  mlir::omp::DeclareTargetCaptureClause::link) ||
3614  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3615  mlir::omp::DeclareTargetCaptureClause::to &&
3616  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3617  llvm::SmallString<64> suffix =
3618  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3619 
3620  if (gOp.getSymName().contains(suffix))
3621  return moduleTranslation.getLLVMModule()->getNamedValue(
3622  gOp.getSymName());
3623 
3624  return moduleTranslation.getLLVMModule()->getNamedValue(
3625  (gOp.getSymName().str() + suffix.str()).str());
3626  }
3627  }
3628  }
3629  }
3630 
3631  return nullptr;
3632 }
3633 
3634 namespace {
3635 // Append customMappers information to existing MapInfosTy
3636 struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3638 
3639  /// Append arrays in \a CurInfo.
3640  void append(MapInfosTy &curInfo) {
3641  Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3642  llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3643  }
3644 };
3645 // A small helper structure to contain data gathered
3646 // for map lowering and coalese it into one area and
3647 // avoiding extra computations such as searches in the
3648 // llvm module for lowered mapped variables or checking
3649 // if something is declare target (and retrieving the
3650 // value) more than neccessary.
3651 struct MapInfoData : MapInfosTy {
3652  llvm::SmallVector<bool, 4> IsDeclareTarget;
3653  llvm::SmallVector<bool, 4> IsAMember;
3654  // Identify if mapping was added by mapClause or use_device clauses.
3655  llvm::SmallVector<bool, 4> IsAMapping;
3658  // Stripped off array/pointer to get the underlying
3659  // element type
3661 
3662  /// Append arrays in \a CurInfo.
3663  void append(MapInfoData &CurInfo) {
3664  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3665  CurInfo.IsDeclareTarget.end());
3666  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3667  OriginalValue.append(CurInfo.OriginalValue.begin(),
3668  CurInfo.OriginalValue.end());
3669  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3670  MapInfosTy::append(CurInfo);
3671  }
3672 };
3673 } // namespace
3674 
3675 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
3676  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3677  arrTy.getElementType()))
3678  return getArrayElementSizeInBits(nestedArrTy, dl);
3679  return dl.getTypeSizeInBits(arrTy.getElementType());
3680 }
3681 
3682 // This function calculates the size to be offloaded for a specified type, given
3683 // its associated map clause (which can contain bounds information which affects
3684 // the total size), this size is calculated based on the underlying element type
3685 // e.g. given a 1-D array of ints, we will calculate the size from the integer
3686 // type * number of elements in the array. This size can be used in other
3687 // calculations but is ultimately used as an argument to the OpenMP runtimes
3688 // kernel argument structure which is generated through the combinedInfo data
3689 // structures.
3690 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
3691 // CGOpenMPRuntime.cpp.
3692 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3693  Operation *clauseOp, llvm::Value *basePointer,
3694  llvm::Type *baseType, llvm::IRBuilderBase &builder,
3695  LLVM::ModuleTranslation &moduleTranslation) {
3696  if (auto memberClause =
3697  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3698  // This calculates the size to transfer based on bounds and the underlying
3699  // element type, provided bounds have been specified (Fortran
3700  // pointers/allocatables/target and arrays that have sections specified fall
3701  // into this as well).
3702  if (!memberClause.getBounds().empty()) {
3703  llvm::Value *elementCount = builder.getInt64(1);
3704  for (auto bounds : memberClause.getBounds()) {
3705  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3706  bounds.getDefiningOp())) {
3707  // The below calculation for the size to be mapped calculated from the
3708  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3709  // multiply by the underlying element types byte size to get the full
3710  // size to be offloaded based on the bounds
3711  elementCount = builder.CreateMul(
3712  elementCount,
3713  builder.CreateAdd(
3714  builder.CreateSub(
3715  moduleTranslation.lookupValue(boundOp.getUpperBound()),
3716  moduleTranslation.lookupValue(boundOp.getLowerBound())),
3717  builder.getInt64(1)));
3718  }
3719  }
3720 
3721  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3722  // the size in inconsistent byte or bit format.
3723  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3724  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3725  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3726 
3727  // The size in bytes x number of elements, the sizeInBytes stored is
3728  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3729  // size, so we do some on the fly runtime math to get the size in
3730  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3731  // some adjustment for members with more complex types.
3732  return builder.CreateMul(elementCount,
3733  builder.getInt64(underlyingTypeSzInBits / 8));
3734  }
3735  }
3736 
3737  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3738 }
3739 
3741  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3742  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3743  llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3744  ArrayRef<Value> useDevAddrOperands = {},
3745  ArrayRef<Value> hasDevAddrOperands = {}) {
3746  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3747  // Check if this is a member mapping and correctly assign that it is, if
3748  // it is a member of a larger object.
3749  // TODO: Need better handling of members, and distinguishing of members
3750  // that are implicitly allocated on device vs explicitly passed in as
3751  // arguments.
3752  // TODO: May require some further additions to support nested record
3753  // types, i.e. member maps that can have member maps.
3754  for (Value mapValue : mapVars) {
3755  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3756  for (auto member : map.getMembers())
3757  if (member == mapOp)
3758  return true;
3759  }
3760  return false;
3761  };
3762 
3763  // Process MapOperands
3764  for (Value mapValue : mapVars) {
3765  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3766  Value offloadPtr =
3767  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3768  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3769  mapData.Pointers.push_back(mapData.OriginalValue.back());
3770 
3771  if (llvm::Value *refPtr =
3772  getRefPtrIfDeclareTarget(offloadPtr,
3773  moduleTranslation)) { // declare target
3774  mapData.IsDeclareTarget.push_back(true);
3775  mapData.BasePointers.push_back(refPtr);
3776  } else { // regular mapped variable
3777  mapData.IsDeclareTarget.push_back(false);
3778  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3779  }
3780 
3781  mapData.BaseType.push_back(
3782  moduleTranslation.convertType(mapOp.getVarType()));
3783  mapData.Sizes.push_back(
3784  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3785  mapData.BaseType.back(), builder, moduleTranslation));
3786  mapData.MapClause.push_back(mapOp.getOperation());
3787  mapData.Types.push_back(
3788  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
3789  mapData.Names.push_back(LLVM::createMappingInformation(
3790  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3791  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3792  if (mapOp.getMapperId())
3793  mapData.Mappers.push_back(
3794  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3795  mapOp, mapOp.getMapperIdAttr()));
3796  else
3797  mapData.Mappers.push_back(nullptr);
3798  mapData.IsAMapping.push_back(true);
3799  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3800  }
3801 
3802  auto findMapInfo = [&mapData](llvm::Value *val,
3803  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3804  unsigned index = 0;
3805  bool found = false;
3806  for (llvm::Value *basePtr : mapData.OriginalValue) {
3807  if (basePtr == val && mapData.IsAMapping[index]) {
3808  found = true;
3809  mapData.Types[index] |=
3810  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3811  mapData.DevicePointers[index] = devInfoTy;
3812  }
3813  index++;
3814  }
3815  return found;
3816  };
3817 
3818  // Process useDevPtr(Addr)Operands
3819  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3820  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3821  for (Value mapValue : useDevOperands) {
3822  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3823  Value offloadPtr =
3824  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3825  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3826 
3827  // Check if map info is already present for this entry.
3828  if (!findMapInfo(origValue, devInfoTy)) {
3829  mapData.OriginalValue.push_back(origValue);
3830  mapData.Pointers.push_back(mapData.OriginalValue.back());
3831  mapData.IsDeclareTarget.push_back(false);
3832  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3833  mapData.BaseType.push_back(
3834  moduleTranslation.convertType(mapOp.getVarType()));
3835  mapData.Sizes.push_back(builder.getInt64(0));
3836  mapData.MapClause.push_back(mapOp.getOperation());
3837  mapData.Types.push_back(
3838  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3839  mapData.Names.push_back(LLVM::createMappingInformation(
3840  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3841  mapData.DevicePointers.push_back(devInfoTy);
3842  mapData.Mappers.push_back(nullptr);
3843  mapData.IsAMapping.push_back(false);
3844  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3845  }
3846  }
3847  };
3848 
3849  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3850  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3851 
3852  for (Value mapValue : hasDevAddrOperands) {
3853  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3854  Value offloadPtr =
3855  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3856  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3857  auto mapType =
3858  static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
3859  auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3860 
3861  mapData.OriginalValue.push_back(origValue);
3862  mapData.BasePointers.push_back(origValue);
3863  mapData.Pointers.push_back(origValue);
3864  mapData.IsDeclareTarget.push_back(false);
3865  mapData.BaseType.push_back(
3866  moduleTranslation.convertType(mapOp.getVarType()));
3867  mapData.Sizes.push_back(
3868  builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
3869  mapData.MapClause.push_back(mapOp.getOperation());
3870  if (llvm::to_underlying(mapType & mapTypeAlways)) {
3871  // Descriptors are mapped with the ALWAYS flag, since they can get
3872  // rematerialized, so the address of the decriptor for a given object
3873  // may change from one place to another.
3874  mapData.Types.push_back(mapType);
3875  // Technically it's possible for a non-descriptor mapping to have
3876  // both has-device-addr and ALWAYS, so lookup the mapper in case it
3877  // exists.
3878  if (mapOp.getMapperId()) {
3879  mapData.Mappers.push_back(
3880  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3881  mapOp, mapOp.getMapperIdAttr()));
3882  } else {
3883  mapData.Mappers.push_back(nullptr);
3884  }
3885  } else {
3886  mapData.Types.push_back(
3887  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
3888  mapData.Mappers.push_back(nullptr);
3889  }
3890  mapData.Names.push_back(LLVM::createMappingInformation(
3891  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3892  mapData.DevicePointers.push_back(
3893  llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3894  mapData.IsAMapping.push_back(false);
3895  mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
3896  }
3897 }
3898 
3899 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
3900  auto *res = llvm::find(mapData.MapClause, memberOp);
3901  assert(res != mapData.MapClause.end() &&
3902  "MapInfoOp for member not found in MapData, cannot return index");
3903  return std::distance(mapData.MapClause.begin(), res);
3904 }
3905 
3906 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
3907  bool first) {
3908  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
3909  // Only 1 member has been mapped, we can return it.
3910  if (indexAttr.size() == 1)
3911  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
3912 
3913  llvm::SmallVector<size_t> indices(indexAttr.size());
3914  std::iota(indices.begin(), indices.end(), 0);
3915 
3916  llvm::sort(indices, [&](const size_t a, const size_t b) {
3917  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
3918  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
3919  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
3920  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
3921  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
3922 
3923  if (aIndex == bIndex)
3924  continue;
3925 
3926  if (aIndex < bIndex)
3927  return first;
3928 
3929  if (aIndex > bIndex)
3930  return !first;
3931  }
3932 
3933  // Iterated the up until the end of the smallest member and
3934  // they were found to be equal up to that point, so select
3935  // the member with the lowest index count, so the "parent"
3936  return memberIndicesA.size() < memberIndicesB.size();
3937  });
3938 
3939  return llvm::cast<omp::MapInfoOp>(
3940  mapInfo.getMembers()[indices.front()].getDefiningOp());
3941 }
3942 
3943 /// This function calculates the array/pointer offset for map data provided
3944 /// with bounds operations, e.g. when provided something like the following:
3945 ///
3946 /// Fortran
3947 /// map(tofrom: array(2:5, 3:2))
3948 /// or
3949 /// C++
3950 /// map(tofrom: array[1:4][2:3])
3951 /// We must calculate the initial pointer offset to pass across, this function
3952 /// performs this using bounds.
3953 ///
3954 /// NOTE: which while specified in row-major order it currently needs to be
3955 /// flipped for Fortran's column order array allocation and access (as
3956 /// opposed to C++'s row-major, hence the backwards processing where order is
3957 /// important). This is likely important to keep in mind for the future when
3958 /// we incorporate a C++ frontend, both frontends will need to agree on the
3959 /// ordering of generated bounds operations (one may have to flip them) to
3960 /// make the below lowering frontend agnostic. The offload size
3961 /// calcualtion may also have to be adjusted for C++.
3962 std::vector<llvm::Value *>
3963 calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
3964  llvm::IRBuilderBase &builder, bool isArrayTy,
3965  OperandRange bounds) {
3966  std::vector<llvm::Value *> idx;
3967  // There's no bounds to calculate an offset from, we can safely
3968  // ignore and return no indices.
3969  if (bounds.empty())
3970  return idx;
3971 
3972  // If we have an array type, then we have its type so can treat it as a
3973  // normal GEP instruction where the bounds operations are simply indexes
3974  // into the array. We currently do reverse order of the bounds, which
3975  // I believe leans more towards Fortran's column-major in memory.
3976  if (isArrayTy) {
3977  idx.push_back(builder.getInt64(0));
3978  for (int i = bounds.size() - 1; i >= 0; --i) {
3979  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3980  bounds[i].getDefiningOp())) {
3981  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
3982  }
3983  }
3984  } else {
3985  // If we do not have an array type, but we have bounds, then we're dealing
3986  // with a pointer that's being treated like an array and we have the
3987  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
3988  // address (pointer pointing to the actual data) so we must caclulate the
3989  // offset using a single index which the following two loops attempts to
3990  // compute.
3991 
3992  // Calculates the size offset we need to make per row e.g. first row or
3993  // column only needs to be offset by one, but the next would have to be
3994  // the previous row/column offset multiplied by the extent of current row.
3995  //
3996  // For example ([1][10][100]):
3997  //
3998  // - First row/column we move by 1 for each index increment
3999  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
4000  // current) for 10 for each index increment
4001  // - Third row/column we would move by 10 (second row/column) *
4002  // (extent/size of current) 100 for 1000 for each index increment
4003  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
4004  for (size_t i = 1; i < bounds.size(); ++i) {
4005  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4006  bounds[i].getDefiningOp())) {
4007  dimensionIndexSizeOffset.push_back(builder.CreateMul(
4008  moduleTranslation.lookupValue(boundOp.getExtent()),
4009  dimensionIndexSizeOffset[i - 1]));
4010  }
4011  }
4012 
4013  // Now that we have calculated how much we move by per index, we must
4014  // multiply each lower bound offset in indexes by the size offset we
4015  // have calculated in the previous and accumulate the results to get
4016  // our final resulting offset.
4017  for (int i = bounds.size() - 1; i >= 0; --i) {
4018  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4019  bounds[i].getDefiningOp())) {
4020  if (idx.empty())
4021  idx.emplace_back(builder.CreateMul(
4022  moduleTranslation.lookupValue(boundOp.getLowerBound()),
4023  dimensionIndexSizeOffset[i]));
4024  else
4025  idx.back() = builder.CreateAdd(
4026  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
4027  boundOp.getLowerBound()),
4028  dimensionIndexSizeOffset[i]));
4029  }
4030  }
4031  }
4032 
4033  return idx;
4034 }
4035 
4036 // This creates two insertions into the MapInfosTy data structure for the
4037 // "parent" of a set of members, (usually a container e.g.
4038 // class/structure/derived type) when subsequent members have also been
4039 // explicitly mapped on the same map clause. Certain types, such as Fortran
4040 // descriptors are mapped like this as well, however, the members are
4041 // implicit as far as a user is concerned, but we must explicitly map them
4042 // internally.
4043 //
4044 // This function also returns the memberOfFlag for this particular parent,
4045 // which is utilised in subsequent member mappings (by modifying there map type
4046 // with it) to indicate that a member is part of this parent and should be
4047 // treated by the runtime as such. Important to achieve the correct mapping.
4048 //
4049 // This function borrows a lot from Clang's emitCombinedEntry function
4050 // inside of CGOpenMPRuntime.cpp
4051 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
4052  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4053  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4054  MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
4055  assert(!ompBuilder.Config.isTargetDevice() &&
4056  "function only supported for host device codegen");
4057 
4058  // Map the first segment of our structure
4059  combinedInfo.Types.emplace_back(
4060  isTargetParams
4061  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
4062  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
4063  combinedInfo.DevicePointers.emplace_back(
4064  mapData.DevicePointers[mapDataIndex]);
4065  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
4066  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
4067  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
4068  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
4069 
4070  // Calculate size of the parent object being mapped based on the
4071  // addresses at runtime, highAddr - lowAddr = size. This of course
4072  // doesn't factor in allocated data like pointers, hence the further
4073  // processing of members specified by users, or in the case of
4074  // Fortran pointers and allocatables, the mapping of the pointed to
4075  // data by the descriptor (which itself, is a structure containing
4076  // runtime information on the dynamically allocated data).
4077  auto parentClause =
4078  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4079 
4080  llvm::Value *lowAddr, *highAddr;
4081  if (!parentClause.getPartialMap()) {
4082  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
4083  builder.getPtrTy());
4084  highAddr = builder.CreatePointerCast(
4085  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
4086  mapData.Pointers[mapDataIndex], 1),
4087  builder.getPtrTy());
4088  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
4089  } else {
4090  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4091  int firstMemberIdx = getMapDataMemberIdx(
4092  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
4093  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
4094  builder.getPtrTy());
4095  int lastMemberIdx = getMapDataMemberIdx(
4096  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
4097  highAddr = builder.CreatePointerCast(
4098  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
4099  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
4100  builder.getPtrTy());
4101  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
4102  }
4103 
4104  llvm::Value *size = builder.CreateIntCast(
4105  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
4106  builder.getInt64Ty(),
4107  /*isSigned=*/false);
4108  combinedInfo.Sizes.push_back(size);
4109 
4110  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
4111  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
4112 
4113  // This creates the initial MEMBER_OF mapping that consists of
4114  // the parent/top level container (same as above effectively, except
4115  // with a fixed initial compile time size and separate maptype which
4116  // indicates the true mape type (tofrom etc.). This parent mapping is
4117  // only relevant if the structure in its totality is being mapped,
4118  // otherwise the above suffices.
4119  if (!parentClause.getPartialMap()) {
4120  // TODO: This will need to be expanded to include the whole host of logic
4121  // for the map flags that Clang currently supports (e.g. it should do some
4122  // further case specific flag modifications). For the moment, it handles
4123  // what we support as expected.
4124  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
4125  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4126  combinedInfo.Types.emplace_back(mapFlag);
4127  combinedInfo.DevicePointers.emplace_back(
4129  combinedInfo.Mappers.emplace_back(nullptr);
4130  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
4131  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
4132  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
4133  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
4134  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
4135  }
4136  return memberOfFlag;
4137 }
4138 
4139 // The intent is to verify if the mapped data being passed is a
4140 // pointer -> pointee that requires special handling in certain cases,
4141 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
4142 //
4143 // There may be a better way to verify this, but unfortunately with
4144 // opaque pointers we lose the ability to easily check if something is
4145 // a pointer whilst maintaining access to the underlying type.
4146 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
4147  // If we have a varPtrPtr field assigned then the underlying type is a pointer
4148  if (mapOp.getVarPtrPtr())
4149  return true;
4150 
4151  // If the map data is declare target with a link clause, then it's represented
4152  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
4153  // no relation to pointers.
4154  if (isDeclareTargetLink(mapOp.getVarPtr()))
4155  return true;
4156 
4157  return false;
4158 }
4159 
4160 // This function is intended to add explicit mappings of members
4162  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4163  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4164  MapInfoData &mapData, uint64_t mapDataIndex,
4165  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
4166  assert(!ompBuilder.Config.isTargetDevice() &&
4167  "function only supported for host device codegen");
4168 
4169  auto parentClause =
4170  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4171 
4172  for (auto mappedMembers : parentClause.getMembers()) {
4173  auto memberClause =
4174  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
4175  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4176 
4177  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
4178 
4179  // If we're currently mapping a pointer to a block of data, we must
4180  // initially map the pointer, and then attatch/bind the data with a
4181  // subsequent map to the pointer. This segment of code generates the
4182  // pointer mapping, which can in certain cases be optimised out as Clang
4183  // currently does in its lowering. However, for the moment we do not do so,
4184  // in part as we currently have substantially less information on the data
4185  // being mapped at this stage.
4186  if (checkIfPointerMap(memberClause)) {
4187  auto mapFlag =
4188  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4189  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4190  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4191  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4192  combinedInfo.Types.emplace_back(mapFlag);
4193  combinedInfo.DevicePointers.emplace_back(
4195  combinedInfo.Mappers.emplace_back(nullptr);
4196  combinedInfo.Names.emplace_back(
4197  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4198  combinedInfo.BasePointers.emplace_back(
4199  mapData.BasePointers[mapDataIndex]);
4200  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
4201  combinedInfo.Sizes.emplace_back(builder.getInt64(
4202  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
4203  }
4204 
4205  // Same MemberOfFlag to indicate its link with parent and other members
4206  // of.
4207  auto mapFlag =
4208  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4209  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4210  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4211  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4212  if (checkIfPointerMap(memberClause))
4213  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4214 
4215  combinedInfo.Types.emplace_back(mapFlag);
4216  combinedInfo.DevicePointers.emplace_back(
4217  mapData.DevicePointers[memberDataIdx]);
4218  combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
4219  combinedInfo.Names.emplace_back(
4220  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4221  uint64_t basePointerIndex =
4222  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
4223  combinedInfo.BasePointers.emplace_back(
4224  mapData.BasePointers[basePointerIndex]);
4225  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
4226 
4227  llvm::Value *size = mapData.Sizes[memberDataIdx];
4228  if (checkIfPointerMap(memberClause)) {
4229  size = builder.CreateSelect(
4230  builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
4231  builder.getInt64(0), size);
4232  }
4233 
4234  combinedInfo.Sizes.emplace_back(size);
4235  }
4236 }
4237 
4238 static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
4239  MapInfosTy &combinedInfo, bool isTargetParams,
4240  int mapDataParentIdx = -1) {
4241  // Declare Target Mappings are excluded from being marked as
4242  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
4243  // marked with OMP_MAP_PTR_AND_OBJ instead.
4244  auto mapFlag = mapData.Types[mapDataIdx];
4245  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
4246 
4247  bool isPtrTy = checkIfPointerMap(mapInfoOp);
4248  if (isPtrTy)
4249  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4250 
4251  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
4252  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4253 
4254  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
4255  !isPtrTy)
4256  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
4257 
4258  // if we're provided a mapDataParentIdx, then the data being mapped is
4259  // part of a larger object (in a parent <-> member mapping) and in this
4260  // case our BasePointer should be the parent.
4261  if (mapDataParentIdx >= 0)
4262  combinedInfo.BasePointers.emplace_back(
4263  mapData.BasePointers[mapDataParentIdx]);
4264  else
4265  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
4266 
4267  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
4268  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
4269  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
4270  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
4271  combinedInfo.Types.emplace_back(mapFlag);
4272  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
4273 }
4274 
4275 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
4276  llvm::IRBuilderBase &builder,
4277  llvm::OpenMPIRBuilder &ompBuilder,
4278  DataLayout &dl, MapInfosTy &combinedInfo,
4279  MapInfoData &mapData, uint64_t mapDataIndex,
4280  bool isTargetParams) {
4281  assert(!ompBuilder.Config.isTargetDevice() &&
4282  "function only supported for host device codegen");
4283 
4284  auto parentClause =
4285  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4286 
4287  // If we have a partial map (no parent referenced in the map clauses of the
4288  // directive, only members) and only a single member, we do not need to bind
4289  // the map of the member to the parent, we can pass the member separately.
4290  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
4291  auto memberClause = llvm::cast<omp::MapInfoOp>(
4292  parentClause.getMembers()[0].getDefiningOp());
4293  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4294  // Note: Clang treats arrays with explicit bounds that fall into this
4295  // category as a parent with map case, however, it seems this isn't a
4296  // requirement, and processing them as an individual map is fine. So,
4297  // we will handle them as individual maps for the moment, as it's
4298  // difficult for us to check this as we always require bounds to be
4299  // specified currently and it's also marginally more optimal (single
4300  // map rather than two). The difference may come from the fact that
4301  // Clang maps array without bounds as pointers (which we do not
4302  // currently do), whereas we treat them as arrays in all cases
4303  // currently.
4304  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
4305  mapDataIndex);
4306  return;
4307  }
4308 
4309  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
4310  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
4311  combinedInfo, mapData, mapDataIndex, isTargetParams);
4312  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
4313  combinedInfo, mapData, mapDataIndex,
4314  memberOfParentFlag);
4315 }
4316 
4317 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
4318 // generates different operation (e.g. load/store) combinations for
4319 // arguments to the kernel, based on map capture kinds which are then
4320 // utilised in the combinedInfo in place of the original Map value.
4321 static void
4322 createAlteredByCaptureMap(MapInfoData &mapData,
4323  LLVM::ModuleTranslation &moduleTranslation,
4324  llvm::IRBuilderBase &builder) {
4325  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4326  "function only supported for host device codegen");
4327  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4328  // if it's declare target, skip it, it's handled separately.
4329  if (!mapData.IsDeclareTarget[i]) {
4330  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4331  omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
4332  bool isPtrTy = checkIfPointerMap(mapOp);
4333 
4334  // Currently handles array sectioning lowerbound case, but more
4335  // logic may be required in the future. Clang invokes EmitLValue,
4336  // which has specialised logic for special Clang types such as user
4337  // defines, so it is possible we will have to extend this for
4338  // structures or other complex types. As the general idea is that this
4339  // function mimics some of the logic from Clang that we require for
4340  // kernel argument passing from host -> device.
4341  switch (captureKind) {
4342  case omp::VariableCaptureKind::ByRef: {
4343  llvm::Value *newV = mapData.Pointers[i];
4344  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
4345  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
4346  mapOp.getBounds());
4347  if (isPtrTy)
4348  newV = builder.CreateLoad(builder.getPtrTy(), newV);
4349 
4350  if (!offsetIdx.empty())
4351  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
4352  "array_offset");
4353  mapData.Pointers[i] = newV;
4354  } break;
4355  case omp::VariableCaptureKind::ByCopy: {
4356  llvm::Type *type = mapData.BaseType[i];
4357  llvm::Value *newV;
4358  if (mapData.Pointers[i]->getType()->isPointerTy())
4359  newV = builder.CreateLoad(type, mapData.Pointers[i]);
4360  else
4361  newV = mapData.Pointers[i];
4362 
4363  if (!isPtrTy) {
4364  auto curInsert = builder.saveIP();
4365  llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation();
4366  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
4367  auto *memTempAlloc =
4368  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
4369  builder.SetCurrentDebugLocation(DbgLoc);
4370  builder.restoreIP(curInsert);
4371 
4372  builder.CreateStore(newV, memTempAlloc);
4373  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
4374  }
4375 
4376  mapData.Pointers[i] = newV;
4377  mapData.BasePointers[i] = newV;
4378  } break;
4379  case omp::VariableCaptureKind::This:
4380  case omp::VariableCaptureKind::VLAType:
4381  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
4382  break;
4383  }
4384  }
4385  }
4386 }
4387 
4388 // Generate all map related information and fill the combinedInfo.
4389 static void genMapInfos(llvm::IRBuilderBase &builder,
4390  LLVM::ModuleTranslation &moduleTranslation,
4391  DataLayout &dl, MapInfosTy &combinedInfo,
4392  MapInfoData &mapData, bool isTargetParams = false) {
4393  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4394  "function only supported for host device codegen");
4395 
4396  // We wish to modify some of the methods in which arguments are
4397  // passed based on their capture type by the target region, this can
4398  // involve generating new loads and stores, which changes the
4399  // MLIR value to LLVM value mapping, however, we only wish to do this
4400  // locally for the current function/target and also avoid altering
4401  // ModuleTranslation, so we remap the base pointer or pointer stored
4402  // in the map infos corresponding MapInfoData, which is later accessed
4403  // by genMapInfos and createTarget to help generate the kernel and
4404  // kernel arg structure. It primarily becomes relevant in cases like
4405  // bycopy, or byref range'd arrays. In the default case, we simply
4406  // pass thee pointer byref as both basePointer and pointer.
4407  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
4408 
4409  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4410 
4411  // We operate under the assumption that all vectors that are
4412  // required in MapInfoData are of equal lengths (either filled with
4413  // default constructed data or appropiate information) so we can
4414  // utilise the size from any component of MapInfoData, if we can't
4415  // something is missing from the initial MapInfoData construction.
4416  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4417  // NOTE/TODO: We currently do not support arbitrary depth record
4418  // type mapping.
4419  if (mapData.IsAMember[i])
4420  continue;
4421 
4422  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
4423  if (!mapInfoOp.getMembers().empty()) {
4424  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
4425  combinedInfo, mapData, i, isTargetParams);
4426  continue;
4427  }
4428 
4429  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
4430  }
4431 }
4432 
4434 emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
4435  LLVM::ModuleTranslation &moduleTranslation,
4436  llvm::StringRef mapperFuncName);
4437 
4439 getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
4440  LLVM::ModuleTranslation &moduleTranslation) {
4441  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4442  "function only supported for host device codegen");
4443  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4444  std::string mapperFuncName =
4445  moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
4446  {"omp_mapper", declMapperOp.getSymName()});
4447 
4448  if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
4449  return lookupFunc;
4450 
4451  return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
4452  mapperFuncName);
4453 }
4454 
4456 emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
4457  LLVM::ModuleTranslation &moduleTranslation,
4458  llvm::StringRef mapperFuncName) {
4459  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4460  "function only supported for host device codegen");
4461  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4462  auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
4463  DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
4464  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4465  llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
4466  SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
4467 
4468  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4469 
4470  // Fill up the arrays with all the mapped variables.
4471  MapInfosTy combinedInfo;
4472  auto genMapInfoCB =
4473  [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
4474  llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
4475  builder.restoreIP(codeGenIP);
4476  moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
4477  moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
4478  builder.GetInsertBlock());
4479  if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
4480  /*ignoreArguments=*/true,
4481  builder)))
4482  return llvm::make_error<PreviouslyReportedError>();
4483  MapInfoData mapData;
4484  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4485  builder);
4486  genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
4487 
4488  // Drop the mapping that is no longer necessary so that the same region can
4489  // be processed multiple times.
4490  moduleTranslation.forgetMapping(declMapperOp.getRegion());
4491  return combinedInfo;
4492  };
4493 
4494  auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
4495  if (!combinedInfo.Mappers[i])
4496  return nullptr;
4497  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4498  moduleTranslation);
4499  };
4500 
4501  llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
4502  genMapInfoCB, varType, mapperFuncName, customMapperCB);
4503  if (!newFn)
4504  return newFn.takeError();
4505  moduleTranslation.mapFunction(mapperFuncName, *newFn);
4506  return *newFn;
4507 }
4508 
4509 static LogicalResult
4510 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
4511  LLVM::ModuleTranslation &moduleTranslation) {
4512  llvm::Value *ifCond = nullptr;
4513  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
4514  SmallVector<Value> mapVars;
4515  SmallVector<Value> useDevicePtrVars;
4516  SmallVector<Value> useDeviceAddrVars;
4517  llvm::omp::RuntimeFunction RTLFn;
4518  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
4519 
4520  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4521  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
4522  /*SeparateBeginEndCalls=*/true);
4523  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
4524  bool isOffloadEntry =
4525  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
4526 
4527  LogicalResult result =
4529  .Case([&](omp::TargetDataOp dataOp) {
4530  if (failed(checkImplementationStatus(*dataOp)))
4531  return failure();
4532 
4533  if (auto ifVar = dataOp.getIfExpr())
4534  ifCond = moduleTranslation.lookupValue(ifVar);
4535 
4536  if (auto devId = dataOp.getDevice())
4537  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4538  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4539  deviceID = intAttr.getInt();
4540 
4541  mapVars = dataOp.getMapVars();
4542  useDevicePtrVars = dataOp.getUseDevicePtrVars();
4543  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
4544  return success();
4545  })
4546  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
4547  if (failed(checkImplementationStatus(*enterDataOp)))
4548  return failure();
4549 
4550  if (auto ifVar = enterDataOp.getIfExpr())
4551  ifCond = moduleTranslation.lookupValue(ifVar);
4552 
4553  if (auto devId = enterDataOp.getDevice())
4554  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4555  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4556  deviceID = intAttr.getInt();
4557  RTLFn =
4558  enterDataOp.getNowait()
4559  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
4560  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
4561  mapVars = enterDataOp.getMapVars();
4562  info.HasNoWait = enterDataOp.getNowait();
4563  return success();
4564  })
4565  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
4566  if (failed(checkImplementationStatus(*exitDataOp)))
4567  return failure();
4568 
4569  if (auto ifVar = exitDataOp.getIfExpr())
4570  ifCond = moduleTranslation.lookupValue(ifVar);
4571 
4572  if (auto devId = exitDataOp.getDevice())
4573  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4574  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4575  deviceID = intAttr.getInt();
4576 
4577  RTLFn = exitDataOp.getNowait()
4578  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
4579  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
4580  mapVars = exitDataOp.getMapVars();
4581  info.HasNoWait = exitDataOp.getNowait();
4582  return success();
4583  })
4584  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
4585  if (failed(checkImplementationStatus(*updateDataOp)))
4586  return failure();
4587 
4588  if (auto ifVar = updateDataOp.getIfExpr())
4589  ifCond = moduleTranslation.lookupValue(ifVar);
4590 
4591  if (auto devId = updateDataOp.getDevice())
4592  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4593  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4594  deviceID = intAttr.getInt();
4595 
4596  RTLFn =
4597  updateDataOp.getNowait()
4598  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4599  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4600  mapVars = updateDataOp.getMapVars();
4601  info.HasNoWait = updateDataOp.getNowait();
4602  return success();
4603  })
4604  .Default([&](Operation *op) {
4605  llvm_unreachable("unexpected operation");
4606  return failure();
4607  });
4608 
4609  if (failed(result))
4610  return failure();
4611  // Pretend we have IF(false) if we're not doing offload.
4612  if (!isOffloadEntry)
4613  ifCond = builder.getFalse();
4614 
4615  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4616  MapInfoData mapData;
4617  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4618  builder, useDevicePtrVars, useDeviceAddrVars);
4619 
4620  // Fill up the arrays with all the mapped variables.
4621  MapInfosTy combinedInfo;
4622  auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4623  builder.restoreIP(codeGenIP);
4624  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4625  return combinedInfo;
4626  };
4627 
4628  // Define a lambda to apply mappings between use_device_addr and
4629  // use_device_ptr base pointers, and their associated block arguments.
4630  auto mapUseDevice =
4631  [&moduleTranslation](
4632  llvm::OpenMPIRBuilder::DeviceInfoTy type,
4634  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4635  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4636  for (auto [arg, useDevVar] :
4637  llvm::zip_equal(blockArgs, useDeviceVars)) {
4638 
4639  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4640  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4641  : mapInfoOp.getVarPtr();
4642  };
4643 
4644  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4645  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4646  mapInfoData.MapClause, mapInfoData.DevicePointers,
4647  mapInfoData.BasePointers)) {
4648  auto mapOp = cast<omp::MapInfoOp>(mapClause);
4649  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4650  devicePointer != type)
4651  continue;
4652 
4653  if (llvm::Value *devPtrInfoMap =
4654  mapper ? mapper(basePointer) : basePointer) {
4655  moduleTranslation.mapValue(arg, devPtrInfoMap);
4656  break;
4657  }
4658  }
4659  }
4660  };
4661 
4662  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4663  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4664  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4665  // We must always restoreIP regardless of doing anything the caller
4666  // does not restore it, leading to incorrect (no) branch generation.
4667  builder.restoreIP(codeGenIP);
4668  assert(isa<omp::TargetDataOp>(op) &&
4669  "BodyGen requested for non TargetDataOp");
4670  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4671  Region &region = cast<omp::TargetDataOp>(op).getRegion();
4672  switch (bodyGenType) {
4673  case BodyGenTy::Priv:
4674  // Check if any device ptr/addr info is available
4675  if (!info.DevicePtrInfoMap.empty()) {
4676  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4677  blockArgIface.getUseDeviceAddrBlockArgs(),
4678  useDeviceAddrVars, mapData,
4679  [&](llvm::Value *basePointer) -> llvm::Value * {
4680  if (!info.DevicePtrInfoMap[basePointer].second)
4681  return nullptr;
4682  return builder.CreateLoad(
4683  builder.getPtrTy(),
4684  info.DevicePtrInfoMap[basePointer].second);
4685  });
4686  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4687  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4688  mapData, [&](llvm::Value *basePointer) {
4689  return info.DevicePtrInfoMap[basePointer].second;
4690  });
4691 
4692  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4693  moduleTranslation)))
4694  return llvm::make_error<PreviouslyReportedError>();
4695  }
4696  break;
4697  case BodyGenTy::DupNoPriv:
4698  if (info.DevicePtrInfoMap.empty()) {
4699  // For host device we still need to do the mapping for codegen,
4700  // otherwise it may try to lookup a missing value.
4701  if (!ompBuilder->Config.IsTargetDevice.value_or(false)) {
4702  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4703  blockArgIface.getUseDeviceAddrBlockArgs(),
4704  useDeviceAddrVars, mapData);
4705  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4706  blockArgIface.getUseDevicePtrBlockArgs(),
4707  useDevicePtrVars, mapData);
4708  }
4709  }
4710  break;
4711  case BodyGenTy::NoPriv:
4712  // If device info is available then region has already been generated
4713  if (info.DevicePtrInfoMap.empty()) {
4714  // For device pass, if use_device_ptr(addr) mappings were present,
4715  // we need to link them here before codegen.
4716  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4717  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4718  blockArgIface.getUseDeviceAddrBlockArgs(),
4719  useDeviceAddrVars, mapData);
4720  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4721  blockArgIface.getUseDevicePtrBlockArgs(),
4722  useDevicePtrVars, mapData);
4723  }
4724 
4725  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4726  moduleTranslation)))
4727  return llvm::make_error<PreviouslyReportedError>();
4728  }
4729  break;
4730  }
4731  return builder.saveIP();
4732  };
4733 
4734  auto customMapperCB =
4735  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4736  if (!combinedInfo.Mappers[i])
4737  return nullptr;
4738  info.HasMapper = true;
4739  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4740  moduleTranslation);
4741  };
4742 
4743  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4744  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4745  findAllocaInsertPoint(builder, moduleTranslation);
4746  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4747  if (isa<omp::TargetDataOp>(op))
4748  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4749  builder.getInt64(deviceID), ifCond,
4750  info, genMapInfoCB, customMapperCB,
4751  /*MapperFunc=*/nullptr, bodyGenCB,
4752  /*DeviceAddrCB=*/nullptr);
4753  return ompBuilder->createTargetData(
4754  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4755  info, genMapInfoCB, customMapperCB, &RTLFn);
4756  }();
4757 
4758  if (failed(handleError(afterIP, *op)))
4759  return failure();
4760 
4761  builder.restoreIP(*afterIP);
4762  return success();
4763 }
4764 
4765 static LogicalResult
4766 convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4767  LLVM::ModuleTranslation &moduleTranslation) {
4768  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4769  auto distributeOp = cast<omp::DistributeOp>(opInst);
4770  if (failed(checkImplementationStatus(opInst)))
4771  return failure();
4772 
4773  /// Process teams op reduction in distribute if the reduction is contained in
4774  /// the distribute op.
4775  omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>();
4776  bool doDistributeReduction =
4777  teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false;
4778 
4779  DenseMap<Value, llvm::Value *> reductionVariableMap;
4780  unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0;
4781  SmallVector<omp::DeclareReductionOp> reductionDecls;
4782  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
4783  llvm::ArrayRef<bool> isByRef;
4784 
4785  if (doDistributeReduction) {
4786  isByRef = getIsByRef(teamsOp.getReductionByref());
4787  assert(isByRef.size() == teamsOp.getNumReductionVars());
4788 
4789  collectReductionDecls(teamsOp, reductionDecls);
4790  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4791  findAllocaInsertPoint(builder, moduleTranslation);
4792 
4793  MutableArrayRef<BlockArgument> reductionArgs =
4794  llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
4795  .getReductionBlockArgs();
4796 
4798  teamsOp, reductionArgs, builder, moduleTranslation, allocaIP,
4799  reductionDecls, privateReductionVariables, reductionVariableMap,
4800  isByRef)))
4801  return failure();
4802  }
4803 
4804  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4805  auto bodyGenCB = [&](InsertPointTy allocaIP,
4806  InsertPointTy codeGenIP) -> llvm::Error {
4807  // Save the alloca insertion point on ModuleTranslation stack for use in
4808  // nested regions.
4810  moduleTranslation, allocaIP);
4811 
4812  // DistributeOp has only one region associated with it.
4813  builder.restoreIP(codeGenIP);
4814  PrivateVarsInfo privVarsInfo(distributeOp);
4815 
4816  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4817  allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4818  if (handleError(afterAllocas, opInst).failed())
4819  return llvm::make_error<PreviouslyReportedError>();
4820 
4821  if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4822  opInst)
4823  .failed())
4824  return llvm::make_error<PreviouslyReportedError>();
4825 
4827  distributeOp, builder, moduleTranslation, privVarsInfo.mlirVars,
4828  privVarsInfo.llvmVars, privVarsInfo.privatizers,
4829  distributeOp.getPrivateNeedsBarrier())))
4830  return llvm::make_error<PreviouslyReportedError>();
4831 
4832  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4833  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4835  convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4836  builder, moduleTranslation);
4837  if (!regionBlock)
4838  return regionBlock.takeError();
4839  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4840 
4841  // Skip applying a workshare loop below when translating 'distribute
4842  // parallel do' (it's been already handled by this point while translating
4843  // the nested omp.wsloop).
4844  if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4845  // TODO: Add support for clauses which are valid for DISTRIBUTE
4846  // constructs. Static schedule is the default.
4847  auto schedule = omp::ClauseScheduleKind::Static;
4848  bool isOrdered = false;
4849  std::optional<omp::ScheduleModifier> scheduleMod;
4850  bool isSimd = false;
4851  llvm::omp::WorksharingLoopType workshareLoopType =
4852  llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4853  bool loopNeedsBarrier = false;
4854  llvm::Value *chunk = nullptr;
4855 
4856  llvm::CanonicalLoopInfo *loopInfo =
4857  findCurrentLoopInfo(moduleTranslation);
4858  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4859  ompBuilder->applyWorkshareLoop(
4860  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4861  convertToScheduleKind(schedule), chunk, isSimd,
4862  scheduleMod == omp::ScheduleModifier::monotonic,
4863  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4864  workshareLoopType);
4865 
4866  if (!wsloopIP)
4867  return wsloopIP.takeError();
4868  }
4869 
4870  if (failed(cleanupPrivateVars(builder, moduleTranslation,
4871  distributeOp.getLoc(), privVarsInfo.llvmVars,
4872  privVarsInfo.privatizers)))
4873  return llvm::make_error<PreviouslyReportedError>();
4874 
4875  return llvm::Error::success();
4876  };
4877 
4878  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4879  findAllocaInsertPoint(builder, moduleTranslation);
4880  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4881  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4882  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
4883 
4884  if (failed(handleError(afterIP, opInst)))
4885  return failure();
4886 
4887  builder.restoreIP(*afterIP);
4888 
4889  if (doDistributeReduction) {
4890  // Process the reductions if required.
4892  teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
4893  privateReductionVariables, isByRef,
4894  /*isNoWait*/ false, /*isTeamsReduction*/ true);
4895  }
4896  return success();
4897 }
4898 
4899 /// Lowers the FlagsAttr which is applied to the module on the device
4900 /// pass when offloading, this attribute contains OpenMP RTL globals that can
4901 /// be passed as flags to the frontend, otherwise they are set to default
4902 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
4903  LLVM::ModuleTranslation &moduleTranslation) {
4904  if (!cast<mlir::ModuleOp>(op))
4905  return failure();
4906 
4907  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4908 
4909  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
4910  attribute.getOpenmpDeviceVersion());
4911 
4912  if (attribute.getNoGpuLib())
4913  return success();
4914 
4915  ompBuilder->createGlobalFlag(
4916  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
4917  "__omp_rtl_debug_kind");
4918  ompBuilder->createGlobalFlag(
4919  attribute
4920  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
4921  ,
4922  "__omp_rtl_assume_teams_oversubscription");
4923  ompBuilder->createGlobalFlag(
4924  attribute
4925  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
4926  ,
4927  "__omp_rtl_assume_threads_oversubscription");
4928  ompBuilder->createGlobalFlag(
4929  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
4930  "__omp_rtl_assume_no_thread_state");
4931  ompBuilder->createGlobalFlag(
4932  attribute
4933  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
4934  ,
4935  "__omp_rtl_assume_no_nested_parallelism");
4936  return success();
4937 }
4938 
4939 static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
4940  omp::TargetOp targetOp,
4941  llvm::StringRef parentName = "") {
4942  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
4943 
4944  assert(fileLoc && "No file found from location");
4945  StringRef fileName = fileLoc.getFilename().getValue();
4946 
4947  llvm::sys::fs::UniqueID id;
4948  uint64_t line = fileLoc.getLine();
4949  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
4950  size_t fileHash = llvm::hash_value(fileName.str());
4951  size_t deviceId = 0xdeadf17e;
4952  targetInfo =
4953  llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
4954  } else {
4955  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
4956  id.getFile(), line);
4957  }
4958 }
4959 
4960 static void
4961 handleDeclareTargetMapVar(MapInfoData &mapData,
4962  LLVM::ModuleTranslation &moduleTranslation,
4963  llvm::IRBuilderBase &builder, llvm::Function *func) {
4964  assert(moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4965  "function only supported for target device codegen");
4966  llvm::IRBuilderBase::InsertPointGuard guard(builder);
4967  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4968  // In the case of declare target mapped variables, the basePointer is
4969  // the reference pointer generated by the convertDeclareTargetAttr
4970  // method. Whereas the kernelValue is the original variable, so for
4971  // the device we must replace all uses of this original global variable
4972  // (stored in kernelValue) with the reference pointer (stored in
4973  // basePointer for declare target mapped variables), as for device the
4974  // data is mapped into this reference pointer and should be loaded
4975  // from it, the original variable is discarded. On host both exist and
4976  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
4977  // function to link the two variables in the runtime and then both the
4978  // reference pointer and the pointer are assigned in the kernel argument
4979  // structure for the host.
4980  if (mapData.IsDeclareTarget[i]) {
4981  // If the original map value is a constant, then we have to make sure all
4982  // of it's uses within the current kernel/function that we are going to
4983  // rewrite are converted to instructions, as we will be altering the old
4984  // use (OriginalValue) from a constant to an instruction, which will be
4985  // illegal and ICE the compiler if the user is a constant expression of
4986  // some kind e.g. a constant GEP.
4987  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
4988  convertUsersOfConstantsToInstructions(constant, func, false);
4989 
4990  // The users iterator will get invalidated if we modify an element,
4991  // so we populate this vector of uses to alter each user on an
4992  // individual basis to emit its own load (rather than one load for
4993  // all).
4995  for (llvm::User *user : mapData.OriginalValue[i]->users())
4996  userVec.push_back(user);
4997 
4998  for (llvm::User *user : userVec) {
4999  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
5000  if (insn->getFunction() == func) {
5001  builder.SetCurrentDebugLocation(insn->getDebugLoc());
5002  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
5003  mapData.BasePointers[i]);
5004  load->moveBefore(insn->getIterator());
5005  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
5006  }
5007  }
5008  }
5009  }
5010  }
5011 }
5012 
5013 // The createDeviceArgumentAccessor function generates
5014 // instructions for retrieving (acessing) kernel
5015 // arguments inside of the device kernel for use by
5016 // the kernel. This enables different semantics such as
5017 // the creation of temporary copies of data allowing
5018 // semantics like read-only/no host write back kernel
5019 // arguments.
5020 //
5021 // This currently implements a very light version of Clang's
5022 // EmitParmDecl's handling of direct argument handling as well
5023 // as a portion of the argument access generation based on
5024 // capture types found at the end of emitOutlinedFunctionPrologue
5025 // in Clang. The indirect path handling of EmitParmDecl's may be
5026 // required for future work, but a direct 1-to-1 copy doesn't seem
5027 // possible as the logic is rather scattered throughout Clang's
5028 // lowering and perhaps we wish to deviate slightly.
5029 //
5030 // \param mapData - A container containing vectors of information
5031 // corresponding to the input argument, which should have a
5032 // corresponding entry in the MapInfoData containers
5033 // OrigialValue's.
5034 // \param arg - This is the generated kernel function argument that
5035 // corresponds to the passed in input argument. We generated different
5036 // accesses of this Argument, based on capture type and other Input
5037 // related information.
5038 // \param input - This is the host side value that will be passed to
5039 // the kernel i.e. the kernel input, we rewrite all uses of this within
5040 // the kernel (as we generate the kernel body based on the target's region
5041 // which maintians references to the original input) to the retVal argument
5042 // apon exit of this function inside of the OMPIRBuilder. This interlinks
5043 // the kernel argument to future uses of it in the function providing
5044 // appropriate "glue" instructions inbetween.
5045 // \param retVal - This is the value that all uses of input inside of the
5046 // kernel will be re-written to, the goal of this function is to generate
5047 // an appropriate location for the kernel argument to be accessed from,
5048 // e.g. ByRef will result in a temporary allocation location and then
5049 // a store of the kernel argument into this allocated memory which
5050 // will then be loaded from, ByCopy will use the allocated memory
5051 // directly.
5052 static llvm::IRBuilderBase::InsertPoint
5054  llvm::Value *input, llvm::Value *&retVal,
5055  llvm::IRBuilderBase &builder,
5056  llvm::OpenMPIRBuilder &ompBuilder,
5057  LLVM::ModuleTranslation &moduleTranslation,
5058  llvm::IRBuilderBase::InsertPoint allocaIP,
5059  llvm::IRBuilderBase::InsertPoint codeGenIP) {
5060  assert(ompBuilder.Config.isTargetDevice() &&
5061  "function only supported for target device codegen");
5062  builder.restoreIP(allocaIP);
5063 
5064  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
5065  LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
5066  ompBuilder.M.getContext());
5067  unsigned alignmentValue = 0;
5068  // Find the associated MapInfoData entry for the current input
5069  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
5070  if (mapData.OriginalValue[i] == input) {
5071  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
5072  capture = mapOp.getMapCaptureType();
5073  // Get information of alignment of mapped object
5074  alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
5075  mapOp.getVarType(), ompBuilder.M.getDataLayout());
5076  break;
5077  }
5078 
5079  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
5080  unsigned int defaultAS =
5081  ompBuilder.M.getDataLayout().getProgramAddressSpace();
5082 
5083  // Create the alloca for the argument the current point.
5084  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
5085 
5086  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
5087  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
5088 
5089  builder.CreateStore(&arg, v);
5090 
5091  builder.restoreIP(codeGenIP);
5092 
5093  switch (capture) {
5094  case omp::VariableCaptureKind::ByCopy: {
5095  retVal = v;
5096  break;
5097  }
5098  case omp::VariableCaptureKind::ByRef: {
5099  llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
5100  v->getType(), v,
5101  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
5102  // CreateAlignedLoad function creates similar LLVM IR:
5103  // %res = load ptr, ptr %input, align 8
5104  // This LLVM IR does not contain information about alignment
5105  // of the loaded value. We need to add !align metadata to unblock
5106  // optimizer. The existence of the !align metadata on the instruction
5107  // tells the optimizer that the value loaded is known to be aligned to
5108  // a boundary specified by the integer value in the metadata node.
5109  // Example:
5110  // %res = load ptr, ptr %input, align 8, !align !align_md_node
5111  // ^ ^
5112  // | |
5113  // alignment of %input address |
5114  // |
5115  // alignment of %res object
5116  if (v->getType()->isPointerTy() && alignmentValue) {
5117  llvm::MDBuilder MDB(builder.getContext());
5118  loadInst->setMetadata(
5119  llvm::LLVMContext::MD_align,
5120  llvm::MDNode::get(builder.getContext(),
5121  MDB.createConstant(llvm::ConstantInt::get(
5122  llvm::Type::getInt64Ty(builder.getContext()),
5123  alignmentValue))));
5124  }
5125  retVal = loadInst;
5126 
5127  break;
5128  }
5129  case omp::VariableCaptureKind::This:
5130  case omp::VariableCaptureKind::VLAType:
5131  // TODO: Consider returning error to use standard reporting for
5132  // unimplemented features.
5133  assert(false && "Currently unsupported capture kind");
5134  break;
5135  }
5136 
5137  return builder.saveIP();
5138 }
5139 
5140 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
5141 /// operation and populate output variables with their corresponding host value
5142 /// (i.e. operand evaluated outside of the target region), based on their uses
5143 /// inside of the target region.
5144 ///
5145 /// Loop bounds and steps are only optionally populated, if output vectors are
5146 /// provided.
5147 static void
5148 extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
5149  Value &numTeamsLower, Value &numTeamsUpper,
5150  Value &threadLimit,
5151  llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
5152  llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
5153  llvm::SmallVectorImpl<Value> *steps = nullptr) {
5154  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
5155  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
5156  blockArgIface.getHostEvalBlockArgs())) {
5157  Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
5158 
5159  for (Operation *user : blockArg.getUsers()) {
5161  .Case([&](omp::TeamsOp teamsOp) {
5162  if (teamsOp.getNumTeamsLower() == blockArg)
5163  numTeamsLower = hostEvalVar;
5164  else if (teamsOp.getNumTeamsUpper() == blockArg)
5165  numTeamsUpper = hostEvalVar;
5166  else if (teamsOp.getThreadLimit() == blockArg)
5167  threadLimit = hostEvalVar;
5168  else
5169  llvm_unreachable("unsupported host_eval use");
5170  })
5171  .Case([&](omp::ParallelOp parallelOp) {
5172  if (parallelOp.getNumThreads() == blockArg)
5173  numThreads = hostEvalVar;
5174  else
5175  llvm_unreachable("unsupported host_eval use");
5176  })
5177  .Case([&](omp::LoopNestOp loopOp) {
5178  auto processBounds =
5179  [&](OperandRange opBounds,
5180  llvm::SmallVectorImpl<Value> *outBounds) -> bool {
5181  bool found = false;
5182  for (auto [i, lb] : llvm::enumerate(opBounds)) {
5183  if (lb == blockArg) {
5184  found = true;
5185  if (outBounds)
5186  (*outBounds)[i] = hostEvalVar;
5187  }
5188  }
5189  return found;
5190  };
5191  bool found =
5192  processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
5193  found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
5194  found;
5195  found = processBounds(loopOp.getLoopSteps(), steps) || found;
5196  (void)found;
5197  assert(found && "unsupported host_eval use");
5198  })
5199  .Default([](Operation *) {
5200  llvm_unreachable("unsupported host_eval use");
5201  });
5202  }
5203  }
5204 }
5205 
5206 /// If \p op is of the given type parameter, return it casted to that type.
5207 /// Otherwise, if its immediate parent operation (or some other higher-level
5208 /// parent, if \p immediateParent is false) is of that type, return that parent
5209 /// casted to the given type.
5210 ///
5211 /// If \p op is \c null or neither it or its parent(s) are of the specified
5212 /// type, return a \c null operation.
5213 template <typename OpTy>
5214 static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
5215  if (!op)
5216  return OpTy();
5217 
5218  if (OpTy casted = dyn_cast<OpTy>(op))
5219  return casted;
5220 
5221  if (immediateParent)
5222  return dyn_cast_if_present<OpTy>(op->getParentOp());
5223 
5224  return op->getParentOfType<OpTy>();
5225 }
5226 
5227 /// If the given \p value is defined by an \c llvm.mlir.constant operation and
5228 /// it is of an integer type, return its value.
5229 static std::optional<int64_t> extractConstInteger(Value value) {
5230  if (!value)
5231  return std::nullopt;
5232 
5233  if (auto constOp = value.getDefiningOp<LLVM::ConstantOp>())
5234  if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
5235  return constAttr.getInt();
5236 
5237  return std::nullopt;
5238 }
5239 
5240 static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
5241  uint64_t sizeInBits = dl.getTypeSizeInBits(type);
5242  uint64_t sizeInBytes = sizeInBits / 8;
5243  return sizeInBytes;
5244 }
5245 
5246 template <typename OpTy>
5247 static uint64_t getReductionDataSize(OpTy &op) {
5248  if (op.getNumReductionVars() > 0) {
5250  collectReductionDecls(op, reductions);
5251 
5253  members.reserve(reductions.size());
5254  for (omp::DeclareReductionOp &red : reductions)
5255  members.push_back(red.getType());
5256  Operation *opp = op.getOperation();
5257  auto structType = mlir::LLVM::LLVMStructType::getLiteral(
5258  opp->getContext(), members, /*isPacked=*/false);
5259  DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
5260  return getTypeByteSize(structType, dl);
5261  }
5262  return 0;
5263 }
5264 
5265 /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
5266 /// values as stated by the corresponding clauses, if constant.
5267 ///
5268 /// These default values must be set before the creation of the outlined LLVM
5269 /// function for the target region, so that they can be used to initialize the
5270 /// corresponding global `ConfigurationEnvironmentTy` structure.
5271 static void
5272 initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
5273  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
5274  bool isTargetDevice, bool isGPU) {
5275  // TODO: Handle constant 'if' clauses.
5276 
5277  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
5278  if (!isTargetDevice) {
5279  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5280  threadLimit);
5281  } else {
5282  // In the target device, values for these clauses are not passed as
5283  // host_eval, but instead evaluated prior to entry to the region. This
5284  // ensures values are mapped and available inside of the target region.
5285  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5286  numTeamsLower = teamsOp.getNumTeamsLower();
5287  numTeamsUpper = teamsOp.getNumTeamsUpper();
5288  threadLimit = teamsOp.getThreadLimit();
5289  }
5290 
5291  if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5292  numThreads = parallelOp.getNumThreads();
5293  }
5294 
5295  // Handle clauses impacting the number of teams.
5296 
5297  int32_t minTeamsVal = 1, maxTeamsVal = -1;
5298  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5299  // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
5300  // clang and set min and max to the same value.
5301  if (numTeamsUpper) {
5302  if (auto val = extractConstInteger(numTeamsUpper))
5303  minTeamsVal = maxTeamsVal = *val;
5304  } else {
5305  minTeamsVal = maxTeamsVal = 0;
5306  }
5307  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
5308  /*immediateParent=*/true) ||
5309  castOrGetParentOfType<omp::SimdOp>(capturedOp,
5310  /*immediateParent=*/true)) {
5311  minTeamsVal = maxTeamsVal = 1;
5312  } else {
5313  minTeamsVal = maxTeamsVal = -1;
5314  }
5315 
5316  // Handle clauses impacting the number of threads.
5317 
5318  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
5319  if (!clauseValue)
5320  return;
5321 
5322  if (auto val = extractConstInteger(clauseValue))
5323  result = *val;
5324 
5325  // Found an applicable clause, so it's not undefined. Mark as unknown
5326  // because it's not constant.
5327  if (result < 0)
5328  result = 0;
5329  };
5330 
5331  // Extract 'thread_limit' clause from 'target' and 'teams' directives.
5332  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
5333  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
5334  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
5335 
5336  // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
5337  int32_t maxThreadsVal = -1;
5338  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5339  setMaxValueFromClause(numThreads, maxThreadsVal);
5340  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
5341  /*immediateParent=*/true))
5342  maxThreadsVal = 1;
5343 
5344  // For max values, < 0 means unset, == 0 means set but unknown. Select the
5345  // minimum value between 'max_threads' and 'thread_limit' clauses that were
5346  // set.
5347  int32_t combinedMaxThreadsVal = targetThreadLimitVal;
5348  if (combinedMaxThreadsVal < 0 ||
5349  (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
5350  combinedMaxThreadsVal = teamsThreadLimitVal;
5351 
5352  if (combinedMaxThreadsVal < 0 ||
5353  (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
5354  combinedMaxThreadsVal = maxThreadsVal;
5355 
5356  int32_t reductionDataSize = 0;
5357  if (isGPU && capturedOp) {
5358  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
5359  reductionDataSize = getReductionDataSize(teamsOp);
5360  }
5361 
5362  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
5363  omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
5364  assert(
5365  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
5366  omp::TargetRegionFlags::spmd) &&
5367  "invalid kernel flags");
5368  attrs.ExecFlags =
5369  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
5370  ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
5371  ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
5372  : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
5373  : llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
5374  attrs.MinTeams = minTeamsVal;
5375  attrs.MaxTeams.front() = maxTeamsVal;
5376  attrs.MinThreads = 1;
5377  attrs.MaxThreads.front() = combinedMaxThreadsVal;
5378  attrs.ReductionDataSize = reductionDataSize;
5379  // TODO: Allow modified buffer length similar to
5380  // fopenmp-cuda-teams-reduction-recs-num flag in clang.
5381  if (attrs.ReductionDataSize != 0)
5382  attrs.ReductionBufferLength = 1024;
5383 }
5384 
5385 /// Gather LLVM runtime values for all clauses evaluated in the host that are
5386 /// passed to the kernel invocation.
5387 ///
5388 /// This function must be called only when compiling for the host. Also, it will
5389 /// only provide correct results if it's called after the body of \c targetOp
5390 /// has been fully generated.
5391 static void
5392 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
5393  LLVM::ModuleTranslation &moduleTranslation,
5394  omp::TargetOp targetOp, Operation *capturedOp,
5395  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
5396  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
5397  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
5398 
5399  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
5400  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
5401  steps(numLoops);
5402  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5403  teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
5404 
5405  // TODO: Handle constant 'if' clauses.
5406  if (Value targetThreadLimit = targetOp.getThreadLimit())
5407  attrs.TargetThreadLimit.front() =
5408  moduleTranslation.lookupValue(targetThreadLimit);
5409 
5410  if (numTeamsLower)
5411  attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
5412 
5413  if (numTeamsUpper)
5414  attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
5415 
5416  if (teamsThreadLimit)
5417  attrs.TeamsThreadLimit.front() =
5418  moduleTranslation.lookupValue(teamsThreadLimit);
5419 
5420  if (numThreads)
5421  attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
5422 
5423  if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
5424  omp::TargetRegionFlags::trip_count)) {
5425  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5426  attrs.LoopTripCount = nullptr;
5427 
5428  // To calculate the trip count, we multiply together the trip counts of
5429  // every collapsed canonical loop. We don't need to create the loop nests
5430  // here, since we're only interested in the trip count.
5431  for (auto [loopLower, loopUpper, loopStep] :
5432  llvm::zip_equal(lowerBounds, upperBounds, steps)) {
5433  llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
5434  llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
5435  llvm::Value *step = moduleTranslation.lookupValue(loopStep);
5436 
5437  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
5438  llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
5439  loc, lowerBound, upperBound, step, /*IsSigned=*/true,
5440  loopOp.getLoopInclusive());
5441 
5442  if (!attrs.LoopTripCount) {
5443  attrs.LoopTripCount = tripCount;
5444  continue;
5445  }
5446 
5447  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
5448  attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
5449  {}, /*HasNUW=*/true);
5450  }
5451  }
5452 }
5453 
5454 static LogicalResult
5455 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
5456  LLVM::ModuleTranslation &moduleTranslation) {
5457  auto targetOp = cast<omp::TargetOp>(opInst);
5458  // The current debug location already has the DISubprogram for the outlined
5459  // function that will be created for the target op. We save it here so that
5460  // we can set it on the outlined function.
5461  llvm::DebugLoc outlinedFnLoc = builder.getCurrentDebugLocation();
5462  if (failed(checkImplementationStatus(opInst)))
5463  return failure();
5464 
5465  // During the handling of target op, we will generate instructions in the
5466  // parent function like call to the oulined function or branch to a new
5467  // BasicBlock. We set the debug location here to parent function so that those
5468  // get the correct debug locations. For outlined functions, the normal MLIR op
5469  // conversion will automatically pick the correct location.
5470  llvm::BasicBlock *parentBB = builder.GetInsertBlock();
5471  assert(parentBB && "No insert block is set for the builder");
5472  llvm::Function *parentLLVMFn = parentBB->getParent();
5473  assert(parentLLVMFn && "Parent Function must be valid");
5474  if (llvm::DISubprogram *SP = parentLLVMFn->getSubprogram())
5475  builder.SetCurrentDebugLocation(llvm::DILocation::get(
5476  parentLLVMFn->getContext(), outlinedFnLoc.getLine(),
5477  outlinedFnLoc.getCol(), SP, outlinedFnLoc.getInlinedAt()));
5478 
5479  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5480  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
5481  bool isGPU = ompBuilder->Config.isGPU();
5482 
5483  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
5484  auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
5485  auto &targetRegion = targetOp.getRegion();
5486  // Holds the private vars that have been mapped along with the block argument
5487  // that corresponds to the MapInfoOp corresponding to the private var in
5488  // question. So, for instance:
5489  //
5490  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
5491  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
5492  //
5493  // Then, %10 has been created so that the descriptor can be used by the
5494  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
5495  // %arg0} in the mappedPrivateVars map.
5496  llvm::DenseMap<Value, Value> mappedPrivateVars;
5497  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
5498  SmallVector<Value> mapVars = targetOp.getMapVars();
5499  SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
5500  ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
5501  ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
5502  llvm::Function *llvmOutlinedFn = nullptr;
5503 
5504  // TODO: It can also be false if a compile-time constant `false` IF clause is
5505  // specified.
5506  bool isOffloadEntry =
5507  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
5508 
5509  // For some private variables, the MapsForPrivatizedVariablesPass
5510  // creates MapInfoOp instances. Go through the private variables and
5511  // the mapped variables so that during codegeneration we are able
5512  // to quickly look up the corresponding map variable, if any for each
5513  // private variable.
5514  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
5515  OperandRange privateVars = targetOp.getPrivateVars();
5516  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
5517  std::optional<DenseI64ArrayAttr> privateMapIndices =
5518  targetOp.getPrivateMapsAttr();
5519 
5520  for (auto [privVarIdx, privVarSymPair] :
5521  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
5522  auto privVar = std::get<0>(privVarSymPair);
5523  auto privSym = std::get<1>(privVarSymPair);
5524 
5525  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
5526  omp::PrivateClauseOp privatizer =
5527  findPrivatizer(targetOp, privatizerName);
5528 
5529  if (!privatizer.needsMap())
5530  continue;
5531 
5532  mlir::Value mappedValue =
5533  targetOp.getMappedValueForPrivateVar(privVarIdx);
5534  assert(mappedValue && "Expected to find mapped value for a privatized "
5535  "variable that needs mapping");
5536 
5537  // The MapInfoOp defining the map var isn't really needed later.
5538  // So, we don't store it in any datastructure. Instead, we just
5539  // do some sanity checks on it right now.
5540  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
5541  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
5542 
5543  // Check #1: Check that the type of the private variable matches
5544  // the type of the variable being mapped.
5545  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
5546  assert(
5547  varType == privVar.getType() &&
5548  "Type of private var doesn't match the type of the mapped value");
5549 
5550  // Ok, only 1 sanity check for now.
5551  // Record the block argument corresponding to this mapvar.
5552  mappedPrivateVars.insert(
5553  {privVar,
5554  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
5555  (*privateMapIndices)[privVarIdx])});
5556  }
5557  }
5558 
5559  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5560  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
5561  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5562  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5563  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5564  // Forward target-cpu and target-features function attributes from the
5565  // original function to the new outlined function.
5566  llvm::Function *llvmParentFn =
5567  moduleTranslation.lookupFunction(parentFn.getName());
5568  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
5569  assert(llvmParentFn && llvmOutlinedFn &&
5570  "Both parent and outlined functions must exist at this point");
5571 
5572  if (outlinedFnLoc && llvmParentFn->getSubprogram())
5573  llvmOutlinedFn->setSubprogram(outlinedFnLoc->getScope()->getSubprogram());
5574 
5575  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
5576  attr.isStringAttribute())
5577  llvmOutlinedFn->addFnAttr(attr);
5578 
5579  if (auto attr = llvmParentFn->getFnAttribute("target-features");
5580  attr.isStringAttribute())
5581  llvmOutlinedFn->addFnAttr(attr);
5582 
5583  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
5584  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5585  llvm::Value *mapOpValue =
5586  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5587  moduleTranslation.mapValue(arg, mapOpValue);
5588  }
5589  for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
5590  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5591  llvm::Value *mapOpValue =
5592  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5593  moduleTranslation.mapValue(arg, mapOpValue);
5594  }
5595 
5596  // Do privatization after moduleTranslation has already recorded
5597  // mapped values.
5598  PrivateVarsInfo privateVarsInfo(targetOp);
5599 
5600  llvm::Expected<llvm::BasicBlock *> afterAllocas =
5601  allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
5602  allocaIP, &mappedPrivateVars);
5603 
5604  if (failed(handleError(afterAllocas, *targetOp)))
5605  return llvm::make_error<PreviouslyReportedError>();
5606 
5607  builder.restoreIP(codeGenIP);
5608  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
5609  &mappedPrivateVars),
5610  *targetOp)
5611  .failed())
5612  return llvm::make_error<PreviouslyReportedError>();
5613 
5615  targetOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
5616  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
5617  targetOp.getPrivateNeedsBarrier(), &mappedPrivateVars)))
5618  return llvm::make_error<PreviouslyReportedError>();
5619 
5620  SmallVector<Region *> privateCleanupRegions;
5621  llvm::transform(privateVarsInfo.privatizers,
5622  std::back_inserter(privateCleanupRegions),
5623  [](omp::PrivateClauseOp privatizer) {
5624  return &privatizer.getDeallocRegion();
5625  });
5626 
5628  targetRegion, "omp.target", builder, moduleTranslation);
5629 
5630  if (!exitBlock)
5631  return exitBlock.takeError();
5632 
5633  builder.SetInsertPoint(*exitBlock);
5634  if (!privateCleanupRegions.empty()) {
5636  privateCleanupRegions, privateVarsInfo.llvmVars,
5637  moduleTranslation, builder, "omp.targetop.private.cleanup",
5638  /*shouldLoadCleanupRegionArg=*/false))) {
5639  return llvm::createStringError(
5640  "failed to inline `dealloc` region of `omp.private` "
5641  "op in the target region");
5642  }
5643  return builder.saveIP();
5644  }
5645 
5646  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
5647  };
5648 
5649  StringRef parentName = parentFn.getName();
5650 
5651  llvm::TargetRegionEntryInfo entryInfo;
5652 
5653  getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
5654 
5655  MapInfoData mapData;
5656  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
5657  builder, /*useDevPtrOperands=*/{},
5658  /*useDevAddrOperands=*/{}, hdaVars);
5659 
5660  MapInfosTy combinedInfos;
5661  auto genMapInfoCB =
5662  [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
5663  builder.restoreIP(codeGenIP);
5664  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
5665  return combinedInfos;
5666  };
5667 
5668  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
5669  llvm::Value *&retVal, InsertPointTy allocaIP,
5670  InsertPointTy codeGenIP)
5671  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5672  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5673  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5674  // We just return the unaltered argument for the host function
5675  // for now, some alterations may be required in the future to
5676  // keep host fallback functions working identically to the device
5677  // version (e.g. pass ByCopy values should be treated as such on
5678  // host and device, currently not always the case)
5679  if (!isTargetDevice) {
5680  retVal = cast<llvm::Value>(&arg);
5681  return codeGenIP;
5682  }
5683 
5684  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
5685  *ompBuilder, moduleTranslation,
5686  allocaIP, codeGenIP);
5687  };
5688 
5689  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
5690  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
5691  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
5692  initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
5693  isTargetDevice, isGPU);
5694 
5695  // Collect host-evaluated values needed to properly launch the kernel from the
5696  // host.
5697  if (!isTargetDevice)
5698  initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
5699  targetCapturedOp, runtimeAttrs);
5700 
5701  // Pass host-evaluated values as parameters to the kernel / host fallback,
5702  // except if they are constants. In any case, map the MLIR block argument to
5703  // the corresponding LLVM values.
5705  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
5706  ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
5707  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
5708  llvm::Value *value = moduleTranslation.lookupValue(var);
5709  moduleTranslation.mapValue(arg, value);
5710 
5711  if (!llvm::isa<llvm::Constant>(value))
5712  kernelInput.push_back(value);
5713  }
5714 
5715  for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
5716  // declare target arguments are not passed to kernels as arguments
5717  // TODO: We currently do not handle cases where a member is explicitly
5718  // passed in as an argument, this will likley need to be handled in
5719  // the near future, rather than using IsAMember, it may be better to
5720  // test if the relevant BlockArg is used within the target region and
5721  // then use that as a basis for exclusion in the kernel inputs.
5722  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
5723  kernelInput.push_back(mapData.OriginalValue[i]);
5724  }
5725 
5727  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
5728  moduleTranslation, dds);
5729 
5730  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5731  findAllocaInsertPoint(builder, moduleTranslation);
5732  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5733 
5734  llvm::OpenMPIRBuilder::TargetDataInfo info(
5735  /*RequiresDevicePointerInfo=*/false,
5736  /*SeparateBeginEndCalls=*/true);
5737 
5738  auto customMapperCB =
5739  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
5740  if (!combinedInfos.Mappers[i])
5741  return nullptr;
5742  info.HasMapper = true;
5743  return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
5744  moduleTranslation);
5745  };
5746 
5747  llvm::Value *ifCond = nullptr;
5748  if (Value targetIfCond = targetOp.getIfExpr())
5749  ifCond = moduleTranslation.lookupValue(targetIfCond);
5750 
5751  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5752  moduleTranslation.getOpenMPBuilder()->createTarget(
5753  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
5754  defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
5755  argAccessorCB, customMapperCB, dds, targetOp.getNowait());
5756 
5757  if (failed(handleError(afterIP, opInst)))
5758  return failure();
5759 
5760  builder.restoreIP(*afterIP);
5761 
5762  // Remap access operations to declare target reference pointers for the
5763  // device, essentially generating extra loadop's as necessary
5764  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5765  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5766  llvmOutlinedFn);
5767 
5768  return success();
5769 }
5770 
5771 static LogicalResult
5772 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5773  LLVM::ModuleTranslation &moduleTranslation) {
5774  // Amend omp.declare_target by deleting the IR of the outlined functions
5775  // created for target regions. They cannot be filtered out from MLIR earlier
5776  // because the omp.target operation inside must be translated to LLVM, but
5777  // the wrapper functions themselves must not remain at the end of the
5778  // process. We know that functions where omp.declare_target does not match
5779  // omp.is_target_device at this stage can only be wrapper functions because
5780  // those that aren't are removed earlier as an MLIR transformation pass.
5781  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5782  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5783  op->getParentOfType<ModuleOp>().getOperation())) {
5784  if (!offloadMod.getIsTargetDevice())
5785  return success();
5786 
5787  omp::DeclareTargetDeviceType declareType =
5788  attribute.getDeviceType().getValue();
5789 
5790  if (declareType == omp::DeclareTargetDeviceType::host) {
5791  llvm::Function *llvmFunc =
5792  moduleTranslation.lookupFunction(funcOp.getName());
5793  llvmFunc->dropAllReferences();
5794  llvmFunc->eraseFromParent();
5795  }
5796  }
5797  return success();
5798  }
5799 
5800  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5801  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5802  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5803  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5804  bool isDeclaration = gOp.isDeclaration();
5805  bool isExternallyVisible =
5806  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5807  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5808  llvm::StringRef mangledName = gOp.getSymName();
5809  auto captureClause =
5810  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5811  auto deviceClause =
5812  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5813  // unused for MLIR at the moment, required in Clang for book
5814  // keeping
5815  std::vector<llvm::GlobalVariable *> generatedRefs;
5816 
5817  std::vector<llvm::Triple> targetTriple;
5818  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5819  op->getParentOfType<mlir::ModuleOp>()->getAttr(
5820  LLVM::LLVMDialect::getTargetTripleAttrName()));
5821  if (targetTripleAttr)
5822  targetTriple.emplace_back(targetTripleAttr.data());
5823 
5824  auto fileInfoCallBack = [&loc]() {
5825  std::string filename = "";
5826  std::uint64_t lineNo = 0;
5827 
5828  if (loc) {
5829  filename = loc.getFilename().str();
5830  lineNo = loc.getLine();
5831  }
5832 
5833  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5834  lineNo);
5835  };
5836 
5837  ompBuilder->registerTargetGlobalVariable(
5838  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5839  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5840  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5841  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5842  gVal->getType(), gVal);
5843 
5844  if (ompBuilder->Config.isTargetDevice() &&
5845  (attribute.getCaptureClause().getValue() !=
5846  mlir::omp::DeclareTargetCaptureClause::to ||
5847  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5848  ompBuilder->getAddrOfDeclareTargetVar(
5849  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5850  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5851  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
5852  /*GlobalInitializer*/ nullptr,
5853  /*VariableLinkage*/ nullptr);
5854  }
5855  }
5856  }
5857 
5858  return success();
5859 }
5860 
5861 // Returns true if the operation is inside a TargetOp or
5862 // is part of a declare target function.
5863 static bool isTargetDeviceOp(Operation *op) {
5864  // Assumes no reverse offloading
5865  if (op->getParentOfType<omp::TargetOp>())
5866  return true;
5867 
5868  // Certain operations return results, and whether utilised in host or
5869  // target there is a chance an LLVM Dialect operation depends on it
5870  // by taking it in as an operand, so we must always lower these in
5871  // some manner or result in an ICE (whether they end up in a no-op
5872  // or otherwise).
5873  if (mlir::isa<omp::ThreadprivateOp>(op))
5874  return true;
5875 
5876  if (mlir::isa<omp::TargetAllocMemOp>(op) ||
5877  mlir::isa<omp::TargetFreeMemOp>(op))
5878  return true;
5879 
5880  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
5881  if (auto declareTargetIface =
5882  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
5883  parentFn.getOperation()))
5884  if (declareTargetIface.isDeclareTarget() &&
5885  declareTargetIface.getDeclareTargetDeviceType() !=
5886  mlir::omp::DeclareTargetDeviceType::host)
5887  return true;
5888 
5889  return false;
5890 }
5891 
5892 static llvm::Function *getOmpTargetAlloc(llvm::IRBuilderBase &builder,
5893  llvm::Module *llvmModule) {
5894  llvm::Type *i64Ty = builder.getInt64Ty();
5895  llvm::Type *i32Ty = builder.getInt32Ty();
5896  llvm::Type *returnType = builder.getPtrTy(0);
5897  llvm::FunctionType *fnType =
5898  llvm::FunctionType::get(returnType, {i64Ty, i32Ty}, false);
5899  llvm::Function *func = cast<llvm::Function>(
5900  llvmModule->getOrInsertFunction("omp_target_alloc", fnType).getCallee());
5901  return func;
5902 }
5903 
5904 static LogicalResult
5905 convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
5906  LLVM::ModuleTranslation &moduleTranslation) {
5907  auto allocMemOp = cast<omp::TargetAllocMemOp>(opInst);
5908  if (!allocMemOp)
5909  return failure();
5910 
5911  // Get "omp_target_alloc" function
5912  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5913  llvm::Function *ompTargetAllocFunc = getOmpTargetAlloc(builder, llvmModule);
5914  // Get the corresponding device value in llvm
5915  mlir::Value deviceNum = allocMemOp.getDevice();
5916  llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
5917  // Get the allocation size.
5918  llvm::DataLayout dataLayout = llvmModule->getDataLayout();
5919  mlir::Type heapTy = allocMemOp.getAllocatedType();
5920  llvm::Type *llvmHeapTy = moduleTranslation.convertType(heapTy);
5921  llvm::TypeSize typeSize = dataLayout.getTypeStoreSize(llvmHeapTy);
5922  llvm::Value *allocSize = builder.getInt64(typeSize.getFixedValue());
5923  for (auto typeParam : allocMemOp.getTypeparams())
5924  allocSize =
5925  builder.CreateMul(allocSize, moduleTranslation.lookupValue(typeParam));
5926  // Create call to "omp_target_alloc" with the args as translated llvm values.
5927  llvm::CallInst *call =
5928  builder.CreateCall(ompTargetAllocFunc, {allocSize, llvmDeviceNum});
5929  llvm::Value *resultI64 = builder.CreatePtrToInt(call, builder.getInt64Ty());
5930 
5931  // Map the result
5932  moduleTranslation.mapValue(allocMemOp.getResult(), resultI64);
5933  return success();
5934 }
5935 
5936 static llvm::Function *getOmpTargetFree(llvm::IRBuilderBase &builder,
5937  llvm::Module *llvmModule) {
5938  llvm::Type *ptrTy = builder.getPtrTy(0);
5939  llvm::Type *i32Ty = builder.getInt32Ty();
5940  llvm::Type *voidTy = builder.getVoidTy();
5941  llvm::FunctionType *fnType =
5942  llvm::FunctionType::get(voidTy, {ptrTy, i32Ty}, false);
5943  llvm::Function *func = dyn_cast<llvm::Function>(
5944  llvmModule->getOrInsertFunction("omp_target_free", fnType).getCallee());
5945  return func;
5946 }
5947 
5948 static LogicalResult
5949 convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
5950  LLVM::ModuleTranslation &moduleTranslation) {
5951  auto freeMemOp = cast<omp::TargetFreeMemOp>(opInst);
5952  if (!freeMemOp)
5953  return failure();
5954 
5955  // Get "omp_target_free" function
5956  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5957  llvm::Function *ompTragetFreeFunc = getOmpTargetFree(builder, llvmModule);
5958  // Get the corresponding device value in llvm
5959  mlir::Value deviceNum = freeMemOp.getDevice();
5960  llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
5961  // Get the corresponding heapref value in llvm
5962  mlir::Value heapref = freeMemOp.getHeapref();
5963  llvm::Value *llvmHeapref = moduleTranslation.lookupValue(heapref);
5964  // Convert heapref int to ptr and call "omp_target_free"
5965  llvm::Value *intToPtr =
5966  builder.CreateIntToPtr(llvmHeapref, builder.getPtrTy(0));
5967  builder.CreateCall(ompTragetFreeFunc, {intToPtr, llvmDeviceNum});
5968  return success();
5969 }
5970 
5971 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
5972 /// OpenMP runtime calls).
5973 static LogicalResult
5974 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
5975  LLVM::ModuleTranslation &moduleTranslation) {
5976  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5977 
5978  // For each loop, introduce one stack frame to hold loop information. Ensure
5979  // this is only done for the outermost loop wrapper to prevent introducing
5980  // multiple stack frames for a single loop. Initially set to null, the loop
5981  // information structure is initialized during translation of the nested
5982  // omp.loop_nest operation, making it available to translation of all loop
5983  // wrappers after their body has been successfully translated.
5984  bool isOutermostLoopWrapper =
5985  isa_and_present<omp::LoopWrapperInterface>(op) &&
5986  !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
5987 
5988  if (isOutermostLoopWrapper)
5989  moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
5990 
5991  auto result =
5993  .Case([&](omp::BarrierOp op) -> LogicalResult {
5995  return failure();
5996 
5997  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5998  ompBuilder->createBarrier(builder.saveIP(),
5999  llvm::omp::OMPD_barrier);
6000  LogicalResult res = handleError(afterIP, *op);
6001  if (res.succeeded()) {
6002  // If the barrier generated a cancellation check, the insertion
6003  // point might now need to be changed to a new continuation block
6004  builder.restoreIP(*afterIP);
6005  }
6006  return res;
6007  })
6008  .Case([&](omp::TaskyieldOp op) {
6010  return failure();
6011 
6012  ompBuilder->createTaskyield(builder.saveIP());
6013  return success();
6014  })
6015  .Case([&](omp::FlushOp op) {
6017  return failure();
6018 
6019  // No support in Openmp runtime function (__kmpc_flush) to accept
6020  // the argument list.
6021  // OpenMP standard states the following:
6022  // "An implementation may implement a flush with a list by ignoring
6023  // the list, and treating it the same as a flush without a list."
6024  //
6025  // The argument list is discarded so that, flush with a list is
6026  // treated same as a flush without a list.
6027  ompBuilder->createFlush(builder.saveIP());
6028  return success();
6029  })
6030  .Case([&](omp::ParallelOp op) {
6031  return convertOmpParallel(op, builder, moduleTranslation);
6032  })
6033  .Case([&](omp::MaskedOp) {
6034  return convertOmpMasked(*op, builder, moduleTranslation);
6035  })
6036  .Case([&](omp::MasterOp) {
6037  return convertOmpMaster(*op, builder, moduleTranslation);
6038  })
6039  .Case([&](omp::CriticalOp) {
6040  return convertOmpCritical(*op, builder, moduleTranslation);
6041  })
6042  .Case([&](omp::OrderedRegionOp) {
6043  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
6044  })
6045  .Case([&](omp::OrderedOp) {
6046  return convertOmpOrdered(*op, builder, moduleTranslation);
6047  })
6048  .Case([&](omp::WsloopOp) {
6049  return convertOmpWsloop(*op, builder, moduleTranslation);
6050  })
6051  .Case([&](omp::SimdOp) {
6052  return convertOmpSimd(*op, builder, moduleTranslation);
6053  })
6054  .Case([&](omp::AtomicReadOp) {
6055  return convertOmpAtomicRead(*op, builder, moduleTranslation);
6056  })
6057  .Case([&](omp::AtomicWriteOp) {
6058  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
6059  })
6060  .Case([&](omp::AtomicUpdateOp op) {
6061  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
6062  })
6063  .Case([&](omp::AtomicCaptureOp op) {
6064  return convertOmpAtomicCapture(op, builder, moduleTranslation);
6065  })
6066  .Case([&](omp::CancelOp op) {
6067  return convertOmpCancel(op, builder, moduleTranslation);
6068  })
6069  .Case([&](omp::CancellationPointOp op) {
6070  return convertOmpCancellationPoint(op, builder, moduleTranslation);
6071  })
6072  .Case([&](omp::SectionsOp) {
6073  return convertOmpSections(*op, builder, moduleTranslation);
6074  })
6075  .Case([&](omp::SingleOp op) {
6076  return convertOmpSingle(op, builder, moduleTranslation);
6077  })
6078  .Case([&](omp::TeamsOp op) {
6079  return convertOmpTeams(op, builder, moduleTranslation);
6080  })
6081  .Case([&](omp::TaskOp op) {
6082  return convertOmpTaskOp(op, builder, moduleTranslation);
6083  })
6084  .Case([&](omp::TaskgroupOp op) {
6085  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
6086  })
6087  .Case([&](omp::TaskwaitOp op) {
6088  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
6089  })
6090  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
6091  omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
6092  omp::CriticalDeclareOp>([](auto op) {
6093  // `yield` and `terminator` can be just omitted. The block structure
6094  // was created in the region that handles their parent operation.
6095  // `declare_reduction` will be used by reductions and is not
6096  // converted directly, skip it.
6097  // `declare_mapper` and `declare_mapper.info` are handled whenever
6098  // they are referred to through a `map` clause.
6099  // `critical.declare` is only used to declare names of critical
6100  // sections which will be used by `critical` ops and hence can be
6101  // ignored for lowering. The OpenMP IRBuilder will create unique
6102  // name for critical section names.
6103  return success();
6104  })
6105  .Case([&](omp::ThreadprivateOp) {
6106  return convertOmpThreadprivate(*op, builder, moduleTranslation);
6107  })
6108  .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
6109  omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
6110  return convertOmpTargetData(op, builder, moduleTranslation);
6111  })
6112  .Case([&](omp::TargetOp) {
6113  return convertOmpTarget(*op, builder, moduleTranslation);
6114  })
6115  .Case([&](omp::DistributeOp) {
6116  return convertOmpDistribute(*op, builder, moduleTranslation);
6117  })
6118  .Case([&](omp::LoopNestOp) {
6119  return convertOmpLoopNest(*op, builder, moduleTranslation);
6120  })
6121  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
6122  [&](auto op) {
6123  // No-op, should be handled by relevant owning operations e.g.
6124  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
6125  // etc. and then discarded
6126  return success();
6127  })
6128  .Case([&](omp::NewCliOp op) {
6129  // Meta-operation: Doesn't do anything by itself, but used to
6130  // identify a loop.
6131  return success();
6132  })
6133  .Case([&](omp::CanonicalLoopOp op) {
6134  return convertOmpCanonicalLoopOp(op, builder, moduleTranslation);
6135  })
6136  .Case([&](omp::UnrollHeuristicOp op) {
6137  // FIXME: Handling omp.unroll_heuristic as an executable requires
6138  // that the generator (e.g. omp.canonical_loop) has been seen first.
6139  // For construct that require all codegen to occur inside a callback
6140  // (e.g. OpenMPIRBilder::createParallel), all codegen of that
6141  // contained region including their transformations must occur at
6142  // the omp.canonical_loop.
6143  return applyUnrollHeuristic(op, builder, moduleTranslation);
6144  })
6145  .Case([&](omp::TargetAllocMemOp) {
6146  return convertTargetAllocMemOp(*op, builder, moduleTranslation);
6147  })
6148  .Case([&](omp::TargetFreeMemOp) {
6149  return convertTargetFreeMemOp(*op, builder, moduleTranslation);
6150  })
6151  .Default([&](Operation *inst) {
6152  return inst->emitError()
6153  << "not yet implemented: " << inst->getName();
6154  });
6155 
6156  if (isOutermostLoopWrapper)
6157  moduleTranslation.stackPop();
6158 
6159  return result;
6160 }
6161 
6162 static LogicalResult
6163 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
6164  LLVM::ModuleTranslation &moduleTranslation) {
6165  return convertHostOrTargetOperation(op, builder, moduleTranslation);
6166 }
6167 
6168 static LogicalResult
6169 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
6170  LLVM::ModuleTranslation &moduleTranslation) {
6171  if (isa<omp::TargetOp>(op))
6172  return convertOmpTarget(*op, builder, moduleTranslation);
6173  if (isa<omp::TargetDataOp>(op))
6174  return convertOmpTargetData(op, builder, moduleTranslation);
6175  bool interrupted =
6176  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
6177  if (isa<omp::TargetOp>(oper)) {
6178  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
6179  return WalkResult::interrupt();
6180  return WalkResult::skip();
6181  }
6182  if (isa<omp::TargetDataOp>(oper)) {
6183  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
6184  return WalkResult::interrupt();
6185  return WalkResult::skip();
6186  }
6187 
6188  // Non-target ops might nest target-related ops, therefore, we
6189  // translate them as non-OpenMP scopes. Translating them is needed by
6190  // nested target-related ops since they might need LLVM values defined
6191  // in their parent non-target ops.
6192  if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
6193  oper->getParentOfType<LLVM::LLVMFuncOp>() &&
6194  !oper->getRegions().empty()) {
6195  if (auto blockArgsIface =
6196  dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
6197  forwardArgs(moduleTranslation, blockArgsIface);
6198  else {
6199  // Here we map entry block arguments of
6200  // non-BlockArgOpenMPOpInterface ops if they can be encountered
6201  // inside of a function and they define any of these arguments.
6202  if (isa<mlir::omp::AtomicUpdateOp>(oper))
6203  for (auto [operand, arg] :
6204  llvm::zip_equal(oper->getOperands(),
6205  oper->getRegion(0).getArguments())) {
6206  moduleTranslation.mapValue(
6207  arg, builder.CreateLoad(
6208  moduleTranslation.convertType(arg.getType()),
6209  moduleTranslation.lookupValue(operand)));
6210  }
6211  }
6212 
6213  if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
6214  assert(builder.GetInsertBlock() &&
6215  "No insert block is set for the builder");
6216  for (auto iv : loopNest.getIVs()) {
6217  // Map iv to an undefined value just to keep the IR validity.
6218  moduleTranslation.mapValue(
6220  moduleTranslation.convertType(iv.getType())));
6221  }
6222  }
6223 
6224  for (Region &region : oper->getRegions()) {
6225  // Regions are fake in the sense that they are not a truthful
6226  // translation of the OpenMP construct being converted (e.g. no
6227  // OpenMP runtime calls will be generated). We just need this to
6228  // prepare the kernel invocation args.
6230  auto result = convertOmpOpRegions(
6231  region, oper->getName().getStringRef().str() + ".fake.region",
6232  builder, moduleTranslation, &phis);
6233  if (failed(handleError(result, *oper)))
6234  return WalkResult::interrupt();
6235 
6236  builder.SetInsertPoint(result.get(), result.get()->end());
6237  }
6238 
6239  return WalkResult::skip();
6240  }
6241 
6242  return WalkResult::advance();
6243  }).wasInterrupted();
6244  return failure(interrupted);
6245 }
6246 
6247 namespace {
6248 
6249 /// Implementation of the dialect interface that converts operations belonging
6250 /// to the OpenMP dialect to LLVM IR.
6251 class OpenMPDialectLLVMIRTranslationInterface
6253 public:
6255 
6256  /// Translates the given operation to LLVM IR using the provided IR builder
6257  /// and saving the state in `moduleTranslation`.
6258  LogicalResult
6259  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
6260  LLVM::ModuleTranslation &moduleTranslation) const final;
6261 
6262  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
6263  /// runtime calls, or operation amendments
6264  LogicalResult
6266  NamedAttribute attribute,
6267  LLVM::ModuleTranslation &moduleTranslation) const final;
6268 };
6269 
6270 } // namespace
6271 
6272 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
6273  Operation *op, ArrayRef<llvm::Instruction *> instructions,
6274  NamedAttribute attribute,
6275  LLVM::ModuleTranslation &moduleTranslation) const {
6276  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
6277  attribute.getName())
6278  .Case("omp.is_target_device",
6279  [&](Attribute attr) {
6280  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
6281  llvm::OpenMPIRBuilderConfig &config =
6282  moduleTranslation.getOpenMPBuilder()->Config;
6283  config.setIsTargetDevice(deviceAttr.getValue());
6284  return success();
6285  }
6286  return failure();
6287  })
6288  .Case("omp.is_gpu",
6289  [&](Attribute attr) {
6290  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
6291  llvm::OpenMPIRBuilderConfig &config =
6292  moduleTranslation.getOpenMPBuilder()->Config;
6293  config.setIsGPU(gpuAttr.getValue());
6294  return success();
6295  }
6296  return failure();
6297  })
6298  .Case("omp.host_ir_filepath",
6299  [&](Attribute attr) {
6300  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
6301  llvm::OpenMPIRBuilder *ompBuilder =
6302  moduleTranslation.getOpenMPBuilder();
6303  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
6304  return success();
6305  }
6306  return failure();
6307  })
6308  .Case("omp.flags",
6309  [&](Attribute attr) {
6310  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
6311  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
6312  return failure();
6313  })
6314  .Case("omp.version",
6315  [&](Attribute attr) {
6316  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
6317  llvm::OpenMPIRBuilder *ompBuilder =
6318  moduleTranslation.getOpenMPBuilder();
6319  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
6320  versionAttr.getVersion());
6321  return success();
6322  }
6323  return failure();
6324  })
6325  .Case("omp.declare_target",
6326  [&](Attribute attr) {
6327  if (auto declareTargetAttr =
6328  dyn_cast<omp::DeclareTargetAttr>(attr))
6329  return convertDeclareTargetAttr(op, declareTargetAttr,
6330  moduleTranslation);
6331  return failure();
6332  })
6333  .Case("omp.requires",
6334  [&](Attribute attr) {
6335  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
6336  using Requires = omp::ClauseRequires;
6337  Requires flags = requiresAttr.getValue();
6338  llvm::OpenMPIRBuilderConfig &config =
6339  moduleTranslation.getOpenMPBuilder()->Config;
6340  config.setHasRequiresReverseOffload(
6341  bitEnumContainsAll(flags, Requires::reverse_offload));
6342  config.setHasRequiresUnifiedAddress(
6343  bitEnumContainsAll(flags, Requires::unified_address));
6344  config.setHasRequiresUnifiedSharedMemory(
6345  bitEnumContainsAll(flags, Requires::unified_shared_memory));
6346  config.setHasRequiresDynamicAllocators(
6347  bitEnumContainsAll(flags, Requires::dynamic_allocators));
6348  return success();
6349  }
6350  return failure();
6351  })
6352  .Case("omp.target_triples",
6353  [&](Attribute attr) {
6354  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
6355  llvm::OpenMPIRBuilderConfig &config =
6356  moduleTranslation.getOpenMPBuilder()->Config;
6357  config.TargetTriples.clear();
6358  config.TargetTriples.reserve(triplesAttr.size());
6359  for (Attribute tripleAttr : triplesAttr) {
6360  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
6361  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
6362  else
6363  return failure();
6364  }
6365  return success();
6366  }
6367  return failure();
6368  })
6369  .Default([](Attribute) {
6370  // Fall through for omp attributes that do not require lowering.
6371  return success();
6372  })(attribute.getValue());
6373 
6374  return failure();
6375 }
6376 
6377 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
6378 /// (including OpenMP runtime calls).
6379 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
6380  Operation *op, llvm::IRBuilderBase &builder,
6381  LLVM::ModuleTranslation &moduleTranslation) const {
6382 
6383  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6384  if (ompBuilder->Config.isTargetDevice()) {
6385  if (isTargetDeviceOp(op)) {
6386  return convertTargetDeviceOp(op, builder, moduleTranslation);
6387  }
6388  return convertTargetOpsInNest(op, builder, moduleTranslation);
6389  }
6390  return convertHostOrTargetOperation(op, builder, moduleTranslation);
6391 }
6392 
6394  registry.insert<omp::OpenMPDialect>();
6395  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
6396  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
6397  });
6398 }
6399 
6401  DialectRegistry registry;
6403  context.appendDialectRegistry(registry);
6404 }
static std::string toString(bytecode::Section::ID sectionID)
Stringify the given section ID.
static ze_device_handle_t getDevice(const uint32_t driverIdx=0, const int32_t devIdx=0)
union mlir::linalg::@1244::ArityGroupAndKind::Kind kind
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable.
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type.
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Function * getOmpTargetAlloc(llvm::IRBuilderBase &builder, llvm::Module *llvmModule)
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Apply a #pragma omp unroll / "!$omp unroll" transformation using the OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static void popCancelFinalizationCB(const ArrayRef< llvm::BranchInst * > cancelTerminators, llvm::OpenMPIRBuilder &ompBuilder, const llvm::OpenMPIRBuilder::InsertPointTy &afterIP)
If we cancelled the construct, we should branch to the finalization block of that construct.
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert an omp.canonical_loop to LLVM-IR.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void pushCancelFinalizationCB(SmallVectorImpl< llvm::BranchInst * > &cancelTerminators, llvm::IRBuilderBase &llvmBuilder, llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op, llvm::omp::Directive cancelDirective)
Shared implementation of a callback which adds a termiator for the new block created for the branch t...
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
Inline reductions' init regions.
static LogicalResult convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl)
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool constructIsCancellable(Operation *op)
Returns true if the construct contains omp.cancel or omp.cancellation_point.
void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp, bool &isIgnoreDenormalMode, bool &isFineGrainedMemory, bool &isRemoteMemory)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static LogicalResult copyFirstPrivateVars(mlir::Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, bool insertBarrier, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef, bool isNowait=false, bool isTeamsReduction=false)
static LogicalResult convertOmpCancellationPoint(omp::CancellationPointOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static uint64_t getReductionDataSize(OpTy &op)
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static llvm::omp::Directive convertCancellationConstructType(omp::ClauseCancellationConstructType directive)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice, bool isGPU)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
static llvm::Function * getOmpTargetFree(llvm::IRBuilderBase &builder, llvm::Module *llvmModule)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:331
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:309
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:244
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:174
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:45
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
type_range getType() const
Definition: ValueRange.cpp:32
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
Dialect * getDialect()
Return the dialect this operation is associated with, or nullptr if the associated dialect is not loa...
Definition: Operation.h:220
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:279
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:538
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
bool hasOneBlock()
Return true if this region has exactly one block.
Definition: Region.h:68
Concrete CRTP base class for StateStack frames.
Definition: StateStack.h:47
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: WalkResult.h:29
static WalkResult advance()
Definition: WalkResult.h:47
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: WalkResult.h:51
static WalkResult interrupt()
Definition: WalkResult.h:46
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
llvm::hash_code hash_value(const StructType::MemberDecorationInfo &memberDecorationInfo)
llvm::PointerUnion< NamedAttribute *, NamedProperty *, NamedTypeConstraint * > Argument
Definition: Argument.h:64
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:304
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars
RAII object calling stackPush/stackPop on construction/destruction.
Definition: StateStack.h:106