MLIR  22.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
19 #include "mlir/IR/Operation.h"
20 #include "mlir/Support/LLVM.h"
23 
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/MDBuilder.h"
34 #include "llvm/IR/ReplaceConstant.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/VirtualFileSystem.h"
37 #include "llvm/TargetParser/Triple.h"
38 #include "llvm/Transforms/Utils/ModuleUtils.h"
39 
40 #include <cstdint>
41 #include <iterator>
42 #include <numeric>
43 #include <optional>
44 #include <utility>
45 
46 using namespace mlir;
47 
48 namespace {
49 static llvm::omp::ScheduleKind
50 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
51  if (!schedKind.has_value())
52  return llvm::omp::OMP_SCHEDULE_Default;
53  switch (schedKind.value()) {
54  case omp::ClauseScheduleKind::Static:
55  return llvm::omp::OMP_SCHEDULE_Static;
56  case omp::ClauseScheduleKind::Dynamic:
57  return llvm::omp::OMP_SCHEDULE_Dynamic;
58  case omp::ClauseScheduleKind::Guided:
59  return llvm::omp::OMP_SCHEDULE_Guided;
60  case omp::ClauseScheduleKind::Auto:
61  return llvm::omp::OMP_SCHEDULE_Auto;
63  return llvm::omp::OMP_SCHEDULE_Runtime;
64  }
65  llvm_unreachable("unhandled schedule clause argument");
66 }
67 
68 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
69 /// insertion points for allocas.
70 class OpenMPAllocaStackFrame
71  : public StateStackFrameBase<OpenMPAllocaStackFrame> {
72 public:
73  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
74 
75  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
76  : allocaInsertPoint(allocaIP) {}
77  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
78 };
79 
80 /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
81 /// collapsed canonical loop information corresponding to an \c omp.loop_nest
82 /// operation.
83 class OpenMPLoopInfoStackFrame
84  : public StateStackFrameBase<OpenMPLoopInfoStackFrame> {
85 public:
86  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
87  llvm::CanonicalLoopInfo *loopInfo = nullptr;
88 };
89 
90 /// Custom error class to signal translation errors that don't need reporting,
91 /// since encountering them will have already triggered relevant error messages.
92 ///
93 /// Its purpose is to serve as the glue between MLIR failures represented as
94 /// \see LogicalResult instances and \see llvm::Error instances used to
95 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
96 /// error of the first type is raised, a message is emitted directly (the \see
97 /// LogicalResult itself does not hold any information). If we need to forward
98 /// this error condition as an \see llvm::Error while avoiding triggering some
99 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
100 /// class to just signal this situation has happened.
101 ///
102 /// For example, this class should be used to trigger errors from within
103 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
104 /// translation of their own regions. This unclutters the error log from
105 /// redundant messages.
106 class PreviouslyReportedError
107  : public llvm::ErrorInfo<PreviouslyReportedError> {
108 public:
109  void log(raw_ostream &) const override {
110  // Do not log anything.
111  }
112 
113  std::error_code convertToErrorCode() const override {
114  llvm_unreachable(
115  "PreviouslyReportedError doesn't support ECError conversion");
116  }
117 
118  // Used by ErrorInfo::classID.
119  static char ID;
120 };
121 
123 
124 /*
125  * Custom class for processing linear clause for omp.wsloop
126  * and omp.simd. Linear clause translation requires setup,
127  * initialization, update, and finalization at varying
128  * basic blocks in the IR. This class helps maintain
129  * internal state to allow consistent translation in
130  * each of these stages.
131  */
132 
133 class LinearClauseProcessor {
134 
135 private:
136  SmallVector<llvm::Value *> linearPreconditionVars;
137  SmallVector<llvm::Value *> linearLoopBodyTemps;
138  SmallVector<llvm::AllocaInst *> linearOrigVars;
139  SmallVector<llvm::Value *> linearOrigVal;
140  SmallVector<llvm::Value *> linearSteps;
141  llvm::BasicBlock *linearFinalizationBB;
142  llvm::BasicBlock *linearExitBB;
143  llvm::BasicBlock *linearLastIterExitBB;
144 
145 public:
146  // Allocate space for linear variabes
147  void createLinearVar(llvm::IRBuilderBase &builder,
148  LLVM::ModuleTranslation &moduleTranslation,
149  mlir::Value &linearVar) {
150  if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
151  moduleTranslation.lookupValue(linearVar))) {
152  linearPreconditionVars.push_back(builder.CreateAlloca(
153  linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
154  llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
155  linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
156  linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
157  linearLoopBodyTemps.push_back(linearLoopBodyTemp);
158  linearOrigVars.push_back(linearVarAlloca);
159  }
160  }
161 
162  // Initialize linear step
163  inline void initLinearStep(LLVM::ModuleTranslation &moduleTranslation,
164  mlir::Value &linearStep) {
165  linearSteps.push_back(moduleTranslation.lookupValue(linearStep));
166  }
167 
168  // Emit IR for initialization of linear variables
169  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
170  initLinearVar(llvm::IRBuilderBase &builder,
171  LLVM::ModuleTranslation &moduleTranslation,
172  llvm::BasicBlock *loopPreHeader) {
173  builder.SetInsertPoint(loopPreHeader->getTerminator());
174  for (size_t index = 0; index < linearOrigVars.size(); index++) {
175  llvm::LoadInst *linearVarLoad = builder.CreateLoad(
176  linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
177  builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
178  }
179  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
180  moduleTranslation.getOpenMPBuilder()->createBarrier(
181  builder.saveIP(), llvm::omp::OMPD_barrier);
182  return afterBarrierIP;
183  }
184 
185  // Emit IR for updating Linear variables
186  void updateLinearVar(llvm::IRBuilderBase &builder, llvm::BasicBlock *loopBody,
187  llvm::Value *loopInductionVar) {
188  builder.SetInsertPoint(loopBody->getTerminator());
189  for (size_t index = 0; index < linearPreconditionVars.size(); index++) {
190  // Emit increments for linear vars
191  llvm::LoadInst *linearVarStart =
192  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
193 
194  linearPreconditionVars[index]);
195  auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
196  auto addInst = builder.CreateAdd(linearVarStart, mulInst);
197  builder.CreateStore(addInst, linearLoopBodyTemps[index]);
198  }
199  }
200 
201  // Linear variable finalization is conditional on the last logical iteration.
202  // Create BB splits to manage the same.
203  void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
204  llvm::BasicBlock *loopExit) {
205  linearFinalizationBB = loopExit->splitBasicBlock(
206  loopExit->getTerminator(), "omp_loop.linear_finalization");
207  linearExitBB = linearFinalizationBB->splitBasicBlock(
208  linearFinalizationBB->getTerminator(), "omp_loop.linear_exit");
209  linearLastIterExitBB = linearFinalizationBB->splitBasicBlock(
210  linearFinalizationBB->getTerminator(), "omp_loop.linear_lastiter_exit");
211  }
212 
213  // Finalize the linear vars
214  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
215  finalizeLinearVar(llvm::IRBuilderBase &builder,
216  LLVM::ModuleTranslation &moduleTranslation,
217  llvm::Value *lastIter) {
218  // Emit condition to check whether last logical iteration is being executed
219  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
220  llvm::Value *loopLastIterLoad = builder.CreateLoad(
221  llvm::Type::getInt32Ty(builder.getContext()), lastIter);
222  llvm::Value *isLast =
223  builder.CreateCmp(llvm::CmpInst::ICMP_NE, loopLastIterLoad,
225  llvm::Type::getInt32Ty(builder.getContext()), 0));
226  // Store the linear variable values to original variables.
227  builder.SetInsertPoint(linearLastIterExitBB->getTerminator());
228  for (size_t index = 0; index < linearOrigVars.size(); index++) {
229  llvm::LoadInst *linearVarTemp =
230  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
231  linearLoopBodyTemps[index]);
232  builder.CreateStore(linearVarTemp, linearOrigVars[index]);
233  }
234 
235  // Create conditional branch such that the linear variable
236  // values are stored to original variables only at the
237  // last logical iteration
238  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
239  builder.CreateCondBr(isLast, linearLastIterExitBB, linearExitBB);
240  linearFinalizationBB->getTerminator()->eraseFromParent();
241  // Emit barrier
242  builder.SetInsertPoint(linearExitBB->getTerminator());
243  return moduleTranslation.getOpenMPBuilder()->createBarrier(
244  builder.saveIP(), llvm::omp::OMPD_barrier);
245  }
246 
247  // Rewrite all uses of the original variable in `BBName`
248  // with the linear variable in-place
249  void rewriteInPlace(llvm::IRBuilderBase &builder, std::string BBName,
250  size_t varIndex) {
252  for (llvm::User *user : linearOrigVal[varIndex]->users())
253  users.push_back(user);
254  for (auto *user : users) {
255  if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
256  if (userInst->getParent()->getName().str() == BBName)
257  user->replaceUsesOfWith(linearOrigVal[varIndex],
258  linearLoopBodyTemps[varIndex]);
259  }
260  }
261  }
262 };
263 
264 } // namespace
265 
266 /// Looks up from the operation from and returns the PrivateClauseOp with
267 /// name symbolName
268 static omp::PrivateClauseOp findPrivatizer(Operation *from,
269  SymbolRefAttr symbolName) {
270  omp::PrivateClauseOp privatizer =
271  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
272  symbolName);
273  assert(privatizer && "privatizer not found in the symbol table");
274  return privatizer;
275 }
276 
277 /// Check whether translation to LLVM IR for the given operation is currently
278 /// supported. If not, descriptive diagnostics will be emitted to let users know
279 /// this is a not-yet-implemented feature.
280 ///
281 /// \returns success if no unimplemented features are needed to translate the
282 /// given operation.
283 static LogicalResult checkImplementationStatus(Operation &op) {
284  auto todo = [&op](StringRef clauseName) {
285  return op.emitError() << "not yet implemented: Unhandled clause "
286  << clauseName << " in " << op.getName()
287  << " operation";
288  };
289 
290  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
291  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
292  result = todo("allocate");
293  };
294  auto checkBare = [&todo](auto op, LogicalResult &result) {
295  if (op.getBare())
296  result = todo("ompx_bare");
297  };
298  auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
299  omp::ClauseCancellationConstructType cancelledDirective =
300  op.getCancelDirective();
301  // Cancelling a taskloop is not yet supported because we don't yet have LLVM
302  // IR conversion for taskloop
303  if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) {
304  Operation *parent = op->getParentOp();
305  while (parent) {
306  if (parent->getDialect() == op->getDialect())
307  break;
308  parent = parent->getParentOp();
309  }
310  if (isa_and_nonnull<omp::TaskloopOp>(parent))
311  result = todo("cancel directive inside of taskloop");
312  }
313  };
314  auto checkDepend = [&todo](auto op, LogicalResult &result) {
315  if (!op.getDependVars().empty() || op.getDependKinds())
316  result = todo("depend");
317  };
318  auto checkDevice = [&todo](auto op, LogicalResult &result) {
319  if (op.getDevice())
320  result = todo("device");
321  };
322  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
323  if (op.getDistScheduleChunkSize())
324  result = todo("dist_schedule with chunk_size");
325  };
326  auto checkHint = [](auto op, LogicalResult &) {
327  if (op.getHint())
328  op.emitWarning("hint clause discarded");
329  };
330  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
331  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
332  op.getInReductionSyms())
333  result = todo("in_reduction");
334  };
335  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
336  if (!op.getIsDevicePtrVars().empty())
337  result = todo("is_device_ptr");
338  };
339  auto checkLinear = [&todo](auto op, LogicalResult &result) {
340  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
341  result = todo("linear");
342  };
343  auto checkNowait = [&todo](auto op, LogicalResult &result) {
344  if (op.getNowait())
345  result = todo("nowait");
346  };
347  auto checkOrder = [&todo](auto op, LogicalResult &result) {
348  if (op.getOrder() || op.getOrderMod())
349  result = todo("order");
350  };
351  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
352  if (op.getParLevelSimd())
353  result = todo("parallelization-level");
354  };
355  auto checkPriority = [&todo](auto op, LogicalResult &result) {
356  if (op.getPriority())
357  result = todo("priority");
358  };
359  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
360  if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
361  // Privatization is supported only for included target tasks.
362  if (!op.getPrivateVars().empty() && op.getNowait())
363  result = todo("privatization for deferred target tasks");
364  } else {
365  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
366  result = todo("privatization");
367  }
368  };
369  auto checkReduction = [&todo](auto op, LogicalResult &result) {
370  if (isa<omp::TeamsOp>(op))
371  if (!op.getReductionVars().empty() || op.getReductionByref() ||
372  op.getReductionSyms())
373  result = todo("reduction");
374  if (op.getReductionMod() &&
375  op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
376  result = todo("reduction with modifier");
377  };
378  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
379  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
380  op.getTaskReductionSyms())
381  result = todo("task_reduction");
382  };
383  auto checkUntied = [&todo](auto op, LogicalResult &result) {
384  if (op.getUntied())
385  result = todo("untied");
386  };
387 
388  LogicalResult result = success();
390  .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); })
391  .Case([&](omp::CancellationPointOp op) {
392  checkCancelDirective(op, result);
393  })
394  .Case([&](omp::DistributeOp op) {
395  checkAllocate(op, result);
396  checkDistSchedule(op, result);
397  checkOrder(op, result);
398  })
399  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
400  .Case([&](omp::SectionsOp op) {
401  checkAllocate(op, result);
402  checkPrivate(op, result);
403  checkReduction(op, result);
404  })
405  .Case([&](omp::SingleOp op) {
406  checkAllocate(op, result);
407  checkPrivate(op, result);
408  })
409  .Case([&](omp::TeamsOp op) {
410  checkAllocate(op, result);
411  checkPrivate(op, result);
412  })
413  .Case([&](omp::TaskOp op) {
414  checkAllocate(op, result);
415  checkInReduction(op, result);
416  })
417  .Case([&](omp::TaskgroupOp op) {
418  checkAllocate(op, result);
419  checkTaskReduction(op, result);
420  })
421  .Case([&](omp::TaskwaitOp op) {
422  checkDepend(op, result);
423  checkNowait(op, result);
424  })
425  .Case([&](omp::TaskloopOp op) {
426  // TODO: Add other clauses check
427  checkUntied(op, result);
428  checkPriority(op, result);
429  })
430  .Case([&](omp::WsloopOp op) {
431  checkAllocate(op, result);
432  checkLinear(op, result);
433  checkOrder(op, result);
434  checkReduction(op, result);
435  })
436  .Case([&](omp::ParallelOp op) {
437  checkAllocate(op, result);
438  checkReduction(op, result);
439  })
440  .Case([&](omp::SimdOp op) {
441  checkLinear(op, result);
442  checkReduction(op, result);
443  })
444  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
445  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
446  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
447  [&](auto op) { checkDepend(op, result); })
448  .Case([&](omp::TargetOp op) {
449  checkAllocate(op, result);
450  checkBare(op, result);
451  checkDevice(op, result);
452  checkInReduction(op, result);
453  checkIsDevicePtr(op, result);
454  checkPrivate(op, result);
455  })
456  .Default([](Operation &) {
457  // Assume all clauses for an operation can be translated unless they are
458  // checked above.
459  });
460  return result;
461 }
462 
463 static LogicalResult handleError(llvm::Error error, Operation &op) {
464  LogicalResult result = success();
465  if (error) {
466  llvm::handleAllErrors(
467  std::move(error),
468  [&](const PreviouslyReportedError &) { result = failure(); },
469  [&](const llvm::ErrorInfoBase &err) {
470  result = op.emitError(err.message());
471  });
472  }
473  return result;
474 }
475 
476 template <typename T>
477 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
478  if (!result)
479  return handleError(result.takeError(), op);
480 
481  return success();
482 }
483 
484 /// Find the insertion point for allocas given the current insertion point for
485 /// normal operations in the builder.
486 static llvm::OpenMPIRBuilder::InsertPointTy
487 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
488  LLVM::ModuleTranslation &moduleTranslation) {
489  // If there is an alloca insertion point on stack, i.e. we are in a nested
490  // operation and a specific point was provided by some surrounding operation,
491  // use it.
492  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
493  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
494  [&](OpenMPAllocaStackFrame &frame) {
495  allocaInsertPoint = frame.allocaInsertPoint;
496  return WalkResult::interrupt();
497  });
498  // In cases with multiple levels of outlining, the tree walk might find an
499  // alloca insertion point that is inside the original function while the
500  // builder insertion point is inside the outlined function. We need to make
501  // sure that we do not use it in those cases.
502  if (walkResult.wasInterrupted() &&
503  allocaInsertPoint.getBlock()->getParent() ==
504  builder.GetInsertBlock()->getParent())
505  return allocaInsertPoint;
506 
507  // Otherwise, insert to the entry block of the surrounding function.
508  // If the current IRBuilder InsertPoint is the function's entry, it cannot
509  // also be used for alloca insertion which would result in insertion order
510  // confusion. Create a new BasicBlock for the Builder and use the entry block
511  // for the allocs.
512  // TODO: Create a dedicated alloca BasicBlock at function creation such that
513  // we do not need to move the current InertPoint here.
514  if (builder.GetInsertBlock() ==
515  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
516  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
517  "Assuming end of basic block");
518  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
519  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
520  builder.GetInsertBlock()->getNextNode());
521  builder.CreateBr(entryBB);
522  builder.SetInsertPoint(entryBB);
523  }
524 
525  llvm::BasicBlock &funcEntryBlock =
526  builder.GetInsertBlock()->getParent()->getEntryBlock();
527  return llvm::OpenMPIRBuilder::InsertPointTy(
528  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
529 }
530 
531 /// Find the loop information structure for the loop nest being translated. It
532 /// will return a `null` value unless called from the translation function for
533 /// a loop wrapper operation after successfully translating its body.
534 static llvm::CanonicalLoopInfo *
535 findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation) {
536  llvm::CanonicalLoopInfo *loopInfo = nullptr;
537  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
538  [&](OpenMPLoopInfoStackFrame &frame) {
539  loopInfo = frame.loopInfo;
540  return WalkResult::interrupt();
541  });
542  return loopInfo;
543 }
544 
545 /// Converts the given region that appears within an OpenMP dialect operation to
546 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
547 /// region, and a branch from any block with an successor-less OpenMP terminator
548 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
549 /// of the continuation block if provided.
551  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
552  LLVM::ModuleTranslation &moduleTranslation,
553  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
554  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
555 
556  llvm::BasicBlock *continuationBlock =
557  splitBB(builder, true, "omp.region.cont");
558  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
559 
560  llvm::LLVMContext &llvmContext = builder.getContext();
561  for (Block &bb : region) {
562  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
563  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
564  builder.GetInsertBlock()->getNextNode());
565  moduleTranslation.mapBlock(&bb, llvmBB);
566  }
567 
568  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
569 
570  // Terminators (namely YieldOp) may be forwarding values to the region that
571  // need to be available in the continuation block. Collect the types of these
572  // operands in preparation of creating PHI nodes. This is skipped for loop
573  // wrapper operations, for which we know in advance they have no terminators.
574  SmallVector<llvm::Type *> continuationBlockPHITypes;
575  unsigned numYields = 0;
576 
577  if (!isLoopWrapper) {
578  bool operandsProcessed = false;
579  for (Block &bb : region.getBlocks()) {
580  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
581  if (!operandsProcessed) {
582  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
583  continuationBlockPHITypes.push_back(
584  moduleTranslation.convertType(yield->getOperand(i).getType()));
585  }
586  operandsProcessed = true;
587  } else {
588  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
589  "mismatching number of values yielded from the region");
590  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
591  llvm::Type *operandType =
592  moduleTranslation.convertType(yield->getOperand(i).getType());
593  (void)operandType;
594  assert(continuationBlockPHITypes[i] == operandType &&
595  "values of mismatching types yielded from the region");
596  }
597  }
598  numYields++;
599  }
600  }
601  }
602 
603  // Insert PHI nodes in the continuation block for any values forwarded by the
604  // terminators in this region.
605  if (!continuationBlockPHITypes.empty())
606  assert(
607  continuationBlockPHIs &&
608  "expected continuation block PHIs if converted regions yield values");
609  if (continuationBlockPHIs) {
610  llvm::IRBuilderBase::InsertPointGuard guard(builder);
611  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
612  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
613  for (llvm::Type *ty : continuationBlockPHITypes)
614  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
615  }
616 
617  // Convert blocks one by one in topological order to ensure
618  // defs are converted before uses.
620  for (Block *bb : blocks) {
621  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
622  // Retarget the branch of the entry block to the entry block of the
623  // converted region (regions are single-entry).
624  if (bb->isEntryBlock()) {
625  assert(sourceTerminator->getNumSuccessors() == 1 &&
626  "provided entry block has multiple successors");
627  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
628  "ContinuationBlock is not the successor of the entry block");
629  sourceTerminator->setSuccessor(0, llvmBB);
630  }
631 
632  llvm::IRBuilderBase::InsertPointGuard guard(builder);
633  if (failed(
634  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
635  return llvm::make_error<PreviouslyReportedError>();
636 
637  // Create a direct branch here for loop wrappers to prevent their lack of a
638  // terminator from causing a crash below.
639  if (isLoopWrapper) {
640  builder.CreateBr(continuationBlock);
641  continue;
642  }
643 
644  // Special handling for `omp.yield` and `omp.terminator` (we may have more
645  // than one): they return the control to the parent OpenMP dialect operation
646  // so replace them with the branch to the continuation block. We handle this
647  // here to avoid relying inter-function communication through the
648  // ModuleTranslation class to set up the correct insertion point. This is
649  // also consistent with MLIR's idiom of handling special region terminators
650  // in the same code that handles the region-owning operation.
651  Operation *terminator = bb->getTerminator();
652  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
653  builder.CreateBr(continuationBlock);
654 
655  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
656  (*continuationBlockPHIs)[i]->addIncoming(
657  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
658  }
659  }
660  // After all blocks have been traversed and values mapped, connect the PHI
661  // nodes to the results of preceding blocks.
662  LLVM::detail::connectPHINodes(region, moduleTranslation);
663 
664  // Remove the blocks and values defined in this region from the mapping since
665  // they are not visible outside of this region. This allows the same region to
666  // be converted several times, that is cloned, without clashes, and slightly
667  // speeds up the lookups.
668  moduleTranslation.forgetMapping(region);
669 
670  return continuationBlock;
671 }
672 
673 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
674 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
675  switch (kind) {
676  case omp::ClauseProcBindKind::Close:
677  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
678  case omp::ClauseProcBindKind::Master:
679  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
680  case omp::ClauseProcBindKind::Primary:
681  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
682  case omp::ClauseProcBindKind::Spread:
683  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
684  }
685  llvm_unreachable("Unknown ClauseProcBindKind kind");
686 }
687 
688 /// Maps block arguments from \p blockArgIface (which are MLIR values) to the
689 /// corresponding LLVM values of \p the interface's operands. This is useful
690 /// when an OpenMP region with entry block arguments is converted to LLVM. In
691 /// this case the block arguments are (part of) of the OpenMP region's entry
692 /// arguments and the operands are (part of) of the operands to the OpenMP op
693 /// containing the region.
694 static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
695  omp::BlockArgOpenMPOpInterface blockArgIface) {
697  blockArgIface.getBlockArgsPairs(blockArgsPairs);
698  for (auto [var, arg] : blockArgsPairs)
699  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
700 }
701 
702 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
703 static LogicalResult
704 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
705  LLVM::ModuleTranslation &moduleTranslation) {
706  auto maskedOp = cast<omp::MaskedOp>(opInst);
707  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
708 
709  if (failed(checkImplementationStatus(opInst)))
710  return failure();
711 
712  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
713  // MaskedOp has only one region associated with it.
714  auto &region = maskedOp.getRegion();
715  builder.restoreIP(codeGenIP);
716  return convertOmpOpRegions(region, "omp.masked.region", builder,
717  moduleTranslation)
718  .takeError();
719  };
720 
721  // TODO: Perform finalization actions for variables. This has to be
722  // called for variables which have destructors/finalizers.
723  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
724 
725  llvm::Value *filterVal = nullptr;
726  if (auto filterVar = maskedOp.getFilteredThreadId()) {
727  filterVal = moduleTranslation.lookupValue(filterVar);
728  } else {
729  llvm::LLVMContext &llvmContext = builder.getContext();
730  filterVal =
731  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
732  }
733  assert(filterVal != nullptr);
734  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
735  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
736  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
737  finiCB, filterVal);
738 
739  if (failed(handleError(afterIP, opInst)))
740  return failure();
741 
742  builder.restoreIP(*afterIP);
743  return success();
744 }
745 
746 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
747 static LogicalResult
748 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
749  LLVM::ModuleTranslation &moduleTranslation) {
750  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
751  auto masterOp = cast<omp::MasterOp>(opInst);
752 
753  if (failed(checkImplementationStatus(opInst)))
754  return failure();
755 
756  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
757  // MasterOp has only one region associated with it.
758  auto &region = masterOp.getRegion();
759  builder.restoreIP(codeGenIP);
760  return convertOmpOpRegions(region, "omp.master.region", builder,
761  moduleTranslation)
762  .takeError();
763  };
764 
765  // TODO: Perform finalization actions for variables. This has to be
766  // called for variables which have destructors/finalizers.
767  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
768 
769  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
770  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
771  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
772  finiCB);
773 
774  if (failed(handleError(afterIP, opInst)))
775  return failure();
776 
777  builder.restoreIP(*afterIP);
778  return success();
779 }
780 
781 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
782 static LogicalResult
783 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
784  LLVM::ModuleTranslation &moduleTranslation) {
785  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
786  auto criticalOp = cast<omp::CriticalOp>(opInst);
787 
788  if (failed(checkImplementationStatus(opInst)))
789  return failure();
790 
791  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
792  // CriticalOp has only one region associated with it.
793  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
794  builder.restoreIP(codeGenIP);
795  return convertOmpOpRegions(region, "omp.critical.region", builder,
796  moduleTranslation)
797  .takeError();
798  };
799 
800  // TODO: Perform finalization actions for variables. This has to be
801  // called for variables which have destructors/finalizers.
802  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
803 
804  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
805  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
806  llvm::Constant *hint = nullptr;
807 
808  // If it has a name, it probably has a hint too.
809  if (criticalOp.getNameAttr()) {
810  // The verifiers in OpenMP Dialect guarentee that all the pointers are
811  // non-null
812  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
813  auto criticalDeclareOp =
814  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
815  symbolRef);
816  hint =
817  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
818  static_cast<int>(criticalDeclareOp.getHint()));
819  }
820  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
821  moduleTranslation.getOpenMPBuilder()->createCritical(
822  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
823 
824  if (failed(handleError(afterIP, opInst)))
825  return failure();
826 
827  builder.restoreIP(*afterIP);
828  return success();
829 }
830 
831 /// A util to collect info needed to convert delayed privatizers from MLIR to
832 /// LLVM.
834  template <typename OP>
836  : blockArgs(
837  cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
838  mlirVars.reserve(blockArgs.size());
839  llvmVars.reserve(blockArgs.size());
840  collectPrivatizationDecls<OP>(op);
841 
842  for (mlir::Value privateVar : op.getPrivateVars())
843  mlirVars.push_back(privateVar);
844  }
845 
850 
851 private:
852  /// Populates `privatizations` with privatization declarations used for the
853  /// given op.
854  template <class OP>
855  void collectPrivatizationDecls(OP op) {
856  std::optional<ArrayAttr> attr = op.getPrivateSyms();
857  if (!attr)
858  return;
859 
860  privatizers.reserve(privatizers.size() + attr->size());
861  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
862  privatizers.push_back(findPrivatizer(op, symbolRef));
863  }
864  }
865 };
866 
867 /// Populates `reductions` with reduction declarations used in the given op.
868 template <typename T>
869 static void
872  std::optional<ArrayAttr> attr = op.getReductionSyms();
873  if (!attr)
874  return;
875 
876  reductions.reserve(reductions.size() + op.getNumReductionVars());
877  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
878  reductions.push_back(
879  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
880  op, symbolRef));
881  }
882 }
883 
884 /// Translates the blocks contained in the given region and appends them to at
885 /// the current insertion point of `builder`. The operations of the entry block
886 /// are appended to the current insertion block. If set, `continuationBlockArgs`
887 /// is populated with translated values that correspond to the values
888 /// omp.yield'ed from the region.
889 static LogicalResult inlineConvertOmpRegions(
890  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
891  LLVM::ModuleTranslation &moduleTranslation,
892  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
893  if (region.empty())
894  return success();
895 
896  // Special case for single-block regions that don't create additional blocks:
897  // insert operations without creating additional blocks.
898  if (region.hasOneBlock()) {
899  llvm::Instruction *potentialTerminator =
900  builder.GetInsertBlock()->empty() ? nullptr
901  : &builder.GetInsertBlock()->back();
902 
903  if (potentialTerminator && potentialTerminator->isTerminator())
904  potentialTerminator->removeFromParent();
905  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
906 
907  if (failed(moduleTranslation.convertBlock(
908  region.front(), /*ignoreArguments=*/true, builder)))
909  return failure();
910 
911  // The continuation arguments are simply the translated terminator operands.
912  if (continuationBlockArgs)
913  llvm::append_range(
914  *continuationBlockArgs,
915  moduleTranslation.lookupValues(region.front().back().getOperands()));
916 
917  // Drop the mapping that is no longer necessary so that the same region can
918  // be processed multiple times.
919  moduleTranslation.forgetMapping(region);
920 
921  if (potentialTerminator && potentialTerminator->isTerminator()) {
922  llvm::BasicBlock *block = builder.GetInsertBlock();
923  if (block->empty()) {
924  // this can happen for really simple reduction init regions e.g.
925  // %0 = llvm.mlir.constant(0 : i32) : i32
926  // omp.yield(%0 : i32)
927  // because the llvm.mlir.constant (MLIR op) isn't converted into any
928  // llvm op
929  potentialTerminator->insertInto(block, block->begin());
930  } else {
931  potentialTerminator->insertAfter(&block->back());
932  }
933  }
934 
935  return success();
936  }
937 
939  llvm::Expected<llvm::BasicBlock *> continuationBlock =
940  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
941 
942  if (failed(handleError(continuationBlock, *region.getParentOp())))
943  return failure();
944 
945  if (continuationBlockArgs)
946  llvm::append_range(*continuationBlockArgs, phis);
947  builder.SetInsertPoint(*continuationBlock,
948  (*continuationBlock)->getFirstInsertionPt());
949  return success();
950 }
951 
952 namespace {
953 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
954 /// store lambdas with capture.
955 using OwningReductionGen =
956  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
957  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
958  llvm::Value *&)>;
959 using OwningAtomicReductionGen =
960  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
961  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
962  llvm::Value *)>;
963 } // namespace
964 
965 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
966 /// reduction declaration. The generator uses `builder` but ignores its
967 /// insertion point.
968 static OwningReductionGen
969 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
970  LLVM::ModuleTranslation &moduleTranslation) {
971  // The lambda is mutable because we need access to non-const methods of decl
972  // (which aren't actually mutating it), and we must capture decl by-value to
973  // avoid the dangling reference after the parent function returns.
974  OwningReductionGen gen =
975  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
976  llvm::Value *lhs, llvm::Value *rhs,
977  llvm::Value *&result) mutable
978  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
979  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
980  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
981  builder.restoreIP(insertPoint);
983  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
984  "omp.reduction.nonatomic.body", builder,
985  moduleTranslation, &phis)))
986  return llvm::createStringError(
987  "failed to inline `combiner` region of `omp.declare_reduction`");
988  result = llvm::getSingleElement(phis);
989  return builder.saveIP();
990  };
991  return gen;
992 }
993 
994 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
995 /// given reduction declaration. The generator uses `builder` but ignores its
996 /// insertion point. Returns null if there is no atomic region available in the
997 /// reduction declaration.
998 static OwningAtomicReductionGen
999 makeAtomicReductionGen(omp::DeclareReductionOp decl,
1000  llvm::IRBuilderBase &builder,
1001  LLVM::ModuleTranslation &moduleTranslation) {
1002  if (decl.getAtomicReductionRegion().empty())
1003  return OwningAtomicReductionGen();
1004 
1005  // The lambda is mutable because we need access to non-const methods of decl
1006  // (which aren't actually mutating it), and we must capture decl by-value to
1007  // avoid the dangling reference after the parent function returns.
1008  OwningAtomicReductionGen atomicGen =
1009  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
1010  llvm::Value *lhs, llvm::Value *rhs) mutable
1011  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1012  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
1013  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
1014  builder.restoreIP(insertPoint);
1016  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
1017  "omp.reduction.atomic.body", builder,
1018  moduleTranslation, &phis)))
1019  return llvm::createStringError(
1020  "failed to inline `atomic` region of `omp.declare_reduction`");
1021  assert(phis.empty());
1022  return builder.saveIP();
1023  };
1024  return atomicGen;
1025 }
1026 
1027 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
1028 static LogicalResult
1029 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
1030  LLVM::ModuleTranslation &moduleTranslation) {
1031  auto orderedOp = cast<omp::OrderedOp>(opInst);
1032 
1033  if (failed(checkImplementationStatus(opInst)))
1034  return failure();
1035 
1036  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
1037  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
1038  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
1039  SmallVector<llvm::Value *> vecValues =
1040  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
1041 
1042  size_t indexVecValues = 0;
1043  while (indexVecValues < vecValues.size()) {
1044  SmallVector<llvm::Value *> storeValues;
1045  storeValues.reserve(numLoops);
1046  for (unsigned i = 0; i < numLoops; i++) {
1047  storeValues.push_back(vecValues[indexVecValues]);
1048  indexVecValues++;
1049  }
1050  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1051  findAllocaInsertPoint(builder, moduleTranslation);
1052  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1053  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
1054  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
1055  }
1056  return success();
1057 }
1058 
1059 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
1060 /// OpenMPIRBuilder.
1061 static LogicalResult
1062 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
1063  LLVM::ModuleTranslation &moduleTranslation) {
1064  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1065  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
1066 
1067  if (failed(checkImplementationStatus(opInst)))
1068  return failure();
1069 
1070  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1071  // OrderedOp has only one region associated with it.
1072  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
1073  builder.restoreIP(codeGenIP);
1074  return convertOmpOpRegions(region, "omp.ordered.region", builder,
1075  moduleTranslation)
1076  .takeError();
1077  };
1078 
1079  // TODO: Perform finalization actions for variables. This has to be
1080  // called for variables which have destructors/finalizers.
1081  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1082 
1083  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1084  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1085  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
1086  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
1087 
1088  if (failed(handleError(afterIP, opInst)))
1089  return failure();
1090 
1091  builder.restoreIP(*afterIP);
1092  return success();
1093 }
1094 
1095 namespace {
1096 /// Contains the arguments for an LLVM store operation
1097 struct DeferredStore {
1098  DeferredStore(llvm::Value *value, llvm::Value *address)
1099  : value(value), address(address) {}
1100 
1101  llvm::Value *value;
1102  llvm::Value *address;
1103 };
1104 } // namespace
1105 
1106 /// Allocate space for privatized reduction variables.
1107 /// `deferredStores` contains information to create store operations which needs
1108 /// to be inserted after all allocas
1109 template <typename T>
1110 static LogicalResult
1112  llvm::IRBuilderBase &builder,
1113  LLVM::ModuleTranslation &moduleTranslation,
1114  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1116  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1117  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1118  SmallVectorImpl<DeferredStore> &deferredStores,
1119  llvm::ArrayRef<bool> isByRefs) {
1120  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1121  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1122 
1123  // delay creating stores until after all allocas
1124  deferredStores.reserve(loop.getNumReductionVars());
1125 
1126  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1127  Region &allocRegion = reductionDecls[i].getAllocRegion();
1128  if (isByRefs[i]) {
1129  if (allocRegion.empty())
1130  continue;
1131 
1133  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1134  builder, moduleTranslation, &phis)))
1135  return loop.emitError(
1136  "failed to inline `alloc` region of `omp.declare_reduction`");
1137 
1138  assert(phis.size() == 1 && "expected one allocation to be yielded");
1139  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1140 
1141  // Allocate reduction variable (which is a pointer to the real reduction
1142  // variable allocated in the inlined region)
1143  llvm::Value *var = builder.CreateAlloca(
1144  moduleTranslation.convertType(reductionDecls[i].getType()));
1145 
1146  llvm::Type *ptrTy = builder.getPtrTy();
1147  llvm::Value *castVar =
1148  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1149  llvm::Value *castPhi =
1150  builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
1151 
1152  deferredStores.emplace_back(castPhi, castVar);
1153 
1154  privateReductionVariables[i] = castVar;
1155  moduleTranslation.mapValue(reductionArgs[i], castPhi);
1156  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
1157  } else {
1158  assert(allocRegion.empty() &&
1159  "allocaction is implicit for by-val reduction");
1160  llvm::Value *var = builder.CreateAlloca(
1161  moduleTranslation.convertType(reductionDecls[i].getType()));
1162 
1163  llvm::Type *ptrTy = builder.getPtrTy();
1164  llvm::Value *castVar =
1165  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1166 
1167  moduleTranslation.mapValue(reductionArgs[i], castVar);
1168  privateReductionVariables[i] = castVar;
1169  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
1170  }
1171  }
1172 
1173  return success();
1174 }
1175 
1176 /// Map input arguments to reduction initialization region
1177 template <typename T>
1178 static void
1179 mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
1181  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1182  unsigned i) {
1183  // map input argument to the initialization region
1184  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1185  Region &initializerRegion = reduction.getInitializerRegion();
1186  Block &entry = initializerRegion.front();
1187 
1188  mlir::Value mlirSource = loop.getReductionVars()[i];
1189  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1190  assert(llvmSource && "lookup reduction var");
1191  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1192 
1193  if (entry.getNumArguments() > 1) {
1194  llvm::Value *allocation =
1195  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1196  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1197  }
1198 }
1199 
1200 static void
1201 setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1202  llvm::BasicBlock *block = nullptr) {
1203  if (block == nullptr)
1204  block = builder.GetInsertBlock();
1205 
1206  if (block->empty() || block->getTerminator() == nullptr)
1207  builder.SetInsertPoint(block);
1208  else
1209  builder.SetInsertPoint(block->getTerminator());
1210 }
1211 
1212 /// Inline reductions' `init` regions. This functions assumes that the
1213 /// `builder`'s insertion point is where the user wants the `init` regions to be
1214 /// inlined; i.e. it does not try to find a proper insertion location for the
1215 /// `init` regions. It also leaves the `builder's insertions point in a state
1216 /// where the user can continue the code-gen directly afterwards.
1217 template <typename OP>
1218 static LogicalResult
1220  llvm::IRBuilderBase &builder,
1221  LLVM::ModuleTranslation &moduleTranslation,
1222  llvm::BasicBlock *latestAllocaBlock,
1224  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1225  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1226  llvm::ArrayRef<bool> isByRef,
1227  SmallVectorImpl<DeferredStore> &deferredStores) {
1228  if (op.getNumReductionVars() == 0)
1229  return success();
1230 
1231  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1232  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1233  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1234  builder.restoreIP(allocaIP);
1235  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1236 
1237  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1238  if (isByRef[i]) {
1239  if (!reductionDecls[i].getAllocRegion().empty())
1240  continue;
1241 
1242  // TODO: remove after all users of by-ref are updated to use the alloc
1243  // region: Allocate reduction variable (which is a pointer to the real
1244  // reduciton variable allocated in the inlined region)
1245  byRefVars[i] = builder.CreateAlloca(
1246  moduleTranslation.convertType(reductionDecls[i].getType()));
1247  }
1248  }
1249 
1250  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1251 
1252  // store result of the alloc region to the allocated pointer to the real
1253  // reduction variable
1254  for (auto [data, addr] : deferredStores)
1255  builder.CreateStore(data, addr);
1256 
1257  // Before the loop, store the initial values of reductions into reduction
1258  // variables. Although this could be done after allocas, we don't want to mess
1259  // up with the alloca insertion point.
1260  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1262 
1263  // map block argument to initializer region
1264  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1265  reductionVariableMap, i);
1266 
1267  // TODO In some cases (specially on the GPU), the init regions may
1268  // contains stack alloctaions. If the region is inlined in a loop, this is
1269  // problematic. Instead of just inlining the region, handle allocations by
1270  // hoisting fixed length allocations to the function entry and using
1271  // stacksave and restore for variable length ones.
1272  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1273  "omp.reduction.neutral", builder,
1274  moduleTranslation, &phis)))
1275  return failure();
1276 
1277  assert(phis.size() == 1 && "expected one value to be yielded from the "
1278  "reduction neutral element declaration region");
1279 
1281 
1282  if (isByRef[i]) {
1283  if (!reductionDecls[i].getAllocRegion().empty())
1284  // done in allocReductionVars
1285  continue;
1286 
1287  // TODO: this path can be removed once all users of by-ref are updated to
1288  // use an alloc region
1289 
1290  // Store the result of the inlined region to the allocated reduction var
1291  // ptr
1292  builder.CreateStore(phis[0], byRefVars[i]);
1293 
1294  privateReductionVariables[i] = byRefVars[i];
1295  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1296  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1297  } else {
1298  // for by-ref case the store is inside of the reduction region
1299  builder.CreateStore(phis[0], privateReductionVariables[i]);
1300  // the rest was handled in allocByValReductionVars
1301  }
1302 
1303  // forget the mapping for the initializer region because we might need a
1304  // different mapping if this reduction declaration is re-used for a
1305  // different variable
1306  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1307  }
1308 
1309  return success();
1310 }
1311 
1312 /// Collect reduction info
1313 template <typename T>
1315  T loop, llvm::IRBuilderBase &builder,
1316  LLVM::ModuleTranslation &moduleTranslation,
1318  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1319  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1320  const ArrayRef<llvm::Value *> privateReductionVariables,
1322  unsigned numReductions = loop.getNumReductionVars();
1323 
1324  for (unsigned i = 0; i < numReductions; ++i) {
1325  owningReductionGens.push_back(
1326  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1327  owningAtomicReductionGens.push_back(
1328  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1329  }
1330 
1331  // Collect the reduction information.
1332  reductionInfos.reserve(numReductions);
1333  for (unsigned i = 0; i < numReductions; ++i) {
1334  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1335  if (owningAtomicReductionGens[i])
1336  atomicGen = owningAtomicReductionGens[i];
1337  llvm::Value *variable =
1338  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1339  reductionInfos.push_back(
1340  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1341  privateReductionVariables[i],
1342  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1343  owningReductionGens[i],
1344  /*ReductionGenClang=*/nullptr, atomicGen});
1345  }
1346 }
1347 
1348 /// handling of DeclareReductionOp's cleanup region
1349 static LogicalResult
1351  llvm::ArrayRef<llvm::Value *> privateVariables,
1352  LLVM::ModuleTranslation &moduleTranslation,
1353  llvm::IRBuilderBase &builder, StringRef regionName,
1354  bool shouldLoadCleanupRegionArg = true) {
1355  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1356  if (cleanupRegion->empty())
1357  continue;
1358 
1359  // map the argument to the cleanup region
1360  Block &entry = cleanupRegion->front();
1361 
1362  llvm::Instruction *potentialTerminator =
1363  builder.GetInsertBlock()->empty() ? nullptr
1364  : &builder.GetInsertBlock()->back();
1365  if (potentialTerminator && potentialTerminator->isTerminator())
1366  builder.SetInsertPoint(potentialTerminator);
1367  llvm::Value *privateVarValue =
1368  shouldLoadCleanupRegionArg
1369  ? builder.CreateLoad(
1370  moduleTranslation.convertType(entry.getArgument(0).getType()),
1371  privateVariables[i])
1372  : privateVariables[i];
1373 
1374  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1375 
1376  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1377  moduleTranslation)))
1378  return failure();
1379 
1380  // clear block argument mapping in case it needs to be re-created with a
1381  // different source for another use of the same reduction decl
1382  moduleTranslation.forgetMapping(*cleanupRegion);
1383  }
1384  return success();
1385 }
1386 
1387 // TODO: not used by ParallelOp
1388 template <class OP>
1389 static LogicalResult createReductionsAndCleanup(
1390  OP op, llvm::IRBuilderBase &builder,
1391  LLVM::ModuleTranslation &moduleTranslation,
1392  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1394  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
1395  bool isNowait = false, bool isTeamsReduction = false) {
1396  // Process the reductions if required.
1397  if (op.getNumReductionVars() == 0)
1398  return success();
1399 
1400  SmallVector<OwningReductionGen> owningReductionGens;
1401  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1403 
1404  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1405 
1406  // Create the reduction generators. We need to own them here because
1407  // ReductionInfo only accepts references to the generators.
1408  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1409  owningReductionGens, owningAtomicReductionGens,
1410  privateReductionVariables, reductionInfos);
1411 
1412  // The call to createReductions below expects the block to have a
1413  // terminator. Create an unreachable instruction to serve as terminator
1414  // and remove it later.
1415  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1416  builder.SetInsertPoint(tempTerminator);
1417  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1418  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1419  isByRef, isNowait, isTeamsReduction);
1420 
1421  if (failed(handleError(contInsertPoint, *op)))
1422  return failure();
1423 
1424  if (!contInsertPoint->getBlock())
1425  return op->emitOpError() << "failed to convert reductions";
1426 
1427  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1428  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1429 
1430  if (failed(handleError(afterIP, *op)))
1431  return failure();
1432 
1433  tempTerminator->eraseFromParent();
1434  builder.restoreIP(*afterIP);
1435 
1436  // after the construct, deallocate private reduction variables
1437  SmallVector<Region *> reductionRegions;
1438  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1439  [](omp::DeclareReductionOp reductionDecl) {
1440  return &reductionDecl.getCleanupRegion();
1441  });
1442  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1443  moduleTranslation, builder,
1444  "omp.reduction.cleanup");
1445  return success();
1446 }
1447 
1448 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1449  if (!attr)
1450  return {};
1451  return *attr;
1452 }
1453 
1454 // TODO: not used by omp.parallel
1455 template <typename OP>
1456 static LogicalResult allocAndInitializeReductionVars(
1457  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1458  LLVM::ModuleTranslation &moduleTranslation,
1459  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1461  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1462  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1463  llvm::ArrayRef<bool> isByRef) {
1464  if (op.getNumReductionVars() == 0)
1465  return success();
1466 
1467  SmallVector<DeferredStore> deferredStores;
1468 
1469  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1470  allocaIP, reductionDecls,
1471  privateReductionVariables, reductionVariableMap,
1472  deferredStores, isByRef)))
1473  return failure();
1474 
1475  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1476  allocaIP.getBlock(), reductionDecls,
1477  privateReductionVariables, reductionVariableMap,
1478  isByRef, deferredStores);
1479 }
1480 
1481 /// Return the llvm::Value * corresponding to the `privateVar` that
1482 /// is being privatized. It isn't always as simple as looking up
1483 /// moduleTranslation with privateVar. For instance, in case of
1484 /// an allocatable, the descriptor for the allocatable is privatized.
1485 /// This descriptor is mapped using an MapInfoOp. So, this function
1486 /// will return a pointer to the llvm::Value corresponding to the
1487 /// block argument for the mapped descriptor.
1488 static llvm::Value *
1489 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1490  LLVM::ModuleTranslation &moduleTranslation,
1491  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1492  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1493  return moduleTranslation.lookupValue(privateVar);
1494 
1495  Value blockArg = (*mappedPrivateVars)[privateVar];
1496  Type privVarType = privateVar.getType();
1497  Type blockArgType = blockArg.getType();
1498  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1499  "A block argument corresponding to a mapped var should have "
1500  "!llvm.ptr type");
1501 
1502  if (privVarType == blockArgType)
1503  return moduleTranslation.lookupValue(blockArg);
1504 
1505  // This typically happens when the privatized type is lowered from
1506  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1507  // struct/pair is passed by value. But, mapped values are passed only as
1508  // pointers, so before we privatize, we must load the pointer.
1509  if (!isa<LLVM::LLVMPointerType>(privVarType))
1510  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1511  moduleTranslation.lookupValue(blockArg));
1512 
1513  return moduleTranslation.lookupValue(privateVar);
1514 }
1515 
1516 /// Initialize a single (first)private variable. You probably want to use
1517 /// allocateAndInitPrivateVars instead of this.
1518 /// This returns the private variable which has been initialized. This
1519 /// variable should be mapped before constructing the body of the Op.
1521  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1522  omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1523  llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1524  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1525  Region &initRegion = privDecl.getInitRegion();
1526  if (initRegion.empty())
1527  return llvmPrivateVar;
1528 
1529  // map initialization region block arguments
1530  llvm::Value *nonPrivateVar = findAssociatedValue(
1531  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1532  assert(nonPrivateVar);
1533  moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1534  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1535 
1536  // in-place convert the private initialization region
1538  if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1539  moduleTranslation, &phis)))
1540  return llvm::createStringError(
1541  "failed to inline `init` region of `omp.private`");
1542 
1543  assert(phis.size() == 1 && "expected one allocation to be yielded");
1544 
1545  // clear init region block argument mapping in case it needs to be
1546  // re-created with a different source for another use of the same
1547  // reduction decl
1548  moduleTranslation.forgetMapping(initRegion);
1549 
1550  // Prefer the value yielded from the init region to the allocated private
1551  // variable in case the region is operating on arguments by-value (e.g.
1552  // Fortran character boxes).
1553  return phis[0];
1554 }
1555 
1556 static llvm::Error
1557 initPrivateVars(llvm::IRBuilderBase &builder,
1558  LLVM::ModuleTranslation &moduleTranslation,
1559  PrivateVarsInfo &privateVarsInfo,
1560  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1561  if (privateVarsInfo.blockArgs.empty())
1562  return llvm::Error::success();
1563 
1564  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1565  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1566 
1567  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1568  privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1569  privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1570  auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1572  builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1573  llvmPrivateVar, privInitBlock, mappedPrivateVars);
1574 
1575  if (!privVarOrErr)
1576  return privVarOrErr.takeError();
1577 
1578  llvmPrivateVar = privVarOrErr.get();
1579  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1580 
1582  }
1583 
1584  return llvm::Error::success();
1585 }
1586 
1587 /// Allocate and initialize delayed private variables. Returns the basic block
1588 /// which comes after all of these allocations. llvm::Value * for each of these
1589 /// private variables are populated in llvmPrivateVars.
1591 allocatePrivateVars(llvm::IRBuilderBase &builder,
1592  LLVM::ModuleTranslation &moduleTranslation,
1593  PrivateVarsInfo &privateVarsInfo,
1594  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1595  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1596  // Allocate private vars
1597  llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1598  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1599  allocaTerminator->getIterator()),
1600  true, allocaTerminator->getStableDebugLoc(),
1601  "omp.region.after_alloca");
1602 
1603  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1604  // Update the allocaTerminator since the alloca block was split above.
1605  allocaTerminator = allocaIP.getBlock()->getTerminator();
1606  builder.SetInsertPoint(allocaTerminator);
1607  // The new terminator is an uncondition branch created by the splitBB above.
1608  assert(allocaTerminator->getNumSuccessors() == 1 &&
1609  "This is an unconditional branch created by splitBB");
1610 
1611  llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
1612  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1613 
1614  unsigned int allocaAS =
1615  moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1616  unsigned int defaultAS = moduleTranslation.getLLVMModule()
1617  ->getDataLayout()
1618  .getProgramAddressSpace();
1619 
1620  for (auto [privDecl, mlirPrivVar, blockArg] :
1621  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1622  privateVarsInfo.blockArgs)) {
1623  llvm::Type *llvmAllocType =
1624  moduleTranslation.convertType(privDecl.getType());
1625  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1626  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1627  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1628  if (allocaAS != defaultAS)
1629  llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1630  builder.getPtrTy(defaultAS));
1631 
1632  privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1633  }
1634 
1635  return afterAllocas;
1636 }
1637 
1638 static LogicalResult copyFirstPrivateVars(
1639  mlir::Operation *op, llvm::IRBuilderBase &builder,
1640  LLVM::ModuleTranslation &moduleTranslation,
1641  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1642  ArrayRef<llvm::Value *> llvmPrivateVars,
1643  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls, bool insertBarrier,
1644  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1645  // Apply copy region for firstprivate.
1646  bool needsFirstprivate =
1647  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1648  return privOp.getDataSharingType() ==
1649  omp::DataSharingClauseType::FirstPrivate;
1650  });
1651 
1652  if (!needsFirstprivate)
1653  return success();
1654 
1655  llvm::BasicBlock *copyBlock =
1656  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1657  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1658 
1659  for (auto [decl, mlirVar, llvmVar] :
1660  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1661  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1662  continue;
1663 
1664  // copyRegion implements `lhs = rhs`
1665  Region &copyRegion = decl.getCopyRegion();
1666 
1667  // map copyRegion rhs arg
1668  llvm::Value *nonPrivateVar = findAssociatedValue(
1669  mlirVar, builder, moduleTranslation, mappedPrivateVars);
1670  assert(nonPrivateVar);
1671  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1672 
1673  // map copyRegion lhs arg
1674  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1675 
1676  // in-place convert copy region
1677  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1678  moduleTranslation)))
1679  return decl.emitError("failed to inline `copy` region of `omp.private`");
1680 
1682 
1683  // ignore unused value yielded from copy region
1684 
1685  // clear copy region block argument mapping in case it needs to be
1686  // re-created with different sources for reuse of the same reduction
1687  // decl
1688  moduleTranslation.forgetMapping(copyRegion);
1689  }
1690 
1691  if (insertBarrier) {
1692  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1693  llvm::OpenMPIRBuilder::InsertPointOrErrorTy res =
1694  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1695  if (failed(handleError(res, *op)))
1696  return failure();
1697  }
1698 
1699  return success();
1700 }
1701 
1702 static LogicalResult
1703 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1704  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1705  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1706  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1707  // private variable deallocation
1708  SmallVector<Region *> privateCleanupRegions;
1709  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1710  [](omp::PrivateClauseOp privatizer) {
1711  return &privatizer.getDeallocRegion();
1712  });
1713 
1715  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1716  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1717  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1718  "`omp.private` op in");
1719 
1720  return success();
1721 }
1722 
1723 /// Returns true if the construct contains omp.cancel or omp.cancellation_point
1725  // omp.cancel and omp.cancellation_point must be "closely nested" so they will
1726  // be visible and not inside of function calls. This is enforced by the
1727  // verifier.
1728  return op
1729  ->walk([](Operation *child) {
1730  if (mlir::isa<omp::CancelOp, omp::CancellationPointOp>(child))
1731  return WalkResult::interrupt();
1732  return WalkResult::advance();
1733  })
1734  .wasInterrupted();
1735 }
1736 
1737 static LogicalResult
1738 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1739  LLVM::ModuleTranslation &moduleTranslation) {
1740  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1741  using StorableBodyGenCallbackTy =
1742  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1743 
1744  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1745 
1746  if (failed(checkImplementationStatus(opInst)))
1747  return failure();
1748 
1749  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1750  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1751 
1752  SmallVector<omp::DeclareReductionOp> reductionDecls;
1753  collectReductionDecls(sectionsOp, reductionDecls);
1754  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1755  findAllocaInsertPoint(builder, moduleTranslation);
1756 
1757  SmallVector<llvm::Value *> privateReductionVariables(
1758  sectionsOp.getNumReductionVars());
1759  DenseMap<Value, llvm::Value *> reductionVariableMap;
1760 
1761  MutableArrayRef<BlockArgument> reductionArgs =
1762  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1763 
1765  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1766  reductionDecls, privateReductionVariables, reductionVariableMap,
1767  isByRef)))
1768  return failure();
1769 
1771 
1772  for (Operation &op : *sectionsOp.getRegion().begin()) {
1773  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1774  if (!sectionOp) // omp.terminator
1775  continue;
1776 
1777  Region &region = sectionOp.getRegion();
1778  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1779  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1780  builder.restoreIP(codeGenIP);
1781 
1782  // map the omp.section reduction block argument to the omp.sections block
1783  // arguments
1784  // TODO: this assumes that the only block arguments are reduction
1785  // variables
1786  assert(region.getNumArguments() ==
1787  sectionsOp.getRegion().getNumArguments());
1788  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1789  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1790  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1791  assert(llvmVal);
1792  moduleTranslation.mapValue(sectionArg, llvmVal);
1793  }
1794 
1795  return convertOmpOpRegions(region, "omp.section.region", builder,
1796  moduleTranslation)
1797  .takeError();
1798  };
1799  sectionCBs.push_back(sectionCB);
1800  }
1801 
1802  // No sections within omp.sections operation - skip generation. This situation
1803  // is only possible if there is only a terminator operation inside the
1804  // sections operation
1805  if (sectionCBs.empty())
1806  return success();
1807 
1808  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1809 
1810  // TODO: Perform appropriate actions according to the data-sharing
1811  // attribute (shared, private, firstprivate, ...) of variables.
1812  // Currently defaults to shared.
1813  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1814  llvm::Value &vPtr, llvm::Value *&replacementValue)
1815  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1816  replacementValue = &vPtr;
1817  return codeGenIP;
1818  };
1819 
1820  // TODO: Perform finalization actions for variables. This has to be
1821  // called for variables which have destructors/finalizers.
1822  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1823 
1824  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1825  bool isCancellable = constructIsCancellable(sectionsOp);
1826  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1827  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1828  moduleTranslation.getOpenMPBuilder()->createSections(
1829  ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable,
1830  sectionsOp.getNowait());
1831 
1832  if (failed(handleError(afterIP, opInst)))
1833  return failure();
1834 
1835  builder.restoreIP(*afterIP);
1836 
1837  // Process the reductions if required.
1839  sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
1840  privateReductionVariables, isByRef, sectionsOp.getNowait());
1841 }
1842 
1843 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1844 static LogicalResult
1845 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1846  LLVM::ModuleTranslation &moduleTranslation) {
1847  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1848  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1849 
1850  if (failed(checkImplementationStatus(*singleOp)))
1851  return failure();
1852 
1853  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1854  builder.restoreIP(codegenIP);
1855  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1856  builder, moduleTranslation)
1857  .takeError();
1858  };
1859  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1860 
1861  // Handle copyprivate
1862  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1863  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1866  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1867  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1868  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1869  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1870  llvmCPFuncs.push_back(
1871  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1872  }
1873 
1874  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1875  moduleTranslation.getOpenMPBuilder()->createSingle(
1876  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1877  llvmCPFuncs);
1878 
1879  if (failed(handleError(afterIP, *singleOp)))
1880  return failure();
1881 
1882  builder.restoreIP(*afterIP);
1883  return success();
1884 }
1885 
1886 static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
1887  auto iface =
1888  llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation());
1889  // Check that all uses of the reduction block arg has the same distribute op
1890  // parent.
1892  Operation *distOp = nullptr;
1893  for (auto ra : iface.getReductionBlockArgs())
1894  for (auto &use : ra.getUses()) {
1895  auto *useOp = use.getOwner();
1896  // Ignore debug uses.
1897  if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
1898  debugUses.push_back(useOp);
1899  continue;
1900  }
1901 
1902  auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>();
1903  // Use is not inside a distribute op - return false
1904  if (!currentDistOp)
1905  return false;
1906  // Multiple distribute operations - return false
1907  Operation *currentOp = currentDistOp.getOperation();
1908  if (distOp && (distOp != currentOp))
1909  return false;
1910 
1911  distOp = currentOp;
1912  }
1913 
1914  // If we are going to use distribute reduction then remove any debug uses of
1915  // the reduction parameters in teamsOp. Otherwise they will be left without
1916  // any mapped value in moduleTranslation and will eventually error out.
1917  for (auto use : debugUses)
1918  use->erase();
1919  return true;
1920 }
1921 
1922 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1923 static LogicalResult
1924 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1925  LLVM::ModuleTranslation &moduleTranslation) {
1926  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1928  return failure();
1929 
1930  DenseMap<Value, llvm::Value *> reductionVariableMap;
1931  unsigned numReductionVars = op.getNumReductionVars();
1932  SmallVector<omp::DeclareReductionOp> reductionDecls;
1933  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
1934  llvm::ArrayRef<bool> isByRef;
1935  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1936  findAllocaInsertPoint(builder, moduleTranslation);
1937 
1938  // Only do teams reduction if there is no distribute op that captures the
1939  // reduction instead.
1940  bool doTeamsReduction = !teamsReductionContainedInDistribute(op);
1941  if (doTeamsReduction) {
1942  isByRef = getIsByRef(op.getReductionByref());
1943 
1944  assert(isByRef.size() == op.getNumReductionVars());
1945 
1946  MutableArrayRef<BlockArgument> reductionArgs =
1947  llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
1948 
1949  collectReductionDecls(op, reductionDecls);
1950 
1952  op, reductionArgs, builder, moduleTranslation, allocaIP,
1953  reductionDecls, privateReductionVariables, reductionVariableMap,
1954  isByRef)))
1955  return failure();
1956  }
1957 
1958  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1960  moduleTranslation, allocaIP);
1961  builder.restoreIP(codegenIP);
1962  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1963  moduleTranslation)
1964  .takeError();
1965  };
1966 
1967  llvm::Value *numTeamsLower = nullptr;
1968  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1969  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1970 
1971  llvm::Value *numTeamsUpper = nullptr;
1972  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1973  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1974 
1975  llvm::Value *threadLimit = nullptr;
1976  if (Value threadLimitVar = op.getThreadLimit())
1977  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1978 
1979  llvm::Value *ifExpr = nullptr;
1980  if (Value ifVar = op.getIfExpr())
1981  ifExpr = moduleTranslation.lookupValue(ifVar);
1982 
1983  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1984  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1985  moduleTranslation.getOpenMPBuilder()->createTeams(
1986  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1987 
1988  if (failed(handleError(afterIP, *op)))
1989  return failure();
1990 
1991  builder.restoreIP(*afterIP);
1992  if (doTeamsReduction) {
1993  // Process the reductions if required.
1995  op, builder, moduleTranslation, allocaIP, reductionDecls,
1996  privateReductionVariables, isByRef,
1997  /*isNoWait*/ false, /*isTeamsReduction*/ true);
1998  }
1999  return success();
2000 }
2001 
2002 static void
2003 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
2004  LLVM::ModuleTranslation &moduleTranslation,
2006  if (dependVars.empty())
2007  return;
2008  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
2009  llvm::omp::RTLDependenceKindTy type;
2010  switch (
2011  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
2012  case mlir::omp::ClauseTaskDepend::taskdependin:
2013  type = llvm::omp::RTLDependenceKindTy::DepIn;
2014  break;
2015  // The OpenMP runtime requires that the codegen for 'depend' clause for
2016  // 'out' dependency kind must be the same as codegen for 'depend' clause
2017  // with 'inout' dependency.
2018  case mlir::omp::ClauseTaskDepend::taskdependout:
2019  case mlir::omp::ClauseTaskDepend::taskdependinout:
2020  type = llvm::omp::RTLDependenceKindTy::DepInOut;
2021  break;
2022  case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
2023  type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
2024  break;
2025  case mlir::omp::ClauseTaskDepend::taskdependinoutset:
2026  type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
2027  break;
2028  };
2029  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
2030  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
2031  dds.emplace_back(dd);
2032  }
2033 }
2034 
2035 /// Shared implementation of a callback which adds a termiator for the new block
2036 /// created for the branch taken when an openmp construct is cancelled. The
2037 /// terminator is saved in \p cancelTerminators. This callback is invoked only
2038 /// if there is cancellation inside of the taskgroup body.
2039 /// The terminator will need to be fixed to branch to the correct block to
2040 /// cleanup the construct.
2041 static void
2043  llvm::IRBuilderBase &llvmBuilder,
2044  llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op,
2045  llvm::omp::Directive cancelDirective) {
2046  auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
2047  llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
2048 
2049  // ip is currently in the block branched to if cancellation occured.
2050  // We need to create a branch to terminate that block.
2051  llvmBuilder.restoreIP(ip);
2052 
2053  // We must still clean up the construct after cancelling it, so we need to
2054  // branch to the block that finalizes the taskgroup.
2055  // That block has not been created yet so use this block as a dummy for now
2056  // and fix this after creating the operation.
2057  cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
2058  return llvm::Error::success();
2059  };
2060  // We have to add the cleanup to the OpenMPIRBuilder before the body gets
2061  // created in case the body contains omp.cancel (which will then expect to be
2062  // able to find this cleanup callback).
2063  ompBuilder.pushFinalizationCB(
2064  {finiCB, cancelDirective, constructIsCancellable(op)});
2065 }
2066 
2067 /// If we cancelled the construct, we should branch to the finalization block of
2068 /// that construct. OMPIRBuilder structures the CFG such that the cleanup block
2069 /// is immediately before the continuation block. Now this finalization has
2070 /// been created we can fix the branch.
2071 static void
2073  llvm::OpenMPIRBuilder &ompBuilder,
2074  const llvm::OpenMPIRBuilder::InsertPointTy &afterIP) {
2075  ompBuilder.popFinalizationCB();
2076  llvm::BasicBlock *constructFini = afterIP.getBlock()->getSinglePredecessor();
2077  for (llvm::BranchInst *cancelBranch : cancelTerminators) {
2078  assert(cancelBranch->getNumSuccessors() == 1 &&
2079  "cancel branch should have one target");
2080  cancelBranch->setSuccessor(0, constructFini);
2081  }
2082 }
2083 
2084 namespace {
2085 /// TaskContextStructManager takes care of creating and freeing a structure
2086 /// containing information needed by the task body to execute.
2087 class TaskContextStructManager {
2088 public:
2089  TaskContextStructManager(llvm::IRBuilderBase &builder,
2090  LLVM::ModuleTranslation &moduleTranslation,
2092  : builder{builder}, moduleTranslation{moduleTranslation},
2093  privateDecls{privateDecls} {}
2094 
2095  /// Creates a heap allocated struct containing space for each private
2096  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
2097  /// the structure should all have the same order (although privateDecls which
2098  /// do not read from the mold argument are skipped).
2099  void generateTaskContextStruct();
2100 
2101  /// Create GEPs to access each member of the structure representing a private
2102  /// variable, adding them to llvmPrivateVars. Null values are added where
2103  /// private decls were skipped so that the ordering continues to match the
2104  /// private decls.
2105  void createGEPsToPrivateVars();
2106 
2107  /// De-allocate the task context structure.
2108  void freeStructPtr();
2109 
2110  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
2111  return llvmPrivateVarGEPs;
2112  }
2113 
2114  llvm::Value *getStructPtr() { return structPtr; }
2115 
2116 private:
2117  llvm::IRBuilderBase &builder;
2118  LLVM::ModuleTranslation &moduleTranslation;
2120 
2121  /// The type of each member of the structure, in order.
2122  SmallVector<llvm::Type *> privateVarTypes;
2123 
2124  /// LLVM values for each private variable, or null if that private variable is
2125  /// not included in the task context structure
2126  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
2127 
2128  /// A pointer to the structure containing context for this task.
2129  llvm::Value *structPtr = nullptr;
2130  /// The type of the structure
2131  llvm::Type *structTy = nullptr;
2132 };
2133 } // namespace
2134 
2135 void TaskContextStructManager::generateTaskContextStruct() {
2136  if (privateDecls.empty())
2137  return;
2138  privateVarTypes.reserve(privateDecls.size());
2139 
2140  for (omp::PrivateClauseOp &privOp : privateDecls) {
2141  // Skip private variables which can safely be allocated and initialised
2142  // inside of the task
2143  if (!privOp.readsFromMold())
2144  continue;
2145  Type mlirType = privOp.getType();
2146  privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
2147  }
2148 
2149  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
2150  privateVarTypes);
2151 
2152  llvm::DataLayout dataLayout =
2153  builder.GetInsertBlock()->getModule()->getDataLayout();
2154  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
2155  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
2156 
2157  // Heap allocate the structure
2158  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
2159  /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
2160  "omp.task.context_ptr");
2161 }
2162 
2163 void TaskContextStructManager::createGEPsToPrivateVars() {
2164  if (!structPtr) {
2165  assert(privateVarTypes.empty());
2166  return;
2167  }
2168 
2169  // Create GEPs for each struct member
2170  llvmPrivateVarGEPs.clear();
2171  llvmPrivateVarGEPs.reserve(privateDecls.size());
2172  llvm::Value *zero = builder.getInt32(0);
2173  unsigned i = 0;
2174  for (auto privDecl : privateDecls) {
2175  if (!privDecl.readsFromMold()) {
2176  // Handle this inside of the task so we don't pass unnessecary vars in
2177  llvmPrivateVarGEPs.push_back(nullptr);
2178  continue;
2179  }
2180  llvm::Value *iVal = builder.getInt32(i);
2181  llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
2182  llvmPrivateVarGEPs.push_back(gep);
2183  i += 1;
2184  }
2185 }
2186 
2187 void TaskContextStructManager::freeStructPtr() {
2188  if (!structPtr)
2189  return;
2190 
2191  llvm::IRBuilderBase::InsertPointGuard guard{builder};
2192  // Ensure we don't put the call to free() after the terminator
2193  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2194  builder.CreateFree(structPtr);
2195 }
2196 
2197 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
2198 static LogicalResult
2199 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
2200  LLVM::ModuleTranslation &moduleTranslation) {
2201  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2202  if (failed(checkImplementationStatus(*taskOp)))
2203  return failure();
2204 
2205  PrivateVarsInfo privateVarsInfo(taskOp);
2206  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
2207  privateVarsInfo.privatizers};
2208 
2209  // Allocate and copy private variables before creating the task. This avoids
2210  // accessing invalid memory if (after this scope ends) the private variables
2211  // are initialized from host variables or if the variables are copied into
2212  // from host variables (firstprivate). The insertion point is just before
2213  // where the code for creating and scheduling the task will go. That puts this
2214  // code outside of the outlined task region, which is what we want because
2215  // this way the initialization and copy regions are executed immediately while
2216  // the host variable data are still live.
2217 
2218  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2219  findAllocaInsertPoint(builder, moduleTranslation);
2220 
2221  // Not using splitBB() because that requires the current block to have a
2222  // terminator.
2223  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
2224  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
2225  builder.getContext(), "omp.task.start",
2226  /*Parent=*/builder.GetInsertBlock()->getParent());
2227  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
2228  builder.SetInsertPoint(branchToTaskStartBlock);
2229 
2230  // Now do this again to make the initialization and copy blocks
2231  llvm::BasicBlock *copyBlock =
2232  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
2233  llvm::BasicBlock *initBlock =
2234  splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
2235 
2236  // Now the control flow graph should look like
2237  // starter_block:
2238  // <---- where we started when convertOmpTaskOp was called
2239  // br %omp.private.init
2240  // omp.private.init:
2241  // br %omp.private.copy
2242  // omp.private.copy:
2243  // br %omp.task.start
2244  // omp.task.start:
2245  // <---- where we want the insertion point to be when we call createTask()
2246 
2247  // Save the alloca insertion point on ModuleTranslation stack for use in
2248  // nested regions.
2250  moduleTranslation, allocaIP);
2251 
2252  // Allocate and initialize private variables
2253  builder.SetInsertPoint(initBlock->getTerminator());
2254 
2255  // Create task variable structure
2256  taskStructMgr.generateTaskContextStruct();
2257  // GEPs so that we can initialize the variables. Don't use these GEPs inside
2258  // of the body otherwise it will be the GEP not the struct which is fowarded
2259  // to the outlined function. GEPs forwarded in this way are passed in a
2260  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
2261  // which may not be executed until after the current stack frame goes out of
2262  // scope.
2263  taskStructMgr.createGEPsToPrivateVars();
2264 
2265  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
2266  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
2267  privateVarsInfo.blockArgs,
2268  taskStructMgr.getLLVMPrivateVarGEPs())) {
2269  // To be handled inside the task.
2270  if (!privDecl.readsFromMold())
2271  continue;
2272  assert(llvmPrivateVarAlloc &&
2273  "reads from mold so shouldn't have been skipped");
2274 
2275  llvm::Expected<llvm::Value *> privateVarOrErr =
2276  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2277  blockArg, llvmPrivateVarAlloc, initBlock);
2278  if (!privateVarOrErr)
2279  return handleError(privateVarOrErr, *taskOp.getOperation());
2280 
2282 
2283  // TODO: this is a bit of a hack for Fortran character boxes.
2284  // Character boxes are passed by value into the init region and then the
2285  // initialized character box is yielded by value. Here we need to store the
2286  // yielded value into the private allocation, and load the private
2287  // allocation to match the type expected by region block arguments.
2288  if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2289  !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2290  builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2291  // Load it so we have the value pointed to by the GEP
2292  llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2293  llvmPrivateVarAlloc);
2294  }
2295  assert(llvmPrivateVarAlloc->getType() ==
2296  moduleTranslation.convertType(blockArg.getType()));
2297 
2298  // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2299  // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2300  // stack allocated structure.
2301  }
2302 
2303  // firstprivate copy region
2304  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2306  taskOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2307  taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
2308  taskOp.getPrivateNeedsBarrier())))
2309  return llvm::failure();
2310 
2311  // Set up for call to createTask()
2312  builder.SetInsertPoint(taskStartBlock);
2313 
2314  auto bodyCB = [&](InsertPointTy allocaIP,
2315  InsertPointTy codegenIP) -> llvm::Error {
2316  // Save the alloca insertion point on ModuleTranslation stack for use in
2317  // nested regions.
2319  moduleTranslation, allocaIP);
2320 
2321  // translate the body of the task:
2322  builder.restoreIP(codegenIP);
2323 
2324  llvm::BasicBlock *privInitBlock = nullptr;
2325  privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2326  for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2327  privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2328  privateVarsInfo.mlirVars))) {
2329  auto [blockArg, privDecl, mlirPrivVar] = zip;
2330  // This is handled before the task executes
2331  if (privDecl.readsFromMold())
2332  continue;
2333 
2334  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2335  llvm::Type *llvmAllocType =
2336  moduleTranslation.convertType(privDecl.getType());
2337  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2338  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2339  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2340 
2341  llvm::Expected<llvm::Value *> privateVarOrError =
2342  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2343  blockArg, llvmPrivateVar, privInitBlock);
2344  if (!privateVarOrError)
2345  return privateVarOrError.takeError();
2346  moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2347  privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2348  }
2349 
2350  taskStructMgr.createGEPsToPrivateVars();
2351  for (auto [i, llvmPrivVar] :
2352  llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2353  if (!llvmPrivVar) {
2354  assert(privateVarsInfo.llvmVars[i] &&
2355  "This is added in the loop above");
2356  continue;
2357  }
2358  privateVarsInfo.llvmVars[i] = llvmPrivVar;
2359  }
2360 
2361  // Find and map the addresses of each variable within the task context
2362  // structure
2363  for (auto [blockArg, llvmPrivateVar, privateDecl] :
2364  llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2365  privateVarsInfo.privatizers)) {
2366  // This was handled above.
2367  if (!privateDecl.readsFromMold())
2368  continue;
2369  // Fix broken pass-by-value case for Fortran character boxes
2370  if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2371  llvmPrivateVar = builder.CreateLoad(
2372  moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2373  }
2374  assert(llvmPrivateVar->getType() ==
2375  moduleTranslation.convertType(blockArg.getType()));
2376  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2377  }
2378 
2379  auto continuationBlockOrError = convertOmpOpRegions(
2380  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2381  if (failed(handleError(continuationBlockOrError, *taskOp)))
2382  return llvm::make_error<PreviouslyReportedError>();
2383 
2384  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2385 
2386  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2387  privateVarsInfo.llvmVars,
2388  privateVarsInfo.privatizers)))
2389  return llvm::make_error<PreviouslyReportedError>();
2390 
2391  // Free heap allocated task context structure at the end of the task.
2392  taskStructMgr.freeStructPtr();
2393 
2394  return llvm::Error::success();
2395  };
2396 
2397  llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
2398  SmallVector<llvm::BranchInst *> cancelTerminators;
2399  // The directive to match here is OMPD_taskgroup because it is the taskgroup
2400  // which is canceled. This is handled here because it is the task's cleanup
2401  // block which should be branched to.
2402  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskOp,
2403  llvm::omp::Directive::OMPD_taskgroup);
2404 
2406  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2407  moduleTranslation, dds);
2408 
2409  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2410  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2411  moduleTranslation.getOpenMPBuilder()->createTask(
2412  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2413  moduleTranslation.lookupValue(taskOp.getFinal()),
2414  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2415  taskOp.getMergeable(),
2416  moduleTranslation.lookupValue(taskOp.getEventHandle()),
2417  moduleTranslation.lookupValue(taskOp.getPriority()));
2418 
2419  if (failed(handleError(afterIP, *taskOp)))
2420  return failure();
2421 
2422  // Set the correct branch target for task cancellation
2423  popCancelFinalizationCB(cancelTerminators, ompBuilder, afterIP.get());
2424 
2425  builder.restoreIP(*afterIP);
2426  return success();
2427 }
2428 
2429 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2430 static LogicalResult
2431 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2432  LLVM::ModuleTranslation &moduleTranslation) {
2433  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2434  if (failed(checkImplementationStatus(*tgOp)))
2435  return failure();
2436 
2437  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2438  builder.restoreIP(codegenIP);
2439  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2440  builder, moduleTranslation)
2441  .takeError();
2442  };
2443 
2444  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2445  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2446  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2447  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2448  bodyCB);
2449 
2450  if (failed(handleError(afterIP, *tgOp)))
2451  return failure();
2452 
2453  builder.restoreIP(*afterIP);
2454  return success();
2455 }
2456 
2457 static LogicalResult
2458 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2459  LLVM::ModuleTranslation &moduleTranslation) {
2460  if (failed(checkImplementationStatus(*twOp)))
2461  return failure();
2462 
2463  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2464  return success();
2465 }
2466 
2467 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2468 static LogicalResult
2469 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2470  LLVM::ModuleTranslation &moduleTranslation) {
2471  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2472  auto wsloopOp = cast<omp::WsloopOp>(opInst);
2473  if (failed(checkImplementationStatus(opInst)))
2474  return failure();
2475 
2476  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2477  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2478  assert(isByRef.size() == wsloopOp.getNumReductionVars());
2479 
2480  // Static is the default.
2481  auto schedule =
2482  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2483 
2484  // Find the loop configuration.
2485  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2486  llvm::Type *ivType = step->getType();
2487  llvm::Value *chunk = nullptr;
2488  if (wsloopOp.getScheduleChunk()) {
2489  llvm::Value *chunkVar =
2490  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2491  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2492  }
2493 
2494  PrivateVarsInfo privateVarsInfo(wsloopOp);
2495 
2496  SmallVector<omp::DeclareReductionOp> reductionDecls;
2497  collectReductionDecls(wsloopOp, reductionDecls);
2498  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2499  findAllocaInsertPoint(builder, moduleTranslation);
2500 
2501  SmallVector<llvm::Value *> privateReductionVariables(
2502  wsloopOp.getNumReductionVars());
2503 
2505  builder, moduleTranslation, privateVarsInfo, allocaIP);
2506  if (handleError(afterAllocas, opInst).failed())
2507  return failure();
2508 
2509  DenseMap<Value, llvm::Value *> reductionVariableMap;
2510 
2511  MutableArrayRef<BlockArgument> reductionArgs =
2512  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2513 
2514  SmallVector<DeferredStore> deferredStores;
2515 
2516  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2517  moduleTranslation, allocaIP, reductionDecls,
2518  privateReductionVariables, reductionVariableMap,
2519  deferredStores, isByRef)))
2520  return failure();
2521 
2522  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2523  opInst)
2524  .failed())
2525  return failure();
2526 
2528  wsloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2529  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2530  wsloopOp.getPrivateNeedsBarrier())))
2531  return failure();
2532 
2533  assert(afterAllocas.get()->getSinglePredecessor());
2534  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2535  moduleTranslation,
2536  afterAllocas.get()->getSinglePredecessor(),
2537  reductionDecls, privateReductionVariables,
2538  reductionVariableMap, isByRef, deferredStores)))
2539  return failure();
2540 
2541  // TODO: Handle doacross loops when the ordered clause has a parameter.
2542  bool isOrdered = wsloopOp.getOrdered().has_value();
2543  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2544  bool isSimd = wsloopOp.getScheduleSimd();
2545  bool loopNeedsBarrier = !wsloopOp.getNowait();
2546 
2547  // The only legal way for the direct parent to be omp.distribute is that this
2548  // represents 'distribute parallel do'. Otherwise, this is a regular
2549  // worksharing loop.
2550  llvm::omp::WorksharingLoopType workshareLoopType =
2551  llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2552  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2553  : llvm::omp::WorksharingLoopType::ForStaticLoop;
2554 
2555  SmallVector<llvm::BranchInst *> cancelTerminators;
2556  pushCancelFinalizationCB(cancelTerminators, builder, *ompBuilder, wsloopOp,
2557  llvm::omp::Directive::OMPD_for);
2558 
2559  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2560 
2561  // Initialize linear variables and linear step
2562  LinearClauseProcessor linearClauseProcessor;
2563  if (!wsloopOp.getLinearVars().empty()) {
2564  for (mlir::Value linearVar : wsloopOp.getLinearVars())
2565  linearClauseProcessor.createLinearVar(builder, moduleTranslation,
2566  linearVar);
2567  for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
2568  linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
2569  }
2570 
2572  wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2573 
2574  if (failed(handleError(regionBlock, opInst)))
2575  return failure();
2576 
2577  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2578 
2579  // Emit Initialization and Update IR for linear variables
2580  if (!wsloopOp.getLinearVars().empty()) {
2581  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2582  linearClauseProcessor.initLinearVar(builder, moduleTranslation,
2583  loopInfo->getPreheader());
2584  if (failed(handleError(afterBarrierIP, *loopOp)))
2585  return failure();
2586  builder.restoreIP(*afterBarrierIP);
2587  linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
2588  loopInfo->getIndVar());
2589  linearClauseProcessor.outlineLinearFinalizationBB(builder,
2590  loopInfo->getExit());
2591  }
2592 
2593  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2594 
2595  // Check if we can generate no-loop kernel
2596  bool noLoopMode = false;
2597  omp::TargetOp targetOp = wsloopOp->getParentOfType<mlir::omp::TargetOp>();
2598  if (targetOp) {
2599  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
2600  // We need this check because, without it, noLoopMode would be set to true
2601  // for every omp.wsloop nested inside a no-loop SPMD target region, even if
2602  // that loop is not the top-level SPMD one.
2603  if (loopOp == targetCapturedOp) {
2604  omp::TargetRegionFlags kernelFlags =
2605  targetOp.getKernelExecFlags(targetCapturedOp);
2606  if (omp::bitEnumContainsAll(kernelFlags,
2607  omp::TargetRegionFlags::spmd |
2608  omp::TargetRegionFlags::no_loop) &&
2609  !omp::bitEnumContainsAny(kernelFlags,
2610  omp::TargetRegionFlags::generic))
2611  noLoopMode = true;
2612  }
2613  }
2614 
2615  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2616  ompBuilder->applyWorkshareLoop(
2617  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2618  convertToScheduleKind(schedule), chunk, isSimd,
2619  scheduleMod == omp::ScheduleModifier::monotonic,
2620  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2621  workshareLoopType, noLoopMode);
2622 
2623  if (failed(handleError(wsloopIP, opInst)))
2624  return failure();
2625 
2626  // Emit finalization and in-place rewrites for linear vars.
2627  if (!wsloopOp.getLinearVars().empty()) {
2628  llvm::OpenMPIRBuilder::InsertPointTy oldIP = builder.saveIP();
2629  assert(loopInfo->getLastIter() &&
2630  "`lastiter` in CanonicalLoopInfo is nullptr");
2631  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2632  linearClauseProcessor.finalizeLinearVar(builder, moduleTranslation,
2633  loopInfo->getLastIter());
2634  if (failed(handleError(afterBarrierIP, *loopOp)))
2635  return failure();
2636  for (size_t index = 0; index < wsloopOp.getLinearVars().size(); index++)
2637  linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
2638  index);
2639  builder.restoreIP(oldIP);
2640  }
2641 
2642  // Set the correct branch target for task cancellation
2643  popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
2644 
2645  // Process the reductions if required.
2647  wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
2648  privateReductionVariables, isByRef, wsloopOp.getNowait(),
2649  /*isTeamsReduction=*/false)))
2650  return failure();
2651 
2652  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2653  privateVarsInfo.llvmVars,
2654  privateVarsInfo.privatizers);
2655 }
2656 
2657 /// Converts the OpenMP parallel operation to LLVM IR.
2658 static LogicalResult
2659 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2660  LLVM::ModuleTranslation &moduleTranslation) {
2661  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2662  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2663  assert(isByRef.size() == opInst.getNumReductionVars());
2664  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2665 
2666  if (failed(checkImplementationStatus(*opInst)))
2667  return failure();
2668 
2669  PrivateVarsInfo privateVarsInfo(opInst);
2670 
2671  // Collect reduction declarations
2672  SmallVector<omp::DeclareReductionOp> reductionDecls;
2673  collectReductionDecls(opInst, reductionDecls);
2674  SmallVector<llvm::Value *> privateReductionVariables(
2675  opInst.getNumReductionVars());
2676  SmallVector<DeferredStore> deferredStores;
2677 
2678  auto bodyGenCB = [&](InsertPointTy allocaIP,
2679  InsertPointTy codeGenIP) -> llvm::Error {
2681  builder, moduleTranslation, privateVarsInfo, allocaIP);
2682  if (handleError(afterAllocas, *opInst).failed())
2683  return llvm::make_error<PreviouslyReportedError>();
2684 
2685  // Allocate reduction vars
2686  DenseMap<Value, llvm::Value *> reductionVariableMap;
2687 
2688  MutableArrayRef<BlockArgument> reductionArgs =
2689  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2690 
2691  allocaIP =
2692  InsertPointTy(allocaIP.getBlock(),
2693  allocaIP.getBlock()->getTerminator()->getIterator());
2694 
2696  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2697  reductionDecls, privateReductionVariables, reductionVariableMap,
2698  deferredStores, isByRef)))
2699  return llvm::make_error<PreviouslyReportedError>();
2700 
2701  assert(afterAllocas.get()->getSinglePredecessor());
2702  builder.restoreIP(codeGenIP);
2703 
2704  if (handleError(
2705  initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2706  *opInst)
2707  .failed())
2708  return llvm::make_error<PreviouslyReportedError>();
2709 
2711  opInst, builder, moduleTranslation, privateVarsInfo.mlirVars,
2712  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2713  opInst.getPrivateNeedsBarrier())))
2714  return llvm::make_error<PreviouslyReportedError>();
2715 
2716  if (failed(
2717  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2718  afterAllocas.get()->getSinglePredecessor(),
2719  reductionDecls, privateReductionVariables,
2720  reductionVariableMap, isByRef, deferredStores)))
2721  return llvm::make_error<PreviouslyReportedError>();
2722 
2723  // Save the alloca insertion point on ModuleTranslation stack for use in
2724  // nested regions.
2726  moduleTranslation, allocaIP);
2727 
2728  // ParallelOp has only one region associated with it.
2730  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2731  if (!regionBlock)
2732  return regionBlock.takeError();
2733 
2734  // Process the reductions if required.
2735  if (opInst.getNumReductionVars() > 0) {
2736  // Collect reduction info
2737  SmallVector<OwningReductionGen> owningReductionGens;
2738  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2740  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2741  owningReductionGens, owningAtomicReductionGens,
2742  privateReductionVariables, reductionInfos);
2743 
2744  // Move to region cont block
2745  builder.SetInsertPoint((*regionBlock)->getTerminator());
2746 
2747  // Generate reductions from info
2748  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2749  builder.SetInsertPoint(tempTerminator);
2750 
2751  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2752  ompBuilder->createReductions(
2753  builder.saveIP(), allocaIP, reductionInfos, isByRef,
2754  /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
2755  if (!contInsertPoint)
2756  return contInsertPoint.takeError();
2757 
2758  if (!contInsertPoint->getBlock())
2759  return llvm::make_error<PreviouslyReportedError>();
2760 
2761  tempTerminator->eraseFromParent();
2762  builder.restoreIP(*contInsertPoint);
2763  }
2764 
2765  return llvm::Error::success();
2766  };
2767 
2768  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2769  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2770  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2771  // bodyGenCB.
2772  replVal = &val;
2773  return codeGenIP;
2774  };
2775 
2776  // TODO: Perform finalization actions for variables. This has to be
2777  // called for variables which have destructors/finalizers.
2778  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2779  InsertPointTy oldIP = builder.saveIP();
2780  builder.restoreIP(codeGenIP);
2781 
2782  // if the reduction has a cleanup region, inline it here to finalize the
2783  // reduction variables
2784  SmallVector<Region *> reductionCleanupRegions;
2785  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2786  [](omp::DeclareReductionOp reductionDecl) {
2787  return &reductionDecl.getCleanupRegion();
2788  });
2790  reductionCleanupRegions, privateReductionVariables,
2791  moduleTranslation, builder, "omp.reduction.cleanup")))
2792  return llvm::createStringError(
2793  "failed to inline `cleanup` region of `omp.declare_reduction`");
2794 
2795  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2796  privateVarsInfo.llvmVars,
2797  privateVarsInfo.privatizers)))
2798  return llvm::make_error<PreviouslyReportedError>();
2799 
2800  builder.restoreIP(oldIP);
2801  return llvm::Error::success();
2802  };
2803 
2804  llvm::Value *ifCond = nullptr;
2805  if (auto ifVar = opInst.getIfExpr())
2806  ifCond = moduleTranslation.lookupValue(ifVar);
2807  llvm::Value *numThreads = nullptr;
2808  if (auto numThreadsVar = opInst.getNumThreads())
2809  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2810  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2811  if (auto bind = opInst.getProcBindKind())
2812  pbKind = getProcBindKind(*bind);
2813  bool isCancellable = constructIsCancellable(opInst);
2814 
2815  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2816  findAllocaInsertPoint(builder, moduleTranslation);
2817  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2818 
2819  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2820  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2821  ifCond, numThreads, pbKind, isCancellable);
2822 
2823  if (failed(handleError(afterIP, *opInst)))
2824  return failure();
2825 
2826  builder.restoreIP(*afterIP);
2827  return success();
2828 }
2829 
2830 /// Convert Order attribute to llvm::omp::OrderKind.
2831 static llvm::omp::OrderKind
2832 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2833  if (!o)
2834  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2835  switch (*o) {
2836  case omp::ClauseOrderKind::Concurrent:
2837  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2838  }
2839  llvm_unreachable("Unknown ClauseOrderKind kind");
2840 }
2841 
2842 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2843 static LogicalResult
2844 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2845  LLVM::ModuleTranslation &moduleTranslation) {
2846  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2847  auto simdOp = cast<omp::SimdOp>(opInst);
2848 
2849  if (failed(checkImplementationStatus(opInst)))
2850  return failure();
2851 
2852  PrivateVarsInfo privateVarsInfo(simdOp);
2853 
2854  MutableArrayRef<BlockArgument> reductionArgs =
2855  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2856  DenseMap<Value, llvm::Value *> reductionVariableMap;
2857  SmallVector<llvm::Value *> privateReductionVariables(
2858  simdOp.getNumReductionVars());
2859  SmallVector<DeferredStore> deferredStores;
2860  SmallVector<omp::DeclareReductionOp> reductionDecls;
2861  collectReductionDecls(simdOp, reductionDecls);
2862  llvm::ArrayRef<bool> isByRef = getIsByRef(simdOp.getReductionByref());
2863  assert(isByRef.size() == simdOp.getNumReductionVars());
2864 
2865  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2866  findAllocaInsertPoint(builder, moduleTranslation);
2867 
2869  builder, moduleTranslation, privateVarsInfo, allocaIP);
2870  if (handleError(afterAllocas, opInst).failed())
2871  return failure();
2872 
2873  if (failed(allocReductionVars(simdOp, reductionArgs, builder,
2874  moduleTranslation, allocaIP, reductionDecls,
2875  privateReductionVariables, reductionVariableMap,
2876  deferredStores, isByRef)))
2877  return failure();
2878 
2879  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2880  opInst)
2881  .failed())
2882  return failure();
2883 
2884  // No call to copyFirstPrivateVars because FIRSTPRIVATE is not allowed for
2885  // SIMD.
2886 
2887  assert(afterAllocas.get()->getSinglePredecessor());
2888  if (failed(initReductionVars(simdOp, reductionArgs, builder,
2889  moduleTranslation,
2890  afterAllocas.get()->getSinglePredecessor(),
2891  reductionDecls, privateReductionVariables,
2892  reductionVariableMap, isByRef, deferredStores)))
2893  return failure();
2894 
2895  llvm::ConstantInt *simdlen = nullptr;
2896  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2897  simdlen = builder.getInt64(simdlenVar.value());
2898 
2899  llvm::ConstantInt *safelen = nullptr;
2900  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2901  safelen = builder.getInt64(safelenVar.value());
2902 
2903  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2904  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2905 
2906  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2907  std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2908  mlir::OperandRange operands = simdOp.getAlignedVars();
2909  for (size_t i = 0; i < operands.size(); ++i) {
2910  llvm::Value *alignment = nullptr;
2911  llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2912  llvm::Type *ty = llvmVal->getType();
2913 
2914  auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2915  alignment = builder.getInt64(intAttr.getInt());
2916  assert(ty->isPointerTy() && "Invalid type for aligned variable");
2917  assert(alignment && "Invalid alignment value");
2918 
2919  // Check if the alignment value is not a power of 2. If so, skip emitting
2920  // alignment.
2921  if (!intAttr.getValue().isPowerOf2())
2922  continue;
2923 
2924  auto curInsert = builder.saveIP();
2925  builder.SetInsertPoint(sourceBlock);
2926  llvmVal = builder.CreateLoad(ty, llvmVal);
2927  builder.restoreIP(curInsert);
2928  alignedVars[llvmVal] = alignment;
2929  }
2930 
2932  simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2933 
2934  if (failed(handleError(regionBlock, opInst)))
2935  return failure();
2936 
2937  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2938  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2939  ompBuilder->applySimd(loopInfo, alignedVars,
2940  simdOp.getIfExpr()
2941  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2942  : nullptr,
2943  order, simdlen, safelen);
2944 
2945  // We now need to reduce the per-simd-lane reduction variable into the
2946  // original variable. This works a bit differently to other reductions (e.g.
2947  // wsloop) because we don't need to call into the OpenMP runtime to handle
2948  // threads: everything happened in this one thread.
2949  for (auto [i, tuple] : llvm::enumerate(
2950  llvm::zip(reductionDecls, isByRef, simdOp.getReductionVars(),
2951  privateReductionVariables))) {
2952  auto [decl, byRef, reductionVar, privateReductionVar] = tuple;
2953 
2954  OwningReductionGen gen = makeReductionGen(decl, builder, moduleTranslation);
2955  llvm::Value *originalVariable = moduleTranslation.lookupValue(reductionVar);
2956  llvm::Type *reductionType = moduleTranslation.convertType(decl.getType());
2957 
2958  // We have one less load for by-ref case because that load is now inside of
2959  // the reduction region.
2960  llvm::Value *redValue = originalVariable;
2961  if (!byRef)
2962  redValue =
2963  builder.CreateLoad(reductionType, redValue, "red.value." + Twine(i));
2964  llvm::Value *privateRedValue = builder.CreateLoad(
2965  reductionType, privateReductionVar, "red.private.value." + Twine(i));
2966  llvm::Value *reduced;
2967 
2968  auto res = gen(builder.saveIP(), redValue, privateRedValue, reduced);
2969  if (failed(handleError(res, opInst)))
2970  return failure();
2971  builder.restoreIP(res.get());
2972 
2973  // For by-ref case, the store is inside of the reduction region.
2974  if (!byRef)
2975  builder.CreateStore(reduced, originalVariable);
2976  }
2977 
2978  // After the construct, deallocate private reduction variables.
2979  SmallVector<Region *> reductionRegions;
2980  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
2981  [](omp::DeclareReductionOp reductionDecl) {
2982  return &reductionDecl.getCleanupRegion();
2983  });
2984  if (failed(inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
2985  moduleTranslation, builder,
2986  "omp.reduction.cleanup")))
2987  return failure();
2988 
2989  return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2990  privateVarsInfo.llvmVars,
2991  privateVarsInfo.privatizers);
2992 }
2993 
2994 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2995 static LogicalResult
2996 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2997  LLVM::ModuleTranslation &moduleTranslation) {
2998  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2999  auto loopOp = cast<omp::LoopNestOp>(opInst);
3000 
3001  // Set up the source location value for OpenMP runtime.
3002  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3003 
3004  // Generator of the canonical loop body.
3007  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
3008  llvm::Value *iv) -> llvm::Error {
3009  // Make sure further conversions know about the induction variable.
3010  moduleTranslation.mapValue(
3011  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
3012 
3013  // Capture the body insertion point for use in nested loops. BodyIP of the
3014  // CanonicalLoopInfo always points to the beginning of the entry block of
3015  // the body.
3016  bodyInsertPoints.push_back(ip);
3017 
3018  if (loopInfos.size() != loopOp.getNumLoops() - 1)
3019  return llvm::Error::success();
3020 
3021  // Convert the body of the loop.
3022  builder.restoreIP(ip);
3024  loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
3025  if (!regionBlock)
3026  return regionBlock.takeError();
3027 
3028  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
3029  return llvm::Error::success();
3030  };
3031 
3032  // Delegate actual loop construction to the OpenMP IRBuilder.
3033  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
3034  // loop, i.e. it has a positive step, uses signed integer semantics.
3035  // Reconsider this code when the nested loop operation clearly supports more
3036  // cases.
3037  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
3038  llvm::Value *lowerBound =
3039  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
3040  llvm::Value *upperBound =
3041  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
3042  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
3043 
3044  // Make sure loop trip count are emitted in the preheader of the outermost
3045  // loop at the latest so that they are all available for the new collapsed
3046  // loop will be created below.
3047  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
3048  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
3049  if (i != 0) {
3050  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
3051  ompLoc.DL);
3052  computeIP = loopInfos.front()->getPreheaderIP();
3053  }
3054 
3056  ompBuilder->createCanonicalLoop(
3057  loc, bodyGen, lowerBound, upperBound, step,
3058  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
3059 
3060  if (failed(handleError(loopResult, *loopOp)))
3061  return failure();
3062 
3063  loopInfos.push_back(*loopResult);
3064  }
3065 
3066  llvm::OpenMPIRBuilder::InsertPointTy afterIP =
3067  loopInfos.front()->getAfterIP();
3068 
3069  // Do tiling.
3070  if (const auto &tiles = loopOp.getTileSizes()) {
3071  llvm::Type *ivType = loopInfos.front()->getIndVarType();
3072  SmallVector<llvm::Value *> tileSizes;
3073 
3074  for (auto tile : tiles.value()) {
3075  llvm::Value *tileVal = llvm::ConstantInt::get(ivType, tile);
3076  tileSizes.push_back(tileVal);
3077  }
3078 
3079  std::vector<llvm::CanonicalLoopInfo *> newLoops =
3080  ompBuilder->tileLoops(ompLoc.DL, loopInfos, tileSizes);
3081 
3082  // Update afterIP to get the correct insertion point after
3083  // tiling.
3084  llvm::BasicBlock *afterBB = newLoops.front()->getAfter();
3085  llvm::BasicBlock *afterAfterBB = afterBB->getSingleSuccessor();
3086  afterIP = {afterAfterBB, afterAfterBB->begin()};
3087 
3088  // Update the loop infos.
3089  loopInfos.clear();
3090  for (const auto &newLoop : newLoops)
3091  loopInfos.push_back(newLoop);
3092  } // Tiling done.
3093 
3094  // Do collapse.
3095  const auto &numCollapse = loopOp.getCollapseNumLoops();
3096  SmallVector<llvm::CanonicalLoopInfo *> collapseLoopInfos(
3097  loopInfos.begin(), loopInfos.begin() + (numCollapse));
3098 
3099  auto newTopLoopInfo =
3100  ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {});
3101 
3102  assert(newTopLoopInfo && "New top loop information is missing");
3103  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
3104  [&](OpenMPLoopInfoStackFrame &frame) {
3105  frame.loopInfo = newTopLoopInfo;
3106  return WalkResult::interrupt();
3107  });
3108 
3109  // Continue building IR after the loop. Note that the LoopInfo returned by
3110  // `collapseLoops` points inside the outermost loop and is intended for
3111  // potential further loop transformations. Use the insertion point stored
3112  // before collapsing loops instead.
3113  builder.restoreIP(afterIP);
3114  return success();
3115 }
3116 
3117 /// Convert an omp.canonical_loop to LLVM-IR
3118 static LogicalResult
3119 convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder,
3120  LLVM::ModuleTranslation &moduleTranslation) {
3121  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3122 
3123  llvm::OpenMPIRBuilder::LocationDescription loopLoc(builder);
3124  Value loopIV = op.getInductionVar();
3125  Value loopTC = op.getTripCount();
3126 
3127  llvm::Value *llvmTC = moduleTranslation.lookupValue(loopTC);
3128 
3130  ompBuilder->createCanonicalLoop(
3131  loopLoc,
3132  [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *llvmIV) {
3133  // Register the mapping of MLIR induction variable to LLVM-IR
3134  // induction variable
3135  moduleTranslation.mapValue(loopIV, llvmIV);
3136 
3137  builder.restoreIP(ip);
3138  llvm::Expected<llvm::BasicBlock *> bodyGenStatus =
3139  convertOmpOpRegions(op.getRegion(), "omp.loop.region", builder,
3140  moduleTranslation);
3141 
3142  return bodyGenStatus.takeError();
3143  },
3144  llvmTC, "omp.loop");
3145  if (!llvmOrError)
3146  return op.emitError(llvm::toString(llvmOrError.takeError()));
3147 
3148  llvm::CanonicalLoopInfo *llvmCLI = *llvmOrError;
3149  llvm::IRBuilderBase::InsertPoint afterIP = llvmCLI->getAfterIP();
3150  builder.restoreIP(afterIP);
3151 
3152  // Register the mapping of MLIR loop to LLVM-IR OpenMPIRBuilder loop
3153  if (Value cli = op.getCli())
3154  moduleTranslation.mapOmpLoop(cli, llvmCLI);
3155 
3156  return success();
3157 }
3158 
3159 /// Apply a `#pragma omp unroll` / "!$omp unroll" transformation using the
3160 /// OpenMPIRBuilder.
3161 static LogicalResult
3162 applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
3163  LLVM::ModuleTranslation &moduleTranslation) {
3164  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3165 
3166  Value applyee = op.getApplyee();
3167  assert(applyee && "Loop to apply unrolling on required");
3168 
3169  llvm::CanonicalLoopInfo *consBuilderCLI =
3170  moduleTranslation.lookupOMPLoop(applyee);
3171  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
3172  ompBuilder->unrollLoopHeuristic(loc.DL, consBuilderCLI);
3173 
3174  moduleTranslation.invalidateOmpLoop(applyee);
3175  return success();
3176 }
3177 
3178 /// Apply a `#pragma omp tile` / `!$omp tile` transformation using the
3179 /// OpenMPIRBuilder.
3180 static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
3181  LLVM::ModuleTranslation &moduleTranslation) {
3182  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3183  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
3184 
3186  SmallVector<llvm::Value *> translatedSizes;
3187 
3188  for (Value size : op.getSizes()) {
3189  llvm::Value *translatedSize = moduleTranslation.lookupValue(size);
3190  assert(translatedSize &&
3191  "sizes clause arguments must already be translated");
3192  translatedSizes.push_back(translatedSize);
3193  }
3194 
3195  for (Value applyee : op.getApplyees()) {
3196  llvm::CanonicalLoopInfo *consBuilderCLI =
3197  moduleTranslation.lookupOMPLoop(applyee);
3198  assert(applyee && "Canonical loop must already been translated");
3199  translatedLoops.push_back(consBuilderCLI);
3200  }
3201 
3202  auto generatedLoops =
3203  ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes);
3204  if (!op.getGeneratees().empty()) {
3205  for (auto [mlirLoop, genLoop] :
3206  zip_equal(op.getGeneratees(), generatedLoops))
3207  moduleTranslation.mapOmpLoop(mlirLoop, genLoop);
3208  }
3209 
3210  // CLIs can only be consumed once
3211  for (Value applyee : op.getApplyees())
3212  moduleTranslation.invalidateOmpLoop(applyee);
3213 
3214  return success();
3215 }
3216 
3217 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
3218 static llvm::AtomicOrdering
3219 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
3220  if (!ao)
3221  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
3222 
3223  switch (*ao) {
3224  case omp::ClauseMemoryOrderKind::Seq_cst:
3225  return llvm::AtomicOrdering::SequentiallyConsistent;
3226  case omp::ClauseMemoryOrderKind::Acq_rel:
3227  return llvm::AtomicOrdering::AcquireRelease;
3228  case omp::ClauseMemoryOrderKind::Acquire:
3229  return llvm::AtomicOrdering::Acquire;
3230  case omp::ClauseMemoryOrderKind::Release:
3231  return llvm::AtomicOrdering::Release;
3232  case omp::ClauseMemoryOrderKind::Relaxed:
3233  return llvm::AtomicOrdering::Monotonic;
3234  }
3235  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
3236 }
3237 
3238 /// Convert omp.atomic.read operation to LLVM IR.
3239 static LogicalResult
3240 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
3241  LLVM::ModuleTranslation &moduleTranslation) {
3242  auto readOp = cast<omp::AtomicReadOp>(opInst);
3243  if (failed(checkImplementationStatus(opInst)))
3244  return failure();
3245 
3246  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3247  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3248  findAllocaInsertPoint(builder, moduleTranslation);
3249 
3250  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3251 
3252  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
3253  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
3254  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
3255 
3256  llvm::Type *elementType =
3257  moduleTranslation.convertType(readOp.getElementType());
3258 
3259  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
3260  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
3261  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP));
3262  return success();
3263 }
3264 
3265 /// Converts an omp.atomic.write operation to LLVM IR.
3266 static LogicalResult
3267 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
3268  LLVM::ModuleTranslation &moduleTranslation) {
3269  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
3270  if (failed(checkImplementationStatus(opInst)))
3271  return failure();
3272 
3273  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3274  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3275  findAllocaInsertPoint(builder, moduleTranslation);
3276 
3277  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3278  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
3279  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
3280  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
3281  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
3282  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
3283  /*isVolatile=*/false};
3284  builder.restoreIP(
3285  ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP));
3286  return success();
3287 }
3288 
3289 /// Converts an LLVM dialect binary operation to the corresponding enum value
3290 /// for `atomicrmw` supported binary operation.
3291 static llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
3293  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
3294  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
3295  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
3296  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
3297  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
3298  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
3299  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
3300  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
3301  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
3302  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
3303 }
3304 
3305 static void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp,
3306  bool &isIgnoreDenormalMode,
3307  bool &isFineGrainedMemory,
3308  bool &isRemoteMemory) {
3309  isIgnoreDenormalMode = false;
3310  isFineGrainedMemory = false;
3311  isRemoteMemory = false;
3312  if (atomicUpdateOp &&
3313  atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
3314  mlir::omp::AtomicControlAttr atomicControlAttr =
3315  atomicUpdateOp.getAtomicControlAttr();
3316  isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
3317  isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
3318  isRemoteMemory = atomicControlAttr.getRemoteMemory();
3319  }
3320 }
3321 
3322 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
3323 static LogicalResult
3324 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
3325  llvm::IRBuilderBase &builder,
3326  LLVM::ModuleTranslation &moduleTranslation) {
3327  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3328  if (failed(checkImplementationStatus(*opInst)))
3329  return failure();
3330 
3331  // Convert values and types.
3332  auto &innerOpList = opInst.getRegion().front().getOperations();
3333  bool isXBinopExpr{false};
3334  llvm::AtomicRMWInst::BinOp binop;
3335  mlir::Value mlirExpr;
3336  llvm::Value *llvmExpr = nullptr;
3337  llvm::Value *llvmX = nullptr;
3338  llvm::Type *llvmXElementType = nullptr;
3339  if (innerOpList.size() == 2) {
3340  // The two operations here are the update and the terminator.
3341  // Since we can identify the update operation, there is a possibility
3342  // that we can generate the atomicrmw instruction.
3343  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
3344  if (!llvm::is_contained(innerOp.getOperands(),
3345  opInst.getRegion().getArgument(0))) {
3346  return opInst.emitError("no atomic update operation with region argument"
3347  " as operand found inside atomic.update region");
3348  }
3349  binop = convertBinOpToAtomic(innerOp);
3350  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
3351  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3352  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3353  } else {
3354  // Since the update region includes more than one operation
3355  // we will resort to generating a cmpxchg loop.
3356  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3357  }
3358  llvmX = moduleTranslation.lookupValue(opInst.getX());
3359  llvmXElementType = moduleTranslation.convertType(
3360  opInst.getRegion().getArgument(0).getType());
3361  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3362  /*isSigned=*/false,
3363  /*isVolatile=*/false};
3364 
3365  llvm::AtomicOrdering atomicOrdering =
3366  convertAtomicOrdering(opInst.getMemoryOrder());
3367 
3368  // Generate update code.
3369  auto updateFn =
3370  [&opInst, &moduleTranslation](
3371  llvm::Value *atomicx,
3372  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3373  Block &bb = *opInst.getRegion().begin();
3374  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
3375  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3376  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3377  return llvm::make_error<PreviouslyReportedError>();
3378 
3379  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3380  assert(yieldop && yieldop.getResults().size() == 1 &&
3381  "terminator must be omp.yield op and it must have exactly one "
3382  "argument");
3383  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3384  };
3385 
3386  bool isIgnoreDenormalMode;
3387  bool isFineGrainedMemory;
3388  bool isRemoteMemory;
3389  extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory,
3390  isRemoteMemory);
3391  // Handle ambiguous alloca, if any.
3392  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3393  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3394  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3395  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
3396  atomicOrdering, binop, updateFn,
3397  isXBinopExpr, isIgnoreDenormalMode,
3398  isFineGrainedMemory, isRemoteMemory);
3399 
3400  if (failed(handleError(afterIP, *opInst)))
3401  return failure();
3402 
3403  builder.restoreIP(*afterIP);
3404  return success();
3405 }
3406 
3407 static LogicalResult
3408 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
3409  llvm::IRBuilderBase &builder,
3410  LLVM::ModuleTranslation &moduleTranslation) {
3411  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3412  if (failed(checkImplementationStatus(*atomicCaptureOp)))
3413  return failure();
3414 
3415  mlir::Value mlirExpr;
3416  bool isXBinopExpr = false, isPostfixUpdate = false;
3417  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3418 
3419  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3420  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
3421 
3422  assert((atomicUpdateOp || atomicWriteOp) &&
3423  "internal op must be an atomic.update or atomic.write op");
3424 
3425  if (atomicWriteOp) {
3426  isPostfixUpdate = true;
3427  mlirExpr = atomicWriteOp.getExpr();
3428  } else {
3429  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
3430  atomicCaptureOp.getAtomicUpdateOp().getOperation();
3431  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
3432  // Find the binary update operation that uses the region argument
3433  // and get the expression to update
3434  if (innerOpList.size() == 2) {
3435  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
3436  if (!llvm::is_contained(innerOp.getOperands(),
3437  atomicUpdateOp.getRegion().getArgument(0))) {
3438  return atomicUpdateOp.emitError(
3439  "no atomic update operation with region argument"
3440  " as operand found inside atomic.update region");
3441  }
3442  binop = convertBinOpToAtomic(innerOp);
3443  isXBinopExpr =
3444  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
3445  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3446  } else {
3447  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3448  }
3449  }
3450 
3451  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3452  llvm::Value *llvmX =
3453  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
3454  llvm::Value *llvmV =
3455  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
3456  llvm::Type *llvmXElementType = moduleTranslation.convertType(
3457  atomicCaptureOp.getAtomicReadOp().getElementType());
3458  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3459  /*isSigned=*/false,
3460  /*isVolatile=*/false};
3461  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
3462  /*isSigned=*/false,
3463  /*isVolatile=*/false};
3464 
3465  llvm::AtomicOrdering atomicOrdering =
3466  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
3467 
3468  auto updateFn =
3469  [&](llvm::Value *atomicx,
3470  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3471  if (atomicWriteOp)
3472  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
3473  Block &bb = *atomicUpdateOp.getRegion().begin();
3474  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
3475  atomicx);
3476  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3477  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3478  return llvm::make_error<PreviouslyReportedError>();
3479 
3480  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3481  assert(yieldop && yieldop.getResults().size() == 1 &&
3482  "terminator must be omp.yield op and it must have exactly one "
3483  "argument");
3484  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3485  };
3486 
3487  bool isIgnoreDenormalMode;
3488  bool isFineGrainedMemory;
3489  bool isRemoteMemory;
3490  extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode,
3491  isFineGrainedMemory, isRemoteMemory);
3492  // Handle ambiguous alloca, if any.
3493  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3494  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3495  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3496  ompBuilder->createAtomicCapture(
3497  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3498  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr,
3499  isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory);
3500 
3501  if (failed(handleError(afterIP, *atomicCaptureOp)))
3502  return failure();
3503 
3504  builder.restoreIP(*afterIP);
3505  return success();
3506 }
3507 
3508 static llvm::omp::Directive convertCancellationConstructType(
3509  omp::ClauseCancellationConstructType directive) {
3510  switch (directive) {
3511  case omp::ClauseCancellationConstructType::Loop:
3512  return llvm::omp::Directive::OMPD_for;
3513  case omp::ClauseCancellationConstructType::Parallel:
3514  return llvm::omp::Directive::OMPD_parallel;
3515  case omp::ClauseCancellationConstructType::Sections:
3516  return llvm::omp::Directive::OMPD_sections;
3517  case omp::ClauseCancellationConstructType::Taskgroup:
3518  return llvm::omp::Directive::OMPD_taskgroup;
3519  }
3520  llvm_unreachable("Unhandled cancellation construct type");
3521 }
3522 
3523 static LogicalResult
3524 convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder,
3525  LLVM::ModuleTranslation &moduleTranslation) {
3526  if (failed(checkImplementationStatus(*op.getOperation())))
3527  return failure();
3528 
3529  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3530  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3531 
3532  llvm::Value *ifCond = nullptr;
3533  if (Value ifVar = op.getIfExpr())
3534  ifCond = moduleTranslation.lookupValue(ifVar);
3535 
3536  llvm::omp::Directive cancelledDirective =
3537  convertCancellationConstructType(op.getCancelDirective());
3538 
3539  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3540  ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective);
3541 
3542  if (failed(handleError(afterIP, *op.getOperation())))
3543  return failure();
3544 
3545  builder.restoreIP(afterIP.get());
3546 
3547  return success();
3548 }
3549 
3550 static LogicalResult
3551 convertOmpCancellationPoint(omp::CancellationPointOp op,
3552  llvm::IRBuilderBase &builder,
3553  LLVM::ModuleTranslation &moduleTranslation) {
3554  if (failed(checkImplementationStatus(*op.getOperation())))
3555  return failure();
3556 
3557  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3558  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3559 
3560  llvm::omp::Directive cancelledDirective =
3561  convertCancellationConstructType(op.getCancelDirective());
3562 
3563  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3564  ompBuilder->createCancellationPoint(ompLoc, cancelledDirective);
3565 
3566  if (failed(handleError(afterIP, *op.getOperation())))
3567  return failure();
3568 
3569  builder.restoreIP(afterIP.get());
3570 
3571  return success();
3572 }
3573 
3574 /// Converts an OpenMP Threadprivate operation into LLVM IR using
3575 /// OpenMPIRBuilder.
3576 static LogicalResult
3577 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
3578  LLVM::ModuleTranslation &moduleTranslation) {
3579  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3580  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3581  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
3582 
3583  if (failed(checkImplementationStatus(opInst)))
3584  return failure();
3585 
3586  Value symAddr = threadprivateOp.getSymAddr();
3587  auto *symOp = symAddr.getDefiningOp();
3588 
3589  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
3590  symOp = asCast.getOperand().getDefiningOp();
3591 
3592  if (!isa<LLVM::AddressOfOp>(symOp))
3593  return opInst.emitError("Addressing symbol not found");
3594  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
3595 
3596  LLVM::GlobalOp global =
3597  addressOfOp.getGlobal(moduleTranslation.symbolTable());
3598  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
3599 
3600  if (!ompBuilder->Config.isTargetDevice()) {
3601  llvm::Type *type = globalValue->getValueType();
3602  llvm::TypeSize typeSize =
3603  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
3604  type);
3605  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
3606  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
3607  ompLoc, globalValue, size, global.getSymName() + ".cache");
3608  moduleTranslation.mapValue(opInst.getResult(0), callInst);
3609  } else {
3610  moduleTranslation.mapValue(opInst.getResult(0), globalValue);
3611  }
3612 
3613  return success();
3614 }
3615 
3616 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
3617 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
3618  switch (deviceClause) {
3619  case mlir::omp::DeclareTargetDeviceType::host:
3620  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
3621  break;
3622  case mlir::omp::DeclareTargetDeviceType::nohost:
3623  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
3624  break;
3625  case mlir::omp::DeclareTargetDeviceType::any:
3626  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
3627  break;
3628  }
3629  llvm_unreachable("unhandled device clause");
3630 }
3631 
3632 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
3634  mlir::omp::DeclareTargetCaptureClause captureClause) {
3635  switch (captureClause) {
3636  case mlir::omp::DeclareTargetCaptureClause::to:
3637  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
3638  case mlir::omp::DeclareTargetCaptureClause::link:
3639  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
3640  case mlir::omp::DeclareTargetCaptureClause::enter:
3641  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
3642  }
3643  llvm_unreachable("unhandled capture clause");
3644 }
3645 
3646 static llvm::SmallString<64>
3647 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
3648  llvm::OpenMPIRBuilder &ompBuilder) {
3649  llvm::SmallString<64> suffix;
3650  llvm::raw_svector_ostream os(suffix);
3651  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
3652  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
3653  auto fileInfoCallBack = [&loc]() {
3654  return std::pair<std::string, uint64_t>(
3655  llvm::StringRef(loc.getFilename()), loc.getLine());
3656  };
3657 
3658  auto vfs = llvm::vfs::getRealFileSystem();
3659  os << llvm::format(
3660  "_%x",
3661  ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack, *vfs).FileID);
3662  }
3663  os << "_decl_tgt_ref_ptr";
3664 
3665  return suffix;
3666 }
3667 
3668 static bool isDeclareTargetLink(mlir::Value value) {
3669  if (auto addressOfOp = value.getDefiningOp<LLVM::AddressOfOp>()) {
3670  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3671  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3672  if (auto declareTargetGlobal =
3673  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3674  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3675  mlir::omp::DeclareTargetCaptureClause::link)
3676  return true;
3677  }
3678  return false;
3679 }
3680 
3681 // Returns the reference pointer generated by the lowering of the declare target
3682 // operation in cases where the link clause is used or the to clause is used in
3683 // USM mode.
3684 static llvm::Value *
3686  LLVM::ModuleTranslation &moduleTranslation) {
3687  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3688  Operation *op = value.getDefiningOp();
3689  if (auto addrCast = llvm::dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
3690  op = addrCast->getOperand(0).getDefiningOp();
3691 
3692  // An easier way to do this may just be to keep track of any pointer
3693  // references and their mapping to their respective operation
3694  if (auto addressOfOp = llvm::dyn_cast_if_present<LLVM::AddressOfOp>(op)) {
3695  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3696  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3697  addressOfOp.getGlobalName()))) {
3698 
3699  if (auto declareTargetGlobal =
3700  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3701  gOp.getOperation())) {
3702 
3703  // In this case, we must utilise the reference pointer generated by the
3704  // declare target operation, similar to Clang
3705  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3706  mlir::omp::DeclareTargetCaptureClause::link) ||
3707  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3708  mlir::omp::DeclareTargetCaptureClause::to &&
3709  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3710  llvm::SmallString<64> suffix =
3711  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3712 
3713  if (gOp.getSymName().contains(suffix))
3714  return moduleTranslation.getLLVMModule()->getNamedValue(
3715  gOp.getSymName());
3716 
3717  return moduleTranslation.getLLVMModule()->getNamedValue(
3718  (gOp.getSymName().str() + suffix.str()).str());
3719  }
3720  }
3721  }
3722  }
3723 
3724  return nullptr;
3725 }
3726 
3727 namespace {
3728 // Append customMappers information to existing MapInfosTy
3729 struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3731 
3732  /// Append arrays in \a CurInfo.
3733  void append(MapInfosTy &curInfo) {
3734  Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3735  llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3736  }
3737 };
3738 // A small helper structure to contain data gathered
3739 // for map lowering and coalese it into one area and
3740 // avoiding extra computations such as searches in the
3741 // llvm module for lowered mapped variables or checking
3742 // if something is declare target (and retrieving the
3743 // value) more than neccessary.
3744 struct MapInfoData : MapInfosTy {
3745  llvm::SmallVector<bool, 4> IsDeclareTarget;
3746  llvm::SmallVector<bool, 4> IsAMember;
3747  // Identify if mapping was added by mapClause or use_device clauses.
3748  llvm::SmallVector<bool, 4> IsAMapping;
3751  // Stripped off array/pointer to get the underlying
3752  // element type
3754 
3755  /// Append arrays in \a CurInfo.
3756  void append(MapInfoData &CurInfo) {
3757  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3758  CurInfo.IsDeclareTarget.end());
3759  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3760  OriginalValue.append(CurInfo.OriginalValue.begin(),
3761  CurInfo.OriginalValue.end());
3762  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3763  MapInfosTy::append(CurInfo);
3764  }
3765 };
3766 } // namespace
3767 
3768 static uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy,
3769  DataLayout &dl) {
3770  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3771  arrTy.getElementType()))
3772  return getArrayElementSizeInBits(nestedArrTy, dl);
3773  return dl.getTypeSizeInBits(arrTy.getElementType());
3774 }
3775 
3776 // This function calculates the size to be offloaded for a specified type, given
3777 // its associated map clause (which can contain bounds information which affects
3778 // the total size), this size is calculated based on the underlying element type
3779 // e.g. given a 1-D array of ints, we will calculate the size from the integer
3780 // type * number of elements in the array. This size can be used in other
3781 // calculations but is ultimately used as an argument to the OpenMP runtimes
3782 // kernel argument structure which is generated through the combinedInfo data
3783 // structures.
3784 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
3785 // CGOpenMPRuntime.cpp.
3786 static llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3787  Operation *clauseOp,
3788  llvm::Value *basePointer,
3789  llvm::Type *baseType,
3790  llvm::IRBuilderBase &builder,
3791  LLVM::ModuleTranslation &moduleTranslation) {
3792  if (auto memberClause =
3793  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3794  // This calculates the size to transfer based on bounds and the underlying
3795  // element type, provided bounds have been specified (Fortran
3796  // pointers/allocatables/target and arrays that have sections specified fall
3797  // into this as well).
3798  if (!memberClause.getBounds().empty()) {
3799  llvm::Value *elementCount = builder.getInt64(1);
3800  for (auto bounds : memberClause.getBounds()) {
3801  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3802  bounds.getDefiningOp())) {
3803  // The below calculation for the size to be mapped calculated from the
3804  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3805  // multiply by the underlying element types byte size to get the full
3806  // size to be offloaded based on the bounds
3807  elementCount = builder.CreateMul(
3808  elementCount,
3809  builder.CreateAdd(
3810  builder.CreateSub(
3811  moduleTranslation.lookupValue(boundOp.getUpperBound()),
3812  moduleTranslation.lookupValue(boundOp.getLowerBound())),
3813  builder.getInt64(1)));
3814  }
3815  }
3816 
3817  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3818  // the size in inconsistent byte or bit format.
3819  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3820  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3821  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3822 
3823  // The size in bytes x number of elements, the sizeInBytes stored is
3824  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3825  // size, so we do some on the fly runtime math to get the size in
3826  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3827  // some adjustment for members with more complex types.
3828  return builder.CreateMul(elementCount,
3829  builder.getInt64(underlyingTypeSzInBits / 8));
3830  }
3831  }
3832 
3833  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3834 }
3835 
3837  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3838  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3839  llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3840  ArrayRef<Value> useDevAddrOperands = {},
3841  ArrayRef<Value> hasDevAddrOperands = {}) {
3842  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3843  // Check if this is a member mapping and correctly assign that it is, if
3844  // it is a member of a larger object.
3845  // TODO: Need better handling of members, and distinguishing of members
3846  // that are implicitly allocated on device vs explicitly passed in as
3847  // arguments.
3848  // TODO: May require some further additions to support nested record
3849  // types, i.e. member maps that can have member maps.
3850  for (Value mapValue : mapVars) {
3851  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3852  for (auto member : map.getMembers())
3853  if (member == mapOp)
3854  return true;
3855  }
3856  return false;
3857  };
3858 
3859  // Process MapOperands
3860  for (Value mapValue : mapVars) {
3861  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3862  Value offloadPtr =
3863  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3864  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3865  mapData.Pointers.push_back(mapData.OriginalValue.back());
3866 
3867  if (llvm::Value *refPtr =
3868  getRefPtrIfDeclareTarget(offloadPtr,
3869  moduleTranslation)) { // declare target
3870  mapData.IsDeclareTarget.push_back(true);
3871  mapData.BasePointers.push_back(refPtr);
3872  } else { // regular mapped variable
3873  mapData.IsDeclareTarget.push_back(false);
3874  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3875  }
3876 
3877  mapData.BaseType.push_back(
3878  moduleTranslation.convertType(mapOp.getVarType()));
3879  mapData.Sizes.push_back(
3880  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3881  mapData.BaseType.back(), builder, moduleTranslation));
3882  mapData.MapClause.push_back(mapOp.getOperation());
3883  mapData.Types.push_back(
3884  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
3885  mapData.Names.push_back(LLVM::createMappingInformation(
3886  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3887  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3888  if (mapOp.getMapperId())
3889  mapData.Mappers.push_back(
3890  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3891  mapOp, mapOp.getMapperIdAttr()));
3892  else
3893  mapData.Mappers.push_back(nullptr);
3894  mapData.IsAMapping.push_back(true);
3895  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3896  }
3897 
3898  auto findMapInfo = [&mapData](llvm::Value *val,
3899  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3900  unsigned index = 0;
3901  bool found = false;
3902  for (llvm::Value *basePtr : mapData.OriginalValue) {
3903  if (basePtr == val && mapData.IsAMapping[index]) {
3904  found = true;
3905  mapData.Types[index] |=
3906  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3907  mapData.DevicePointers[index] = devInfoTy;
3908  }
3909  index++;
3910  }
3911  return found;
3912  };
3913 
3914  // Process useDevPtr(Addr)Operands
3915  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3916  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3917  for (Value mapValue : useDevOperands) {
3918  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3919  Value offloadPtr =
3920  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3921  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3922 
3923  // Check if map info is already present for this entry.
3924  if (!findMapInfo(origValue, devInfoTy)) {
3925  mapData.OriginalValue.push_back(origValue);
3926  mapData.Pointers.push_back(mapData.OriginalValue.back());
3927  mapData.IsDeclareTarget.push_back(false);
3928  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3929  mapData.BaseType.push_back(
3930  moduleTranslation.convertType(mapOp.getVarType()));
3931  mapData.Sizes.push_back(builder.getInt64(0));
3932  mapData.MapClause.push_back(mapOp.getOperation());
3933  mapData.Types.push_back(
3934  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3935  mapData.Names.push_back(LLVM::createMappingInformation(
3936  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3937  mapData.DevicePointers.push_back(devInfoTy);
3938  mapData.Mappers.push_back(nullptr);
3939  mapData.IsAMapping.push_back(false);
3940  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3941  }
3942  }
3943  };
3944 
3945  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3946  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3947 
3948  for (Value mapValue : hasDevAddrOperands) {
3949  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3950  Value offloadPtr =
3951  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3952  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3953  auto mapType =
3954  static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
3955  auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3956 
3957  mapData.OriginalValue.push_back(origValue);
3958  mapData.BasePointers.push_back(origValue);
3959  mapData.Pointers.push_back(origValue);
3960  mapData.IsDeclareTarget.push_back(false);
3961  mapData.BaseType.push_back(
3962  moduleTranslation.convertType(mapOp.getVarType()));
3963  mapData.Sizes.push_back(
3964  builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
3965  mapData.MapClause.push_back(mapOp.getOperation());
3966  if (llvm::to_underlying(mapType & mapTypeAlways)) {
3967  // Descriptors are mapped with the ALWAYS flag, since they can get
3968  // rematerialized, so the address of the decriptor for a given object
3969  // may change from one place to another.
3970  mapData.Types.push_back(mapType);
3971  // Technically it's possible for a non-descriptor mapping to have
3972  // both has-device-addr and ALWAYS, so lookup the mapper in case it
3973  // exists.
3974  if (mapOp.getMapperId()) {
3975  mapData.Mappers.push_back(
3976  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3977  mapOp, mapOp.getMapperIdAttr()));
3978  } else {
3979  mapData.Mappers.push_back(nullptr);
3980  }
3981  } else {
3982  mapData.Types.push_back(
3983  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
3984  mapData.Mappers.push_back(nullptr);
3985  }
3986  mapData.Names.push_back(LLVM::createMappingInformation(
3987  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3988  mapData.DevicePointers.push_back(
3989  llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3990  mapData.IsAMapping.push_back(false);
3991  mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
3992  }
3993 }
3994 
3995 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
3996  auto *res = llvm::find(mapData.MapClause, memberOp);
3997  assert(res != mapData.MapClause.end() &&
3998  "MapInfoOp for member not found in MapData, cannot return index");
3999  return std::distance(mapData.MapClause.begin(), res);
4000 }
4001 
4002 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
4003  bool first) {
4004  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
4005  // Only 1 member has been mapped, we can return it.
4006  if (indexAttr.size() == 1)
4007  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
4008 
4009  llvm::SmallVector<size_t> indices(indexAttr.size());
4010  std::iota(indices.begin(), indices.end(), 0);
4011 
4012  llvm::sort(indices, [&](const size_t a, const size_t b) {
4013  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
4014  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
4015  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
4016  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
4017  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
4018 
4019  if (aIndex == bIndex)
4020  continue;
4021 
4022  if (aIndex < bIndex)
4023  return first;
4024 
4025  if (aIndex > bIndex)
4026  return !first;
4027  }
4028 
4029  // Iterated the up until the end of the smallest member and
4030  // they were found to be equal up to that point, so select
4031  // the member with the lowest index count, so the "parent"
4032  return memberIndicesA.size() < memberIndicesB.size();
4033  });
4034 
4035  return llvm::cast<omp::MapInfoOp>(
4036  mapInfo.getMembers()[indices.front()].getDefiningOp());
4037 }
4038 
4039 /// This function calculates the array/pointer offset for map data provided
4040 /// with bounds operations, e.g. when provided something like the following:
4041 ///
4042 /// Fortran
4043 /// map(tofrom: array(2:5, 3:2))
4044 /// or
4045 /// C++
4046 /// map(tofrom: array[1:4][2:3])
4047 /// We must calculate the initial pointer offset to pass across, this function
4048 /// performs this using bounds.
4049 ///
4050 /// NOTE: which while specified in row-major order it currently needs to be
4051 /// flipped for Fortran's column order array allocation and access (as
4052 /// opposed to C++'s row-major, hence the backwards processing where order is
4053 /// important). This is likely important to keep in mind for the future when
4054 /// we incorporate a C++ frontend, both frontends will need to agree on the
4055 /// ordering of generated bounds operations (one may have to flip them) to
4056 /// make the below lowering frontend agnostic. The offload size
4057 /// calcualtion may also have to be adjusted for C++.
4058 static std::vector<llvm::Value *>
4059 calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
4060  llvm::IRBuilderBase &builder, bool isArrayTy,
4061  OperandRange bounds) {
4062  std::vector<llvm::Value *> idx;
4063  // There's no bounds to calculate an offset from, we can safely
4064  // ignore and return no indices.
4065  if (bounds.empty())
4066  return idx;
4067 
4068  // If we have an array type, then we have its type so can treat it as a
4069  // normal GEP instruction where the bounds operations are simply indexes
4070  // into the array. We currently do reverse order of the bounds, which
4071  // I believe leans more towards Fortran's column-major in memory.
4072  if (isArrayTy) {
4073  idx.push_back(builder.getInt64(0));
4074  for (int i = bounds.size() - 1; i >= 0; --i) {
4075  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4076  bounds[i].getDefiningOp())) {
4077  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
4078  }
4079  }
4080  } else {
4081  // If we do not have an array type, but we have bounds, then we're dealing
4082  // with a pointer that's being treated like an array and we have the
4083  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
4084  // address (pointer pointing to the actual data) so we must caclulate the
4085  // offset using a single index which the following two loops attempts to
4086  // compute.
4087 
4088  // Calculates the size offset we need to make per row e.g. first row or
4089  // column only needs to be offset by one, but the next would have to be
4090  // the previous row/column offset multiplied by the extent of current row.
4091  //
4092  // For example ([1][10][100]):
4093  //
4094  // - First row/column we move by 1 for each index increment
4095  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
4096  // current) for 10 for each index increment
4097  // - Third row/column we would move by 10 (second row/column) *
4098  // (extent/size of current) 100 for 1000 for each index increment
4099  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
4100  for (size_t i = 1; i < bounds.size(); ++i) {
4101  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4102  bounds[i].getDefiningOp())) {
4103  dimensionIndexSizeOffset.push_back(builder.CreateMul(
4104  moduleTranslation.lookupValue(boundOp.getExtent()),
4105  dimensionIndexSizeOffset[i - 1]));
4106  }
4107  }
4108 
4109  // Now that we have calculated how much we move by per index, we must
4110  // multiply each lower bound offset in indexes by the size offset we
4111  // have calculated in the previous and accumulate the results to get
4112  // our final resulting offset.
4113  for (int i = bounds.size() - 1; i >= 0; --i) {
4114  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4115  bounds[i].getDefiningOp())) {
4116  if (idx.empty())
4117  idx.emplace_back(builder.CreateMul(
4118  moduleTranslation.lookupValue(boundOp.getLowerBound()),
4119  dimensionIndexSizeOffset[i]));
4120  else
4121  idx.back() = builder.CreateAdd(
4122  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
4123  boundOp.getLowerBound()),
4124  dimensionIndexSizeOffset[i]));
4125  }
4126  }
4127  }
4128 
4129  return idx;
4130 }
4131 
4132 // This creates two insertions into the MapInfosTy data structure for the
4133 // "parent" of a set of members, (usually a container e.g.
4134 // class/structure/derived type) when subsequent members have also been
4135 // explicitly mapped on the same map clause. Certain types, such as Fortran
4136 // descriptors are mapped like this as well, however, the members are
4137 // implicit as far as a user is concerned, but we must explicitly map them
4138 // internally.
4139 //
4140 // This function also returns the memberOfFlag for this particular parent,
4141 // which is utilised in subsequent member mappings (by modifying there map type
4142 // with it) to indicate that a member is part of this parent and should be
4143 // treated by the runtime as such. Important to achieve the correct mapping.
4144 //
4145 // This function borrows a lot from Clang's emitCombinedEntry function
4146 // inside of CGOpenMPRuntime.cpp
4147 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
4148  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4149  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4150  MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
4151  assert(!ompBuilder.Config.isTargetDevice() &&
4152  "function only supported for host device codegen");
4153 
4154  // Map the first segment of the parent. If a user-defined mapper is attached,
4155  // include the parent's to/from-style bits (and common modifiers) in this
4156  // base entry so the mapper receives correct copy semantics via its 'type'
4157  // parameter. Also keep TARGET_PARAM when required for kernel arguments.
4158  llvm::omp::OpenMPOffloadMappingFlags baseFlag =
4159  isTargetParams
4160  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
4161  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
4162 
4163  // Detect if this mapping uses a user-defined mapper.
4164  bool hasUserMapper = mapData.Mappers[mapDataIndex] != nullptr;
4165  if (hasUserMapper) {
4166  using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
4167  // Preserve relevant map-type bits from the parent clause. These include
4168  // the copy direction (TO/FROM), as well as commonly used modifiers that
4169  // should be visible to the mapper for correct behaviour.
4170  mapFlags parentFlags = mapData.Types[mapDataIndex];
4171  mapFlags preserve = mapFlags::OMP_MAP_TO | mapFlags::OMP_MAP_FROM |
4172  mapFlags::OMP_MAP_ALWAYS | mapFlags::OMP_MAP_CLOSE |
4173  mapFlags::OMP_MAP_PRESENT | mapFlags::OMP_MAP_OMPX_HOLD;
4174  baseFlag |= (parentFlags & preserve);
4175  }
4176 
4177  combinedInfo.Types.emplace_back(baseFlag);
4178  combinedInfo.DevicePointers.emplace_back(
4179  mapData.DevicePointers[mapDataIndex]);
4180  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
4181  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
4182  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
4183  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
4184 
4185  // Calculate size of the parent object being mapped based on the
4186  // addresses at runtime, highAddr - lowAddr = size. This of course
4187  // doesn't factor in allocated data like pointers, hence the further
4188  // processing of members specified by users, or in the case of
4189  // Fortran pointers and allocatables, the mapping of the pointed to
4190  // data by the descriptor (which itself, is a structure containing
4191  // runtime information on the dynamically allocated data).
4192  auto parentClause =
4193  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4194 
4195  llvm::Value *lowAddr, *highAddr;
4196  if (!parentClause.getPartialMap()) {
4197  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
4198  builder.getPtrTy());
4199  highAddr = builder.CreatePointerCast(
4200  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
4201  mapData.Pointers[mapDataIndex], 1),
4202  builder.getPtrTy());
4203  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
4204  } else {
4205  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4206  int firstMemberIdx = getMapDataMemberIdx(
4207  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
4208  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
4209  builder.getPtrTy());
4210  int lastMemberIdx = getMapDataMemberIdx(
4211  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
4212  highAddr = builder.CreatePointerCast(
4213  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
4214  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
4215  builder.getPtrTy());
4216  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
4217  }
4218 
4219  llvm::Value *size = builder.CreateIntCast(
4220  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
4221  builder.getInt64Ty(),
4222  /*isSigned=*/false);
4223  combinedInfo.Sizes.push_back(size);
4224 
4225  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
4226  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
4227 
4228  // This creates the initial MEMBER_OF mapping that consists of
4229  // the parent/top level container (same as above effectively, except
4230  // with a fixed initial compile time size and separate maptype which
4231  // indicates the true mape type (tofrom etc.). This parent mapping is
4232  // only relevant if the structure in its totality is being mapped,
4233  // otherwise the above suffices.
4234  if (!parentClause.getPartialMap()) {
4235  // TODO: This will need to be expanded to include the whole host of logic
4236  // for the map flags that Clang currently supports (e.g. it should do some
4237  // further case specific flag modifications). For the moment, it handles
4238  // what we support as expected.
4239  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
4240  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4241  combinedInfo.Types.emplace_back(mapFlag);
4242  combinedInfo.DevicePointers.emplace_back(
4244  combinedInfo.Mappers.emplace_back(nullptr);
4245  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
4246  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
4247  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
4248  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
4249  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
4250  }
4251  return memberOfFlag;
4252 }
4253 
4254 // The intent is to verify if the mapped data being passed is a
4255 // pointer -> pointee that requires special handling in certain cases,
4256 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
4257 //
4258 // There may be a better way to verify this, but unfortunately with
4259 // opaque pointers we lose the ability to easily check if something is
4260 // a pointer whilst maintaining access to the underlying type.
4261 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
4262  // If we have a varPtrPtr field assigned then the underlying type is a pointer
4263  if (mapOp.getVarPtrPtr())
4264  return true;
4265 
4266  // If the map data is declare target with a link clause, then it's represented
4267  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
4268  // no relation to pointers.
4269  if (isDeclareTargetLink(mapOp.getVarPtr()))
4270  return true;
4271 
4272  return false;
4273 }
4274 
4275 // This function is intended to add explicit mappings of members
4277  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4278  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4279  MapInfoData &mapData, uint64_t mapDataIndex,
4280  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
4281  assert(!ompBuilder.Config.isTargetDevice() &&
4282  "function only supported for host device codegen");
4283 
4284  auto parentClause =
4285  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4286 
4287  for (auto mappedMembers : parentClause.getMembers()) {
4288  auto memberClause =
4289  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
4290  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4291 
4292  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
4293 
4294  // If we're currently mapping a pointer to a block of data, we must
4295  // initially map the pointer, and then attatch/bind the data with a
4296  // subsequent map to the pointer. This segment of code generates the
4297  // pointer mapping, which can in certain cases be optimised out as Clang
4298  // currently does in its lowering. However, for the moment we do not do so,
4299  // in part as we currently have substantially less information on the data
4300  // being mapped at this stage.
4301  if (checkIfPointerMap(memberClause)) {
4302  auto mapFlag =
4303  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4304  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4305  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4306  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4307  combinedInfo.Types.emplace_back(mapFlag);
4308  combinedInfo.DevicePointers.emplace_back(
4310  combinedInfo.Mappers.emplace_back(nullptr);
4311  combinedInfo.Names.emplace_back(
4312  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4313  combinedInfo.BasePointers.emplace_back(
4314  mapData.BasePointers[mapDataIndex]);
4315  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
4316  combinedInfo.Sizes.emplace_back(builder.getInt64(
4317  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
4318  }
4319 
4320  // Same MemberOfFlag to indicate its link with parent and other members
4321  // of.
4322  auto mapFlag =
4323  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4324  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4325  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4326  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4327  if (checkIfPointerMap(memberClause))
4328  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4329 
4330  combinedInfo.Types.emplace_back(mapFlag);
4331  combinedInfo.DevicePointers.emplace_back(
4332  mapData.DevicePointers[memberDataIdx]);
4333  combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
4334  combinedInfo.Names.emplace_back(
4335  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4336  uint64_t basePointerIndex =
4337  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
4338  combinedInfo.BasePointers.emplace_back(
4339  mapData.BasePointers[basePointerIndex]);
4340  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
4341 
4342  llvm::Value *size = mapData.Sizes[memberDataIdx];
4343  if (checkIfPointerMap(memberClause)) {
4344  size = builder.CreateSelect(
4345  builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
4346  builder.getInt64(0), size);
4347  }
4348 
4349  combinedInfo.Sizes.emplace_back(size);
4350  }
4351 }
4352 
4353 static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
4354  MapInfosTy &combinedInfo, bool isTargetParams,
4355  int mapDataParentIdx = -1) {
4356  // Declare Target Mappings are excluded from being marked as
4357  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
4358  // marked with OMP_MAP_PTR_AND_OBJ instead.
4359  auto mapFlag = mapData.Types[mapDataIdx];
4360  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
4361 
4362  bool isPtrTy = checkIfPointerMap(mapInfoOp);
4363  if (isPtrTy)
4364  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4365 
4366  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
4367  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4368 
4369  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
4370  !isPtrTy)
4371  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
4372 
4373  // if we're provided a mapDataParentIdx, then the data being mapped is
4374  // part of a larger object (in a parent <-> member mapping) and in this
4375  // case our BasePointer should be the parent.
4376  if (mapDataParentIdx >= 0)
4377  combinedInfo.BasePointers.emplace_back(
4378  mapData.BasePointers[mapDataParentIdx]);
4379  else
4380  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
4381 
4382  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
4383  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
4384  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
4385  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
4386  combinedInfo.Types.emplace_back(mapFlag);
4387  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
4388 }
4389 
4390 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
4391  llvm::IRBuilderBase &builder,
4392  llvm::OpenMPIRBuilder &ompBuilder,
4393  DataLayout &dl, MapInfosTy &combinedInfo,
4394  MapInfoData &mapData, uint64_t mapDataIndex,
4395  bool isTargetParams) {
4396  assert(!ompBuilder.Config.isTargetDevice() &&
4397  "function only supported for host device codegen");
4398 
4399  auto parentClause =
4400  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4401 
4402  // If we have a partial map (no parent referenced in the map clauses of the
4403  // directive, only members) and only a single member, we do not need to bind
4404  // the map of the member to the parent, we can pass the member separately.
4405  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
4406  auto memberClause = llvm::cast<omp::MapInfoOp>(
4407  parentClause.getMembers()[0].getDefiningOp());
4408  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4409  // Note: Clang treats arrays with explicit bounds that fall into this
4410  // category as a parent with map case, however, it seems this isn't a
4411  // requirement, and processing them as an individual map is fine. So,
4412  // we will handle them as individual maps for the moment, as it's
4413  // difficult for us to check this as we always require bounds to be
4414  // specified currently and it's also marginally more optimal (single
4415  // map rather than two). The difference may come from the fact that
4416  // Clang maps array without bounds as pointers (which we do not
4417  // currently do), whereas we treat them as arrays in all cases
4418  // currently.
4419  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
4420  mapDataIndex);
4421  return;
4422  }
4423 
4424  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
4425  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
4426  combinedInfo, mapData, mapDataIndex, isTargetParams);
4427  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
4428  combinedInfo, mapData, mapDataIndex,
4429  memberOfParentFlag);
4430 }
4431 
4432 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
4433 // generates different operation (e.g. load/store) combinations for
4434 // arguments to the kernel, based on map capture kinds which are then
4435 // utilised in the combinedInfo in place of the original Map value.
4436 static void
4437 createAlteredByCaptureMap(MapInfoData &mapData,
4438  LLVM::ModuleTranslation &moduleTranslation,
4439  llvm::IRBuilderBase &builder) {
4440  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4441  "function only supported for host device codegen");
4442  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4443  // if it's declare target, skip it, it's handled separately.
4444  if (!mapData.IsDeclareTarget[i]) {
4445  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4446  omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
4447  bool isPtrTy = checkIfPointerMap(mapOp);
4448 
4449  // Currently handles array sectioning lowerbound case, but more
4450  // logic may be required in the future. Clang invokes EmitLValue,
4451  // which has specialised logic for special Clang types such as user
4452  // defines, so it is possible we will have to extend this for
4453  // structures or other complex types. As the general idea is that this
4454  // function mimics some of the logic from Clang that we require for
4455  // kernel argument passing from host -> device.
4456  switch (captureKind) {
4457  case omp::VariableCaptureKind::ByRef: {
4458  llvm::Value *newV = mapData.Pointers[i];
4459  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
4460  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
4461  mapOp.getBounds());
4462  if (isPtrTy)
4463  newV = builder.CreateLoad(builder.getPtrTy(), newV);
4464 
4465  if (!offsetIdx.empty())
4466  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
4467  "array_offset");
4468  mapData.Pointers[i] = newV;
4469  } break;
4470  case omp::VariableCaptureKind::ByCopy: {
4471  llvm::Type *type = mapData.BaseType[i];
4472  llvm::Value *newV;
4473  if (mapData.Pointers[i]->getType()->isPointerTy())
4474  newV = builder.CreateLoad(type, mapData.Pointers[i]);
4475  else
4476  newV = mapData.Pointers[i];
4477 
4478  if (!isPtrTy) {
4479  auto curInsert = builder.saveIP();
4480  llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation();
4481  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
4482  auto *memTempAlloc =
4483  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
4484  builder.SetCurrentDebugLocation(DbgLoc);
4485  builder.restoreIP(curInsert);
4486 
4487  builder.CreateStore(newV, memTempAlloc);
4488  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
4489  }
4490 
4491  mapData.Pointers[i] = newV;
4492  mapData.BasePointers[i] = newV;
4493  } break;
4494  case omp::VariableCaptureKind::This:
4495  case omp::VariableCaptureKind::VLAType:
4496  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
4497  break;
4498  }
4499  }
4500  }
4501 }
4502 
4503 // Generate all map related information and fill the combinedInfo.
4504 static void genMapInfos(llvm::IRBuilderBase &builder,
4505  LLVM::ModuleTranslation &moduleTranslation,
4506  DataLayout &dl, MapInfosTy &combinedInfo,
4507  MapInfoData &mapData, bool isTargetParams = false) {
4508  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4509  "function only supported for host device codegen");
4510 
4511  // We wish to modify some of the methods in which arguments are
4512  // passed based on their capture type by the target region, this can
4513  // involve generating new loads and stores, which changes the
4514  // MLIR value to LLVM value mapping, however, we only wish to do this
4515  // locally for the current function/target and also avoid altering
4516  // ModuleTranslation, so we remap the base pointer or pointer stored
4517  // in the map infos corresponding MapInfoData, which is later accessed
4518  // by genMapInfos and createTarget to help generate the kernel and
4519  // kernel arg structure. It primarily becomes relevant in cases like
4520  // bycopy, or byref range'd arrays. In the default case, we simply
4521  // pass thee pointer byref as both basePointer and pointer.
4522  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
4523 
4524  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4525 
4526  // We operate under the assumption that all vectors that are
4527  // required in MapInfoData are of equal lengths (either filled with
4528  // default constructed data or appropiate information) so we can
4529  // utilise the size from any component of MapInfoData, if we can't
4530  // something is missing from the initial MapInfoData construction.
4531  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4532  // NOTE/TODO: We currently do not support arbitrary depth record
4533  // type mapping.
4534  if (mapData.IsAMember[i])
4535  continue;
4536 
4537  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
4538  if (!mapInfoOp.getMembers().empty()) {
4539  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
4540  combinedInfo, mapData, i, isTargetParams);
4541  continue;
4542  }
4543 
4544  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
4545  }
4546 }
4547 
4549 emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
4550  LLVM::ModuleTranslation &moduleTranslation,
4551  llvm::StringRef mapperFuncName);
4552 
4554 getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
4555  LLVM::ModuleTranslation &moduleTranslation) {
4556  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4557  "function only supported for host device codegen");
4558  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4559  std::string mapperFuncName =
4560  moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
4561  {"omp_mapper", declMapperOp.getSymName()});
4562 
4563  if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
4564  return lookupFunc;
4565 
4566  return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
4567  mapperFuncName);
4568 }
4569 
4571 emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
4572  LLVM::ModuleTranslation &moduleTranslation,
4573  llvm::StringRef mapperFuncName) {
4574  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4575  "function only supported for host device codegen");
4576  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4577  auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
4578  DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
4579  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4580  llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
4581  SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
4582 
4583  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4584 
4585  // Fill up the arrays with all the mapped variables.
4586  MapInfosTy combinedInfo;
4587  auto genMapInfoCB =
4588  [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
4589  llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
4590  builder.restoreIP(codeGenIP);
4591  moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
4592  moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
4593  builder.GetInsertBlock());
4594  if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
4595  /*ignoreArguments=*/true,
4596  builder)))
4597  return llvm::make_error<PreviouslyReportedError>();
4598  MapInfoData mapData;
4599  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4600  builder);
4601  genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
4602 
4603  // Drop the mapping that is no longer necessary so that the same region can
4604  // be processed multiple times.
4605  moduleTranslation.forgetMapping(declMapperOp.getRegion());
4606  return combinedInfo;
4607  };
4608 
4609  auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
4610  if (!combinedInfo.Mappers[i])
4611  return nullptr;
4612  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4613  moduleTranslation);
4614  };
4615 
4616  llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
4617  genMapInfoCB, varType, mapperFuncName, customMapperCB);
4618  if (!newFn)
4619  return newFn.takeError();
4620  moduleTranslation.mapFunction(mapperFuncName, *newFn);
4621  return *newFn;
4622 }
4623 
4624 static LogicalResult
4625 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
4626  LLVM::ModuleTranslation &moduleTranslation) {
4627  llvm::Value *ifCond = nullptr;
4628  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
4629  SmallVector<Value> mapVars;
4630  SmallVector<Value> useDevicePtrVars;
4631  SmallVector<Value> useDeviceAddrVars;
4632  llvm::omp::RuntimeFunction RTLFn;
4633  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
4634 
4635  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4636  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
4637  /*SeparateBeginEndCalls=*/true);
4638  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
4639  bool isOffloadEntry =
4640  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
4641 
4642  LogicalResult result =
4644  .Case([&](omp::TargetDataOp dataOp) {
4645  if (failed(checkImplementationStatus(*dataOp)))
4646  return failure();
4647 
4648  if (auto ifVar = dataOp.getIfExpr())
4649  ifCond = moduleTranslation.lookupValue(ifVar);
4650 
4651  if (auto devId = dataOp.getDevice())
4652  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4653  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4654  deviceID = intAttr.getInt();
4655 
4656  mapVars = dataOp.getMapVars();
4657  useDevicePtrVars = dataOp.getUseDevicePtrVars();
4658  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
4659  return success();
4660  })
4661  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
4662  if (failed(checkImplementationStatus(*enterDataOp)))
4663  return failure();
4664 
4665  if (auto ifVar = enterDataOp.getIfExpr())
4666  ifCond = moduleTranslation.lookupValue(ifVar);
4667 
4668  if (auto devId = enterDataOp.getDevice())
4669  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4670  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4671  deviceID = intAttr.getInt();
4672  RTLFn =
4673  enterDataOp.getNowait()
4674  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
4675  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
4676  mapVars = enterDataOp.getMapVars();
4677  info.HasNoWait = enterDataOp.getNowait();
4678  return success();
4679  })
4680  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
4681  if (failed(checkImplementationStatus(*exitDataOp)))
4682  return failure();
4683 
4684  if (auto ifVar = exitDataOp.getIfExpr())
4685  ifCond = moduleTranslation.lookupValue(ifVar);
4686 
4687  if (auto devId = exitDataOp.getDevice())
4688  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4689  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4690  deviceID = intAttr.getInt();
4691 
4692  RTLFn = exitDataOp.getNowait()
4693  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
4694  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
4695  mapVars = exitDataOp.getMapVars();
4696  info.HasNoWait = exitDataOp.getNowait();
4697  return success();
4698  })
4699  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
4700  if (failed(checkImplementationStatus(*updateDataOp)))
4701  return failure();
4702 
4703  if (auto ifVar = updateDataOp.getIfExpr())
4704  ifCond = moduleTranslation.lookupValue(ifVar);
4705 
4706  if (auto devId = updateDataOp.getDevice())
4707  if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
4708  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4709  deviceID = intAttr.getInt();
4710 
4711  RTLFn =
4712  updateDataOp.getNowait()
4713  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4714  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4715  mapVars = updateDataOp.getMapVars();
4716  info.HasNoWait = updateDataOp.getNowait();
4717  return success();
4718  })
4719  .DefaultUnreachable("unexpected operation");
4720 
4721  if (failed(result))
4722  return failure();
4723  // Pretend we have IF(false) if we're not doing offload.
4724  if (!isOffloadEntry)
4725  ifCond = builder.getFalse();
4726 
4727  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4728  MapInfoData mapData;
4729  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4730  builder, useDevicePtrVars, useDeviceAddrVars);
4731 
4732  // Fill up the arrays with all the mapped variables.
4733  MapInfosTy combinedInfo;
4734  auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4735  builder.restoreIP(codeGenIP);
4736  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4737  return combinedInfo;
4738  };
4739 
4740  // Define a lambda to apply mappings between use_device_addr and
4741  // use_device_ptr base pointers, and their associated block arguments.
4742  auto mapUseDevice =
4743  [&moduleTranslation](
4744  llvm::OpenMPIRBuilder::DeviceInfoTy type,
4746  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4747  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4748  for (auto [arg, useDevVar] :
4749  llvm::zip_equal(blockArgs, useDeviceVars)) {
4750 
4751  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4752  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4753  : mapInfoOp.getVarPtr();
4754  };
4755 
4756  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4757  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4758  mapInfoData.MapClause, mapInfoData.DevicePointers,
4759  mapInfoData.BasePointers)) {
4760  auto mapOp = cast<omp::MapInfoOp>(mapClause);
4761  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4762  devicePointer != type)
4763  continue;
4764 
4765  if (llvm::Value *devPtrInfoMap =
4766  mapper ? mapper(basePointer) : basePointer) {
4767  moduleTranslation.mapValue(arg, devPtrInfoMap);
4768  break;
4769  }
4770  }
4771  }
4772  };
4773 
4774  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4775  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4776  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4777  // We must always restoreIP regardless of doing anything the caller
4778  // does not restore it, leading to incorrect (no) branch generation.
4779  builder.restoreIP(codeGenIP);
4780  assert(isa<omp::TargetDataOp>(op) &&
4781  "BodyGen requested for non TargetDataOp");
4782  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4783  Region &region = cast<omp::TargetDataOp>(op).getRegion();
4784  switch (bodyGenType) {
4785  case BodyGenTy::Priv:
4786  // Check if any device ptr/addr info is available
4787  if (!info.DevicePtrInfoMap.empty()) {
4788  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4789  blockArgIface.getUseDeviceAddrBlockArgs(),
4790  useDeviceAddrVars, mapData,
4791  [&](llvm::Value *basePointer) -> llvm::Value * {
4792  if (!info.DevicePtrInfoMap[basePointer].second)
4793  return nullptr;
4794  return builder.CreateLoad(
4795  builder.getPtrTy(),
4796  info.DevicePtrInfoMap[basePointer].second);
4797  });
4798  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4799  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4800  mapData, [&](llvm::Value *basePointer) {
4801  return info.DevicePtrInfoMap[basePointer].second;
4802  });
4803 
4804  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4805  moduleTranslation)))
4806  return llvm::make_error<PreviouslyReportedError>();
4807  }
4808  break;
4809  case BodyGenTy::DupNoPriv:
4810  if (info.DevicePtrInfoMap.empty()) {
4811  // For host device we still need to do the mapping for codegen,
4812  // otherwise it may try to lookup a missing value.
4813  if (!ompBuilder->Config.IsTargetDevice.value_or(false)) {
4814  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4815  blockArgIface.getUseDeviceAddrBlockArgs(),
4816  useDeviceAddrVars, mapData);
4817  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4818  blockArgIface.getUseDevicePtrBlockArgs(),
4819  useDevicePtrVars, mapData);
4820  }
4821  }
4822  break;
4823  case BodyGenTy::NoPriv:
4824  // If device info is available then region has already been generated
4825  if (info.DevicePtrInfoMap.empty()) {
4826  // For device pass, if use_device_ptr(addr) mappings were present,
4827  // we need to link them here before codegen.
4828  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4829  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4830  blockArgIface.getUseDeviceAddrBlockArgs(),
4831  useDeviceAddrVars, mapData);
4832  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4833  blockArgIface.getUseDevicePtrBlockArgs(),
4834  useDevicePtrVars, mapData);
4835  }
4836 
4837  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4838  moduleTranslation)))
4839  return llvm::make_error<PreviouslyReportedError>();
4840  }
4841  break;
4842  }
4843  return builder.saveIP();
4844  };
4845 
4846  auto customMapperCB =
4847  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4848  if (!combinedInfo.Mappers[i])
4849  return nullptr;
4850  info.HasMapper = true;
4851  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4852  moduleTranslation);
4853  };
4854 
4855  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4856  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4857  findAllocaInsertPoint(builder, moduleTranslation);
4858  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4859  if (isa<omp::TargetDataOp>(op))
4860  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4861  builder.getInt64(deviceID), ifCond,
4862  info, genMapInfoCB, customMapperCB,
4863  /*MapperFunc=*/nullptr, bodyGenCB,
4864  /*DeviceAddrCB=*/nullptr);
4865  return ompBuilder->createTargetData(
4866  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4867  info, genMapInfoCB, customMapperCB, &RTLFn);
4868  }();
4869 
4870  if (failed(handleError(afterIP, *op)))
4871  return failure();
4872 
4873  builder.restoreIP(*afterIP);
4874  return success();
4875 }
4876 
4877 static LogicalResult
4878 convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4879  LLVM::ModuleTranslation &moduleTranslation) {
4880  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4881  auto distributeOp = cast<omp::DistributeOp>(opInst);
4882  if (failed(checkImplementationStatus(opInst)))
4883  return failure();
4884 
4885  /// Process teams op reduction in distribute if the reduction is contained in
4886  /// the distribute op.
4887  omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>();
4888  bool doDistributeReduction =
4889  teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false;
4890 
4891  DenseMap<Value, llvm::Value *> reductionVariableMap;
4892  unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0;
4893  SmallVector<omp::DeclareReductionOp> reductionDecls;
4894  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
4895  llvm::ArrayRef<bool> isByRef;
4896 
4897  if (doDistributeReduction) {
4898  isByRef = getIsByRef(teamsOp.getReductionByref());
4899  assert(isByRef.size() == teamsOp.getNumReductionVars());
4900 
4901  collectReductionDecls(teamsOp, reductionDecls);
4902  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4903  findAllocaInsertPoint(builder, moduleTranslation);
4904 
4905  MutableArrayRef<BlockArgument> reductionArgs =
4906  llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
4907  .getReductionBlockArgs();
4908 
4910  teamsOp, reductionArgs, builder, moduleTranslation, allocaIP,
4911  reductionDecls, privateReductionVariables, reductionVariableMap,
4912  isByRef)))
4913  return failure();
4914  }
4915 
4916  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4917  auto bodyGenCB = [&](InsertPointTy allocaIP,
4918  InsertPointTy codeGenIP) -> llvm::Error {
4919  // Save the alloca insertion point on ModuleTranslation stack for use in
4920  // nested regions.
4922  moduleTranslation, allocaIP);
4923 
4924  // DistributeOp has only one region associated with it.
4925  builder.restoreIP(codeGenIP);
4926  PrivateVarsInfo privVarsInfo(distributeOp);
4927 
4928  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4929  allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4930  if (handleError(afterAllocas, opInst).failed())
4931  return llvm::make_error<PreviouslyReportedError>();
4932 
4933  if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4934  opInst)
4935  .failed())
4936  return llvm::make_error<PreviouslyReportedError>();
4937 
4939  distributeOp, builder, moduleTranslation, privVarsInfo.mlirVars,
4940  privVarsInfo.llvmVars, privVarsInfo.privatizers,
4941  distributeOp.getPrivateNeedsBarrier())))
4942  return llvm::make_error<PreviouslyReportedError>();
4943 
4944  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4945  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4947  convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4948  builder, moduleTranslation);
4949  if (!regionBlock)
4950  return regionBlock.takeError();
4951  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4952 
4953  // Skip applying a workshare loop below when translating 'distribute
4954  // parallel do' (it's been already handled by this point while translating
4955  // the nested omp.wsloop).
4956  if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4957  // TODO: Add support for clauses which are valid for DISTRIBUTE
4958  // constructs. Static schedule is the default.
4959  auto schedule = omp::ClauseScheduleKind::Static;
4960  bool isOrdered = false;
4961  std::optional<omp::ScheduleModifier> scheduleMod;
4962  bool isSimd = false;
4963  llvm::omp::WorksharingLoopType workshareLoopType =
4964  llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4965  bool loopNeedsBarrier = false;
4966  llvm::Value *chunk = nullptr;
4967 
4968  llvm::CanonicalLoopInfo *loopInfo =
4969  findCurrentLoopInfo(moduleTranslation);
4970  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4971  ompBuilder->applyWorkshareLoop(
4972  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4973  convertToScheduleKind(schedule), chunk, isSimd,
4974  scheduleMod == omp::ScheduleModifier::monotonic,
4975  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4976  workshareLoopType);
4977 
4978  if (!wsloopIP)
4979  return wsloopIP.takeError();
4980  }
4981 
4982  if (failed(cleanupPrivateVars(builder, moduleTranslation,
4983  distributeOp.getLoc(), privVarsInfo.llvmVars,
4984  privVarsInfo.privatizers)))
4985  return llvm::make_error<PreviouslyReportedError>();
4986 
4987  return llvm::Error::success();
4988  };
4989 
4990  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4991  findAllocaInsertPoint(builder, moduleTranslation);
4992  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4993  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4994  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
4995 
4996  if (failed(handleError(afterIP, opInst)))
4997  return failure();
4998 
4999  builder.restoreIP(*afterIP);
5000 
5001  if (doDistributeReduction) {
5002  // Process the reductions if required.
5004  teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
5005  privateReductionVariables, isByRef,
5006  /*isNoWait*/ false, /*isTeamsReduction*/ true);
5007  }
5008  return success();
5009 }
5010 
5011 /// Lowers the FlagsAttr which is applied to the module on the device
5012 /// pass when offloading, this attribute contains OpenMP RTL globals that can
5013 /// be passed as flags to the frontend, otherwise they are set to default
5014 static LogicalResult
5015 convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
5016  LLVM::ModuleTranslation &moduleTranslation) {
5017  if (!cast<mlir::ModuleOp>(op))
5018  return failure();
5019 
5020  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5021 
5022  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
5023  attribute.getOpenmpDeviceVersion());
5024 
5025  if (attribute.getNoGpuLib())
5026  return success();
5027 
5028  ompBuilder->createGlobalFlag(
5029  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
5030  "__omp_rtl_debug_kind");
5031  ompBuilder->createGlobalFlag(
5032  attribute
5033  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
5034  ,
5035  "__omp_rtl_assume_teams_oversubscription");
5036  ompBuilder->createGlobalFlag(
5037  attribute
5038  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
5039  ,
5040  "__omp_rtl_assume_threads_oversubscription");
5041  ompBuilder->createGlobalFlag(
5042  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
5043  "__omp_rtl_assume_no_thread_state");
5044  ompBuilder->createGlobalFlag(
5045  attribute
5046  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
5047  ,
5048  "__omp_rtl_assume_no_nested_parallelism");
5049  return success();
5050 }
5051 
5052 static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
5053  omp::TargetOp targetOp,
5054  llvm::StringRef parentName = "") {
5055  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
5056 
5057  assert(fileLoc && "No file found from location");
5058  StringRef fileName = fileLoc.getFilename().getValue();
5059 
5060  llvm::sys::fs::UniqueID id;
5061  uint64_t line = fileLoc.getLine();
5062  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
5063  size_t fileHash = llvm::hash_value(fileName.str());
5064  size_t deviceId = 0xdeadf17e;
5065  targetInfo =
5066  llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
5067  } else {
5068  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
5069  id.getFile(), line);
5070  }
5071 }
5072 
5073 static void
5074 handleDeclareTargetMapVar(MapInfoData &mapData,
5075  LLVM::ModuleTranslation &moduleTranslation,
5076  llvm::IRBuilderBase &builder, llvm::Function *func) {
5077  assert(moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
5078  "function only supported for target device codegen");
5079  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5080  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
5081  // In the case of declare target mapped variables, the basePointer is
5082  // the reference pointer generated by the convertDeclareTargetAttr
5083  // method. Whereas the kernelValue is the original variable, so for
5084  // the device we must replace all uses of this original global variable
5085  // (stored in kernelValue) with the reference pointer (stored in
5086  // basePointer for declare target mapped variables), as for device the
5087  // data is mapped into this reference pointer and should be loaded
5088  // from it, the original variable is discarded. On host both exist and
5089  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
5090  // function to link the two variables in the runtime and then both the
5091  // reference pointer and the pointer are assigned in the kernel argument
5092  // structure for the host.
5093  if (mapData.IsDeclareTarget[i]) {
5094  // If the original map value is a constant, then we have to make sure all
5095  // of it's uses within the current kernel/function that we are going to
5096  // rewrite are converted to instructions, as we will be altering the old
5097  // use (OriginalValue) from a constant to an instruction, which will be
5098  // illegal and ICE the compiler if the user is a constant expression of
5099  // some kind e.g. a constant GEP.
5100  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
5101  convertUsersOfConstantsToInstructions(constant, func, false);
5102 
5103  // The users iterator will get invalidated if we modify an element,
5104  // so we populate this vector of uses to alter each user on an
5105  // individual basis to emit its own load (rather than one load for
5106  // all).
5108  for (llvm::User *user : mapData.OriginalValue[i]->users())
5109  userVec.push_back(user);
5110 
5111  for (llvm::User *user : userVec) {
5112  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
5113  if (insn->getFunction() == func) {
5114  builder.SetCurrentDebugLocation(insn->getDebugLoc());
5115  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
5116  mapData.BasePointers[i]);
5117  load->moveBefore(insn->getIterator());
5118  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
5119  }
5120  }
5121  }
5122  }
5123  }
5124 }
5125 
5126 // The createDeviceArgumentAccessor function generates
5127 // instructions for retrieving (acessing) kernel
5128 // arguments inside of the device kernel for use by
5129 // the kernel. This enables different semantics such as
5130 // the creation of temporary copies of data allowing
5131 // semantics like read-only/no host write back kernel
5132 // arguments.
5133 //
5134 // This currently implements a very light version of Clang's
5135 // EmitParmDecl's handling of direct argument handling as well
5136 // as a portion of the argument access generation based on
5137 // capture types found at the end of emitOutlinedFunctionPrologue
5138 // in Clang. The indirect path handling of EmitParmDecl's may be
5139 // required for future work, but a direct 1-to-1 copy doesn't seem
5140 // possible as the logic is rather scattered throughout Clang's
5141 // lowering and perhaps we wish to deviate slightly.
5142 //
5143 // \param mapData - A container containing vectors of information
5144 // corresponding to the input argument, which should have a
5145 // corresponding entry in the MapInfoData containers
5146 // OrigialValue's.
5147 // \param arg - This is the generated kernel function argument that
5148 // corresponds to the passed in input argument. We generated different
5149 // accesses of this Argument, based on capture type and other Input
5150 // related information.
5151 // \param input - This is the host side value that will be passed to
5152 // the kernel i.e. the kernel input, we rewrite all uses of this within
5153 // the kernel (as we generate the kernel body based on the target's region
5154 // which maintians references to the original input) to the retVal argument
5155 // apon exit of this function inside of the OMPIRBuilder. This interlinks
5156 // the kernel argument to future uses of it in the function providing
5157 // appropriate "glue" instructions inbetween.
5158 // \param retVal - This is the value that all uses of input inside of the
5159 // kernel will be re-written to, the goal of this function is to generate
5160 // an appropriate location for the kernel argument to be accessed from,
5161 // e.g. ByRef will result in a temporary allocation location and then
5162 // a store of the kernel argument into this allocated memory which
5163 // will then be loaded from, ByCopy will use the allocated memory
5164 // directly.
5165 static llvm::IRBuilderBase::InsertPoint
5167  llvm::Value *input, llvm::Value *&retVal,
5168  llvm::IRBuilderBase &builder,
5169  llvm::OpenMPIRBuilder &ompBuilder,
5170  LLVM::ModuleTranslation &moduleTranslation,
5171  llvm::IRBuilderBase::InsertPoint allocaIP,
5172  llvm::IRBuilderBase::InsertPoint codeGenIP) {
5173  assert(ompBuilder.Config.isTargetDevice() &&
5174  "function only supported for target device codegen");
5175  builder.restoreIP(allocaIP);
5176 
5177  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
5178  LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
5179  ompBuilder.M.getContext());
5180  unsigned alignmentValue = 0;
5181  // Find the associated MapInfoData entry for the current input
5182  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
5183  if (mapData.OriginalValue[i] == input) {
5184  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
5185  capture = mapOp.getMapCaptureType();
5186  // Get information of alignment of mapped object
5187  alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
5188  mapOp.getVarType(), ompBuilder.M.getDataLayout());
5189  break;
5190  }
5191 
5192  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
5193  unsigned int defaultAS =
5194  ompBuilder.M.getDataLayout().getProgramAddressSpace();
5195 
5196  // Create the alloca for the argument the current point.
5197  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
5198 
5199  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
5200  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
5201 
5202  builder.CreateStore(&arg, v);
5203 
5204  builder.restoreIP(codeGenIP);
5205 
5206  switch (capture) {
5207  case omp::VariableCaptureKind::ByCopy: {
5208  retVal = v;
5209  break;
5210  }
5211  case omp::VariableCaptureKind::ByRef: {
5212  llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
5213  v->getType(), v,
5214  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
5215  // CreateAlignedLoad function creates similar LLVM IR:
5216  // %res = load ptr, ptr %input, align 8
5217  // This LLVM IR does not contain information about alignment
5218  // of the loaded value. We need to add !align metadata to unblock
5219  // optimizer. The existence of the !align metadata on the instruction
5220  // tells the optimizer that the value loaded is known to be aligned to
5221  // a boundary specified by the integer value in the metadata node.
5222  // Example:
5223  // %res = load ptr, ptr %input, align 8, !align !align_md_node
5224  // ^ ^
5225  // | |
5226  // alignment of %input address |
5227  // |
5228  // alignment of %res object
5229  if (v->getType()->isPointerTy() && alignmentValue) {
5230  llvm::MDBuilder MDB(builder.getContext());
5231  loadInst->setMetadata(
5232  llvm::LLVMContext::MD_align,
5233  llvm::MDNode::get(builder.getContext(),
5234  MDB.createConstant(llvm::ConstantInt::get(
5235  llvm::Type::getInt64Ty(builder.getContext()),
5236  alignmentValue))));
5237  }
5238  retVal = loadInst;
5239 
5240  break;
5241  }
5242  case omp::VariableCaptureKind::This:
5243  case omp::VariableCaptureKind::VLAType:
5244  // TODO: Consider returning error to use standard reporting for
5245  // unimplemented features.
5246  assert(false && "Currently unsupported capture kind");
5247  break;
5248  }
5249 
5250  return builder.saveIP();
5251 }
5252 
5253 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
5254 /// operation and populate output variables with their corresponding host value
5255 /// (i.e. operand evaluated outside of the target region), based on their uses
5256 /// inside of the target region.
5257 ///
5258 /// Loop bounds and steps are only optionally populated, if output vectors are
5259 /// provided.
5260 static void
5261 extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
5262  Value &numTeamsLower, Value &numTeamsUpper,
5263  Value &threadLimit,
5264  llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
5265  llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
5266  llvm::SmallVectorImpl<Value> *steps = nullptr) {
5267  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
5268  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
5269  blockArgIface.getHostEvalBlockArgs())) {
5270  Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
5271 
5272  for (Operation *user : blockArg.getUsers()) {
5274  .Case([&](omp::TeamsOp teamsOp) {
5275  if (teamsOp.getNumTeamsLower() == blockArg)
5276  numTeamsLower = hostEvalVar;
5277  else if (teamsOp.getNumTeamsUpper() == blockArg)
5278  numTeamsUpper = hostEvalVar;
5279  else if (teamsOp.getThreadLimit() == blockArg)
5280  threadLimit = hostEvalVar;
5281  else
5282  llvm_unreachable("unsupported host_eval use");
5283  })
5284  .Case([&](omp::ParallelOp parallelOp) {
5285  if (parallelOp.getNumThreads() == blockArg)
5286  numThreads = hostEvalVar;
5287  else
5288  llvm_unreachable("unsupported host_eval use");
5289  })
5290  .Case([&](omp::LoopNestOp loopOp) {
5291  auto processBounds =
5292  [&](OperandRange opBounds,
5293  llvm::SmallVectorImpl<Value> *outBounds) -> bool {
5294  bool found = false;
5295  for (auto [i, lb] : llvm::enumerate(opBounds)) {
5296  if (lb == blockArg) {
5297  found = true;
5298  if (outBounds)
5299  (*outBounds)[i] = hostEvalVar;
5300  }
5301  }
5302  return found;
5303  };
5304  bool found =
5305  processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
5306  found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
5307  found;
5308  found = processBounds(loopOp.getLoopSteps(), steps) || found;
5309  (void)found;
5310  assert(found && "unsupported host_eval use");
5311  })
5312  .DefaultUnreachable("unsupported host_eval use");
5313  }
5314  }
5315 }
5316 
5317 /// If \p op is of the given type parameter, return it casted to that type.
5318 /// Otherwise, if its immediate parent operation (or some other higher-level
5319 /// parent, if \p immediateParent is false) is of that type, return that parent
5320 /// casted to the given type.
5321 ///
5322 /// If \p op is \c null or neither it or its parent(s) are of the specified
5323 /// type, return a \c null operation.
5324 template <typename OpTy>
5325 static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
5326  if (!op)
5327  return OpTy();
5328 
5329  if (OpTy casted = dyn_cast<OpTy>(op))
5330  return casted;
5331 
5332  if (immediateParent)
5333  return dyn_cast_if_present<OpTy>(op->getParentOp());
5334 
5335  return op->getParentOfType<OpTy>();
5336 }
5337 
5338 /// If the given \p value is defined by an \c llvm.mlir.constant operation and
5339 /// it is of an integer type, return its value.
5340 static std::optional<int64_t> extractConstInteger(Value value) {
5341  if (!value)
5342  return std::nullopt;
5343 
5344  if (auto constOp = value.getDefiningOp<LLVM::ConstantOp>())
5345  if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
5346  return constAttr.getInt();
5347 
5348  return std::nullopt;
5349 }
5350 
5351 static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
5352  uint64_t sizeInBits = dl.getTypeSizeInBits(type);
5353  uint64_t sizeInBytes = sizeInBits / 8;
5354  return sizeInBytes;
5355 }
5356 
5357 template <typename OpTy>
5358 static uint64_t getReductionDataSize(OpTy &op) {
5359  if (op.getNumReductionVars() > 0) {
5361  collectReductionDecls(op, reductions);
5362 
5364  members.reserve(reductions.size());
5365  for (omp::DeclareReductionOp &red : reductions)
5366  members.push_back(red.getType());
5367  Operation *opp = op.getOperation();
5368  auto structType = mlir::LLVM::LLVMStructType::getLiteral(
5369  opp->getContext(), members, /*isPacked=*/false);
5370  DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
5371  return getTypeByteSize(structType, dl);
5372  }
5373  return 0;
5374 }
5375 
5376 /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
5377 /// values as stated by the corresponding clauses, if constant.
5378 ///
5379 /// These default values must be set before the creation of the outlined LLVM
5380 /// function for the target region, so that they can be used to initialize the
5381 /// corresponding global `ConfigurationEnvironmentTy` structure.
5382 static void
5383 initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
5384  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
5385  bool isTargetDevice, bool isGPU) {
5386  // TODO: Handle constant 'if' clauses.
5387 
5388  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
5389  if (!isTargetDevice) {
5390  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5391  threadLimit);
5392  } else {
5393  // In the target device, values for these clauses are not passed as
5394  // host_eval, but instead evaluated prior to entry to the region. This
5395  // ensures values are mapped and available inside of the target region.
5396  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5397  numTeamsLower = teamsOp.getNumTeamsLower();
5398  numTeamsUpper = teamsOp.getNumTeamsUpper();
5399  threadLimit = teamsOp.getThreadLimit();
5400  }
5401 
5402  if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5403  numThreads = parallelOp.getNumThreads();
5404  }
5405 
5406  // Handle clauses impacting the number of teams.
5407 
5408  int32_t minTeamsVal = 1, maxTeamsVal = -1;
5409  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5410  // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
5411  // clang and set min and max to the same value.
5412  if (numTeamsUpper) {
5413  if (auto val = extractConstInteger(numTeamsUpper))
5414  minTeamsVal = maxTeamsVal = *val;
5415  } else {
5416  minTeamsVal = maxTeamsVal = 0;
5417  }
5418  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
5419  /*immediateParent=*/true) ||
5420  castOrGetParentOfType<omp::SimdOp>(capturedOp,
5421  /*immediateParent=*/true)) {
5422  minTeamsVal = maxTeamsVal = 1;
5423  } else {
5424  minTeamsVal = maxTeamsVal = -1;
5425  }
5426 
5427  // Handle clauses impacting the number of threads.
5428 
5429  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
5430  if (!clauseValue)
5431  return;
5432 
5433  if (auto val = extractConstInteger(clauseValue))
5434  result = *val;
5435 
5436  // Found an applicable clause, so it's not undefined. Mark as unknown
5437  // because it's not constant.
5438  if (result < 0)
5439  result = 0;
5440  };
5441 
5442  // Extract 'thread_limit' clause from 'target' and 'teams' directives.
5443  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
5444  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
5445  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
5446 
5447  // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
5448  int32_t maxThreadsVal = -1;
5449  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5450  setMaxValueFromClause(numThreads, maxThreadsVal);
5451  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
5452  /*immediateParent=*/true))
5453  maxThreadsVal = 1;
5454 
5455  // For max values, < 0 means unset, == 0 means set but unknown. Select the
5456  // minimum value between 'max_threads' and 'thread_limit' clauses that were
5457  // set.
5458  int32_t combinedMaxThreadsVal = targetThreadLimitVal;
5459  if (combinedMaxThreadsVal < 0 ||
5460  (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
5461  combinedMaxThreadsVal = teamsThreadLimitVal;
5462 
5463  if (combinedMaxThreadsVal < 0 ||
5464  (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
5465  combinedMaxThreadsVal = maxThreadsVal;
5466 
5467  int32_t reductionDataSize = 0;
5468  if (isGPU && capturedOp) {
5469  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
5470  reductionDataSize = getReductionDataSize(teamsOp);
5471  }
5472 
5473  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
5474  omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
5475  assert(
5476  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
5477  omp::TargetRegionFlags::spmd) &&
5478  "invalid kernel flags");
5479  attrs.ExecFlags =
5480  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
5481  ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
5482  ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
5483  : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
5484  : llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
5485  if (omp::bitEnumContainsAll(kernelFlags,
5486  omp::TargetRegionFlags::spmd |
5487  omp::TargetRegionFlags::no_loop) &&
5488  !omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic))
5489  attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_SPMD_NO_LOOP;
5490 
5491  attrs.MinTeams = minTeamsVal;
5492  attrs.MaxTeams.front() = maxTeamsVal;
5493  attrs.MinThreads = 1;
5494  attrs.MaxThreads.front() = combinedMaxThreadsVal;
5495  attrs.ReductionDataSize = reductionDataSize;
5496  // TODO: Allow modified buffer length similar to
5497  // fopenmp-cuda-teams-reduction-recs-num flag in clang.
5498  if (attrs.ReductionDataSize != 0)
5499  attrs.ReductionBufferLength = 1024;
5500 }
5501 
5502 /// Gather LLVM runtime values for all clauses evaluated in the host that are
5503 /// passed to the kernel invocation.
5504 ///
5505 /// This function must be called only when compiling for the host. Also, it will
5506 /// only provide correct results if it's called after the body of \c targetOp
5507 /// has been fully generated.
5508 static void
5509 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
5510  LLVM::ModuleTranslation &moduleTranslation,
5511  omp::TargetOp targetOp, Operation *capturedOp,
5512  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
5513  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
5514  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
5515 
5516  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
5517  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
5518  steps(numLoops);
5519  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5520  teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
5521 
5522  // TODO: Handle constant 'if' clauses.
5523  if (Value targetThreadLimit = targetOp.getThreadLimit())
5524  attrs.TargetThreadLimit.front() =
5525  moduleTranslation.lookupValue(targetThreadLimit);
5526 
5527  if (numTeamsLower)
5528  attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
5529 
5530  if (numTeamsUpper)
5531  attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
5532 
5533  if (teamsThreadLimit)
5534  attrs.TeamsThreadLimit.front() =
5535  moduleTranslation.lookupValue(teamsThreadLimit);
5536 
5537  if (numThreads)
5538  attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
5539 
5540  if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
5541  omp::TargetRegionFlags::trip_count)) {
5542  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5543  attrs.LoopTripCount = nullptr;
5544 
5545  // To calculate the trip count, we multiply together the trip counts of
5546  // every collapsed canonical loop. We don't need to create the loop nests
5547  // here, since we're only interested in the trip count.
5548  for (auto [loopLower, loopUpper, loopStep] :
5549  llvm::zip_equal(lowerBounds, upperBounds, steps)) {
5550  llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
5551  llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
5552  llvm::Value *step = moduleTranslation.lookupValue(loopStep);
5553 
5554  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
5555  llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
5556  loc, lowerBound, upperBound, step, /*IsSigned=*/true,
5557  loopOp.getLoopInclusive());
5558 
5559  if (!attrs.LoopTripCount) {
5560  attrs.LoopTripCount = tripCount;
5561  continue;
5562  }
5563 
5564  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
5565  attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
5566  {}, /*HasNUW=*/true);
5567  }
5568  }
5569 }
5570 
5571 static LogicalResult
5572 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
5573  LLVM::ModuleTranslation &moduleTranslation) {
5574  auto targetOp = cast<omp::TargetOp>(opInst);
5575  // The current debug location already has the DISubprogram for the outlined
5576  // function that will be created for the target op. We save it here so that
5577  // we can set it on the outlined function.
5578  llvm::DebugLoc outlinedFnLoc = builder.getCurrentDebugLocation();
5579  if (failed(checkImplementationStatus(opInst)))
5580  return failure();
5581 
5582  // During the handling of target op, we will generate instructions in the
5583  // parent function like call to the oulined function or branch to a new
5584  // BasicBlock. We set the debug location here to parent function so that those
5585  // get the correct debug locations. For outlined functions, the normal MLIR op
5586  // conversion will automatically pick the correct location.
5587  llvm::BasicBlock *parentBB = builder.GetInsertBlock();
5588  assert(parentBB && "No insert block is set for the builder");
5589  llvm::Function *parentLLVMFn = parentBB->getParent();
5590  assert(parentLLVMFn && "Parent Function must be valid");
5591  if (llvm::DISubprogram *SP = parentLLVMFn->getSubprogram())
5592  builder.SetCurrentDebugLocation(llvm::DILocation::get(
5593  parentLLVMFn->getContext(), outlinedFnLoc.getLine(),
5594  outlinedFnLoc.getCol(), SP, outlinedFnLoc.getInlinedAt()));
5595 
5596  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5597  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
5598  bool isGPU = ompBuilder->Config.isGPU();
5599 
5600  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
5601  auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
5602  auto &targetRegion = targetOp.getRegion();
5603  // Holds the private vars that have been mapped along with the block argument
5604  // that corresponds to the MapInfoOp corresponding to the private var in
5605  // question. So, for instance:
5606  //
5607  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
5608  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
5609  //
5610  // Then, %10 has been created so that the descriptor can be used by the
5611  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
5612  // %arg0} in the mappedPrivateVars map.
5613  llvm::DenseMap<Value, Value> mappedPrivateVars;
5614  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
5615  SmallVector<Value> mapVars = targetOp.getMapVars();
5616  SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
5617  ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
5618  ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
5619  llvm::Function *llvmOutlinedFn = nullptr;
5620 
5621  // TODO: It can also be false if a compile-time constant `false` IF clause is
5622  // specified.
5623  bool isOffloadEntry =
5624  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
5625 
5626  // For some private variables, the MapsForPrivatizedVariablesPass
5627  // creates MapInfoOp instances. Go through the private variables and
5628  // the mapped variables so that during codegeneration we are able
5629  // to quickly look up the corresponding map variable, if any for each
5630  // private variable.
5631  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
5632  OperandRange privateVars = targetOp.getPrivateVars();
5633  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
5634  std::optional<DenseI64ArrayAttr> privateMapIndices =
5635  targetOp.getPrivateMapsAttr();
5636 
5637  for (auto [privVarIdx, privVarSymPair] :
5638  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
5639  auto privVar = std::get<0>(privVarSymPair);
5640  auto privSym = std::get<1>(privVarSymPair);
5641 
5642  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
5643  omp::PrivateClauseOp privatizer =
5644  findPrivatizer(targetOp, privatizerName);
5645 
5646  if (!privatizer.needsMap())
5647  continue;
5648 
5649  mlir::Value mappedValue =
5650  targetOp.getMappedValueForPrivateVar(privVarIdx);
5651  assert(mappedValue && "Expected to find mapped value for a privatized "
5652  "variable that needs mapping");
5653 
5654  // The MapInfoOp defining the map var isn't really needed later.
5655  // So, we don't store it in any datastructure. Instead, we just
5656  // do some sanity checks on it right now.
5657  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
5658  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
5659 
5660  // Check #1: Check that the type of the private variable matches
5661  // the type of the variable being mapped.
5662  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
5663  assert(
5664  varType == privVar.getType() &&
5665  "Type of private var doesn't match the type of the mapped value");
5666 
5667  // Ok, only 1 sanity check for now.
5668  // Record the block argument corresponding to this mapvar.
5669  mappedPrivateVars.insert(
5670  {privVar,
5671  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
5672  (*privateMapIndices)[privVarIdx])});
5673  }
5674  }
5675 
5676  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5677  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
5678  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5679  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5680  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5681  // Forward target-cpu and target-features function attributes from the
5682  // original function to the new outlined function.
5683  llvm::Function *llvmParentFn =
5684  moduleTranslation.lookupFunction(parentFn.getName());
5685  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
5686  assert(llvmParentFn && llvmOutlinedFn &&
5687  "Both parent and outlined functions must exist at this point");
5688 
5689  if (outlinedFnLoc && llvmParentFn->getSubprogram())
5690  llvmOutlinedFn->setSubprogram(outlinedFnLoc->getScope()->getSubprogram());
5691 
5692  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
5693  attr.isStringAttribute())
5694  llvmOutlinedFn->addFnAttr(attr);
5695 
5696  if (auto attr = llvmParentFn->getFnAttribute("target-features");
5697  attr.isStringAttribute())
5698  llvmOutlinedFn->addFnAttr(attr);
5699 
5700  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
5701  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5702  llvm::Value *mapOpValue =
5703  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5704  moduleTranslation.mapValue(arg, mapOpValue);
5705  }
5706  for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
5707  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5708  llvm::Value *mapOpValue =
5709  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5710  moduleTranslation.mapValue(arg, mapOpValue);
5711  }
5712 
5713  // Do privatization after moduleTranslation has already recorded
5714  // mapped values.
5715  PrivateVarsInfo privateVarsInfo(targetOp);
5716 
5717  llvm::Expected<llvm::BasicBlock *> afterAllocas =
5718  allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
5719  allocaIP, &mappedPrivateVars);
5720 
5721  if (failed(handleError(afterAllocas, *targetOp)))
5722  return llvm::make_error<PreviouslyReportedError>();
5723 
5724  builder.restoreIP(codeGenIP);
5725  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
5726  &mappedPrivateVars),
5727  *targetOp)
5728  .failed())
5729  return llvm::make_error<PreviouslyReportedError>();
5730 
5732  targetOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
5733  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
5734  targetOp.getPrivateNeedsBarrier(), &mappedPrivateVars)))
5735  return llvm::make_error<PreviouslyReportedError>();
5736 
5737  SmallVector<Region *> privateCleanupRegions;
5738  llvm::transform(privateVarsInfo.privatizers,
5739  std::back_inserter(privateCleanupRegions),
5740  [](omp::PrivateClauseOp privatizer) {
5741  return &privatizer.getDeallocRegion();
5742  });
5743 
5745  targetRegion, "omp.target", builder, moduleTranslation);
5746 
5747  if (!exitBlock)
5748  return exitBlock.takeError();
5749 
5750  builder.SetInsertPoint(*exitBlock);
5751  if (!privateCleanupRegions.empty()) {
5753  privateCleanupRegions, privateVarsInfo.llvmVars,
5754  moduleTranslation, builder, "omp.targetop.private.cleanup",
5755  /*shouldLoadCleanupRegionArg=*/false))) {
5756  return llvm::createStringError(
5757  "failed to inline `dealloc` region of `omp.private` "
5758  "op in the target region");
5759  }
5760  return builder.saveIP();
5761  }
5762 
5763  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
5764  };
5765 
5766  StringRef parentName = parentFn.getName();
5767 
5768  llvm::TargetRegionEntryInfo entryInfo;
5769 
5770  getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
5771 
5772  MapInfoData mapData;
5773  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
5774  builder, /*useDevPtrOperands=*/{},
5775  /*useDevAddrOperands=*/{}, hdaVars);
5776 
5777  MapInfosTy combinedInfos;
5778  auto genMapInfoCB =
5779  [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
5780  builder.restoreIP(codeGenIP);
5781  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
5782  return combinedInfos;
5783  };
5784 
5785  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
5786  llvm::Value *&retVal, InsertPointTy allocaIP,
5787  InsertPointTy codeGenIP)
5788  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5789  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5790  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5791  // We just return the unaltered argument for the host function
5792  // for now, some alterations may be required in the future to
5793  // keep host fallback functions working identically to the device
5794  // version (e.g. pass ByCopy values should be treated as such on
5795  // host and device, currently not always the case)
5796  if (!isTargetDevice) {
5797  retVal = cast<llvm::Value>(&arg);
5798  return codeGenIP;
5799  }
5800 
5801  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
5802  *ompBuilder, moduleTranslation,
5803  allocaIP, codeGenIP);
5804  };
5805 
5806  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
5807  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
5808  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
5809  initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
5810  isTargetDevice, isGPU);
5811 
5812  // Collect host-evaluated values needed to properly launch the kernel from the
5813  // host.
5814  if (!isTargetDevice)
5815  initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
5816  targetCapturedOp, runtimeAttrs);
5817 
5818  // Pass host-evaluated values as parameters to the kernel / host fallback,
5819  // except if they are constants. In any case, map the MLIR block argument to
5820  // the corresponding LLVM values.
5822  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
5823  ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
5824  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
5825  llvm::Value *value = moduleTranslation.lookupValue(var);
5826  moduleTranslation.mapValue(arg, value);
5827 
5828  if (!llvm::isa<llvm::Constant>(value))
5829  kernelInput.push_back(value);
5830  }
5831 
5832  for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
5833  // declare target arguments are not passed to kernels as arguments
5834  // TODO: We currently do not handle cases where a member is explicitly
5835  // passed in as an argument, this will likley need to be handled in
5836  // the near future, rather than using IsAMember, it may be better to
5837  // test if the relevant BlockArg is used within the target region and
5838  // then use that as a basis for exclusion in the kernel inputs.
5839  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
5840  kernelInput.push_back(mapData.OriginalValue[i]);
5841  }
5842 
5844  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
5845  moduleTranslation, dds);
5846 
5847  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5848  findAllocaInsertPoint(builder, moduleTranslation);
5849  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5850 
5851  llvm::OpenMPIRBuilder::TargetDataInfo info(
5852  /*RequiresDevicePointerInfo=*/false,
5853  /*SeparateBeginEndCalls=*/true);
5854 
5855  auto customMapperCB =
5856  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
5857  if (!combinedInfos.Mappers[i])
5858  return nullptr;
5859  info.HasMapper = true;
5860  return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
5861  moduleTranslation);
5862  };
5863 
5864  llvm::Value *ifCond = nullptr;
5865  if (Value targetIfCond = targetOp.getIfExpr())
5866  ifCond = moduleTranslation.lookupValue(targetIfCond);
5867 
5868  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5869  moduleTranslation.getOpenMPBuilder()->createTarget(
5870  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
5871  defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
5872  argAccessorCB, customMapperCB, dds, targetOp.getNowait());
5873 
5874  if (failed(handleError(afterIP, opInst)))
5875  return failure();
5876 
5877  builder.restoreIP(*afterIP);
5878 
5879  // Remap access operations to declare target reference pointers for the
5880  // device, essentially generating extra loadop's as necessary
5881  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5882  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5883  llvmOutlinedFn);
5884 
5885  return success();
5886 }
5887 
5888 static LogicalResult
5889 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5890  LLVM::ModuleTranslation &moduleTranslation) {
5891  // Amend omp.declare_target by deleting the IR of the outlined functions
5892  // created for target regions. They cannot be filtered out from MLIR earlier
5893  // because the omp.target operation inside must be translated to LLVM, but
5894  // the wrapper functions themselves must not remain at the end of the
5895  // process. We know that functions where omp.declare_target does not match
5896  // omp.is_target_device at this stage can only be wrapper functions because
5897  // those that aren't are removed earlier as an MLIR transformation pass.
5898  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5899  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5900  op->getParentOfType<ModuleOp>().getOperation())) {
5901  if (!offloadMod.getIsTargetDevice())
5902  return success();
5903 
5904  omp::DeclareTargetDeviceType declareType =
5905  attribute.getDeviceType().getValue();
5906 
5907  if (declareType == omp::DeclareTargetDeviceType::host) {
5908  llvm::Function *llvmFunc =
5909  moduleTranslation.lookupFunction(funcOp.getName());
5910  llvmFunc->dropAllReferences();
5911  llvmFunc->eraseFromParent();
5912  }
5913  }
5914  return success();
5915  }
5916 
5917  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5918  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5919  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5920  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5921  bool isDeclaration = gOp.isDeclaration();
5922  bool isExternallyVisible =
5923  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5924  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5925  llvm::StringRef mangledName = gOp.getSymName();
5926  auto captureClause =
5927  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5928  auto deviceClause =
5929  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5930  // unused for MLIR at the moment, required in Clang for book
5931  // keeping
5932  std::vector<llvm::GlobalVariable *> generatedRefs;
5933 
5934  std::vector<llvm::Triple> targetTriple;
5935  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5936  op->getParentOfType<mlir::ModuleOp>()->getAttr(
5937  LLVM::LLVMDialect::getTargetTripleAttrName()));
5938  if (targetTripleAttr)
5939  targetTriple.emplace_back(targetTripleAttr.data());
5940 
5941  auto fileInfoCallBack = [&loc]() {
5942  std::string filename = "";
5943  std::uint64_t lineNo = 0;
5944 
5945  if (loc) {
5946  filename = loc.getFilename().str();
5947  lineNo = loc.getLine();
5948  }
5949 
5950  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5951  lineNo);
5952  };
5953 
5954  auto vfs = llvm::vfs::getRealFileSystem();
5955 
5956  ompBuilder->registerTargetGlobalVariable(
5957  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5958  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack, *vfs),
5959  mangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5960  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5961  gVal->getType(), gVal);
5962 
5963  if (ompBuilder->Config.isTargetDevice() &&
5964  (attribute.getCaptureClause().getValue() !=
5965  mlir::omp::DeclareTargetCaptureClause::to ||
5966  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5967  ompBuilder->getAddrOfDeclareTargetVar(
5968  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5969  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack, *vfs),
5970  mangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5971  gVal->getType(), /*GlobalInitializer*/ nullptr,
5972  /*VariableLinkage*/ nullptr);
5973  }
5974  }
5975  }
5976 
5977  return success();
5978 }
5979 
5980 // Returns true if the operation is inside a TargetOp or
5981 // is part of a declare target function.
5982 static bool isTargetDeviceOp(Operation *op) {
5983  // Assumes no reverse offloading
5984  if (op->getParentOfType<omp::TargetOp>())
5985  return true;
5986 
5987  // Certain operations return results, and whether utilised in host or
5988  // target there is a chance an LLVM Dialect operation depends on it
5989  // by taking it in as an operand, so we must always lower these in
5990  // some manner or result in an ICE (whether they end up in a no-op
5991  // or otherwise).
5992  if (mlir::isa<omp::ThreadprivateOp>(op))
5993  return true;
5994 
5995  if (mlir::isa<omp::TargetAllocMemOp>(op) ||
5996  mlir::isa<omp::TargetFreeMemOp>(op))
5997  return true;
5998 
5999  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
6000  if (auto declareTargetIface =
6001  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
6002  parentFn.getOperation()))
6003  if (declareTargetIface.isDeclareTarget() &&
6004  declareTargetIface.getDeclareTargetDeviceType() !=
6005  mlir::omp::DeclareTargetDeviceType::host)
6006  return true;
6007 
6008  return false;
6009 }
6010 
6011 static llvm::Function *getOmpTargetAlloc(llvm::IRBuilderBase &builder,
6012  llvm::Module *llvmModule) {
6013  llvm::Type *i64Ty = builder.getInt64Ty();
6014  llvm::Type *i32Ty = builder.getInt32Ty();
6015  llvm::Type *returnType = builder.getPtrTy(0);
6016  llvm::FunctionType *fnType =
6017  llvm::FunctionType::get(returnType, {i64Ty, i32Ty}, false);
6018  llvm::Function *func = cast<llvm::Function>(
6019  llvmModule->getOrInsertFunction("omp_target_alloc", fnType).getCallee());
6020  return func;
6021 }
6022 
6023 static LogicalResult
6024 convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
6025  LLVM::ModuleTranslation &moduleTranslation) {
6026  auto allocMemOp = cast<omp::TargetAllocMemOp>(opInst);
6027  if (!allocMemOp)
6028  return failure();
6029 
6030  // Get "omp_target_alloc" function
6031  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
6032  llvm::Function *ompTargetAllocFunc = getOmpTargetAlloc(builder, llvmModule);
6033  // Get the corresponding device value in llvm
6034  mlir::Value deviceNum = allocMemOp.getDevice();
6035  llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
6036  // Get the allocation size.
6037  llvm::DataLayout dataLayout = llvmModule->getDataLayout();
6038  mlir::Type heapTy = allocMemOp.getAllocatedType();
6039  llvm::Type *llvmHeapTy = moduleTranslation.convertType(heapTy);
6040  llvm::TypeSize typeSize = dataLayout.getTypeStoreSize(llvmHeapTy);
6041  llvm::Value *allocSize = builder.getInt64(typeSize.getFixedValue());
6042  for (auto typeParam : allocMemOp.getTypeparams())
6043  allocSize =
6044  builder.CreateMul(allocSize, moduleTranslation.lookupValue(typeParam));
6045  // Create call to "omp_target_alloc" with the args as translated llvm values.
6046  llvm::CallInst *call =
6047  builder.CreateCall(ompTargetAllocFunc, {allocSize, llvmDeviceNum});
6048  llvm::Value *resultI64 = builder.CreatePtrToInt(call, builder.getInt64Ty());
6049 
6050  // Map the result
6051  moduleTranslation.mapValue(allocMemOp.getResult(), resultI64);
6052  return success();
6053 }
6054 
6055 static llvm::Function *getOmpTargetFree(llvm::IRBuilderBase &builder,
6056  llvm::Module *llvmModule) {
6057  llvm::Type *ptrTy = builder.getPtrTy(0);
6058  llvm::Type *i32Ty = builder.getInt32Ty();
6059  llvm::Type *voidTy = builder.getVoidTy();
6060  llvm::FunctionType *fnType =
6061  llvm::FunctionType::get(voidTy, {ptrTy, i32Ty}, false);
6062  llvm::Function *func = dyn_cast<llvm::Function>(
6063  llvmModule->getOrInsertFunction("omp_target_free", fnType).getCallee());
6064  return func;
6065 }
6066 
6067 static LogicalResult
6068 convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
6069  LLVM::ModuleTranslation &moduleTranslation) {
6070  auto freeMemOp = cast<omp::TargetFreeMemOp>(opInst);
6071  if (!freeMemOp)
6072  return failure();
6073 
6074  // Get "omp_target_free" function
6075  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
6076  llvm::Function *ompTragetFreeFunc = getOmpTargetFree(builder, llvmModule);
6077  // Get the corresponding device value in llvm
6078  mlir::Value deviceNum = freeMemOp.getDevice();
6079  llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
6080  // Get the corresponding heapref value in llvm
6081  mlir::Value heapref = freeMemOp.getHeapref();
6082  llvm::Value *llvmHeapref = moduleTranslation.lookupValue(heapref);
6083  // Convert heapref int to ptr and call "omp_target_free"
6084  llvm::Value *intToPtr =
6085  builder.CreateIntToPtr(llvmHeapref, builder.getPtrTy(0));
6086  builder.CreateCall(ompTragetFreeFunc, {intToPtr, llvmDeviceNum});
6087  return success();
6088 }
6089 
6090 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
6091 /// OpenMP runtime calls).
6092 static LogicalResult
6093 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
6094  LLVM::ModuleTranslation &moduleTranslation) {
6095  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6096 
6097  // For each loop, introduce one stack frame to hold loop information. Ensure
6098  // this is only done for the outermost loop wrapper to prevent introducing
6099  // multiple stack frames for a single loop. Initially set to null, the loop
6100  // information structure is initialized during translation of the nested
6101  // omp.loop_nest operation, making it available to translation of all loop
6102  // wrappers after their body has been successfully translated.
6103  bool isOutermostLoopWrapper =
6104  isa_and_present<omp::LoopWrapperInterface>(op) &&
6105  !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
6106 
6107  if (isOutermostLoopWrapper)
6108  moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
6109 
6110  auto result =
6112  .Case([&](omp::BarrierOp op) -> LogicalResult {
6114  return failure();
6115 
6116  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
6117  ompBuilder->createBarrier(builder.saveIP(),
6118  llvm::omp::OMPD_barrier);
6119  LogicalResult res = handleError(afterIP, *op);
6120  if (res.succeeded()) {
6121  // If the barrier generated a cancellation check, the insertion
6122  // point might now need to be changed to a new continuation block
6123  builder.restoreIP(*afterIP);
6124  }
6125  return res;
6126  })
6127  .Case([&](omp::TaskyieldOp op) {
6129  return failure();
6130 
6131  ompBuilder->createTaskyield(builder.saveIP());
6132  return success();
6133  })
6134  .Case([&](omp::FlushOp op) {
6136  return failure();
6137 
6138  // No support in Openmp runtime function (__kmpc_flush) to accept
6139  // the argument list.
6140  // OpenMP standard states the following:
6141  // "An implementation may implement a flush with a list by ignoring
6142  // the list, and treating it the same as a flush without a list."
6143  //
6144  // The argument list is discarded so that, flush with a list is
6145  // treated same as a flush without a list.
6146  ompBuilder->createFlush(builder.saveIP());
6147  return success();
6148  })
6149  .Case([&](omp::ParallelOp op) {
6150  return convertOmpParallel(op, builder, moduleTranslation);
6151  })
6152  .Case([&](omp::MaskedOp) {
6153  return convertOmpMasked(*op, builder, moduleTranslation);
6154  })
6155  .Case([&](omp::MasterOp) {
6156  return convertOmpMaster(*op, builder, moduleTranslation);
6157  })
6158  .Case([&](omp::CriticalOp) {
6159  return convertOmpCritical(*op, builder, moduleTranslation);
6160  })
6161  .Case([&](omp::OrderedRegionOp) {
6162  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
6163  })
6164  .Case([&](omp::OrderedOp) {
6165  return convertOmpOrdered(*op, builder, moduleTranslation);
6166  })
6167  .Case([&](omp::WsloopOp) {
6168  return convertOmpWsloop(*op, builder, moduleTranslation);
6169  })
6170  .Case([&](omp::SimdOp) {
6171  return convertOmpSimd(*op, builder, moduleTranslation);
6172  })
6173  .Case([&](omp::AtomicReadOp) {
6174  return convertOmpAtomicRead(*op, builder, moduleTranslation);
6175  })
6176  .Case([&](omp::AtomicWriteOp) {
6177  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
6178  })
6179  .Case([&](omp::AtomicUpdateOp op) {
6180  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
6181  })
6182  .Case([&](omp::AtomicCaptureOp op) {
6183  return convertOmpAtomicCapture(op, builder, moduleTranslation);
6184  })
6185  .Case([&](omp::CancelOp op) {
6186  return convertOmpCancel(op, builder, moduleTranslation);
6187  })
6188  .Case([&](omp::CancellationPointOp op) {
6189  return convertOmpCancellationPoint(op, builder, moduleTranslation);
6190  })
6191  .Case([&](omp::SectionsOp) {
6192  return convertOmpSections(*op, builder, moduleTranslation);
6193  })
6194  .Case([&](omp::SingleOp op) {
6195  return convertOmpSingle(op, builder, moduleTranslation);
6196  })
6197  .Case([&](omp::TeamsOp op) {
6198  return convertOmpTeams(op, builder, moduleTranslation);
6199  })
6200  .Case([&](omp::TaskOp op) {
6201  return convertOmpTaskOp(op, builder, moduleTranslation);
6202  })
6203  .Case([&](omp::TaskgroupOp op) {
6204  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
6205  })
6206  .Case([&](omp::TaskwaitOp op) {
6207  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
6208  })
6209  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
6210  omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
6211  omp::CriticalDeclareOp>([](auto op) {
6212  // `yield` and `terminator` can be just omitted. The block structure
6213  // was created in the region that handles their parent operation.
6214  // `declare_reduction` will be used by reductions and is not
6215  // converted directly, skip it.
6216  // `declare_mapper` and `declare_mapper.info` are handled whenever
6217  // they are referred to through a `map` clause.
6218  // `critical.declare` is only used to declare names of critical
6219  // sections which will be used by `critical` ops and hence can be
6220  // ignored for lowering. The OpenMP IRBuilder will create unique
6221  // name for critical section names.
6222  return success();
6223  })
6224  .Case([&](omp::ThreadprivateOp) {
6225  return convertOmpThreadprivate(*op, builder, moduleTranslation);
6226  })
6227  .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
6228  omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
6229  return convertOmpTargetData(op, builder, moduleTranslation);
6230  })
6231  .Case([&](omp::TargetOp) {
6232  return convertOmpTarget(*op, builder, moduleTranslation);
6233  })
6234  .Case([&](omp::DistributeOp) {
6235  return convertOmpDistribute(*op, builder, moduleTranslation);
6236  })
6237  .Case([&](omp::LoopNestOp) {
6238  return convertOmpLoopNest(*op, builder, moduleTranslation);
6239  })
6240  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
6241  [&](auto op) {
6242  // No-op, should be handled by relevant owning operations e.g.
6243  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
6244  // etc. and then discarded
6245  return success();
6246  })
6247  .Case([&](omp::NewCliOp op) {
6248  // Meta-operation: Doesn't do anything by itself, but used to
6249  // identify a loop.
6250  return success();
6251  })
6252  .Case([&](omp::CanonicalLoopOp op) {
6253  return convertOmpCanonicalLoopOp(op, builder, moduleTranslation);
6254  })
6255  .Case([&](omp::UnrollHeuristicOp op) {
6256  // FIXME: Handling omp.unroll_heuristic as an executable requires
6257  // that the generator (e.g. omp.canonical_loop) has been seen first.
6258  // For construct that require all codegen to occur inside a callback
6259  // (e.g. OpenMPIRBilder::createParallel), all codegen of that
6260  // contained region including their transformations must occur at
6261  // the omp.canonical_loop.
6262  return applyUnrollHeuristic(op, builder, moduleTranslation);
6263  })
6264  .Case([&](omp::TileOp op) {
6265  return applyTile(op, builder, moduleTranslation);
6266  })
6267  .Case([&](omp::TargetAllocMemOp) {
6268  return convertTargetAllocMemOp(*op, builder, moduleTranslation);
6269  })
6270  .Case([&](omp::TargetFreeMemOp) {
6271  return convertTargetFreeMemOp(*op, builder, moduleTranslation);
6272  })
6273  .Default([&](Operation *inst) {
6274  return inst->emitError()
6275  << "not yet implemented: " << inst->getName();
6276  });
6277 
6278  if (isOutermostLoopWrapper)
6279  moduleTranslation.stackPop();
6280 
6281  return result;
6282 }
6283 
6284 static LogicalResult
6285 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
6286  LLVM::ModuleTranslation &moduleTranslation) {
6287  return convertHostOrTargetOperation(op, builder, moduleTranslation);
6288 }
6289 
6290 static LogicalResult
6291 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
6292  LLVM::ModuleTranslation &moduleTranslation) {
6293  if (isa<omp::TargetOp>(op))
6294  return convertOmpTarget(*op, builder, moduleTranslation);
6295  if (isa<omp::TargetDataOp>(op))
6296  return convertOmpTargetData(op, builder, moduleTranslation);
6297  bool interrupted =
6298  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
6299  if (isa<omp::TargetOp>(oper)) {
6300  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
6301  return WalkResult::interrupt();
6302  return WalkResult::skip();
6303  }
6304  if (isa<omp::TargetDataOp>(oper)) {
6305  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
6306  return WalkResult::interrupt();
6307  return WalkResult::skip();
6308  }
6309 
6310  // Non-target ops might nest target-related ops, therefore, we
6311  // translate them as non-OpenMP scopes. Translating them is needed by
6312  // nested target-related ops since they might need LLVM values defined
6313  // in their parent non-target ops.
6314  if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
6315  oper->getParentOfType<LLVM::LLVMFuncOp>() &&
6316  !oper->getRegions().empty()) {
6317  if (auto blockArgsIface =
6318  dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
6319  forwardArgs(moduleTranslation, blockArgsIface);
6320  else {
6321  // Here we map entry block arguments of
6322  // non-BlockArgOpenMPOpInterface ops if they can be encountered
6323  // inside of a function and they define any of these arguments.
6324  if (isa<mlir::omp::AtomicUpdateOp>(oper))
6325  for (auto [operand, arg] :
6326  llvm::zip_equal(oper->getOperands(),
6327  oper->getRegion(0).getArguments())) {
6328  moduleTranslation.mapValue(
6329  arg, builder.CreateLoad(
6330  moduleTranslation.convertType(arg.getType()),
6331  moduleTranslation.lookupValue(operand)));
6332  }
6333  }
6334 
6335  if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
6336  assert(builder.GetInsertBlock() &&
6337  "No insert block is set for the builder");
6338  for (auto iv : loopNest.getIVs()) {
6339  // Map iv to an undefined value just to keep the IR validity.
6340  moduleTranslation.mapValue(
6342  moduleTranslation.convertType(iv.getType())));
6343  }
6344  }
6345 
6346  for (Region &region : oper->getRegions()) {
6347  // Regions are fake in the sense that they are not a truthful
6348  // translation of the OpenMP construct being converted (e.g. no
6349  // OpenMP runtime calls will be generated). We just need this to
6350  // prepare the kernel invocation args.
6352  auto result = convertOmpOpRegions(
6353  region, oper->getName().getStringRef().str() + ".fake.region",
6354  builder, moduleTranslation, &phis);
6355  if (failed(handleError(result, *oper)))
6356  return WalkResult::interrupt();
6357 
6358  builder.SetInsertPoint(result.get(), result.get()->end());
6359  }
6360 
6361  return WalkResult::skip();
6362  }
6363 
6364  return WalkResult::advance();
6365  }).wasInterrupted();
6366  return failure(interrupted);
6367 }
6368 
6369 namespace {
6370 
6371 /// Implementation of the dialect interface that converts operations belonging
6372 /// to the OpenMP dialect to LLVM IR.
6373 class OpenMPDialectLLVMIRTranslationInterface
6375 public:
6377 
6378  /// Translates the given operation to LLVM IR using the provided IR builder
6379  /// and saving the state in `moduleTranslation`.
6380  LogicalResult
6381  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
6382  LLVM::ModuleTranslation &moduleTranslation) const final;
6383 
6384  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
6385  /// runtime calls, or operation amendments
6386  LogicalResult
6387  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
6388  NamedAttribute attribute,
6389  LLVM::ModuleTranslation &moduleTranslation) const final;
6390 };
6391 
6392 } // namespace
6393 
6394 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
6395  Operation *op, ArrayRef<llvm::Instruction *> instructions,
6396  NamedAttribute attribute,
6397  LLVM::ModuleTranslation &moduleTranslation) const {
6398  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
6399  attribute.getName())
6400  .Case("omp.is_target_device",
6401  [&](Attribute attr) {
6402  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
6403  llvm::OpenMPIRBuilderConfig &config =
6404  moduleTranslation.getOpenMPBuilder()->Config;
6405  config.setIsTargetDevice(deviceAttr.getValue());
6406  return success();
6407  }
6408  return failure();
6409  })
6410  .Case("omp.is_gpu",
6411  [&](Attribute attr) {
6412  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
6413  llvm::OpenMPIRBuilderConfig &config =
6414  moduleTranslation.getOpenMPBuilder()->Config;
6415  config.setIsGPU(gpuAttr.getValue());
6416  return success();
6417  }
6418  return failure();
6419  })
6420  .Case("omp.host_ir_filepath",
6421  [&](Attribute attr) {
6422  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
6423  llvm::OpenMPIRBuilder *ompBuilder =
6424  moduleTranslation.getOpenMPBuilder();
6425  auto VFS = llvm::vfs::getRealFileSystem();
6426  ompBuilder->loadOffloadInfoMetadata(*VFS,
6427  filepathAttr.getValue());
6428  return success();
6429  }
6430  return failure();
6431  })
6432  .Case("omp.flags",
6433  [&](Attribute attr) {
6434  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
6435  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
6436  return failure();
6437  })
6438  .Case("omp.version",
6439  [&](Attribute attr) {
6440  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
6441  llvm::OpenMPIRBuilder *ompBuilder =
6442  moduleTranslation.getOpenMPBuilder();
6443  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
6444  versionAttr.getVersion());
6445  return success();
6446  }
6447  return failure();
6448  })
6449  .Case("omp.declare_target",
6450  [&](Attribute attr) {
6451  if (auto declareTargetAttr =
6452  dyn_cast<omp::DeclareTargetAttr>(attr))
6453  return convertDeclareTargetAttr(op, declareTargetAttr,
6454  moduleTranslation);
6455  return failure();
6456  })
6457  .Case("omp.requires",
6458  [&](Attribute attr) {
6459  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
6460  using Requires = omp::ClauseRequires;
6461  Requires flags = requiresAttr.getValue();
6462  llvm::OpenMPIRBuilderConfig &config =
6463  moduleTranslation.getOpenMPBuilder()->Config;
6464  config.setHasRequiresReverseOffload(
6465  bitEnumContainsAll(flags, Requires::reverse_offload));
6466  config.setHasRequiresUnifiedAddress(
6467  bitEnumContainsAll(flags, Requires::unified_address));
6468  config.setHasRequiresUnifiedSharedMemory(
6469  bitEnumContainsAll(flags, Requires::unified_shared_memory));
6470  config.setHasRequiresDynamicAllocators(
6471  bitEnumContainsAll(flags, Requires::dynamic_allocators));
6472  return success();
6473  }
6474  return failure();
6475  })
6476  .Case("omp.target_triples",
6477  [&](Attribute attr) {
6478  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
6479  llvm::OpenMPIRBuilderConfig &config =
6480  moduleTranslation.getOpenMPBuilder()->Config;
6481  config.TargetTriples.clear();
6482  config.TargetTriples.reserve(triplesAttr.size());
6483  for (Attribute tripleAttr : triplesAttr) {
6484  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
6485  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
6486  else
6487  return failure();
6488  }
6489  return success();
6490  }
6491  return failure();
6492  })
6493  .Default([](Attribute) {
6494  // Fall through for omp attributes that do not require lowering.
6495  return success();
6496  })(attribute.getValue());
6497 
6498  return failure();
6499 }
6500 
6501 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
6502 /// (including OpenMP runtime calls).
6503 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
6504  Operation *op, llvm::IRBuilderBase &builder,
6505  LLVM::ModuleTranslation &moduleTranslation) const {
6506 
6507  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6508  if (ompBuilder->Config.isTargetDevice()) {
6509  if (isTargetDeviceOp(op)) {
6510  return convertTargetDeviceOp(op, builder, moduleTranslation);
6511  }
6512  return convertTargetOpsInNest(op, builder, moduleTranslation);
6513  }
6514  return convertHostOrTargetOperation(op, builder, moduleTranslation);
6515 }
6516 
6518  registry.insert<omp::OpenMPDialect>();
6519  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
6520  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
6521  });
6522 }
6523 
6525  DialectRegistry registry;
6527  context.appendDialectRegistry(registry);
6528 }
static std::string toString(bytecode::Section::ID sectionID)
Stringify the given section ID.
static ze_device_handle_t getDevice(const uint32_t driverIdx=0, const int32_t devIdx=0)
union mlir::linalg::@1252::ArityGroupAndKind::Kind kind
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable.
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type.
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Function * getOmpTargetAlloc(llvm::IRBuilderBase &builder, llvm::Module *llvmModule)
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Apply a #pragma omp unroll / "!$omp unroll" transformation using the OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static void popCancelFinalizationCB(const ArrayRef< llvm::BranchInst * > cancelTerminators, llvm::OpenMPIRBuilder &ompBuilder, const llvm::OpenMPIRBuilder::InsertPointTy &afterIP)
If we cancelled the construct, we should branch to the finalization block of that construct.
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables.
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert an omp.canonical_loop to LLVM-IR.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static void pushCancelFinalizationCB(SmallVectorImpl< llvm::BranchInst * > &cancelTerminators, llvm::IRBuilderBase &llvmBuilder, llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op, llvm::omp::Directive cancelDirective)
Shared implementation of a callback which adds a termiator for the new block created for the branch t...
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
Inline reductions' init regions.
static LogicalResult convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl)
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool constructIsCancellable(Operation *op)
Returns true if the construct contains omp.cancel or omp.cancellation_point.
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static LogicalResult copyFirstPrivateVars(mlir::Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, bool insertBarrier, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef, bool isNowait=false, bool isTeamsReduction=false)
static LogicalResult convertOmpCancellationPoint(omp::CancellationPointOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static uint64_t getReductionDataSize(OpTy &op)
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static llvm::omp::Directive convertCancellationConstructType(omp::ClauseCancellationConstructType directive)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice, bool isGPU)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Apply a #pragma omp tile / !$omp tile transformation using the OpenMPIRBuilder.
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
static llvm::Function * getOmpTargetFree(llvm::IRBuilderBase &builder, llvm::Module *llvmModule)
static void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp, bool &isIgnoreDenormalMode, bool &isFineGrainedMemory, bool &isRemoteMemory)
static Operation * genLoop(CodegenEnv &env, OpBuilder &builder, LoopId curr, unsigned numCases, bool needsUniv, ArrayRef< TensorLevel > tidLvls)
Generates a for-loop or a while-loop, depending on whether it implements singleton iteration or co-it...
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:331
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:309
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:244
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:174
Base class for dialect interfaces providing translation to LLVM IR.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:45
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
type_range getType() const
Definition: ValueRange.cpp:32
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
Dialect * getDialect()
Return the dialect this operation is associated with, or nullptr if the associated dialect is not loa...
Definition: Operation.h:220
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
bool hasOneBlock()
Return true if this region has exactly one block.
Definition: Region.h:68
Concrete CRTP base class for StateStack frames.
Definition: StateStack.h:47
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: WalkResult.h:29
static WalkResult skip()
Definition: WalkResult.h:48
static WalkResult advance()
Definition: WalkResult.h:47
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: WalkResult.h:51
static WalkResult interrupt()
Definition: WalkResult.h:46
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:561
llvm::hash_code hash_value(const StructType::MemberDecorationInfo &memberDecorationInfo)
llvm::PointerUnion< NamedAttribute *, NamedProperty *, NamedTypeConstraint * > Argument
Definition: Argument.h:64
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:304
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: Utils.cpp:1279
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars
RAII object calling stackPush/stackPop on construction/destruction.
Definition: StateStack.h:106