MLIR  21.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
19 #include "mlir/IR/IRMapping.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Support/LLVM.h"
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/TypeSwitch.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/MDBuilder.h"
37 #include "llvm/IR/ReplaceConstant.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/TargetParser/Triple.h"
40 #include "llvm/Transforms/Utils/ModuleUtils.h"
41 
42 #include <any>
43 #include <cstdint>
44 #include <iterator>
45 #include <numeric>
46 #include <optional>
47 #include <utility>
48 
49 using namespace mlir;
50 
51 namespace {
52 static llvm::omp::ScheduleKind
53 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
54  if (!schedKind.has_value())
55  return llvm::omp::OMP_SCHEDULE_Default;
56  switch (schedKind.value()) {
57  case omp::ClauseScheduleKind::Static:
58  return llvm::omp::OMP_SCHEDULE_Static;
59  case omp::ClauseScheduleKind::Dynamic:
60  return llvm::omp::OMP_SCHEDULE_Dynamic;
61  case omp::ClauseScheduleKind::Guided:
62  return llvm::omp::OMP_SCHEDULE_Guided;
63  case omp::ClauseScheduleKind::Auto:
64  return llvm::omp::OMP_SCHEDULE_Auto;
66  return llvm::omp::OMP_SCHEDULE_Runtime;
67  }
68  llvm_unreachable("unhandled schedule clause argument");
69 }
70 
71 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
72 /// insertion points for allocas.
73 class OpenMPAllocaStackFrame
74  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
75 public:
76  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
77 
78  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
79  : allocaInsertPoint(allocaIP) {}
80  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
81 };
82 
83 /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
84 /// collapsed canonical loop information corresponding to an \c omp.loop_nest
85 /// operation.
86 class OpenMPLoopInfoStackFrame
87  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> {
88 public:
89  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
90  llvm::CanonicalLoopInfo *loopInfo = nullptr;
91 };
92 
93 /// Custom error class to signal translation errors that don't need reporting,
94 /// since encountering them will have already triggered relevant error messages.
95 ///
96 /// Its purpose is to serve as the glue between MLIR failures represented as
97 /// \see LogicalResult instances and \see llvm::Error instances used to
98 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
99 /// error of the first type is raised, a message is emitted directly (the \see
100 /// LogicalResult itself does not hold any information). If we need to forward
101 /// this error condition as an \see llvm::Error while avoiding triggering some
102 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
103 /// class to just signal this situation has happened.
104 ///
105 /// For example, this class should be used to trigger errors from within
106 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
107 /// translation of their own regions. This unclutters the error log from
108 /// redundant messages.
109 class PreviouslyReportedError
110  : public llvm::ErrorInfo<PreviouslyReportedError> {
111 public:
112  void log(raw_ostream &) const override {
113  // Do not log anything.
114  }
115 
116  std::error_code convertToErrorCode() const override {
117  llvm_unreachable(
118  "PreviouslyReportedError doesn't support ECError conversion");
119  }
120 
121  // Used by ErrorInfo::classID.
122  static char ID;
123 };
124 
126 
127 /*
128  * Custom class for processing linear clause for omp.wsloop
129  * and omp.simd. Linear clause translation requires setup,
130  * initialization, update, and finalization at varying
131  * basic blocks in the IR. This class helps maintain
132  * internal state to allow consistent translation in
133  * each of these stages.
134  */
135 
136 class LinearClauseProcessor {
137 
138 private:
139  SmallVector<llvm::Value *> linearPreconditionVars;
140  SmallVector<llvm::Value *> linearLoopBodyTemps;
141  SmallVector<llvm::AllocaInst *> linearOrigVars;
142  SmallVector<llvm::Value *> linearOrigVal;
143  SmallVector<llvm::Value *> linearSteps;
144  llvm::BasicBlock *linearFinalizationBB;
145  llvm::BasicBlock *linearExitBB;
146  llvm::BasicBlock *linearLastIterExitBB;
147 
148 public:
149  // Allocate space for linear variabes
150  void createLinearVar(llvm::IRBuilderBase &builder,
151  LLVM::ModuleTranslation &moduleTranslation,
152  mlir::Value &linearVar) {
153  if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
154  moduleTranslation.lookupValue(linearVar))) {
155  linearPreconditionVars.push_back(builder.CreateAlloca(
156  linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
157  llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
158  linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
159  linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
160  linearLoopBodyTemps.push_back(linearLoopBodyTemp);
161  linearOrigVars.push_back(linearVarAlloca);
162  }
163  }
164 
165  // Initialize linear step
166  inline void initLinearStep(LLVM::ModuleTranslation &moduleTranslation,
167  mlir::Value &linearStep) {
168  linearSteps.push_back(moduleTranslation.lookupValue(linearStep));
169  }
170 
171  // Emit IR for initialization of linear variables
172  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
173  initLinearVar(llvm::IRBuilderBase &builder,
174  LLVM::ModuleTranslation &moduleTranslation,
175  llvm::BasicBlock *loopPreHeader) {
176  builder.SetInsertPoint(loopPreHeader->getTerminator());
177  for (size_t index = 0; index < linearOrigVars.size(); index++) {
178  llvm::LoadInst *linearVarLoad = builder.CreateLoad(
179  linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
180  builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
181  }
182  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
183  moduleTranslation.getOpenMPBuilder()->createBarrier(
184  builder.saveIP(), llvm::omp::OMPD_barrier);
185  return afterBarrierIP;
186  }
187 
188  // Emit IR for updating Linear variables
189  void updateLinearVar(llvm::IRBuilderBase &builder, llvm::BasicBlock *loopBody,
190  llvm::Value *loopInductionVar) {
191  builder.SetInsertPoint(loopBody->getTerminator());
192  for (size_t index = 0; index < linearPreconditionVars.size(); index++) {
193  // Emit increments for linear vars
194  llvm::LoadInst *linearVarStart =
195  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
196 
197  linearPreconditionVars[index]);
198  auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
199  auto addInst = builder.CreateAdd(linearVarStart, mulInst);
200  builder.CreateStore(addInst, linearLoopBodyTemps[index]);
201  }
202  }
203 
204  // Linear variable finalization is conditional on the last logical iteration.
205  // Create BB splits to manage the same.
206  void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
207  llvm::BasicBlock *loopExit) {
208  linearFinalizationBB = loopExit->splitBasicBlock(
209  loopExit->getTerminator(), "omp_loop.linear_finalization");
210  linearExitBB = linearFinalizationBB->splitBasicBlock(
211  linearFinalizationBB->getTerminator(), "omp_loop.linear_exit");
212  linearLastIterExitBB = linearFinalizationBB->splitBasicBlock(
213  linearFinalizationBB->getTerminator(), "omp_loop.linear_lastiter_exit");
214  }
215 
216  // Finalize the linear vars
217  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
218  finalizeLinearVar(llvm::IRBuilderBase &builder,
219  LLVM::ModuleTranslation &moduleTranslation,
220  llvm::Value *lastIter) {
221  // Emit condition to check whether last logical iteration is being executed
222  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
223  llvm::Value *loopLastIterLoad = builder.CreateLoad(
224  llvm::Type::getInt32Ty(builder.getContext()), lastIter);
225  llvm::Value *isLast =
226  builder.CreateCmp(llvm::CmpInst::ICMP_NE, loopLastIterLoad,
228  llvm::Type::getInt32Ty(builder.getContext()), 0));
229  // Store the linear variable values to original variables.
230  builder.SetInsertPoint(linearLastIterExitBB->getTerminator());
231  for (size_t index = 0; index < linearOrigVars.size(); index++) {
232  llvm::LoadInst *linearVarTemp =
233  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
234  linearLoopBodyTemps[index]);
235  builder.CreateStore(linearVarTemp, linearOrigVars[index]);
236  }
237 
238  // Create conditional branch such that the linear variable
239  // values are stored to original variables only at the
240  // last logical iteration
241  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
242  builder.CreateCondBr(isLast, linearLastIterExitBB, linearExitBB);
243  linearFinalizationBB->getTerminator()->eraseFromParent();
244  // Emit barrier
245  builder.SetInsertPoint(linearExitBB->getTerminator());
246  return moduleTranslation.getOpenMPBuilder()->createBarrier(
247  builder.saveIP(), llvm::omp::OMPD_barrier);
248  }
249 
250  // Rewrite all uses of the original variable in `BBName`
251  // with the linear variable in-place
252  void rewriteInPlace(llvm::IRBuilderBase &builder, std::string BBName,
253  size_t varIndex) {
255  for (llvm::User *user : linearOrigVal[varIndex]->users())
256  users.push_back(user);
257  for (auto *user : users) {
258  if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
259  if (userInst->getParent()->getName().str() == BBName)
260  user->replaceUsesOfWith(linearOrigVal[varIndex],
261  linearLoopBodyTemps[varIndex]);
262  }
263  }
264  }
265 };
266 
267 } // namespace
268 
269 /// Looks up from the operation from and returns the PrivateClauseOp with
270 /// name symbolName
271 static omp::PrivateClauseOp findPrivatizer(Operation *from,
272  SymbolRefAttr symbolName) {
273  omp::PrivateClauseOp privatizer =
274  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
275  symbolName);
276  assert(privatizer && "privatizer not found in the symbol table");
277  return privatizer;
278 }
279 
280 /// Check whether translation to LLVM IR for the given operation is currently
281 /// supported. If not, descriptive diagnostics will be emitted to let users know
282 /// this is a not-yet-implemented feature.
283 ///
284 /// \returns success if no unimplemented features are needed to translate the
285 /// given operation.
286 static LogicalResult checkImplementationStatus(Operation &op) {
287  auto todo = [&op](StringRef clauseName) {
288  return op.emitError() << "not yet implemented: Unhandled clause "
289  << clauseName << " in " << op.getName()
290  << " operation";
291  };
292 
293  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
294  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
295  result = todo("allocate");
296  };
297  auto checkBare = [&todo](auto op, LogicalResult &result) {
298  if (op.getBare())
299  result = todo("ompx_bare");
300  };
301  auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
302  omp::ClauseCancellationConstructType cancelledDirective =
303  op.getCancelDirective();
304  // Cancelling a taskloop is not yet supported because we don't yet have LLVM
305  // IR conversion for taskloop
306  if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) {
307  Operation *parent = op->getParentOp();
308  while (parent) {
309  if (parent->getDialect() == op->getDialect())
310  break;
311  parent = parent->getParentOp();
312  }
313  if (isa_and_nonnull<omp::TaskloopOp>(parent))
314  result = todo("cancel directive inside of taskloop");
315  }
316  };
317  auto checkDepend = [&todo](auto op, LogicalResult &result) {
318  if (!op.getDependVars().empty() || op.getDependKinds())
319  result = todo("depend");
320  };
321  auto checkDevice = [&todo](auto op, LogicalResult &result) {
322  if (op.getDevice())
323  result = todo("device");
324  };
325  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
326  if (op.getDistScheduleChunkSize())
327  result = todo("dist_schedule with chunk_size");
328  };
329  auto checkHint = [](auto op, LogicalResult &) {
330  if (op.getHint())
331  op.emitWarning("hint clause discarded");
332  };
333  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
334  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
335  op.getInReductionSyms())
336  result = todo("in_reduction");
337  };
338  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
339  if (!op.getIsDevicePtrVars().empty())
340  result = todo("is_device_ptr");
341  };
342  auto checkLinear = [&todo](auto op, LogicalResult &result) {
343  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
344  result = todo("linear");
345  };
346  auto checkNowait = [&todo](auto op, LogicalResult &result) {
347  if (op.getNowait())
348  result = todo("nowait");
349  };
350  auto checkOrder = [&todo](auto op, LogicalResult &result) {
351  if (op.getOrder() || op.getOrderMod())
352  result = todo("order");
353  };
354  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
355  if (op.getParLevelSimd())
356  result = todo("parallelization-level");
357  };
358  auto checkPriority = [&todo](auto op, LogicalResult &result) {
359  if (op.getPriority())
360  result = todo("priority");
361  };
362  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
363  if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
364  // Privatization is supported only for included target tasks.
365  if (!op.getPrivateVars().empty() && op.getNowait())
366  result = todo("privatization for deferred target tasks");
367  } else {
368  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
369  result = todo("privatization");
370  }
371  };
372  auto checkReduction = [&todo](auto op, LogicalResult &result) {
373  if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op))
374  if (!op.getReductionVars().empty() || op.getReductionByref() ||
375  op.getReductionSyms())
376  result = todo("reduction");
377  if (op.getReductionMod() &&
378  op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
379  result = todo("reduction with modifier");
380  };
381  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
382  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
383  op.getTaskReductionSyms())
384  result = todo("task_reduction");
385  };
386  auto checkUntied = [&todo](auto op, LogicalResult &result) {
387  if (op.getUntied())
388  result = todo("untied");
389  };
390 
391  LogicalResult result = success();
393  .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); })
394  .Case([&](omp::CancellationPointOp op) {
395  checkCancelDirective(op, result);
396  })
397  .Case([&](omp::DistributeOp op) {
398  checkAllocate(op, result);
399  checkDistSchedule(op, result);
400  checkOrder(op, result);
401  })
402  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
403  .Case([&](omp::SectionsOp op) {
404  checkAllocate(op, result);
405  checkPrivate(op, result);
406  checkReduction(op, result);
407  })
408  .Case([&](omp::SingleOp op) {
409  checkAllocate(op, result);
410  checkPrivate(op, result);
411  })
412  .Case([&](omp::TeamsOp op) {
413  checkAllocate(op, result);
414  checkPrivate(op, result);
415  })
416  .Case([&](omp::TaskOp op) {
417  checkAllocate(op, result);
418  checkInReduction(op, result);
419  })
420  .Case([&](omp::TaskgroupOp op) {
421  checkAllocate(op, result);
422  checkTaskReduction(op, result);
423  })
424  .Case([&](omp::TaskwaitOp op) {
425  checkDepend(op, result);
426  checkNowait(op, result);
427  })
428  .Case([&](omp::TaskloopOp op) {
429  // TODO: Add other clauses check
430  checkUntied(op, result);
431  checkPriority(op, result);
432  })
433  .Case([&](omp::WsloopOp op) {
434  checkAllocate(op, result);
435  checkLinear(op, result);
436  checkOrder(op, result);
437  checkReduction(op, result);
438  })
439  .Case([&](omp::ParallelOp op) {
440  checkAllocate(op, result);
441  checkReduction(op, result);
442  })
443  .Case([&](omp::SimdOp op) {
444  checkLinear(op, result);
445  checkReduction(op, result);
446  })
447  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
448  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
449  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
450  [&](auto op) { checkDepend(op, result); })
451  .Case([&](omp::TargetOp op) {
452  checkAllocate(op, result);
453  checkBare(op, result);
454  checkDevice(op, result);
455  checkInReduction(op, result);
456  checkIsDevicePtr(op, result);
457  checkPrivate(op, result);
458  })
459  .Default([](Operation &) {
460  // Assume all clauses for an operation can be translated unless they are
461  // checked above.
462  });
463  return result;
464 }
465 
466 static LogicalResult handleError(llvm::Error error, Operation &op) {
467  LogicalResult result = success();
468  if (error) {
469  llvm::handleAllErrors(
470  std::move(error),
471  [&](const PreviouslyReportedError &) { result = failure(); },
472  [&](const llvm::ErrorInfoBase &err) {
473  result = op.emitError(err.message());
474  });
475  }
476  return result;
477 }
478 
479 template <typename T>
480 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
481  if (!result)
482  return handleError(result.takeError(), op);
483 
484  return success();
485 }
486 
487 /// Find the insertion point for allocas given the current insertion point for
488 /// normal operations in the builder.
489 static llvm::OpenMPIRBuilder::InsertPointTy
490 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
491  LLVM::ModuleTranslation &moduleTranslation) {
492  // If there is an alloca insertion point on stack, i.e. we are in a nested
493  // operation and a specific point was provided by some surrounding operation,
494  // use it.
495  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
496  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
497  [&](OpenMPAllocaStackFrame &frame) {
498  allocaInsertPoint = frame.allocaInsertPoint;
499  return WalkResult::interrupt();
500  });
501  if (walkResult.wasInterrupted())
502  return allocaInsertPoint;
503 
504  // Otherwise, insert to the entry block of the surrounding function.
505  // If the current IRBuilder InsertPoint is the function's entry, it cannot
506  // also be used for alloca insertion which would result in insertion order
507  // confusion. Create a new BasicBlock for the Builder and use the entry block
508  // for the allocs.
509  // TODO: Create a dedicated alloca BasicBlock at function creation such that
510  // we do not need to move the current InertPoint here.
511  if (builder.GetInsertBlock() ==
512  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
513  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
514  "Assuming end of basic block");
515  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
516  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
517  builder.GetInsertBlock()->getNextNode());
518  builder.CreateBr(entryBB);
519  builder.SetInsertPoint(entryBB);
520  }
521 
522  llvm::BasicBlock &funcEntryBlock =
523  builder.GetInsertBlock()->getParent()->getEntryBlock();
524  return llvm::OpenMPIRBuilder::InsertPointTy(
525  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
526 }
527 
528 /// Find the loop information structure for the loop nest being translated. It
529 /// will return a `null` value unless called from the translation function for
530 /// a loop wrapper operation after successfully translating its body.
531 static llvm::CanonicalLoopInfo *
533  llvm::CanonicalLoopInfo *loopInfo = nullptr;
534  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
535  [&](OpenMPLoopInfoStackFrame &frame) {
536  loopInfo = frame.loopInfo;
537  return WalkResult::interrupt();
538  });
539  return loopInfo;
540 }
541 
542 /// Converts the given region that appears within an OpenMP dialect operation to
543 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
544 /// region, and a branch from any block with an successor-less OpenMP terminator
545 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
546 /// of the continuation block if provided.
548  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
549  LLVM::ModuleTranslation &moduleTranslation,
550  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
551  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
552 
553  llvm::BasicBlock *continuationBlock =
554  splitBB(builder, true, "omp.region.cont");
555  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
556 
557  llvm::LLVMContext &llvmContext = builder.getContext();
558  for (Block &bb : region) {
559  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
560  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
561  builder.GetInsertBlock()->getNextNode());
562  moduleTranslation.mapBlock(&bb, llvmBB);
563  }
564 
565  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
566 
567  // Terminators (namely YieldOp) may be forwarding values to the region that
568  // need to be available in the continuation block. Collect the types of these
569  // operands in preparation of creating PHI nodes. This is skipped for loop
570  // wrapper operations, for which we know in advance they have no terminators.
571  SmallVector<llvm::Type *> continuationBlockPHITypes;
572  unsigned numYields = 0;
573 
574  if (!isLoopWrapper) {
575  bool operandsProcessed = false;
576  for (Block &bb : region.getBlocks()) {
577  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
578  if (!operandsProcessed) {
579  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
580  continuationBlockPHITypes.push_back(
581  moduleTranslation.convertType(yield->getOperand(i).getType()));
582  }
583  operandsProcessed = true;
584  } else {
585  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
586  "mismatching number of values yielded from the region");
587  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
588  llvm::Type *operandType =
589  moduleTranslation.convertType(yield->getOperand(i).getType());
590  (void)operandType;
591  assert(continuationBlockPHITypes[i] == operandType &&
592  "values of mismatching types yielded from the region");
593  }
594  }
595  numYields++;
596  }
597  }
598  }
599 
600  // Insert PHI nodes in the continuation block for any values forwarded by the
601  // terminators in this region.
602  if (!continuationBlockPHITypes.empty())
603  assert(
604  continuationBlockPHIs &&
605  "expected continuation block PHIs if converted regions yield values");
606  if (continuationBlockPHIs) {
607  llvm::IRBuilderBase::InsertPointGuard guard(builder);
608  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
609  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
610  for (llvm::Type *ty : continuationBlockPHITypes)
611  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
612  }
613 
614  // Convert blocks one by one in topological order to ensure
615  // defs are converted before uses.
617  for (Block *bb : blocks) {
618  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
619  // Retarget the branch of the entry block to the entry block of the
620  // converted region (regions are single-entry).
621  if (bb->isEntryBlock()) {
622  assert(sourceTerminator->getNumSuccessors() == 1 &&
623  "provided entry block has multiple successors");
624  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
625  "ContinuationBlock is not the successor of the entry block");
626  sourceTerminator->setSuccessor(0, llvmBB);
627  }
628 
629  llvm::IRBuilderBase::InsertPointGuard guard(builder);
630  if (failed(
631  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
632  return llvm::make_error<PreviouslyReportedError>();
633 
634  // Create a direct branch here for loop wrappers to prevent their lack of a
635  // terminator from causing a crash below.
636  if (isLoopWrapper) {
637  builder.CreateBr(continuationBlock);
638  continue;
639  }
640 
641  // Special handling for `omp.yield` and `omp.terminator` (we may have more
642  // than one): they return the control to the parent OpenMP dialect operation
643  // so replace them with the branch to the continuation block. We handle this
644  // here to avoid relying inter-function communication through the
645  // ModuleTranslation class to set up the correct insertion point. This is
646  // also consistent with MLIR's idiom of handling special region terminators
647  // in the same code that handles the region-owning operation.
648  Operation *terminator = bb->getTerminator();
649  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
650  builder.CreateBr(continuationBlock);
651 
652  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
653  (*continuationBlockPHIs)[i]->addIncoming(
654  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
655  }
656  }
657  // After all blocks have been traversed and values mapped, connect the PHI
658  // nodes to the results of preceding blocks.
659  LLVM::detail::connectPHINodes(region, moduleTranslation);
660 
661  // Remove the blocks and values defined in this region from the mapping since
662  // they are not visible outside of this region. This allows the same region to
663  // be converted several times, that is cloned, without clashes, and slightly
664  // speeds up the lookups.
665  moduleTranslation.forgetMapping(region);
666 
667  return continuationBlock;
668 }
669 
670 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
671 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
672  switch (kind) {
673  case omp::ClauseProcBindKind::Close:
674  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
675  case omp::ClauseProcBindKind::Master:
676  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
677  case omp::ClauseProcBindKind::Primary:
678  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
679  case omp::ClauseProcBindKind::Spread:
680  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
681  }
682  llvm_unreachable("Unknown ClauseProcBindKind kind");
683 }
684 
685 /// Maps block arguments from \p blockArgIface (which are MLIR values) to the
686 /// corresponding LLVM values of \p the interface's operands. This is useful
687 /// when an OpenMP region with entry block arguments is converted to LLVM. In
688 /// this case the block arguments are (part of) of the OpenMP region's entry
689 /// arguments and the operands are (part of) of the operands to the OpenMP op
690 /// containing the region.
691 static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
692  omp::BlockArgOpenMPOpInterface blockArgIface) {
694  blockArgIface.getBlockArgsPairs(blockArgsPairs);
695  for (auto [var, arg] : blockArgsPairs)
696  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
697 }
698 
699 /// Helper function to map block arguments defined by ignored loop wrappers to
700 /// LLVM values and prevent any uses of those from triggering null pointer
701 /// dereferences.
702 ///
703 /// This must be called after block arguments of parent wrappers have already
704 /// been mapped to LLVM IR values.
705 static LogicalResult
706 convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
707  LLVM::ModuleTranslation &moduleTranslation) {
708  // Map block arguments directly to the LLVM value associated to the
709  // corresponding operand. This is semantically equivalent to this wrapper not
710  // being present.
712  .Case([&](omp::SimdOp op) {
713  forwardArgs(moduleTranslation,
714  cast<omp::BlockArgOpenMPOpInterface>(*op));
715  op.emitWarning() << "simd information on composite construct discarded";
716  return success();
717  })
718  .Default([&](Operation *op) {
719  return op->emitError() << "cannot ignore wrapper";
720  });
721 }
722 
723 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
724 static LogicalResult
725 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
726  LLVM::ModuleTranslation &moduleTranslation) {
727  auto maskedOp = cast<omp::MaskedOp>(opInst);
728  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
729 
730  if (failed(checkImplementationStatus(opInst)))
731  return failure();
732 
733  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
734  // MaskedOp has only one region associated with it.
735  auto &region = maskedOp.getRegion();
736  builder.restoreIP(codeGenIP);
737  return convertOmpOpRegions(region, "omp.masked.region", builder,
738  moduleTranslation)
739  .takeError();
740  };
741 
742  // TODO: Perform finalization actions for variables. This has to be
743  // called for variables which have destructors/finalizers.
744  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
745 
746  llvm::Value *filterVal = nullptr;
747  if (auto filterVar = maskedOp.getFilteredThreadId()) {
748  filterVal = moduleTranslation.lookupValue(filterVar);
749  } else {
750  llvm::LLVMContext &llvmContext = builder.getContext();
751  filterVal =
752  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
753  }
754  assert(filterVal != nullptr);
755  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
756  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
757  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
758  finiCB, filterVal);
759 
760  if (failed(handleError(afterIP, opInst)))
761  return failure();
762 
763  builder.restoreIP(*afterIP);
764  return success();
765 }
766 
767 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
768 static LogicalResult
769 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
770  LLVM::ModuleTranslation &moduleTranslation) {
771  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
772  auto masterOp = cast<omp::MasterOp>(opInst);
773 
774  if (failed(checkImplementationStatus(opInst)))
775  return failure();
776 
777  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
778  // MasterOp has only one region associated with it.
779  auto &region = masterOp.getRegion();
780  builder.restoreIP(codeGenIP);
781  return convertOmpOpRegions(region, "omp.master.region", builder,
782  moduleTranslation)
783  .takeError();
784  };
785 
786  // TODO: Perform finalization actions for variables. This has to be
787  // called for variables which have destructors/finalizers.
788  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
789 
790  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
791  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
792  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
793  finiCB);
794 
795  if (failed(handleError(afterIP, opInst)))
796  return failure();
797 
798  builder.restoreIP(*afterIP);
799  return success();
800 }
801 
802 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
803 static LogicalResult
804 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
805  LLVM::ModuleTranslation &moduleTranslation) {
806  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
807  auto criticalOp = cast<omp::CriticalOp>(opInst);
808 
809  if (failed(checkImplementationStatus(opInst)))
810  return failure();
811 
812  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
813  // CriticalOp has only one region associated with it.
814  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
815  builder.restoreIP(codeGenIP);
816  return convertOmpOpRegions(region, "omp.critical.region", builder,
817  moduleTranslation)
818  .takeError();
819  };
820 
821  // TODO: Perform finalization actions for variables. This has to be
822  // called for variables which have destructors/finalizers.
823  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
824 
825  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
826  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
827  llvm::Constant *hint = nullptr;
828 
829  // If it has a name, it probably has a hint too.
830  if (criticalOp.getNameAttr()) {
831  // The verifiers in OpenMP Dialect guarentee that all the pointers are
832  // non-null
833  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
834  auto criticalDeclareOp =
835  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
836  symbolRef);
837  hint =
838  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
839  static_cast<int>(criticalDeclareOp.getHint()));
840  }
841  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
842  moduleTranslation.getOpenMPBuilder()->createCritical(
843  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
844 
845  if (failed(handleError(afterIP, opInst)))
846  return failure();
847 
848  builder.restoreIP(*afterIP);
849  return success();
850 }
851 
852 /// A util to collect info needed to convert delayed privatizers from MLIR to
853 /// LLVM.
855  template <typename OP>
857  : blockArgs(
858  cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
859  mlirVars.reserve(blockArgs.size());
860  llvmVars.reserve(blockArgs.size());
861  collectPrivatizationDecls<OP>(op);
862 
863  for (mlir::Value privateVar : op.getPrivateVars())
864  mlirVars.push_back(privateVar);
865  }
866 
871 
872 private:
873  /// Populates `privatizations` with privatization declarations used for the
874  /// given op.
875  template <class OP>
876  void collectPrivatizationDecls(OP op) {
877  std::optional<ArrayAttr> attr = op.getPrivateSyms();
878  if (!attr)
879  return;
880 
881  privatizers.reserve(privatizers.size() + attr->size());
882  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
883  privatizers.push_back(findPrivatizer(op, symbolRef));
884  }
885  }
886 };
887 
888 /// Populates `reductions` with reduction declarations used in the given op.
889 template <typename T>
890 static void
893  std::optional<ArrayAttr> attr = op.getReductionSyms();
894  if (!attr)
895  return;
896 
897  reductions.reserve(reductions.size() + op.getNumReductionVars());
898  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
899  reductions.push_back(
900  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
901  op, symbolRef));
902  }
903 }
904 
905 /// Translates the blocks contained in the given region and appends them to at
906 /// the current insertion point of `builder`. The operations of the entry block
907 /// are appended to the current insertion block. If set, `continuationBlockArgs`
908 /// is populated with translated values that correspond to the values
909 /// omp.yield'ed from the region.
910 static LogicalResult inlineConvertOmpRegions(
911  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
912  LLVM::ModuleTranslation &moduleTranslation,
913  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
914  if (region.empty())
915  return success();
916 
917  // Special case for single-block regions that don't create additional blocks:
918  // insert operations without creating additional blocks.
919  if (llvm::hasSingleElement(region)) {
920  llvm::Instruction *potentialTerminator =
921  builder.GetInsertBlock()->empty() ? nullptr
922  : &builder.GetInsertBlock()->back();
923 
924  if (potentialTerminator && potentialTerminator->isTerminator())
925  potentialTerminator->removeFromParent();
926  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
927 
928  if (failed(moduleTranslation.convertBlock(
929  region.front(), /*ignoreArguments=*/true, builder)))
930  return failure();
931 
932  // The continuation arguments are simply the translated terminator operands.
933  if (continuationBlockArgs)
934  llvm::append_range(
935  *continuationBlockArgs,
936  moduleTranslation.lookupValues(region.front().back().getOperands()));
937 
938  // Drop the mapping that is no longer necessary so that the same region can
939  // be processed multiple times.
940  moduleTranslation.forgetMapping(region);
941 
942  if (potentialTerminator && potentialTerminator->isTerminator()) {
943  llvm::BasicBlock *block = builder.GetInsertBlock();
944  if (block->empty()) {
945  // this can happen for really simple reduction init regions e.g.
946  // %0 = llvm.mlir.constant(0 : i32) : i32
947  // omp.yield(%0 : i32)
948  // because the llvm.mlir.constant (MLIR op) isn't converted into any
949  // llvm op
950  potentialTerminator->insertInto(block, block->begin());
951  } else {
952  potentialTerminator->insertAfter(&block->back());
953  }
954  }
955 
956  return success();
957  }
958 
960  llvm::Expected<llvm::BasicBlock *> continuationBlock =
961  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
962 
963  if (failed(handleError(continuationBlock, *region.getParentOp())))
964  return failure();
965 
966  if (continuationBlockArgs)
967  llvm::append_range(*continuationBlockArgs, phis);
968  builder.SetInsertPoint(*continuationBlock,
969  (*continuationBlock)->getFirstInsertionPt());
970  return success();
971 }
972 
973 namespace {
974 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
975 /// store lambdas with capture.
976 using OwningReductionGen =
977  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
978  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
979  llvm::Value *&)>;
980 using OwningAtomicReductionGen =
981  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
982  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
983  llvm::Value *)>;
984 } // namespace
985 
986 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
987 /// reduction declaration. The generator uses `builder` but ignores its
988 /// insertion point.
989 static OwningReductionGen
990 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
991  LLVM::ModuleTranslation &moduleTranslation) {
992  // The lambda is mutable because we need access to non-const methods of decl
993  // (which aren't actually mutating it), and we must capture decl by-value to
994  // avoid the dangling reference after the parent function returns.
995  OwningReductionGen gen =
996  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
997  llvm::Value *lhs, llvm::Value *rhs,
998  llvm::Value *&result) mutable
999  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1000  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
1001  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
1002  builder.restoreIP(insertPoint);
1004  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
1005  "omp.reduction.nonatomic.body", builder,
1006  moduleTranslation, &phis)))
1007  return llvm::createStringError(
1008  "failed to inline `combiner` region of `omp.declare_reduction`");
1009  result = llvm::getSingleElement(phis);
1010  return builder.saveIP();
1011  };
1012  return gen;
1013 }
1014 
1015 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
1016 /// given reduction declaration. The generator uses `builder` but ignores its
1017 /// insertion point. Returns null if there is no atomic region available in the
1018 /// reduction declaration.
1019 static OwningAtomicReductionGen
1020 makeAtomicReductionGen(omp::DeclareReductionOp decl,
1021  llvm::IRBuilderBase &builder,
1022  LLVM::ModuleTranslation &moduleTranslation) {
1023  if (decl.getAtomicReductionRegion().empty())
1024  return OwningAtomicReductionGen();
1025 
1026  // The lambda is mutable because we need access to non-const methods of decl
1027  // (which aren't actually mutating it), and we must capture decl by-value to
1028  // avoid the dangling reference after the parent function returns.
1029  OwningAtomicReductionGen atomicGen =
1030  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
1031  llvm::Value *lhs, llvm::Value *rhs) mutable
1032  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1033  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
1034  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
1035  builder.restoreIP(insertPoint);
1037  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
1038  "omp.reduction.atomic.body", builder,
1039  moduleTranslation, &phis)))
1040  return llvm::createStringError(
1041  "failed to inline `atomic` region of `omp.declare_reduction`");
1042  assert(phis.empty());
1043  return builder.saveIP();
1044  };
1045  return atomicGen;
1046 }
1047 
1048 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
1049 static LogicalResult
1050 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
1051  LLVM::ModuleTranslation &moduleTranslation) {
1052  auto orderedOp = cast<omp::OrderedOp>(opInst);
1053 
1054  if (failed(checkImplementationStatus(opInst)))
1055  return failure();
1056 
1057  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
1058  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
1059  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
1060  SmallVector<llvm::Value *> vecValues =
1061  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
1062 
1063  size_t indexVecValues = 0;
1064  while (indexVecValues < vecValues.size()) {
1065  SmallVector<llvm::Value *> storeValues;
1066  storeValues.reserve(numLoops);
1067  for (unsigned i = 0; i < numLoops; i++) {
1068  storeValues.push_back(vecValues[indexVecValues]);
1069  indexVecValues++;
1070  }
1071  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1072  findAllocaInsertPoint(builder, moduleTranslation);
1073  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1074  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
1075  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
1076  }
1077  return success();
1078 }
1079 
1080 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
1081 /// OpenMPIRBuilder.
1082 static LogicalResult
1083 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
1084  LLVM::ModuleTranslation &moduleTranslation) {
1085  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1086  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
1087 
1088  if (failed(checkImplementationStatus(opInst)))
1089  return failure();
1090 
1091  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1092  // OrderedOp has only one region associated with it.
1093  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
1094  builder.restoreIP(codeGenIP);
1095  return convertOmpOpRegions(region, "omp.ordered.region", builder,
1096  moduleTranslation)
1097  .takeError();
1098  };
1099 
1100  // TODO: Perform finalization actions for variables. This has to be
1101  // called for variables which have destructors/finalizers.
1102  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1103 
1104  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1105  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1106  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
1107  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
1108 
1109  if (failed(handleError(afterIP, opInst)))
1110  return failure();
1111 
1112  builder.restoreIP(*afterIP);
1113  return success();
1114 }
1115 
1116 namespace {
1117 /// Contains the arguments for an LLVM store operation
1118 struct DeferredStore {
1119  DeferredStore(llvm::Value *value, llvm::Value *address)
1120  : value(value), address(address) {}
1121 
1122  llvm::Value *value;
1123  llvm::Value *address;
1124 };
1125 } // namespace
1126 
1127 /// Allocate space for privatized reduction variables.
1128 /// `deferredStores` contains information to create store operations which needs
1129 /// to be inserted after all allocas
1130 template <typename T>
1131 static LogicalResult
1133  llvm::IRBuilderBase &builder,
1134  LLVM::ModuleTranslation &moduleTranslation,
1135  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1137  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1138  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1139  SmallVectorImpl<DeferredStore> &deferredStores,
1140  llvm::ArrayRef<bool> isByRefs) {
1141  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1142  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1143 
1144  // delay creating stores until after all allocas
1145  deferredStores.reserve(loop.getNumReductionVars());
1146 
1147  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1148  Region &allocRegion = reductionDecls[i].getAllocRegion();
1149  if (isByRefs[i]) {
1150  if (allocRegion.empty())
1151  continue;
1152 
1154  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1155  builder, moduleTranslation, &phis)))
1156  return loop.emitError(
1157  "failed to inline `alloc` region of `omp.declare_reduction`");
1158 
1159  assert(phis.size() == 1 && "expected one allocation to be yielded");
1160  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1161 
1162  // Allocate reduction variable (which is a pointer to the real reduction
1163  // variable allocated in the inlined region)
1164  llvm::Value *var = builder.CreateAlloca(
1165  moduleTranslation.convertType(reductionDecls[i].getType()));
1166 
1167  llvm::Type *ptrTy = builder.getPtrTy();
1168  llvm::Value *castVar =
1169  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1170  llvm::Value *castPhi =
1171  builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
1172 
1173  deferredStores.emplace_back(castPhi, castVar);
1174 
1175  privateReductionVariables[i] = castVar;
1176  moduleTranslation.mapValue(reductionArgs[i], castPhi);
1177  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
1178  } else {
1179  assert(allocRegion.empty() &&
1180  "allocaction is implicit for by-val reduction");
1181  llvm::Value *var = builder.CreateAlloca(
1182  moduleTranslation.convertType(reductionDecls[i].getType()));
1183 
1184  llvm::Type *ptrTy = builder.getPtrTy();
1185  llvm::Value *castVar =
1186  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1187 
1188  moduleTranslation.mapValue(reductionArgs[i], castVar);
1189  privateReductionVariables[i] = castVar;
1190  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
1191  }
1192  }
1193 
1194  return success();
1195 }
1196 
1197 /// Map input arguments to reduction initialization region
1198 template <typename T>
1199 static void
1202  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1203  unsigned i) {
1204  // map input argument to the initialization region
1205  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1206  Region &initializerRegion = reduction.getInitializerRegion();
1207  Block &entry = initializerRegion.front();
1208 
1209  mlir::Value mlirSource = loop.getReductionVars()[i];
1210  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1211  assert(llvmSource && "lookup reduction var");
1212  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1213 
1214  if (entry.getNumArguments() > 1) {
1215  llvm::Value *allocation =
1216  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1217  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1218  }
1219 }
1220 
1221 static void
1222 setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1223  llvm::BasicBlock *block = nullptr) {
1224  if (block == nullptr)
1225  block = builder.GetInsertBlock();
1226 
1227  if (block->empty() || block->getTerminator() == nullptr)
1228  builder.SetInsertPoint(block);
1229  else
1230  builder.SetInsertPoint(block->getTerminator());
1231 }
1232 
1233 /// Inline reductions' `init` regions. This functions assumes that the
1234 /// `builder`'s insertion point is where the user wants the `init` regions to be
1235 /// inlined; i.e. it does not try to find a proper insertion location for the
1236 /// `init` regions. It also leaves the `builder's insertions point in a state
1237 /// where the user can continue the code-gen directly afterwards.
1238 template <typename OP>
1239 static LogicalResult
1241  llvm::IRBuilderBase &builder,
1242  LLVM::ModuleTranslation &moduleTranslation,
1243  llvm::BasicBlock *latestAllocaBlock,
1245  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1246  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1247  llvm::ArrayRef<bool> isByRef,
1248  SmallVectorImpl<DeferredStore> &deferredStores) {
1249  if (op.getNumReductionVars() == 0)
1250  return success();
1251 
1252  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1253  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1254  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1255  builder.restoreIP(allocaIP);
1256  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1257 
1258  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1259  if (isByRef[i]) {
1260  if (!reductionDecls[i].getAllocRegion().empty())
1261  continue;
1262 
1263  // TODO: remove after all users of by-ref are updated to use the alloc
1264  // region: Allocate reduction variable (which is a pointer to the real
1265  // reduciton variable allocated in the inlined region)
1266  byRefVars[i] = builder.CreateAlloca(
1267  moduleTranslation.convertType(reductionDecls[i].getType()));
1268  }
1269  }
1270 
1271  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1272 
1273  // store result of the alloc region to the allocated pointer to the real
1274  // reduction variable
1275  for (auto [data, addr] : deferredStores)
1276  builder.CreateStore(data, addr);
1277 
1278  // Before the loop, store the initial values of reductions into reduction
1279  // variables. Although this could be done after allocas, we don't want to mess
1280  // up with the alloca insertion point.
1281  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1283 
1284  // map block argument to initializer region
1285  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1286  reductionVariableMap, i);
1287 
1288  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1289  "omp.reduction.neutral", builder,
1290  moduleTranslation, &phis)))
1291  return failure();
1292 
1293  assert(phis.size() == 1 && "expected one value to be yielded from the "
1294  "reduction neutral element declaration region");
1295 
1297 
1298  if (isByRef[i]) {
1299  if (!reductionDecls[i].getAllocRegion().empty())
1300  // done in allocReductionVars
1301  continue;
1302 
1303  // TODO: this path can be removed once all users of by-ref are updated to
1304  // use an alloc region
1305 
1306  // Store the result of the inlined region to the allocated reduction var
1307  // ptr
1308  builder.CreateStore(phis[0], byRefVars[i]);
1309 
1310  privateReductionVariables[i] = byRefVars[i];
1311  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1312  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1313  } else {
1314  // for by-ref case the store is inside of the reduction region
1315  builder.CreateStore(phis[0], privateReductionVariables[i]);
1316  // the rest was handled in allocByValReductionVars
1317  }
1318 
1319  // forget the mapping for the initializer region because we might need a
1320  // different mapping if this reduction declaration is re-used for a
1321  // different variable
1322  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1323  }
1324 
1325  return success();
1326 }
1327 
1328 /// Collect reduction info
1329 template <typename T>
1331  T loop, llvm::IRBuilderBase &builder,
1332  LLVM::ModuleTranslation &moduleTranslation,
1334  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1335  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1336  const ArrayRef<llvm::Value *> privateReductionVariables,
1338  unsigned numReductions = loop.getNumReductionVars();
1339 
1340  for (unsigned i = 0; i < numReductions; ++i) {
1341  owningReductionGens.push_back(
1342  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1343  owningAtomicReductionGens.push_back(
1344  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1345  }
1346 
1347  // Collect the reduction information.
1348  reductionInfos.reserve(numReductions);
1349  for (unsigned i = 0; i < numReductions; ++i) {
1350  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1351  if (owningAtomicReductionGens[i])
1352  atomicGen = owningAtomicReductionGens[i];
1353  llvm::Value *variable =
1354  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1355  reductionInfos.push_back(
1356  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1357  privateReductionVariables[i],
1358  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1359  owningReductionGens[i],
1360  /*ReductionGenClang=*/nullptr, atomicGen});
1361  }
1362 }
1363 
1364 /// handling of DeclareReductionOp's cleanup region
1365 static LogicalResult
1367  llvm::ArrayRef<llvm::Value *> privateVariables,
1368  LLVM::ModuleTranslation &moduleTranslation,
1369  llvm::IRBuilderBase &builder, StringRef regionName,
1370  bool shouldLoadCleanupRegionArg = true) {
1371  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1372  if (cleanupRegion->empty())
1373  continue;
1374 
1375  // map the argument to the cleanup region
1376  Block &entry = cleanupRegion->front();
1377 
1378  llvm::Instruction *potentialTerminator =
1379  builder.GetInsertBlock()->empty() ? nullptr
1380  : &builder.GetInsertBlock()->back();
1381  if (potentialTerminator && potentialTerminator->isTerminator())
1382  builder.SetInsertPoint(potentialTerminator);
1383  llvm::Value *privateVarValue =
1384  shouldLoadCleanupRegionArg
1385  ? builder.CreateLoad(
1386  moduleTranslation.convertType(entry.getArgument(0).getType()),
1387  privateVariables[i])
1388  : privateVariables[i];
1389 
1390  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1391 
1392  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1393  moduleTranslation)))
1394  return failure();
1395 
1396  // clear block argument mapping in case it needs to be re-created with a
1397  // different source for another use of the same reduction decl
1398  moduleTranslation.forgetMapping(*cleanupRegion);
1399  }
1400  return success();
1401 }
1402 
1403 // TODO: not used by ParallelOp
1404 template <class OP>
1405 static LogicalResult createReductionsAndCleanup(
1406  OP op, llvm::IRBuilderBase &builder,
1407  LLVM::ModuleTranslation &moduleTranslation,
1408  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1410  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
1411  bool isNowait = false, bool isTeamsReduction = false) {
1412  // Process the reductions if required.
1413  if (op.getNumReductionVars() == 0)
1414  return success();
1415 
1416  SmallVector<OwningReductionGen> owningReductionGens;
1417  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1419 
1420  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1421 
1422  // Create the reduction generators. We need to own them here because
1423  // ReductionInfo only accepts references to the generators.
1424  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1425  owningReductionGens, owningAtomicReductionGens,
1426  privateReductionVariables, reductionInfos);
1427 
1428  // The call to createReductions below expects the block to have a
1429  // terminator. Create an unreachable instruction to serve as terminator
1430  // and remove it later.
1431  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1432  builder.SetInsertPoint(tempTerminator);
1433  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1434  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1435  isByRef, isNowait, isTeamsReduction);
1436 
1437  if (failed(handleError(contInsertPoint, *op)))
1438  return failure();
1439 
1440  if (!contInsertPoint->getBlock())
1441  return op->emitOpError() << "failed to convert reductions";
1442 
1443  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1444  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1445 
1446  if (failed(handleError(afterIP, *op)))
1447  return failure();
1448 
1449  tempTerminator->eraseFromParent();
1450  builder.restoreIP(*afterIP);
1451 
1452  // after the construct, deallocate private reduction variables
1453  SmallVector<Region *> reductionRegions;
1454  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1455  [](omp::DeclareReductionOp reductionDecl) {
1456  return &reductionDecl.getCleanupRegion();
1457  });
1458  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1459  moduleTranslation, builder,
1460  "omp.reduction.cleanup");
1461  return success();
1462 }
1463 
1464 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1465  if (!attr)
1466  return {};
1467  return *attr;
1468 }
1469 
1470 // TODO: not used by omp.parallel
1471 template <typename OP>
1472 static LogicalResult allocAndInitializeReductionVars(
1473  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1474  LLVM::ModuleTranslation &moduleTranslation,
1475  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1477  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1478  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1479  llvm::ArrayRef<bool> isByRef) {
1480  if (op.getNumReductionVars() == 0)
1481  return success();
1482 
1483  SmallVector<DeferredStore> deferredStores;
1484 
1485  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1486  allocaIP, reductionDecls,
1487  privateReductionVariables, reductionVariableMap,
1488  deferredStores, isByRef)))
1489  return failure();
1490 
1491  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1492  allocaIP.getBlock(), reductionDecls,
1493  privateReductionVariables, reductionVariableMap,
1494  isByRef, deferredStores);
1495 }
1496 
1497 /// Return the llvm::Value * corresponding to the `privateVar` that
1498 /// is being privatized. It isn't always as simple as looking up
1499 /// moduleTranslation with privateVar. For instance, in case of
1500 /// an allocatable, the descriptor for the allocatable is privatized.
1501 /// This descriptor is mapped using an MapInfoOp. So, this function
1502 /// will return a pointer to the llvm::Value corresponding to the
1503 /// block argument for the mapped descriptor.
1504 static llvm::Value *
1505 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1506  LLVM::ModuleTranslation &moduleTranslation,
1507  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1508  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1509  return moduleTranslation.lookupValue(privateVar);
1510 
1511  Value blockArg = (*mappedPrivateVars)[privateVar];
1512  Type privVarType = privateVar.getType();
1513  Type blockArgType = blockArg.getType();
1514  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1515  "A block argument corresponding to a mapped var should have "
1516  "!llvm.ptr type");
1517 
1518  if (privVarType == blockArgType)
1519  return moduleTranslation.lookupValue(blockArg);
1520 
1521  // This typically happens when the privatized type is lowered from
1522  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1523  // struct/pair is passed by value. But, mapped values are passed only as
1524  // pointers, so before we privatize, we must load the pointer.
1525  if (!isa<LLVM::LLVMPointerType>(privVarType))
1526  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1527  moduleTranslation.lookupValue(blockArg));
1528 
1529  return moduleTranslation.lookupValue(privateVar);
1530 }
1531 
1532 /// Initialize a single (first)private variable. You probably want to use
1533 /// allocateAndInitPrivateVars instead of this.
1534 /// This returns the private variable which has been initialized. This
1535 /// variable should be mapped before constructing the body of the Op.
1537  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1538  omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1539  llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1540  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1541  Region &initRegion = privDecl.getInitRegion();
1542  if (initRegion.empty())
1543  return llvmPrivateVar;
1544 
1545  // map initialization region block arguments
1546  llvm::Value *nonPrivateVar = findAssociatedValue(
1547  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1548  assert(nonPrivateVar);
1549  moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1550  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1551 
1552  // in-place convert the private initialization region
1554  if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1555  moduleTranslation, &phis)))
1556  return llvm::createStringError(
1557  "failed to inline `init` region of `omp.private`");
1558 
1559  assert(phis.size() == 1 && "expected one allocation to be yielded");
1560 
1561  // clear init region block argument mapping in case it needs to be
1562  // re-created with a different source for another use of the same
1563  // reduction decl
1564  moduleTranslation.forgetMapping(initRegion);
1565 
1566  // Prefer the value yielded from the init region to the allocated private
1567  // variable in case the region is operating on arguments by-value (e.g.
1568  // Fortran character boxes).
1569  return phis[0];
1570 }
1571 
1572 static llvm::Error
1573 initPrivateVars(llvm::IRBuilderBase &builder,
1574  LLVM::ModuleTranslation &moduleTranslation,
1575  PrivateVarsInfo &privateVarsInfo,
1576  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1577  if (privateVarsInfo.blockArgs.empty())
1578  return llvm::Error::success();
1579 
1580  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1581  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1582 
1583  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1584  privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1585  privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1586  auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1588  builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1589  llvmPrivateVar, privInitBlock, mappedPrivateVars);
1590 
1591  if (!privVarOrErr)
1592  return privVarOrErr.takeError();
1593 
1594  llvmPrivateVar = privVarOrErr.get();
1595  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1596 
1598  }
1599 
1600  return llvm::Error::success();
1601 }
1602 
1603 /// Allocate and initialize delayed private variables. Returns the basic block
1604 /// which comes after all of these allocations. llvm::Value * for each of these
1605 /// private variables are populated in llvmPrivateVars.
1607 allocatePrivateVars(llvm::IRBuilderBase &builder,
1608  LLVM::ModuleTranslation &moduleTranslation,
1609  PrivateVarsInfo &privateVarsInfo,
1610  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1611  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1612  // Allocate private vars
1613  llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1614  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1615  allocaTerminator->getIterator()),
1616  true, allocaTerminator->getStableDebugLoc(),
1617  "omp.region.after_alloca");
1618 
1619  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1620  // Update the allocaTerminator since the alloca block was split above.
1621  allocaTerminator = allocaIP.getBlock()->getTerminator();
1622  builder.SetInsertPoint(allocaTerminator);
1623  // The new terminator is an uncondition branch created by the splitBB above.
1624  assert(allocaTerminator->getNumSuccessors() == 1 &&
1625  "This is an unconditional branch created by splitBB");
1626 
1627  llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
1628  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1629 
1630  unsigned int allocaAS =
1631  moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1632  unsigned int defaultAS = moduleTranslation.getLLVMModule()
1633  ->getDataLayout()
1634  .getProgramAddressSpace();
1635 
1636  for (auto [privDecl, mlirPrivVar, blockArg] :
1637  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1638  privateVarsInfo.blockArgs)) {
1639  llvm::Type *llvmAllocType =
1640  moduleTranslation.convertType(privDecl.getType());
1641  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1642  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1643  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1644  if (allocaAS != defaultAS)
1645  llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1646  builder.getPtrTy(defaultAS));
1647 
1648  privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1649  }
1650 
1651  return afterAllocas;
1652 }
1653 
1654 static LogicalResult copyFirstPrivateVars(
1655  mlir::Operation *op, llvm::IRBuilderBase &builder,
1656  LLVM::ModuleTranslation &moduleTranslation,
1657  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1658  ArrayRef<llvm::Value *> llvmPrivateVars,
1659  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls, bool insertBarrier,
1660  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1661  // Apply copy region for firstprivate.
1662  bool needsFirstprivate =
1663  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1664  return privOp.getDataSharingType() ==
1665  omp::DataSharingClauseType::FirstPrivate;
1666  });
1667 
1668  if (!needsFirstprivate)
1669  return success();
1670 
1671  llvm::BasicBlock *copyBlock =
1672  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1673  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1674 
1675  for (auto [decl, mlirVar, llvmVar] :
1676  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1677  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1678  continue;
1679 
1680  // copyRegion implements `lhs = rhs`
1681  Region &copyRegion = decl.getCopyRegion();
1682 
1683  // map copyRegion rhs arg
1684  llvm::Value *nonPrivateVar = findAssociatedValue(
1685  mlirVar, builder, moduleTranslation, mappedPrivateVars);
1686  assert(nonPrivateVar);
1687  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1688 
1689  // map copyRegion lhs arg
1690  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1691 
1692  // in-place convert copy region
1693  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1694  moduleTranslation)))
1695  return decl.emitError("failed to inline `copy` region of `omp.private`");
1696 
1698 
1699  // ignore unused value yielded from copy region
1700 
1701  // clear copy region block argument mapping in case it needs to be
1702  // re-created with different sources for reuse of the same reduction
1703  // decl
1704  moduleTranslation.forgetMapping(copyRegion);
1705  }
1706 
1707  if (insertBarrier) {
1708  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1709  llvm::OpenMPIRBuilder::InsertPointOrErrorTy res =
1710  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1711  if (failed(handleError(res, *op)))
1712  return failure();
1713  }
1714 
1715  return success();
1716 }
1717 
1718 static LogicalResult
1719 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1720  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1721  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1722  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1723  // private variable deallocation
1724  SmallVector<Region *> privateCleanupRegions;
1725  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1726  [](omp::PrivateClauseOp privatizer) {
1727  return &privatizer.getDeallocRegion();
1728  });
1729 
1730  if (failed(inlineOmpRegionCleanup(
1731  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1732  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1733  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1734  "`omp.private` op in");
1735 
1736  return success();
1737 }
1738 
1739 /// Returns true if the construct contains omp.cancel or omp.cancellation_point
1741  // omp.cancel and omp.cancellation_point must be "closely nested" so they will
1742  // be visible and not inside of function calls. This is enforced by the
1743  // verifier.
1744  return op
1745  ->walk([](Operation *child) {
1746  if (mlir::isa<omp::CancelOp, omp::CancellationPointOp>(child))
1747  return WalkResult::interrupt();
1748  return WalkResult::advance();
1749  })
1750  .wasInterrupted();
1751 }
1752 
1753 static LogicalResult
1754 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1755  LLVM::ModuleTranslation &moduleTranslation) {
1756  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1757  using StorableBodyGenCallbackTy =
1758  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1759 
1760  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1761 
1762  if (failed(checkImplementationStatus(opInst)))
1763  return failure();
1764 
1765  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1766  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1767 
1768  SmallVector<omp::DeclareReductionOp> reductionDecls;
1769  collectReductionDecls(sectionsOp, reductionDecls);
1770  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1771  findAllocaInsertPoint(builder, moduleTranslation);
1772 
1773  SmallVector<llvm::Value *> privateReductionVariables(
1774  sectionsOp.getNumReductionVars());
1775  DenseMap<Value, llvm::Value *> reductionVariableMap;
1776 
1777  MutableArrayRef<BlockArgument> reductionArgs =
1778  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1779 
1781  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1782  reductionDecls, privateReductionVariables, reductionVariableMap,
1783  isByRef)))
1784  return failure();
1785 
1787 
1788  for (Operation &op : *sectionsOp.getRegion().begin()) {
1789  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1790  if (!sectionOp) // omp.terminator
1791  continue;
1792 
1793  Region &region = sectionOp.getRegion();
1794  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1795  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1796  builder.restoreIP(codeGenIP);
1797 
1798  // map the omp.section reduction block argument to the omp.sections block
1799  // arguments
1800  // TODO: this assumes that the only block arguments are reduction
1801  // variables
1802  assert(region.getNumArguments() ==
1803  sectionsOp.getRegion().getNumArguments());
1804  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1805  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1806  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1807  assert(llvmVal);
1808  moduleTranslation.mapValue(sectionArg, llvmVal);
1809  }
1810 
1811  return convertOmpOpRegions(region, "omp.section.region", builder,
1812  moduleTranslation)
1813  .takeError();
1814  };
1815  sectionCBs.push_back(sectionCB);
1816  }
1817 
1818  // No sections within omp.sections operation - skip generation. This situation
1819  // is only possible if there is only a terminator operation inside the
1820  // sections operation
1821  if (sectionCBs.empty())
1822  return success();
1823 
1824  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1825 
1826  // TODO: Perform appropriate actions according to the data-sharing
1827  // attribute (shared, private, firstprivate, ...) of variables.
1828  // Currently defaults to shared.
1829  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1830  llvm::Value &vPtr, llvm::Value *&replacementValue)
1831  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1832  replacementValue = &vPtr;
1833  return codeGenIP;
1834  };
1835 
1836  // TODO: Perform finalization actions for variables. This has to be
1837  // called for variables which have destructors/finalizers.
1838  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1839 
1840  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1841  bool isCancellable = constructIsCancellable(sectionsOp);
1842  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1843  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1844  moduleTranslation.getOpenMPBuilder()->createSections(
1845  ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable,
1846  sectionsOp.getNowait());
1847 
1848  if (failed(handleError(afterIP, opInst)))
1849  return failure();
1850 
1851  builder.restoreIP(*afterIP);
1852 
1853  // Process the reductions if required.
1855  sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
1856  privateReductionVariables, isByRef, sectionsOp.getNowait());
1857 }
1858 
1859 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1860 static LogicalResult
1861 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1862  LLVM::ModuleTranslation &moduleTranslation) {
1863  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1864  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1865 
1866  if (failed(checkImplementationStatus(*singleOp)))
1867  return failure();
1868 
1869  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1870  builder.restoreIP(codegenIP);
1871  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1872  builder, moduleTranslation)
1873  .takeError();
1874  };
1875  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1876 
1877  // Handle copyprivate
1878  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1879  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1882  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1883  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1884  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1885  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1886  llvmCPFuncs.push_back(
1887  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1888  }
1889 
1890  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1891  moduleTranslation.getOpenMPBuilder()->createSingle(
1892  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1893  llvmCPFuncs);
1894 
1895  if (failed(handleError(afterIP, *singleOp)))
1896  return failure();
1897 
1898  builder.restoreIP(*afterIP);
1899  return success();
1900 }
1901 
1902 static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
1903  auto iface =
1904  llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation());
1905  // Check that all uses of the reduction block arg has the same distribute op
1906  // parent.
1908  Operation *distOp = nullptr;
1909  for (auto ra : iface.getReductionBlockArgs())
1910  for (auto &use : ra.getUses()) {
1911  auto *useOp = use.getOwner();
1912  // Ignore debug uses.
1913  if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
1914  debugUses.push_back(useOp);
1915  continue;
1916  }
1917 
1918  auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>();
1919  // Use is not inside a distribute op - return false
1920  if (!currentDistOp)
1921  return false;
1922  // Multiple distribute operations - return false
1923  Operation *currentOp = currentDistOp.getOperation();
1924  if (distOp && (distOp != currentOp))
1925  return false;
1926 
1927  distOp = currentOp;
1928  }
1929 
1930  // If we are going to use distribute reduction then remove any debug uses of
1931  // the reduction parameters in teamsOp. Otherwise they will be left without
1932  // any mapped value in moduleTranslation and will eventually error out.
1933  for (auto use : debugUses)
1934  use->erase();
1935  return true;
1936 }
1937 
1938 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1939 static LogicalResult
1940 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1941  LLVM::ModuleTranslation &moduleTranslation) {
1942  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1943  if (failed(checkImplementationStatus(*op)))
1944  return failure();
1945 
1946  DenseMap<Value, llvm::Value *> reductionVariableMap;
1947  unsigned numReductionVars = op.getNumReductionVars();
1948  SmallVector<omp::DeclareReductionOp> reductionDecls;
1949  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
1950  llvm::ArrayRef<bool> isByRef;
1951  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1952  findAllocaInsertPoint(builder, moduleTranslation);
1953 
1954  // Only do teams reduction if there is no distribute op that captures the
1955  // reduction instead.
1956  bool doTeamsReduction = !teamsReductionContainedInDistribute(op);
1957  if (doTeamsReduction) {
1958  isByRef = getIsByRef(op.getReductionByref());
1959 
1960  assert(isByRef.size() == op.getNumReductionVars());
1961 
1962  MutableArrayRef<BlockArgument> reductionArgs =
1963  llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
1964 
1965  collectReductionDecls(op, reductionDecls);
1966 
1968  op, reductionArgs, builder, moduleTranslation, allocaIP,
1969  reductionDecls, privateReductionVariables, reductionVariableMap,
1970  isByRef)))
1971  return failure();
1972  }
1973 
1974  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1976  moduleTranslation, allocaIP);
1977  builder.restoreIP(codegenIP);
1978  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1979  moduleTranslation)
1980  .takeError();
1981  };
1982 
1983  llvm::Value *numTeamsLower = nullptr;
1984  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1985  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1986 
1987  llvm::Value *numTeamsUpper = nullptr;
1988  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1989  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1990 
1991  llvm::Value *threadLimit = nullptr;
1992  if (Value threadLimitVar = op.getThreadLimit())
1993  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1994 
1995  llvm::Value *ifExpr = nullptr;
1996  if (Value ifVar = op.getIfExpr())
1997  ifExpr = moduleTranslation.lookupValue(ifVar);
1998 
1999  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2000  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2001  moduleTranslation.getOpenMPBuilder()->createTeams(
2002  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
2003 
2004  if (failed(handleError(afterIP, *op)))
2005  return failure();
2006 
2007  builder.restoreIP(*afterIP);
2008  if (doTeamsReduction) {
2009  // Process the reductions if required.
2011  op, builder, moduleTranslation, allocaIP, reductionDecls,
2012  privateReductionVariables, isByRef,
2013  /*isNoWait*/ false, /*isTeamsReduction*/ true);
2014  }
2015  return success();
2016 }
2017 
2018 static void
2019 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
2020  LLVM::ModuleTranslation &moduleTranslation,
2022  if (dependVars.empty())
2023  return;
2024  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
2025  llvm::omp::RTLDependenceKindTy type;
2026  switch (
2027  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
2028  case mlir::omp::ClauseTaskDepend::taskdependin:
2029  type = llvm::omp::RTLDependenceKindTy::DepIn;
2030  break;
2031  // The OpenMP runtime requires that the codegen for 'depend' clause for
2032  // 'out' dependency kind must be the same as codegen for 'depend' clause
2033  // with 'inout' dependency.
2034  case mlir::omp::ClauseTaskDepend::taskdependout:
2035  case mlir::omp::ClauseTaskDepend::taskdependinout:
2036  type = llvm::omp::RTLDependenceKindTy::DepInOut;
2037  break;
2038  case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
2039  type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
2040  break;
2041  case mlir::omp::ClauseTaskDepend::taskdependinoutset:
2042  type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
2043  break;
2044  };
2045  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
2046  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
2047  dds.emplace_back(dd);
2048  }
2049 }
2050 
2051 /// Shared implementation of a callback which adds a termiator for the new block
2052 /// created for the branch taken when an openmp construct is cancelled. The
2053 /// terminator is saved in \p cancelTerminators. This callback is invoked only
2054 /// if there is cancellation inside of the taskgroup body.
2055 /// The terminator will need to be fixed to branch to the correct block to
2056 /// cleanup the construct.
2057 static void
2059  llvm::IRBuilderBase &llvmBuilder,
2060  llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op,
2061  llvm::omp::Directive cancelDirective) {
2062  auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
2063  llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
2064 
2065  // ip is currently in the block branched to if cancellation occured.
2066  // We need to create a branch to terminate that block.
2067  llvmBuilder.restoreIP(ip);
2068 
2069  // We must still clean up the construct after cancelling it, so we need to
2070  // branch to the block that finalizes the taskgroup.
2071  // That block has not been created yet so use this block as a dummy for now
2072  // and fix this after creating the operation.
2073  cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
2074  return llvm::Error::success();
2075  };
2076  // We have to add the cleanup to the OpenMPIRBuilder before the body gets
2077  // created in case the body contains omp.cancel (which will then expect to be
2078  // able to find this cleanup callback).
2079  ompBuilder.pushFinalizationCB(
2080  {finiCB, cancelDirective, constructIsCancellable(op)});
2081 }
2082 
2083 /// If we cancelled the construct, we should branch to the finalization block of
2084 /// that construct. OMPIRBuilder structures the CFG such that the cleanup block
2085 /// is immediately before the continuation block. Now this finalization has
2086 /// been created we can fix the branch.
2087 static void
2089  llvm::OpenMPIRBuilder &ompBuilder,
2090  const llvm::OpenMPIRBuilder::InsertPointTy &afterIP) {
2091  ompBuilder.popFinalizationCB();
2092  llvm::BasicBlock *constructFini = afterIP.getBlock()->getSinglePredecessor();
2093  for (llvm::BranchInst *cancelBranch : cancelTerminators) {
2094  assert(cancelBranch->getNumSuccessors() == 1 &&
2095  "cancel branch should have one target");
2096  cancelBranch->setSuccessor(0, constructFini);
2097  }
2098 }
2099 
2100 namespace {
2101 /// TaskContextStructManager takes care of creating and freeing a structure
2102 /// containing information needed by the task body to execute.
2103 class TaskContextStructManager {
2104 public:
2105  TaskContextStructManager(llvm::IRBuilderBase &builder,
2106  LLVM::ModuleTranslation &moduleTranslation,
2108  : builder{builder}, moduleTranslation{moduleTranslation},
2109  privateDecls{privateDecls} {}
2110 
2111  /// Creates a heap allocated struct containing space for each private
2112  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
2113  /// the structure should all have the same order (although privateDecls which
2114  /// do not read from the mold argument are skipped).
2115  void generateTaskContextStruct();
2116 
2117  /// Create GEPs to access each member of the structure representing a private
2118  /// variable, adding them to llvmPrivateVars. Null values are added where
2119  /// private decls were skipped so that the ordering continues to match the
2120  /// private decls.
2121  void createGEPsToPrivateVars();
2122 
2123  /// De-allocate the task context structure.
2124  void freeStructPtr();
2125 
2126  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
2127  return llvmPrivateVarGEPs;
2128  }
2129 
2130  llvm::Value *getStructPtr() { return structPtr; }
2131 
2132 private:
2133  llvm::IRBuilderBase &builder;
2134  LLVM::ModuleTranslation &moduleTranslation;
2136 
2137  /// The type of each member of the structure, in order.
2138  SmallVector<llvm::Type *> privateVarTypes;
2139 
2140  /// LLVM values for each private variable, or null if that private variable is
2141  /// not included in the task context structure
2142  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
2143 
2144  /// A pointer to the structure containing context for this task.
2145  llvm::Value *structPtr = nullptr;
2146  /// The type of the structure
2147  llvm::Type *structTy = nullptr;
2148 };
2149 } // namespace
2150 
2151 void TaskContextStructManager::generateTaskContextStruct() {
2152  if (privateDecls.empty())
2153  return;
2154  privateVarTypes.reserve(privateDecls.size());
2155 
2156  for (omp::PrivateClauseOp &privOp : privateDecls) {
2157  // Skip private variables which can safely be allocated and initialised
2158  // inside of the task
2159  if (!privOp.readsFromMold())
2160  continue;
2161  Type mlirType = privOp.getType();
2162  privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
2163  }
2164 
2165  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
2166  privateVarTypes);
2167 
2168  llvm::DataLayout dataLayout =
2169  builder.GetInsertBlock()->getModule()->getDataLayout();
2170  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
2171  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
2172 
2173  // Heap allocate the structure
2174  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
2175  /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
2176  "omp.task.context_ptr");
2177 }
2178 
2179 void TaskContextStructManager::createGEPsToPrivateVars() {
2180  if (!structPtr) {
2181  assert(privateVarTypes.empty());
2182  return;
2183  }
2184 
2185  // Create GEPs for each struct member
2186  llvmPrivateVarGEPs.clear();
2187  llvmPrivateVarGEPs.reserve(privateDecls.size());
2188  llvm::Value *zero = builder.getInt32(0);
2189  unsigned i = 0;
2190  for (auto privDecl : privateDecls) {
2191  if (!privDecl.readsFromMold()) {
2192  // Handle this inside of the task so we don't pass unnessecary vars in
2193  llvmPrivateVarGEPs.push_back(nullptr);
2194  continue;
2195  }
2196  llvm::Value *iVal = builder.getInt32(i);
2197  llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
2198  llvmPrivateVarGEPs.push_back(gep);
2199  i += 1;
2200  }
2201 }
2202 
2203 void TaskContextStructManager::freeStructPtr() {
2204  if (!structPtr)
2205  return;
2206 
2207  llvm::IRBuilderBase::InsertPointGuard guard{builder};
2208  // Ensure we don't put the call to free() after the terminator
2209  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2210  builder.CreateFree(structPtr);
2211 }
2212 
2213 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
2214 static LogicalResult
2215 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
2216  LLVM::ModuleTranslation &moduleTranslation) {
2217  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2218  if (failed(checkImplementationStatus(*taskOp)))
2219  return failure();
2220 
2221  PrivateVarsInfo privateVarsInfo(taskOp);
2222  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
2223  privateVarsInfo.privatizers};
2224 
2225  // Allocate and copy private variables before creating the task. This avoids
2226  // accessing invalid memory if (after this scope ends) the private variables
2227  // are initialized from host variables or if the variables are copied into
2228  // from host variables (firstprivate). The insertion point is just before
2229  // where the code for creating and scheduling the task will go. That puts this
2230  // code outside of the outlined task region, which is what we want because
2231  // this way the initialization and copy regions are executed immediately while
2232  // the host variable data are still live.
2233 
2234  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2235  findAllocaInsertPoint(builder, moduleTranslation);
2236 
2237  // Not using splitBB() because that requires the current block to have a
2238  // terminator.
2239  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
2240  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
2241  builder.getContext(), "omp.task.start",
2242  /*Parent=*/builder.GetInsertBlock()->getParent());
2243  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
2244  builder.SetInsertPoint(branchToTaskStartBlock);
2245 
2246  // Now do this again to make the initialization and copy blocks
2247  llvm::BasicBlock *copyBlock =
2248  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
2249  llvm::BasicBlock *initBlock =
2250  splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
2251 
2252  // Now the control flow graph should look like
2253  // starter_block:
2254  // <---- where we started when convertOmpTaskOp was called
2255  // br %omp.private.init
2256  // omp.private.init:
2257  // br %omp.private.copy
2258  // omp.private.copy:
2259  // br %omp.task.start
2260  // omp.task.start:
2261  // <---- where we want the insertion point to be when we call createTask()
2262 
2263  // Save the alloca insertion point on ModuleTranslation stack for use in
2264  // nested regions.
2266  moduleTranslation, allocaIP);
2267 
2268  // Allocate and initialize private variables
2269  builder.SetInsertPoint(initBlock->getTerminator());
2270 
2271  // Create task variable structure
2272  taskStructMgr.generateTaskContextStruct();
2273  // GEPs so that we can initialize the variables. Don't use these GEPs inside
2274  // of the body otherwise it will be the GEP not the struct which is fowarded
2275  // to the outlined function. GEPs forwarded in this way are passed in a
2276  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
2277  // which may not be executed until after the current stack frame goes out of
2278  // scope.
2279  taskStructMgr.createGEPsToPrivateVars();
2280 
2281  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
2282  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
2283  privateVarsInfo.blockArgs,
2284  taskStructMgr.getLLVMPrivateVarGEPs())) {
2285  // To be handled inside the task.
2286  if (!privDecl.readsFromMold())
2287  continue;
2288  assert(llvmPrivateVarAlloc &&
2289  "reads from mold so shouldn't have been skipped");
2290 
2291  llvm::Expected<llvm::Value *> privateVarOrErr =
2292  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2293  blockArg, llvmPrivateVarAlloc, initBlock);
2294  if (!privateVarOrErr)
2295  return handleError(privateVarOrErr, *taskOp.getOperation());
2296 
2298 
2299  // TODO: this is a bit of a hack for Fortran character boxes.
2300  // Character boxes are passed by value into the init region and then the
2301  // initialized character box is yielded by value. Here we need to store the
2302  // yielded value into the private allocation, and load the private
2303  // allocation to match the type expected by region block arguments.
2304  if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2305  !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2306  builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2307  // Load it so we have the value pointed to by the GEP
2308  llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2309  llvmPrivateVarAlloc);
2310  }
2311  assert(llvmPrivateVarAlloc->getType() ==
2312  moduleTranslation.convertType(blockArg.getType()));
2313 
2314  // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2315  // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2316  // stack allocated structure.
2317  }
2318 
2319  // firstprivate copy region
2320  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2321  if (failed(copyFirstPrivateVars(
2322  taskOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2323  taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
2324  taskOp.getPrivateNeedsBarrier())))
2325  return llvm::failure();
2326 
2327  // Set up for call to createTask()
2328  builder.SetInsertPoint(taskStartBlock);
2329 
2330  auto bodyCB = [&](InsertPointTy allocaIP,
2331  InsertPointTy codegenIP) -> llvm::Error {
2332  // Save the alloca insertion point on ModuleTranslation stack for use in
2333  // nested regions.
2335  moduleTranslation, allocaIP);
2336 
2337  // translate the body of the task:
2338  builder.restoreIP(codegenIP);
2339 
2340  llvm::BasicBlock *privInitBlock = nullptr;
2341  privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2342  for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2343  privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2344  privateVarsInfo.mlirVars))) {
2345  auto [blockArg, privDecl, mlirPrivVar] = zip;
2346  // This is handled before the task executes
2347  if (privDecl.readsFromMold())
2348  continue;
2349 
2350  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2351  llvm::Type *llvmAllocType =
2352  moduleTranslation.convertType(privDecl.getType());
2353  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2354  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2355  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2356 
2357  llvm::Expected<llvm::Value *> privateVarOrError =
2358  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2359  blockArg, llvmPrivateVar, privInitBlock);
2360  if (!privateVarOrError)
2361  return privateVarOrError.takeError();
2362  moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2363  privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2364  }
2365 
2366  taskStructMgr.createGEPsToPrivateVars();
2367  for (auto [i, llvmPrivVar] :
2368  llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2369  if (!llvmPrivVar) {
2370  assert(privateVarsInfo.llvmVars[i] &&
2371  "This is added in the loop above");
2372  continue;
2373  }
2374  privateVarsInfo.llvmVars[i] = llvmPrivVar;
2375  }
2376 
2377  // Find and map the addresses of each variable within the task context
2378  // structure
2379  for (auto [blockArg, llvmPrivateVar, privateDecl] :
2380  llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2381  privateVarsInfo.privatizers)) {
2382  // This was handled above.
2383  if (!privateDecl.readsFromMold())
2384  continue;
2385  // Fix broken pass-by-value case for Fortran character boxes
2386  if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2387  llvmPrivateVar = builder.CreateLoad(
2388  moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2389  }
2390  assert(llvmPrivateVar->getType() ==
2391  moduleTranslation.convertType(blockArg.getType()));
2392  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2393  }
2394 
2395  auto continuationBlockOrError = convertOmpOpRegions(
2396  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2397  if (failed(handleError(continuationBlockOrError, *taskOp)))
2398  return llvm::make_error<PreviouslyReportedError>();
2399 
2400  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2401 
2402  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2403  privateVarsInfo.llvmVars,
2404  privateVarsInfo.privatizers)))
2405  return llvm::make_error<PreviouslyReportedError>();
2406 
2407  // Free heap allocated task context structure at the end of the task.
2408  taskStructMgr.freeStructPtr();
2409 
2410  return llvm::Error::success();
2411  };
2412 
2413  llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
2414  SmallVector<llvm::BranchInst *> cancelTerminators;
2415  // The directive to match here is OMPD_taskgroup because it is the taskgroup
2416  // which is canceled. This is handled here because it is the task's cleanup
2417  // block which should be branched to.
2418  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskOp,
2419  llvm::omp::Directive::OMPD_taskgroup);
2420 
2422  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2423  moduleTranslation, dds);
2424 
2425  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2426  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2427  moduleTranslation.getOpenMPBuilder()->createTask(
2428  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2429  moduleTranslation.lookupValue(taskOp.getFinal()),
2430  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2431  taskOp.getMergeable(),
2432  moduleTranslation.lookupValue(taskOp.getEventHandle()),
2433  moduleTranslation.lookupValue(taskOp.getPriority()));
2434 
2435  if (failed(handleError(afterIP, *taskOp)))
2436  return failure();
2437 
2438  // Set the correct branch target for task cancellation
2439  popCancelFinalizationCB(cancelTerminators, ompBuilder, afterIP.get());
2440 
2441  builder.restoreIP(*afterIP);
2442  return success();
2443 }
2444 
2445 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2446 static LogicalResult
2447 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2448  LLVM::ModuleTranslation &moduleTranslation) {
2449  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2450  if (failed(checkImplementationStatus(*tgOp)))
2451  return failure();
2452 
2453  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2454  builder.restoreIP(codegenIP);
2455  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2456  builder, moduleTranslation)
2457  .takeError();
2458  };
2459 
2460  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2461  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2462  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2463  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2464  bodyCB);
2465 
2466  if (failed(handleError(afterIP, *tgOp)))
2467  return failure();
2468 
2469  builder.restoreIP(*afterIP);
2470  return success();
2471 }
2472 
2473 static LogicalResult
2474 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2475  LLVM::ModuleTranslation &moduleTranslation) {
2476  if (failed(checkImplementationStatus(*twOp)))
2477  return failure();
2478 
2479  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2480  return success();
2481 }
2482 
2483 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2484 static LogicalResult
2485 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2486  LLVM::ModuleTranslation &moduleTranslation) {
2487  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2488  auto wsloopOp = cast<omp::WsloopOp>(opInst);
2489  if (failed(checkImplementationStatus(opInst)))
2490  return failure();
2491 
2492  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2493  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2494  assert(isByRef.size() == wsloopOp.getNumReductionVars());
2495 
2496  // Static is the default.
2497  auto schedule =
2498  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2499 
2500  // Find the loop configuration.
2501  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2502  llvm::Type *ivType = step->getType();
2503  llvm::Value *chunk = nullptr;
2504  if (wsloopOp.getScheduleChunk()) {
2505  llvm::Value *chunkVar =
2506  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2507  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2508  }
2509 
2510  PrivateVarsInfo privateVarsInfo(wsloopOp);
2511 
2512  SmallVector<omp::DeclareReductionOp> reductionDecls;
2513  collectReductionDecls(wsloopOp, reductionDecls);
2514  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2515  findAllocaInsertPoint(builder, moduleTranslation);
2516 
2517  SmallVector<llvm::Value *> privateReductionVariables(
2518  wsloopOp.getNumReductionVars());
2519 
2521  builder, moduleTranslation, privateVarsInfo, allocaIP);
2522  if (handleError(afterAllocas, opInst).failed())
2523  return failure();
2524 
2525  DenseMap<Value, llvm::Value *> reductionVariableMap;
2526 
2527  MutableArrayRef<BlockArgument> reductionArgs =
2528  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2529 
2530  SmallVector<DeferredStore> deferredStores;
2531 
2532  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2533  moduleTranslation, allocaIP, reductionDecls,
2534  privateReductionVariables, reductionVariableMap,
2535  deferredStores, isByRef)))
2536  return failure();
2537 
2538  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2539  opInst)
2540  .failed())
2541  return failure();
2542 
2543  if (failed(copyFirstPrivateVars(
2544  wsloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2545  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2546  wsloopOp.getPrivateNeedsBarrier())))
2547  return failure();
2548 
2549  assert(afterAllocas.get()->getSinglePredecessor());
2550  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2551  moduleTranslation,
2552  afterAllocas.get()->getSinglePredecessor(),
2553  reductionDecls, privateReductionVariables,
2554  reductionVariableMap, isByRef, deferredStores)))
2555  return failure();
2556 
2557  // TODO: Handle doacross loops when the ordered clause has a parameter.
2558  bool isOrdered = wsloopOp.getOrdered().has_value();
2559  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2560  bool isSimd = wsloopOp.getScheduleSimd();
2561  bool loopNeedsBarrier = !wsloopOp.getNowait();
2562 
2563  // The only legal way for the direct parent to be omp.distribute is that this
2564  // represents 'distribute parallel do'. Otherwise, this is a regular
2565  // worksharing loop.
2566  llvm::omp::WorksharingLoopType workshareLoopType =
2567  llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2568  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2569  : llvm::omp::WorksharingLoopType::ForStaticLoop;
2570 
2571  SmallVector<llvm::BranchInst *> cancelTerminators;
2572  pushCancelFinalizationCB(cancelTerminators, builder, *ompBuilder, wsloopOp,
2573  llvm::omp::Directive::OMPD_for);
2574 
2575  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2576 
2577  // Initialize linear variables and linear step
2578  LinearClauseProcessor linearClauseProcessor;
2579  if (wsloopOp.getLinearVars().size()) {
2580  for (mlir::Value linearVar : wsloopOp.getLinearVars())
2581  linearClauseProcessor.createLinearVar(builder, moduleTranslation,
2582  linearVar);
2583  for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
2584  linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
2585  }
2586 
2588  wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2589 
2590  if (failed(handleError(regionBlock, opInst)))
2591  return failure();
2592 
2593  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2594 
2595  // Emit Initialization and Update IR for linear variables
2596  if (wsloopOp.getLinearVars().size()) {
2597  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2598  linearClauseProcessor.initLinearVar(builder, moduleTranslation,
2599  loopInfo->getPreheader());
2600  if (failed(handleError(afterBarrierIP, *loopOp)))
2601  return failure();
2602  builder.restoreIP(*afterBarrierIP);
2603  linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
2604  loopInfo->getIndVar());
2605  linearClauseProcessor.outlineLinearFinalizationBB(builder,
2606  loopInfo->getExit());
2607  }
2608 
2609  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2610  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2611  ompBuilder->applyWorkshareLoop(
2612  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2613  convertToScheduleKind(schedule), chunk, isSimd,
2614  scheduleMod == omp::ScheduleModifier::monotonic,
2615  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2616  workshareLoopType);
2617 
2618  if (failed(handleError(wsloopIP, opInst)))
2619  return failure();
2620 
2621  // Emit finalization and in-place rewrites for linear vars.
2622  if (wsloopOp.getLinearVars().size()) {
2623  llvm::OpenMPIRBuilder::InsertPointTy oldIP = builder.saveIP();
2624  assert(loopInfo->getLastIter() &&
2625  "`lastiter` in CanonicalLoopInfo is nullptr");
2626  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2627  linearClauseProcessor.finalizeLinearVar(builder, moduleTranslation,
2628  loopInfo->getLastIter());
2629  if (failed(handleError(afterBarrierIP, *loopOp)))
2630  return failure();
2631  for (size_t index = 0; index < wsloopOp.getLinearVars().size(); index++)
2632  linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
2633  index);
2634  builder.restoreIP(oldIP);
2635  }
2636 
2637  // Set the correct branch target for task cancellation
2638  popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
2639 
2640  // Process the reductions if required.
2641  if (failed(createReductionsAndCleanup(
2642  wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
2643  privateReductionVariables, isByRef, wsloopOp.getNowait(),
2644  /*isTeamsReduction=*/false)))
2645  return failure();
2646 
2647  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2648  privateVarsInfo.llvmVars,
2649  privateVarsInfo.privatizers);
2650 }
2651 
2652 /// Converts the OpenMP parallel operation to LLVM IR.
2653 static LogicalResult
2654 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2655  LLVM::ModuleTranslation &moduleTranslation) {
2656  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2657  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2658  assert(isByRef.size() == opInst.getNumReductionVars());
2659  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2660 
2661  if (failed(checkImplementationStatus(*opInst)))
2662  return failure();
2663 
2664  PrivateVarsInfo privateVarsInfo(opInst);
2665 
2666  // Collect reduction declarations
2667  SmallVector<omp::DeclareReductionOp> reductionDecls;
2668  collectReductionDecls(opInst, reductionDecls);
2669  SmallVector<llvm::Value *> privateReductionVariables(
2670  opInst.getNumReductionVars());
2671  SmallVector<DeferredStore> deferredStores;
2672 
2673  auto bodyGenCB = [&](InsertPointTy allocaIP,
2674  InsertPointTy codeGenIP) -> llvm::Error {
2676  builder, moduleTranslation, privateVarsInfo, allocaIP);
2677  if (handleError(afterAllocas, *opInst).failed())
2678  return llvm::make_error<PreviouslyReportedError>();
2679 
2680  // Allocate reduction vars
2681  DenseMap<Value, llvm::Value *> reductionVariableMap;
2682 
2683  MutableArrayRef<BlockArgument> reductionArgs =
2684  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2685 
2686  allocaIP =
2687  InsertPointTy(allocaIP.getBlock(),
2688  allocaIP.getBlock()->getTerminator()->getIterator());
2689 
2690  if (failed(allocReductionVars(
2691  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2692  reductionDecls, privateReductionVariables, reductionVariableMap,
2693  deferredStores, isByRef)))
2694  return llvm::make_error<PreviouslyReportedError>();
2695 
2696  assert(afterAllocas.get()->getSinglePredecessor());
2697  builder.restoreIP(codeGenIP);
2698 
2699  if (handleError(
2700  initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2701  *opInst)
2702  .failed())
2703  return llvm::make_error<PreviouslyReportedError>();
2704 
2705  if (failed(copyFirstPrivateVars(
2706  opInst, builder, moduleTranslation, privateVarsInfo.mlirVars,
2707  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2708  opInst.getPrivateNeedsBarrier())))
2709  return llvm::make_error<PreviouslyReportedError>();
2710 
2711  if (failed(
2712  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2713  afterAllocas.get()->getSinglePredecessor(),
2714  reductionDecls, privateReductionVariables,
2715  reductionVariableMap, isByRef, deferredStores)))
2716  return llvm::make_error<PreviouslyReportedError>();
2717 
2718  // Save the alloca insertion point on ModuleTranslation stack for use in
2719  // nested regions.
2721  moduleTranslation, allocaIP);
2722 
2723  // ParallelOp has only one region associated with it.
2725  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2726  if (!regionBlock)
2727  return regionBlock.takeError();
2728 
2729  // Process the reductions if required.
2730  if (opInst.getNumReductionVars() > 0) {
2731  // Collect reduction info
2732  SmallVector<OwningReductionGen> owningReductionGens;
2733  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2735  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2736  owningReductionGens, owningAtomicReductionGens,
2737  privateReductionVariables, reductionInfos);
2738 
2739  // Move to region cont block
2740  builder.SetInsertPoint((*regionBlock)->getTerminator());
2741 
2742  // Generate reductions from info
2743  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2744  builder.SetInsertPoint(tempTerminator);
2745 
2746  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2747  ompBuilder->createReductions(
2748  builder.saveIP(), allocaIP, reductionInfos, isByRef,
2749  /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
2750  if (!contInsertPoint)
2751  return contInsertPoint.takeError();
2752 
2753  if (!contInsertPoint->getBlock())
2754  return llvm::make_error<PreviouslyReportedError>();
2755 
2756  tempTerminator->eraseFromParent();
2757  builder.restoreIP(*contInsertPoint);
2758  }
2759 
2760  return llvm::Error::success();
2761  };
2762 
2763  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2764  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2765  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2766  // bodyGenCB.
2767  replVal = &val;
2768  return codeGenIP;
2769  };
2770 
2771  // TODO: Perform finalization actions for variables. This has to be
2772  // called for variables which have destructors/finalizers.
2773  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2774  InsertPointTy oldIP = builder.saveIP();
2775  builder.restoreIP(codeGenIP);
2776 
2777  // if the reduction has a cleanup region, inline it here to finalize the
2778  // reduction variables
2779  SmallVector<Region *> reductionCleanupRegions;
2780  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2781  [](omp::DeclareReductionOp reductionDecl) {
2782  return &reductionDecl.getCleanupRegion();
2783  });
2784  if (failed(inlineOmpRegionCleanup(
2785  reductionCleanupRegions, privateReductionVariables,
2786  moduleTranslation, builder, "omp.reduction.cleanup")))
2787  return llvm::createStringError(
2788  "failed to inline `cleanup` region of `omp.declare_reduction`");
2789 
2790  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2791  privateVarsInfo.llvmVars,
2792  privateVarsInfo.privatizers)))
2793  return llvm::make_error<PreviouslyReportedError>();
2794 
2795  builder.restoreIP(oldIP);
2796  return llvm::Error::success();
2797  };
2798 
2799  llvm::Value *ifCond = nullptr;
2800  if (auto ifVar = opInst.getIfExpr())
2801  ifCond = moduleTranslation.lookupValue(ifVar);
2802  llvm::Value *numThreads = nullptr;
2803  if (auto numThreadsVar = opInst.getNumThreads())
2804  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2805  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2806  if (auto bind = opInst.getProcBindKind())
2807  pbKind = getProcBindKind(*bind);
2808  bool isCancellable = constructIsCancellable(opInst);
2809 
2810  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2811  findAllocaInsertPoint(builder, moduleTranslation);
2812  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2813 
2814  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2815  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2816  ifCond, numThreads, pbKind, isCancellable);
2817 
2818  if (failed(handleError(afterIP, *opInst)))
2819  return failure();
2820 
2821  builder.restoreIP(*afterIP);
2822  return success();
2823 }
2824 
2825 /// Convert Order attribute to llvm::omp::OrderKind.
2826 static llvm::omp::OrderKind
2827 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2828  if (!o)
2829  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2830  switch (*o) {
2831  case omp::ClauseOrderKind::Concurrent:
2832  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2833  }
2834  llvm_unreachable("Unknown ClauseOrderKind kind");
2835 }
2836 
2837 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2838 static LogicalResult
2839 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2840  LLVM::ModuleTranslation &moduleTranslation) {
2841  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2842  auto simdOp = cast<omp::SimdOp>(opInst);
2843 
2844  // TODO: Replace this with proper composite translation support.
2845  // Currently, simd information on composite constructs is ignored, so e.g.
2846  // 'do/for simd' will be treated the same as a standalone 'do/for'. This is
2847  // allowed by the spec, since it's equivalent to using a SIMD length of 1.
2848  if (simdOp.isComposite()) {
2849  if (failed(convertIgnoredWrapper(simdOp, moduleTranslation)))
2850  return failure();
2851 
2852  return inlineConvertOmpRegions(simdOp.getRegion(), "omp.simd.region",
2853  builder, moduleTranslation);
2854  }
2855 
2856  if (failed(checkImplementationStatus(opInst)))
2857  return failure();
2858 
2859  PrivateVarsInfo privateVarsInfo(simdOp);
2860 
2861  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2862  findAllocaInsertPoint(builder, moduleTranslation);
2863 
2865  builder, moduleTranslation, privateVarsInfo, allocaIP);
2866  if (handleError(afterAllocas, opInst).failed())
2867  return failure();
2868 
2869  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2870  opInst)
2871  .failed())
2872  return failure();
2873 
2874  llvm::ConstantInt *simdlen = nullptr;
2875  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2876  simdlen = builder.getInt64(simdlenVar.value());
2877 
2878  llvm::ConstantInt *safelen = nullptr;
2879  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2880  safelen = builder.getInt64(safelenVar.value());
2881 
2882  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2883  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2884 
2885  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2886  std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2887  mlir::OperandRange operands = simdOp.getAlignedVars();
2888  for (size_t i = 0; i < operands.size(); ++i) {
2889  llvm::Value *alignment = nullptr;
2890  llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2891  llvm::Type *ty = llvmVal->getType();
2892 
2893  auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2894  alignment = builder.getInt64(intAttr.getInt());
2895  assert(ty->isPointerTy() && "Invalid type for aligned variable");
2896  assert(alignment && "Invalid alignment value");
2897  auto curInsert = builder.saveIP();
2898  builder.SetInsertPoint(sourceBlock);
2899  llvmVal = builder.CreateLoad(ty, llvmVal);
2900  builder.restoreIP(curInsert);
2901  alignedVars[llvmVal] = alignment;
2902  }
2903 
2905  simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2906 
2907  if (failed(handleError(regionBlock, opInst)))
2908  return failure();
2909 
2910  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2911  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2912  ompBuilder->applySimd(loopInfo, alignedVars,
2913  simdOp.getIfExpr()
2914  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2915  : nullptr,
2916  order, simdlen, safelen);
2917 
2918  return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2919  privateVarsInfo.llvmVars,
2920  privateVarsInfo.privatizers);
2921 }
2922 
2923 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2924 static LogicalResult
2925 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2926  LLVM::ModuleTranslation &moduleTranslation) {
2927  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2928  auto loopOp = cast<omp::LoopNestOp>(opInst);
2929 
2930  // Set up the source location value for OpenMP runtime.
2931  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2932 
2933  // Generator of the canonical loop body.
2936  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2937  llvm::Value *iv) -> llvm::Error {
2938  // Make sure further conversions know about the induction variable.
2939  moduleTranslation.mapValue(
2940  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2941 
2942  // Capture the body insertion point for use in nested loops. BodyIP of the
2943  // CanonicalLoopInfo always points to the beginning of the entry block of
2944  // the body.
2945  bodyInsertPoints.push_back(ip);
2946 
2947  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2948  return llvm::Error::success();
2949 
2950  // Convert the body of the loop.
2951  builder.restoreIP(ip);
2953  loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
2954  if (!regionBlock)
2955  return regionBlock.takeError();
2956 
2957  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2958  return llvm::Error::success();
2959  };
2960 
2961  // Delegate actual loop construction to the OpenMP IRBuilder.
2962  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
2963  // loop, i.e. it has a positive step, uses signed integer semantics.
2964  // Reconsider this code when the nested loop operation clearly supports more
2965  // cases.
2966  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
2967  llvm::Value *lowerBound =
2968  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
2969  llvm::Value *upperBound =
2970  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
2971  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
2972 
2973  // Make sure loop trip count are emitted in the preheader of the outermost
2974  // loop at the latest so that they are all available for the new collapsed
2975  // loop will be created below.
2976  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
2977  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
2978  if (i != 0) {
2979  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
2980  ompLoc.DL);
2981  computeIP = loopInfos.front()->getPreheaderIP();
2982  }
2983 
2985  ompBuilder->createCanonicalLoop(
2986  loc, bodyGen, lowerBound, upperBound, step,
2987  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
2988 
2989  if (failed(handleError(loopResult, *loopOp)))
2990  return failure();
2991 
2992  loopInfos.push_back(*loopResult);
2993  }
2994 
2995  // Collapse loops. Store the insertion point because LoopInfos may get
2996  // invalidated.
2997  llvm::OpenMPIRBuilder::InsertPointTy afterIP =
2998  loopInfos.front()->getAfterIP();
2999 
3000  // Update the stack frame created for this loop to point to the resulting loop
3001  // after applying transformations.
3002  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
3003  [&](OpenMPLoopInfoStackFrame &frame) {
3004  frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
3005  return WalkResult::interrupt();
3006  });
3007 
3008  // Continue building IR after the loop. Note that the LoopInfo returned by
3009  // `collapseLoops` points inside the outermost loop and is intended for
3010  // potential further loop transformations. Use the insertion point stored
3011  // before collapsing loops instead.
3012  builder.restoreIP(afterIP);
3013  return success();
3014 }
3015 
3016 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
3017 static llvm::AtomicOrdering
3018 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
3019  if (!ao)
3020  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
3021 
3022  switch (*ao) {
3023  case omp::ClauseMemoryOrderKind::Seq_cst:
3024  return llvm::AtomicOrdering::SequentiallyConsistent;
3025  case omp::ClauseMemoryOrderKind::Acq_rel:
3026  return llvm::AtomicOrdering::AcquireRelease;
3027  case omp::ClauseMemoryOrderKind::Acquire:
3028  return llvm::AtomicOrdering::Acquire;
3029  case omp::ClauseMemoryOrderKind::Release:
3030  return llvm::AtomicOrdering::Release;
3031  case omp::ClauseMemoryOrderKind::Relaxed:
3032  return llvm::AtomicOrdering::Monotonic;
3033  }
3034  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
3035 }
3036 
3037 /// Convert omp.atomic.read operation to LLVM IR.
3038 static LogicalResult
3039 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
3040  LLVM::ModuleTranslation &moduleTranslation) {
3041  auto readOp = cast<omp::AtomicReadOp>(opInst);
3042  if (failed(checkImplementationStatus(opInst)))
3043  return failure();
3044 
3045  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3046  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3047  findAllocaInsertPoint(builder, moduleTranslation);
3048 
3049  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3050 
3051  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
3052  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
3053  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
3054 
3055  llvm::Type *elementType =
3056  moduleTranslation.convertType(readOp.getElementType());
3057 
3058  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
3059  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
3060  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP));
3061  return success();
3062 }
3063 
3064 /// Converts an omp.atomic.write operation to LLVM IR.
3065 static LogicalResult
3066 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
3067  LLVM::ModuleTranslation &moduleTranslation) {
3068  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
3069  if (failed(checkImplementationStatus(opInst)))
3070  return failure();
3071 
3072  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3073  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3074  findAllocaInsertPoint(builder, moduleTranslation);
3075 
3076  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3077  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
3078  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
3079  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
3080  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
3081  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
3082  /*isVolatile=*/false};
3083  builder.restoreIP(
3084  ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP));
3085  return success();
3086 }
3087 
3088 /// Converts an LLVM dialect binary operation to the corresponding enum value
3089 /// for `atomicrmw` supported binary operation.
3090 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
3092  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
3093  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
3094  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
3095  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
3096  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
3097  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
3098  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
3099  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
3100  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
3101  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
3102 }
3103 
3104 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
3105 static LogicalResult
3106 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
3107  llvm::IRBuilderBase &builder,
3108  LLVM::ModuleTranslation &moduleTranslation) {
3109  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3110  if (failed(checkImplementationStatus(*opInst)))
3111  return failure();
3112 
3113  // Convert values and types.
3114  auto &innerOpList = opInst.getRegion().front().getOperations();
3115  bool isXBinopExpr{false};
3116  llvm::AtomicRMWInst::BinOp binop;
3117  mlir::Value mlirExpr;
3118  llvm::Value *llvmExpr = nullptr;
3119  llvm::Value *llvmX = nullptr;
3120  llvm::Type *llvmXElementType = nullptr;
3121  if (innerOpList.size() == 2) {
3122  // The two operations here are the update and the terminator.
3123  // Since we can identify the update operation, there is a possibility
3124  // that we can generate the atomicrmw instruction.
3125  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
3126  if (!llvm::is_contained(innerOp.getOperands(),
3127  opInst.getRegion().getArgument(0))) {
3128  return opInst.emitError("no atomic update operation with region argument"
3129  " as operand found inside atomic.update region");
3130  }
3131  binop = convertBinOpToAtomic(innerOp);
3132  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
3133  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3134  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3135  } else {
3136  // Since the update region includes more than one operation
3137  // we will resort to generating a cmpxchg loop.
3138  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3139  }
3140  llvmX = moduleTranslation.lookupValue(opInst.getX());
3141  llvmXElementType = moduleTranslation.convertType(
3142  opInst.getRegion().getArgument(0).getType());
3143  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3144  /*isSigned=*/false,
3145  /*isVolatile=*/false};
3146 
3147  llvm::AtomicOrdering atomicOrdering =
3148  convertAtomicOrdering(opInst.getMemoryOrder());
3149 
3150  // Generate update code.
3151  auto updateFn =
3152  [&opInst, &moduleTranslation](
3153  llvm::Value *atomicx,
3154  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3155  Block &bb = *opInst.getRegion().begin();
3156  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
3157  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3158  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3159  return llvm::make_error<PreviouslyReportedError>();
3160 
3161  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3162  assert(yieldop && yieldop.getResults().size() == 1 &&
3163  "terminator must be omp.yield op and it must have exactly one "
3164  "argument");
3165  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3166  };
3167 
3168  // Handle ambiguous alloca, if any.
3169  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3170  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3171  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3172  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
3173  atomicOrdering, binop, updateFn,
3174  isXBinopExpr);
3175 
3176  if (failed(handleError(afterIP, *opInst)))
3177  return failure();
3178 
3179  builder.restoreIP(*afterIP);
3180  return success();
3181 }
3182 
3183 static LogicalResult
3184 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
3185  llvm::IRBuilderBase &builder,
3186  LLVM::ModuleTranslation &moduleTranslation) {
3187  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3188  if (failed(checkImplementationStatus(*atomicCaptureOp)))
3189  return failure();
3190 
3191  mlir::Value mlirExpr;
3192  bool isXBinopExpr = false, isPostfixUpdate = false;
3193  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3194 
3195  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3196  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
3197 
3198  assert((atomicUpdateOp || atomicWriteOp) &&
3199  "internal op must be an atomic.update or atomic.write op");
3200 
3201  if (atomicWriteOp) {
3202  isPostfixUpdate = true;
3203  mlirExpr = atomicWriteOp.getExpr();
3204  } else {
3205  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
3206  atomicCaptureOp.getAtomicUpdateOp().getOperation();
3207  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
3208  // Find the binary update operation that uses the region argument
3209  // and get the expression to update
3210  if (innerOpList.size() == 2) {
3211  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
3212  if (!llvm::is_contained(innerOp.getOperands(),
3213  atomicUpdateOp.getRegion().getArgument(0))) {
3214  return atomicUpdateOp.emitError(
3215  "no atomic update operation with region argument"
3216  " as operand found inside atomic.update region");
3217  }
3218  binop = convertBinOpToAtomic(innerOp);
3219  isXBinopExpr =
3220  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
3221  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3222  } else {
3223  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3224  }
3225  }
3226 
3227  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3228  llvm::Value *llvmX =
3229  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
3230  llvm::Value *llvmV =
3231  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
3232  llvm::Type *llvmXElementType = moduleTranslation.convertType(
3233  atomicCaptureOp.getAtomicReadOp().getElementType());
3234  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3235  /*isSigned=*/false,
3236  /*isVolatile=*/false};
3237  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
3238  /*isSigned=*/false,
3239  /*isVolatile=*/false};
3240 
3241  llvm::AtomicOrdering atomicOrdering =
3242  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
3243 
3244  auto updateFn =
3245  [&](llvm::Value *atomicx,
3246  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3247  if (atomicWriteOp)
3248  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
3249  Block &bb = *atomicUpdateOp.getRegion().begin();
3250  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
3251  atomicx);
3252  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3253  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3254  return llvm::make_error<PreviouslyReportedError>();
3255 
3256  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3257  assert(yieldop && yieldop.getResults().size() == 1 &&
3258  "terminator must be omp.yield op and it must have exactly one "
3259  "argument");
3260  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3261  };
3262 
3263  // Handle ambiguous alloca, if any.
3264  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3265  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3266  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3267  ompBuilder->createAtomicCapture(
3268  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3269  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3270 
3271  if (failed(handleError(afterIP, *atomicCaptureOp)))
3272  return failure();
3273 
3274  builder.restoreIP(*afterIP);
3275  return success();
3276 }
3277 
3278 static llvm::omp::Directive convertCancellationConstructType(
3279  omp::ClauseCancellationConstructType directive) {
3280  switch (directive) {
3281  case omp::ClauseCancellationConstructType::Loop:
3282  return llvm::omp::Directive::OMPD_for;
3283  case omp::ClauseCancellationConstructType::Parallel:
3284  return llvm::omp::Directive::OMPD_parallel;
3285  case omp::ClauseCancellationConstructType::Sections:
3286  return llvm::omp::Directive::OMPD_sections;
3287  case omp::ClauseCancellationConstructType::Taskgroup:
3288  return llvm::omp::Directive::OMPD_taskgroup;
3289  }
3290 }
3291 
3292 static LogicalResult
3293 convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder,
3294  LLVM::ModuleTranslation &moduleTranslation) {
3295  if (failed(checkImplementationStatus(*op.getOperation())))
3296  return failure();
3297 
3298  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3299  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3300 
3301  llvm::Value *ifCond = nullptr;
3302  if (Value ifVar = op.getIfExpr())
3303  ifCond = moduleTranslation.lookupValue(ifVar);
3304 
3305  llvm::omp::Directive cancelledDirective =
3306  convertCancellationConstructType(op.getCancelDirective());
3307 
3308  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3309  ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective);
3310 
3311  if (failed(handleError(afterIP, *op.getOperation())))
3312  return failure();
3313 
3314  builder.restoreIP(afterIP.get());
3315 
3316  return success();
3317 }
3318 
3319 static LogicalResult
3320 convertOmpCancellationPoint(omp::CancellationPointOp op,
3321  llvm::IRBuilderBase &builder,
3322  LLVM::ModuleTranslation &moduleTranslation) {
3323  if (failed(checkImplementationStatus(*op.getOperation())))
3324  return failure();
3325 
3326  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3327  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3328 
3329  llvm::omp::Directive cancelledDirective =
3330  convertCancellationConstructType(op.getCancelDirective());
3331 
3332  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3333  ompBuilder->createCancellationPoint(ompLoc, cancelledDirective);
3334 
3335  if (failed(handleError(afterIP, *op.getOperation())))
3336  return failure();
3337 
3338  builder.restoreIP(afterIP.get());
3339 
3340  return success();
3341 }
3342 
3343 /// Converts an OpenMP Threadprivate operation into LLVM IR using
3344 /// OpenMPIRBuilder.
3345 static LogicalResult
3346 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
3347  LLVM::ModuleTranslation &moduleTranslation) {
3348  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3349  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3350  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
3351 
3352  if (failed(checkImplementationStatus(opInst)))
3353  return failure();
3354 
3355  Value symAddr = threadprivateOp.getSymAddr();
3356  auto *symOp = symAddr.getDefiningOp();
3357 
3358  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
3359  symOp = asCast.getOperand().getDefiningOp();
3360 
3361  if (!isa<LLVM::AddressOfOp>(symOp))
3362  return opInst.emitError("Addressing symbol not found");
3363  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
3364 
3365  LLVM::GlobalOp global =
3366  addressOfOp.getGlobal(moduleTranslation.symbolTable());
3367  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
3368 
3369  if (!ompBuilder->Config.isTargetDevice()) {
3370  llvm::Type *type = globalValue->getValueType();
3371  llvm::TypeSize typeSize =
3372  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
3373  type);
3374  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
3375  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
3376  ompLoc, globalValue, size, global.getSymName() + ".cache");
3377  moduleTranslation.mapValue(opInst.getResult(0), callInst);
3378  } else {
3379  moduleTranslation.mapValue(opInst.getResult(0), globalValue);
3380  }
3381 
3382  return success();
3383 }
3384 
3385 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
3386 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
3387  switch (deviceClause) {
3388  case mlir::omp::DeclareTargetDeviceType::host:
3389  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
3390  break;
3391  case mlir::omp::DeclareTargetDeviceType::nohost:
3392  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
3393  break;
3394  case mlir::omp::DeclareTargetDeviceType::any:
3395  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
3396  break;
3397  }
3398  llvm_unreachable("unhandled device clause");
3399 }
3400 
3401 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
3403  mlir::omp::DeclareTargetCaptureClause captureClause) {
3404  switch (captureClause) {
3405  case mlir::omp::DeclareTargetCaptureClause::to:
3406  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
3407  case mlir::omp::DeclareTargetCaptureClause::link:
3408  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
3409  case mlir::omp::DeclareTargetCaptureClause::enter:
3410  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
3411  }
3412  llvm_unreachable("unhandled capture clause");
3413 }
3414 
3415 static llvm::SmallString<64>
3416 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
3417  llvm::OpenMPIRBuilder &ompBuilder) {
3418  llvm::SmallString<64> suffix;
3419  llvm::raw_svector_ostream os(suffix);
3420  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
3421  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
3422  auto fileInfoCallBack = [&loc]() {
3423  return std::pair<std::string, uint64_t>(
3424  llvm::StringRef(loc.getFilename()), loc.getLine());
3425  };
3426 
3427  os << llvm::format(
3428  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
3429  }
3430  os << "_decl_tgt_ref_ptr";
3431 
3432  return suffix;
3433 }
3434 
3435 static bool isDeclareTargetLink(mlir::Value value) {
3436  if (auto addressOfOp =
3437  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3438  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3439  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3440  if (auto declareTargetGlobal =
3441  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3442  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3443  mlir::omp::DeclareTargetCaptureClause::link)
3444  return true;
3445  }
3446  return false;
3447 }
3448 
3449 // Returns the reference pointer generated by the lowering of the declare target
3450 // operation in cases where the link clause is used or the to clause is used in
3451 // USM mode.
3452 static llvm::Value *
3454  LLVM::ModuleTranslation &moduleTranslation) {
3455  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3456 
3457  // An easier way to do this may just be to keep track of any pointer
3458  // references and their mapping to their respective operation
3459  if (auto addressOfOp =
3460  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3461  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3462  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3463  addressOfOp.getGlobalName()))) {
3464 
3465  if (auto declareTargetGlobal =
3466  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3467  gOp.getOperation())) {
3468 
3469  // In this case, we must utilise the reference pointer generated by the
3470  // declare target operation, similar to Clang
3471  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3472  mlir::omp::DeclareTargetCaptureClause::link) ||
3473  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3474  mlir::omp::DeclareTargetCaptureClause::to &&
3475  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3476  llvm::SmallString<64> suffix =
3477  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3478 
3479  if (gOp.getSymName().contains(suffix))
3480  return moduleTranslation.getLLVMModule()->getNamedValue(
3481  gOp.getSymName());
3482 
3483  return moduleTranslation.getLLVMModule()->getNamedValue(
3484  (gOp.getSymName().str() + suffix.str()).str());
3485  }
3486  }
3487  }
3488  }
3489 
3490  return nullptr;
3491 }
3492 
3493 namespace {
3494 // Append customMappers information to existing MapInfosTy
3495 struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3497 
3498  /// Append arrays in \a CurInfo.
3499  void append(MapInfosTy &curInfo) {
3500  Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3501  llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3502  }
3503 };
3504 // A small helper structure to contain data gathered
3505 // for map lowering and coalese it into one area and
3506 // avoiding extra computations such as searches in the
3507 // llvm module for lowered mapped variables or checking
3508 // if something is declare target (and retrieving the
3509 // value) more than neccessary.
3510 struct MapInfoData : MapInfosTy {
3511  llvm::SmallVector<bool, 4> IsDeclareTarget;
3512  llvm::SmallVector<bool, 4> IsAMember;
3513  // Identify if mapping was added by mapClause or use_device clauses.
3514  llvm::SmallVector<bool, 4> IsAMapping;
3517  // Stripped off array/pointer to get the underlying
3518  // element type
3520 
3521  /// Append arrays in \a CurInfo.
3522  void append(MapInfoData &CurInfo) {
3523  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3524  CurInfo.IsDeclareTarget.end());
3525  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3526  OriginalValue.append(CurInfo.OriginalValue.begin(),
3527  CurInfo.OriginalValue.end());
3528  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3529  MapInfosTy::append(CurInfo);
3530  }
3531 };
3532 } // namespace
3533 
3534 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
3535  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3536  arrTy.getElementType()))
3537  return getArrayElementSizeInBits(nestedArrTy, dl);
3538  return dl.getTypeSizeInBits(arrTy.getElementType());
3539 }
3540 
3541 // This function calculates the size to be offloaded for a specified type, given
3542 // its associated map clause (which can contain bounds information which affects
3543 // the total size), this size is calculated based on the underlying element type
3544 // e.g. given a 1-D array of ints, we will calculate the size from the integer
3545 // type * number of elements in the array. This size can be used in other
3546 // calculations but is ultimately used as an argument to the OpenMP runtimes
3547 // kernel argument structure which is generated through the combinedInfo data
3548 // structures.
3549 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
3550 // CGOpenMPRuntime.cpp.
3551 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3552  Operation *clauseOp, llvm::Value *basePointer,
3553  llvm::Type *baseType, llvm::IRBuilderBase &builder,
3554  LLVM::ModuleTranslation &moduleTranslation) {
3555  if (auto memberClause =
3556  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3557  // This calculates the size to transfer based on bounds and the underlying
3558  // element type, provided bounds have been specified (Fortran
3559  // pointers/allocatables/target and arrays that have sections specified fall
3560  // into this as well).
3561  if (!memberClause.getBounds().empty()) {
3562  llvm::Value *elementCount = builder.getInt64(1);
3563  for (auto bounds : memberClause.getBounds()) {
3564  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3565  bounds.getDefiningOp())) {
3566  // The below calculation for the size to be mapped calculated from the
3567  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3568  // multiply by the underlying element types byte size to get the full
3569  // size to be offloaded based on the bounds
3570  elementCount = builder.CreateMul(
3571  elementCount,
3572  builder.CreateAdd(
3573  builder.CreateSub(
3574  moduleTranslation.lookupValue(boundOp.getUpperBound()),
3575  moduleTranslation.lookupValue(boundOp.getLowerBound())),
3576  builder.getInt64(1)));
3577  }
3578  }
3579 
3580  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3581  // the size in inconsistent byte or bit format.
3582  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3583  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3584  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3585 
3586  // The size in bytes x number of elements, the sizeInBytes stored is
3587  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3588  // size, so we do some on the fly runtime math to get the size in
3589  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3590  // some adjustment for members with more complex types.
3591  return builder.CreateMul(elementCount,
3592  builder.getInt64(underlyingTypeSzInBits / 8));
3593  }
3594  }
3595 
3596  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3597 }
3598 
3600  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3601  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3602  llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3603  ArrayRef<Value> useDevAddrOperands = {},
3604  ArrayRef<Value> hasDevAddrOperands = {}) {
3605  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3606  // Check if this is a member mapping and correctly assign that it is, if
3607  // it is a member of a larger object.
3608  // TODO: Need better handling of members, and distinguishing of members
3609  // that are implicitly allocated on device vs explicitly passed in as
3610  // arguments.
3611  // TODO: May require some further additions to support nested record
3612  // types, i.e. member maps that can have member maps.
3613  for (Value mapValue : mapVars) {
3614  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3615  for (auto member : map.getMembers())
3616  if (member == mapOp)
3617  return true;
3618  }
3619  return false;
3620  };
3621 
3622  // Process MapOperands
3623  for (Value mapValue : mapVars) {
3624  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3625  Value offloadPtr =
3626  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3627  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3628  mapData.Pointers.push_back(mapData.OriginalValue.back());
3629 
3630  if (llvm::Value *refPtr =
3631  getRefPtrIfDeclareTarget(offloadPtr,
3632  moduleTranslation)) { // declare target
3633  mapData.IsDeclareTarget.push_back(true);
3634  mapData.BasePointers.push_back(refPtr);
3635  } else { // regular mapped variable
3636  mapData.IsDeclareTarget.push_back(false);
3637  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3638  }
3639 
3640  mapData.BaseType.push_back(
3641  moduleTranslation.convertType(mapOp.getVarType()));
3642  mapData.Sizes.push_back(
3643  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3644  mapData.BaseType.back(), builder, moduleTranslation));
3645  mapData.MapClause.push_back(mapOp.getOperation());
3646  mapData.Types.push_back(
3647  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
3648  mapData.Names.push_back(LLVM::createMappingInformation(
3649  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3650  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3651  if (mapOp.getMapperId())
3652  mapData.Mappers.push_back(
3653  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3654  mapOp, mapOp.getMapperIdAttr()));
3655  else
3656  mapData.Mappers.push_back(nullptr);
3657  mapData.IsAMapping.push_back(true);
3658  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3659  }
3660 
3661  auto findMapInfo = [&mapData](llvm::Value *val,
3662  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3663  unsigned index = 0;
3664  bool found = false;
3665  for (llvm::Value *basePtr : mapData.OriginalValue) {
3666  if (basePtr == val && mapData.IsAMapping[index]) {
3667  found = true;
3668  mapData.Types[index] |=
3669  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3670  mapData.DevicePointers[index] = devInfoTy;
3671  }
3672  index++;
3673  }
3674  return found;
3675  };
3676 
3677  // Process useDevPtr(Addr)Operands
3678  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3679  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3680  for (Value mapValue : useDevOperands) {
3681  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3682  Value offloadPtr =
3683  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3684  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3685 
3686  // Check if map info is already present for this entry.
3687  if (!findMapInfo(origValue, devInfoTy)) {
3688  mapData.OriginalValue.push_back(origValue);
3689  mapData.Pointers.push_back(mapData.OriginalValue.back());
3690  mapData.IsDeclareTarget.push_back(false);
3691  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3692  mapData.BaseType.push_back(
3693  moduleTranslation.convertType(mapOp.getVarType()));
3694  mapData.Sizes.push_back(builder.getInt64(0));
3695  mapData.MapClause.push_back(mapOp.getOperation());
3696  mapData.Types.push_back(
3697  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3698  mapData.Names.push_back(LLVM::createMappingInformation(
3699  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3700  mapData.DevicePointers.push_back(devInfoTy);
3701  mapData.Mappers.push_back(nullptr);
3702  mapData.IsAMapping.push_back(false);
3703  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3704  }
3705  }
3706  };
3707 
3708  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3709  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3710 
3711  for (Value mapValue : hasDevAddrOperands) {
3712  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3713  Value offloadPtr =
3714  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3715  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3716  auto mapType =
3717  static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
3718  auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3719 
3720  mapData.OriginalValue.push_back(origValue);
3721  mapData.BasePointers.push_back(origValue);
3722  mapData.Pointers.push_back(origValue);
3723  mapData.IsDeclareTarget.push_back(false);
3724  mapData.BaseType.push_back(
3725  moduleTranslation.convertType(mapOp.getVarType()));
3726  mapData.Sizes.push_back(
3727  builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
3728  mapData.MapClause.push_back(mapOp.getOperation());
3729  if (llvm::to_underlying(mapType & mapTypeAlways)) {
3730  // Descriptors are mapped with the ALWAYS flag, since they can get
3731  // rematerialized, so the address of the decriptor for a given object
3732  // may change from one place to another.
3733  mapData.Types.push_back(mapType);
3734  // Technically it's possible for a non-descriptor mapping to have
3735  // both has-device-addr and ALWAYS, so lookup the mapper in case it
3736  // exists.
3737  if (mapOp.getMapperId()) {
3738  mapData.Mappers.push_back(
3739  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3740  mapOp, mapOp.getMapperIdAttr()));
3741  } else {
3742  mapData.Mappers.push_back(nullptr);
3743  }
3744  } else {
3745  mapData.Types.push_back(
3746  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
3747  mapData.Mappers.push_back(nullptr);
3748  }
3749  mapData.Names.push_back(LLVM::createMappingInformation(
3750  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3751  mapData.DevicePointers.push_back(
3752  llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3753  mapData.IsAMapping.push_back(false);
3754  mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
3755  }
3756 }
3757 
3758 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
3759  auto *res = llvm::find(mapData.MapClause, memberOp);
3760  assert(res != mapData.MapClause.end() &&
3761  "MapInfoOp for member not found in MapData, cannot return index");
3762  return std::distance(mapData.MapClause.begin(), res);
3763 }
3764 
3765 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
3766  bool first) {
3767  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
3768  // Only 1 member has been mapped, we can return it.
3769  if (indexAttr.size() == 1)
3770  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
3771 
3772  llvm::SmallVector<size_t> indices(indexAttr.size());
3773  std::iota(indices.begin(), indices.end(), 0);
3774 
3775  llvm::sort(indices.begin(), indices.end(),
3776  [&](const size_t a, const size_t b) {
3777  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
3778  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
3779  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
3780  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
3781  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
3782 
3783  if (aIndex == bIndex)
3784  continue;
3785 
3786  if (aIndex < bIndex)
3787  return first;
3788 
3789  if (aIndex > bIndex)
3790  return !first;
3791  }
3792 
3793  // Iterated the up until the end of the smallest member and
3794  // they were found to be equal up to that point, so select
3795  // the member with the lowest index count, so the "parent"
3796  return memberIndicesA.size() < memberIndicesB.size();
3797  });
3798 
3799  return llvm::cast<omp::MapInfoOp>(
3800  mapInfo.getMembers()[indices.front()].getDefiningOp());
3801 }
3802 
3803 /// This function calculates the array/pointer offset for map data provided
3804 /// with bounds operations, e.g. when provided something like the following:
3805 ///
3806 /// Fortran
3807 /// map(tofrom: array(2:5, 3:2))
3808 /// or
3809 /// C++
3810 /// map(tofrom: array[1:4][2:3])
3811 /// We must calculate the initial pointer offset to pass across, this function
3812 /// performs this using bounds.
3813 ///
3814 /// NOTE: which while specified in row-major order it currently needs to be
3815 /// flipped for Fortran's column order array allocation and access (as
3816 /// opposed to C++'s row-major, hence the backwards processing where order is
3817 /// important). This is likely important to keep in mind for the future when
3818 /// we incorporate a C++ frontend, both frontends will need to agree on the
3819 /// ordering of generated bounds operations (one may have to flip them) to
3820 /// make the below lowering frontend agnostic. The offload size
3821 /// calcualtion may also have to be adjusted for C++.
3822 std::vector<llvm::Value *>
3824  llvm::IRBuilderBase &builder, bool isArrayTy,
3825  OperandRange bounds) {
3826  std::vector<llvm::Value *> idx;
3827  // There's no bounds to calculate an offset from, we can safely
3828  // ignore and return no indices.
3829  if (bounds.empty())
3830  return idx;
3831 
3832  // If we have an array type, then we have its type so can treat it as a
3833  // normal GEP instruction where the bounds operations are simply indexes
3834  // into the array. We currently do reverse order of the bounds, which
3835  // I believe leans more towards Fortran's column-major in memory.
3836  if (isArrayTy) {
3837  idx.push_back(builder.getInt64(0));
3838  for (int i = bounds.size() - 1; i >= 0; --i) {
3839  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3840  bounds[i].getDefiningOp())) {
3841  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
3842  }
3843  }
3844  } else {
3845  // If we do not have an array type, but we have bounds, then we're dealing
3846  // with a pointer that's being treated like an array and we have the
3847  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
3848  // address (pointer pointing to the actual data) so we must caclulate the
3849  // offset using a single index which the following two loops attempts to
3850  // compute.
3851 
3852  // Calculates the size offset we need to make per row e.g. first row or
3853  // column only needs to be offset by one, but the next would have to be
3854  // the previous row/column offset multiplied by the extent of current row.
3855  //
3856  // For example ([1][10][100]):
3857  //
3858  // - First row/column we move by 1 for each index increment
3859  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
3860  // current) for 10 for each index increment
3861  // - Third row/column we would move by 10 (second row/column) *
3862  // (extent/size of current) 100 for 1000 for each index increment
3863  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
3864  for (size_t i = 1; i < bounds.size(); ++i) {
3865  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3866  bounds[i].getDefiningOp())) {
3867  dimensionIndexSizeOffset.push_back(builder.CreateMul(
3868  moduleTranslation.lookupValue(boundOp.getExtent()),
3869  dimensionIndexSizeOffset[i - 1]));
3870  }
3871  }
3872 
3873  // Now that we have calculated how much we move by per index, we must
3874  // multiply each lower bound offset in indexes by the size offset we
3875  // have calculated in the previous and accumulate the results to get
3876  // our final resulting offset.
3877  for (int i = bounds.size() - 1; i >= 0; --i) {
3878  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3879  bounds[i].getDefiningOp())) {
3880  if (idx.empty())
3881  idx.emplace_back(builder.CreateMul(
3882  moduleTranslation.lookupValue(boundOp.getLowerBound()),
3883  dimensionIndexSizeOffset[i]));
3884  else
3885  idx.back() = builder.CreateAdd(
3886  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
3887  boundOp.getLowerBound()),
3888  dimensionIndexSizeOffset[i]));
3889  }
3890  }
3891  }
3892 
3893  return idx;
3894 }
3895 
3896 // This creates two insertions into the MapInfosTy data structure for the
3897 // "parent" of a set of members, (usually a container e.g.
3898 // class/structure/derived type) when subsequent members have also been
3899 // explicitly mapped on the same map clause. Certain types, such as Fortran
3900 // descriptors are mapped like this as well, however, the members are
3901 // implicit as far as a user is concerned, but we must explicitly map them
3902 // internally.
3903 //
3904 // This function also returns the memberOfFlag for this particular parent,
3905 // which is utilised in subsequent member mappings (by modifying there map type
3906 // with it) to indicate that a member is part of this parent and should be
3907 // treated by the runtime as such. Important to achieve the correct mapping.
3908 //
3909 // This function borrows a lot from Clang's emitCombinedEntry function
3910 // inside of CGOpenMPRuntime.cpp
3911 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
3912  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3913  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3914  MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
3915  assert(!ompBuilder.Config.isTargetDevice() &&
3916  "function only supported for host device codegen");
3917 
3918  // Map the first segment of our structure
3919  combinedInfo.Types.emplace_back(
3920  isTargetParams
3921  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
3922  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
3923  combinedInfo.DevicePointers.emplace_back(
3924  mapData.DevicePointers[mapDataIndex]);
3925  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
3926  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3927  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3928  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3929 
3930  // Calculate size of the parent object being mapped based on the
3931  // addresses at runtime, highAddr - lowAddr = size. This of course
3932  // doesn't factor in allocated data like pointers, hence the further
3933  // processing of members specified by users, or in the case of
3934  // Fortran pointers and allocatables, the mapping of the pointed to
3935  // data by the descriptor (which itself, is a structure containing
3936  // runtime information on the dynamically allocated data).
3937  auto parentClause =
3938  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3939 
3940  llvm::Value *lowAddr, *highAddr;
3941  if (!parentClause.getPartialMap()) {
3942  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3943  builder.getPtrTy());
3944  highAddr = builder.CreatePointerCast(
3945  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3946  mapData.Pointers[mapDataIndex], 1),
3947  builder.getPtrTy());
3948  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3949  } else {
3950  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3951  int firstMemberIdx = getMapDataMemberIdx(
3952  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
3953  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3954  builder.getPtrTy());
3955  int lastMemberIdx = getMapDataMemberIdx(
3956  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
3957  highAddr = builder.CreatePointerCast(
3958  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3959  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3960  builder.getPtrTy());
3961  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
3962  }
3963 
3964  llvm::Value *size = builder.CreateIntCast(
3965  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3966  builder.getInt64Ty(),
3967  /*isSigned=*/false);
3968  combinedInfo.Sizes.push_back(size);
3969 
3970  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
3971  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3972 
3973  // This creates the initial MEMBER_OF mapping that consists of
3974  // the parent/top level container (same as above effectively, except
3975  // with a fixed initial compile time size and separate maptype which
3976  // indicates the true mape type (tofrom etc.). This parent mapping is
3977  // only relevant if the structure in its totality is being mapped,
3978  // otherwise the above suffices.
3979  if (!parentClause.getPartialMap()) {
3980  // TODO: This will need to be expanded to include the whole host of logic
3981  // for the map flags that Clang currently supports (e.g. it should do some
3982  // further case specific flag modifications). For the moment, it handles
3983  // what we support as expected.
3984  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
3985  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3986  combinedInfo.Types.emplace_back(mapFlag);
3987  combinedInfo.DevicePointers.emplace_back(
3989  combinedInfo.Mappers.emplace_back(nullptr);
3990  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3991  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3992  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3993  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3994  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
3995  }
3996  return memberOfFlag;
3997 }
3998 
3999 // The intent is to verify if the mapped data being passed is a
4000 // pointer -> pointee that requires special handling in certain cases,
4001 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
4002 //
4003 // There may be a better way to verify this, but unfortunately with
4004 // opaque pointers we lose the ability to easily check if something is
4005 // a pointer whilst maintaining access to the underlying type.
4006 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
4007  // If we have a varPtrPtr field assigned then the underlying type is a pointer
4008  if (mapOp.getVarPtrPtr())
4009  return true;
4010 
4011  // If the map data is declare target with a link clause, then it's represented
4012  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
4013  // no relation to pointers.
4014  if (isDeclareTargetLink(mapOp.getVarPtr()))
4015  return true;
4016 
4017  return false;
4018 }
4019 
4020 // This function is intended to add explicit mappings of members
4022  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4023  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4024  MapInfoData &mapData, uint64_t mapDataIndex,
4025  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
4026  assert(!ompBuilder.Config.isTargetDevice() &&
4027  "function only supported for host device codegen");
4028 
4029  auto parentClause =
4030  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4031 
4032  for (auto mappedMembers : parentClause.getMembers()) {
4033  auto memberClause =
4034  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
4035  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4036 
4037  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
4038 
4039  // If we're currently mapping a pointer to a block of data, we must
4040  // initially map the pointer, and then attatch/bind the data with a
4041  // subsequent map to the pointer. This segment of code generates the
4042  // pointer mapping, which can in certain cases be optimised out as Clang
4043  // currently does in its lowering. However, for the moment we do not do so,
4044  // in part as we currently have substantially less information on the data
4045  // being mapped at this stage.
4046  if (checkIfPointerMap(memberClause)) {
4047  auto mapFlag =
4048  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4049  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4050  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4051  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4052  combinedInfo.Types.emplace_back(mapFlag);
4053  combinedInfo.DevicePointers.emplace_back(
4055  combinedInfo.Mappers.emplace_back(nullptr);
4056  combinedInfo.Names.emplace_back(
4057  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4058  combinedInfo.BasePointers.emplace_back(
4059  mapData.BasePointers[mapDataIndex]);
4060  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
4061  combinedInfo.Sizes.emplace_back(builder.getInt64(
4062  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
4063  }
4064 
4065  // Same MemberOfFlag to indicate its link with parent and other members
4066  // of.
4067  auto mapFlag =
4068  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4069  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4070  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4071  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4072  if (checkIfPointerMap(memberClause))
4073  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4074 
4075  combinedInfo.Types.emplace_back(mapFlag);
4076  combinedInfo.DevicePointers.emplace_back(
4077  mapData.DevicePointers[memberDataIdx]);
4078  combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
4079  combinedInfo.Names.emplace_back(
4080  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4081  uint64_t basePointerIndex =
4082  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
4083  combinedInfo.BasePointers.emplace_back(
4084  mapData.BasePointers[basePointerIndex]);
4085  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
4086 
4087  llvm::Value *size = mapData.Sizes[memberDataIdx];
4088  if (checkIfPointerMap(memberClause)) {
4089  size = builder.CreateSelect(
4090  builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
4091  builder.getInt64(0), size);
4092  }
4093 
4094  combinedInfo.Sizes.emplace_back(size);
4095  }
4096 }
4097 
4098 static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
4099  MapInfosTy &combinedInfo, bool isTargetParams,
4100  int mapDataParentIdx = -1) {
4101  // Declare Target Mappings are excluded from being marked as
4102  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
4103  // marked with OMP_MAP_PTR_AND_OBJ instead.
4104  auto mapFlag = mapData.Types[mapDataIdx];
4105  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
4106 
4107  bool isPtrTy = checkIfPointerMap(mapInfoOp);
4108  if (isPtrTy)
4109  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4110 
4111  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
4112  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4113 
4114  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
4115  !isPtrTy)
4116  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
4117 
4118  // if we're provided a mapDataParentIdx, then the data being mapped is
4119  // part of a larger object (in a parent <-> member mapping) and in this
4120  // case our BasePointer should be the parent.
4121  if (mapDataParentIdx >= 0)
4122  combinedInfo.BasePointers.emplace_back(
4123  mapData.BasePointers[mapDataParentIdx]);
4124  else
4125  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
4126 
4127  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
4128  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
4129  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
4130  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
4131  combinedInfo.Types.emplace_back(mapFlag);
4132  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
4133 }
4134 
4135 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
4136  llvm::IRBuilderBase &builder,
4137  llvm::OpenMPIRBuilder &ompBuilder,
4138  DataLayout &dl, MapInfosTy &combinedInfo,
4139  MapInfoData &mapData, uint64_t mapDataIndex,
4140  bool isTargetParams) {
4141  assert(!ompBuilder.Config.isTargetDevice() &&
4142  "function only supported for host device codegen");
4143 
4144  auto parentClause =
4145  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4146 
4147  // If we have a partial map (no parent referenced in the map clauses of the
4148  // directive, only members) and only a single member, we do not need to bind
4149  // the map of the member to the parent, we can pass the member separately.
4150  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
4151  auto memberClause = llvm::cast<omp::MapInfoOp>(
4152  parentClause.getMembers()[0].getDefiningOp());
4153  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4154  // Note: Clang treats arrays with explicit bounds that fall into this
4155  // category as a parent with map case, however, it seems this isn't a
4156  // requirement, and processing them as an individual map is fine. So,
4157  // we will handle them as individual maps for the moment, as it's
4158  // difficult for us to check this as we always require bounds to be
4159  // specified currently and it's also marginally more optimal (single
4160  // map rather than two). The difference may come from the fact that
4161  // Clang maps array without bounds as pointers (which we do not
4162  // currently do), whereas we treat them as arrays in all cases
4163  // currently.
4164  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
4165  mapDataIndex);
4166  return;
4167  }
4168 
4169  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
4170  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
4171  combinedInfo, mapData, mapDataIndex, isTargetParams);
4172  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
4173  combinedInfo, mapData, mapDataIndex,
4174  memberOfParentFlag);
4175 }
4176 
4177 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
4178 // generates different operation (e.g. load/store) combinations for
4179 // arguments to the kernel, based on map capture kinds which are then
4180 // utilised in the combinedInfo in place of the original Map value.
4181 static void
4182 createAlteredByCaptureMap(MapInfoData &mapData,
4183  LLVM::ModuleTranslation &moduleTranslation,
4184  llvm::IRBuilderBase &builder) {
4185  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4186  "function only supported for host device codegen");
4187  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4188  // if it's declare target, skip it, it's handled separately.
4189  if (!mapData.IsDeclareTarget[i]) {
4190  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4191  omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
4192  bool isPtrTy = checkIfPointerMap(mapOp);
4193 
4194  // Currently handles array sectioning lowerbound case, but more
4195  // logic may be required in the future. Clang invokes EmitLValue,
4196  // which has specialised logic for special Clang types such as user
4197  // defines, so it is possible we will have to extend this for
4198  // structures or other complex types. As the general idea is that this
4199  // function mimics some of the logic from Clang that we require for
4200  // kernel argument passing from host -> device.
4201  switch (captureKind) {
4202  case omp::VariableCaptureKind::ByRef: {
4203  llvm::Value *newV = mapData.Pointers[i];
4204  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
4205  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
4206  mapOp.getBounds());
4207  if (isPtrTy)
4208  newV = builder.CreateLoad(builder.getPtrTy(), newV);
4209 
4210  if (!offsetIdx.empty())
4211  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
4212  "array_offset");
4213  mapData.Pointers[i] = newV;
4214  } break;
4215  case omp::VariableCaptureKind::ByCopy: {
4216  llvm::Type *type = mapData.BaseType[i];
4217  llvm::Value *newV;
4218  if (mapData.Pointers[i]->getType()->isPointerTy())
4219  newV = builder.CreateLoad(type, mapData.Pointers[i]);
4220  else
4221  newV = mapData.Pointers[i];
4222 
4223  if (!isPtrTy) {
4224  auto curInsert = builder.saveIP();
4225  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
4226  auto *memTempAlloc =
4227  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
4228  builder.restoreIP(curInsert);
4229 
4230  builder.CreateStore(newV, memTempAlloc);
4231  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
4232  }
4233 
4234  mapData.Pointers[i] = newV;
4235  mapData.BasePointers[i] = newV;
4236  } break;
4237  case omp::VariableCaptureKind::This:
4238  case omp::VariableCaptureKind::VLAType:
4239  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
4240  break;
4241  }
4242  }
4243  }
4244 }
4245 
4246 // Generate all map related information and fill the combinedInfo.
4247 static void genMapInfos(llvm::IRBuilderBase &builder,
4248  LLVM::ModuleTranslation &moduleTranslation,
4249  DataLayout &dl, MapInfosTy &combinedInfo,
4250  MapInfoData &mapData, bool isTargetParams = false) {
4251  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4252  "function only supported for host device codegen");
4253 
4254  // We wish to modify some of the methods in which arguments are
4255  // passed based on their capture type by the target region, this can
4256  // involve generating new loads and stores, which changes the
4257  // MLIR value to LLVM value mapping, however, we only wish to do this
4258  // locally for the current function/target and also avoid altering
4259  // ModuleTranslation, so we remap the base pointer or pointer stored
4260  // in the map infos corresponding MapInfoData, which is later accessed
4261  // by genMapInfos and createTarget to help generate the kernel and
4262  // kernel arg structure. It primarily becomes relevant in cases like
4263  // bycopy, or byref range'd arrays. In the default case, we simply
4264  // pass thee pointer byref as both basePointer and pointer.
4265  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
4266 
4267  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4268 
4269  // We operate under the assumption that all vectors that are
4270  // required in MapInfoData are of equal lengths (either filled with
4271  // default constructed data or appropiate information) so we can
4272  // utilise the size from any component of MapInfoData, if we can't
4273  // something is missing from the initial MapInfoData construction.
4274  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4275  // NOTE/TODO: We currently do not support arbitrary depth record
4276  // type mapping.
4277  if (mapData.IsAMember[i])
4278  continue;
4279 
4280  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
4281  if (!mapInfoOp.getMembers().empty()) {
4282  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
4283  combinedInfo, mapData, i, isTargetParams);
4284  continue;
4285  }
4286 
4287  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
4288  }
4289 }
4290 
4292 emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
4293  LLVM::ModuleTranslation &moduleTranslation,
4294  llvm::StringRef mapperFuncName);
4295 
4297 getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
4298  LLVM::ModuleTranslation &moduleTranslation) {
4299  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4300  "function only supported for host device codegen");
4301  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4302  std::string mapperFuncName =
4303  moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
4304  {"omp_mapper", declMapperOp.getSymName()});
4305 
4306  if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
4307  return lookupFunc;
4308 
4309  return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
4310  mapperFuncName);
4311 }
4312 
4314 emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
4315  LLVM::ModuleTranslation &moduleTranslation,
4316  llvm::StringRef mapperFuncName) {
4317  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4318  "function only supported for host device codegen");
4319  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4320  auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
4321  DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
4322  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4323  llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
4324  SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
4325 
4326  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4327 
4328  // Fill up the arrays with all the mapped variables.
4329  MapInfosTy combinedInfo;
4330  auto genMapInfoCB =
4331  [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
4332  llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
4333  builder.restoreIP(codeGenIP);
4334  moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
4335  moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
4336  builder.GetInsertBlock());
4337  if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
4338  /*ignoreArguments=*/true,
4339  builder)))
4340  return llvm::make_error<PreviouslyReportedError>();
4341  MapInfoData mapData;
4342  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4343  builder);
4344  genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
4345 
4346  // Drop the mapping that is no longer necessary so that the same region can
4347  // be processed multiple times.
4348  moduleTranslation.forgetMapping(declMapperOp.getRegion());
4349  return combinedInfo;
4350  };
4351 
4352  auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
4353  if (!combinedInfo.Mappers[i])
4354  return nullptr;
4355  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4356  moduleTranslation);
4357  };
4358 
4359  llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
4360  genMapInfoCB, varType, mapperFuncName, customMapperCB);
4361  if (!newFn)
4362  return newFn.takeError();
4363  moduleTranslation.mapFunction(mapperFuncName, *newFn);
4364  return *newFn;
4365 }
4366 
4367 static LogicalResult
4368 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
4369  LLVM::ModuleTranslation &moduleTranslation) {
4370  llvm::Value *ifCond = nullptr;
4371  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
4372  SmallVector<Value> mapVars;
4373  SmallVector<Value> useDevicePtrVars;
4374  SmallVector<Value> useDeviceAddrVars;
4375  llvm::omp::RuntimeFunction RTLFn;
4376  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
4377 
4378  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4379  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
4380  /*SeparateBeginEndCalls=*/true);
4381 
4382  LogicalResult result =
4384  .Case([&](omp::TargetDataOp dataOp) {
4385  if (failed(checkImplementationStatus(*dataOp)))
4386  return failure();
4387 
4388  if (auto ifVar = dataOp.getIfExpr())
4389  ifCond = moduleTranslation.lookupValue(ifVar);
4390 
4391  if (auto devId = dataOp.getDevice())
4392  if (auto constOp =
4393  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4394  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4395  deviceID = intAttr.getInt();
4396 
4397  mapVars = dataOp.getMapVars();
4398  useDevicePtrVars = dataOp.getUseDevicePtrVars();
4399  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
4400  return success();
4401  })
4402  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
4403  if (failed(checkImplementationStatus(*enterDataOp)))
4404  return failure();
4405 
4406  if (auto ifVar = enterDataOp.getIfExpr())
4407  ifCond = moduleTranslation.lookupValue(ifVar);
4408 
4409  if (auto devId = enterDataOp.getDevice())
4410  if (auto constOp =
4411  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4412  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4413  deviceID = intAttr.getInt();
4414  RTLFn =
4415  enterDataOp.getNowait()
4416  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
4417  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
4418  mapVars = enterDataOp.getMapVars();
4419  info.HasNoWait = enterDataOp.getNowait();
4420  return success();
4421  })
4422  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
4423  if (failed(checkImplementationStatus(*exitDataOp)))
4424  return failure();
4425 
4426  if (auto ifVar = exitDataOp.getIfExpr())
4427  ifCond = moduleTranslation.lookupValue(ifVar);
4428 
4429  if (auto devId = exitDataOp.getDevice())
4430  if (auto constOp =
4431  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4432  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4433  deviceID = intAttr.getInt();
4434 
4435  RTLFn = exitDataOp.getNowait()
4436  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
4437  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
4438  mapVars = exitDataOp.getMapVars();
4439  info.HasNoWait = exitDataOp.getNowait();
4440  return success();
4441  })
4442  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
4443  if (failed(checkImplementationStatus(*updateDataOp)))
4444  return failure();
4445 
4446  if (auto ifVar = updateDataOp.getIfExpr())
4447  ifCond = moduleTranslation.lookupValue(ifVar);
4448 
4449  if (auto devId = updateDataOp.getDevice())
4450  if (auto constOp =
4451  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4452  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4453  deviceID = intAttr.getInt();
4454 
4455  RTLFn =
4456  updateDataOp.getNowait()
4457  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4458  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4459  mapVars = updateDataOp.getMapVars();
4460  info.HasNoWait = updateDataOp.getNowait();
4461  return success();
4462  })
4463  .Default([&](Operation *op) {
4464  llvm_unreachable("unexpected operation");
4465  return failure();
4466  });
4467 
4468  if (failed(result))
4469  return failure();
4470 
4471  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4472  MapInfoData mapData;
4473  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4474  builder, useDevicePtrVars, useDeviceAddrVars);
4475 
4476  // Fill up the arrays with all the mapped variables.
4477  MapInfosTy combinedInfo;
4478  auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4479  builder.restoreIP(codeGenIP);
4480  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4481  return combinedInfo;
4482  };
4483 
4484  // Define a lambda to apply mappings between use_device_addr and
4485  // use_device_ptr base pointers, and their associated block arguments.
4486  auto mapUseDevice =
4487  [&moduleTranslation](
4488  llvm::OpenMPIRBuilder::DeviceInfoTy type,
4490  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4491  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4492  for (auto [arg, useDevVar] :
4493  llvm::zip_equal(blockArgs, useDeviceVars)) {
4494 
4495  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4496  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4497  : mapInfoOp.getVarPtr();
4498  };
4499 
4500  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4501  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4502  mapInfoData.MapClause, mapInfoData.DevicePointers,
4503  mapInfoData.BasePointers)) {
4504  auto mapOp = cast<omp::MapInfoOp>(mapClause);
4505  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4506  devicePointer != type)
4507  continue;
4508 
4509  if (llvm::Value *devPtrInfoMap =
4510  mapper ? mapper(basePointer) : basePointer) {
4511  moduleTranslation.mapValue(arg, devPtrInfoMap);
4512  break;
4513  }
4514  }
4515  }
4516  };
4517 
4518  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4519  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4520  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4521  builder.restoreIP(codeGenIP);
4522  assert(isa<omp::TargetDataOp>(op) &&
4523  "BodyGen requested for non TargetDataOp");
4524  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4525  Region &region = cast<omp::TargetDataOp>(op).getRegion();
4526  switch (bodyGenType) {
4527  case BodyGenTy::Priv:
4528  // Check if any device ptr/addr info is available
4529  if (!info.DevicePtrInfoMap.empty()) {
4530  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4531  blockArgIface.getUseDeviceAddrBlockArgs(),
4532  useDeviceAddrVars, mapData,
4533  [&](llvm::Value *basePointer) -> llvm::Value * {
4534  if (!info.DevicePtrInfoMap[basePointer].second)
4535  return nullptr;
4536  return builder.CreateLoad(
4537  builder.getPtrTy(),
4538  info.DevicePtrInfoMap[basePointer].second);
4539  });
4540  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4541  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4542  mapData, [&](llvm::Value *basePointer) {
4543  return info.DevicePtrInfoMap[basePointer].second;
4544  });
4545 
4546  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4547  moduleTranslation)))
4548  return llvm::make_error<PreviouslyReportedError>();
4549  }
4550  break;
4551  case BodyGenTy::DupNoPriv:
4552  // We must always restoreIP regardless of doing anything the caller
4553  // does not restore it, leading to incorrect (no) branch generation.
4554  builder.restoreIP(codeGenIP);
4555  break;
4556  case BodyGenTy::NoPriv:
4557  // If device info is available then region has already been generated
4558  if (info.DevicePtrInfoMap.empty()) {
4559  // For device pass, if use_device_ptr(addr) mappings were present,
4560  // we need to link them here before codegen.
4561  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4562  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4563  blockArgIface.getUseDeviceAddrBlockArgs(),
4564  useDeviceAddrVars, mapData);
4565  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4566  blockArgIface.getUseDevicePtrBlockArgs(),
4567  useDevicePtrVars, mapData);
4568  }
4569 
4570  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4571  moduleTranslation)))
4572  return llvm::make_error<PreviouslyReportedError>();
4573  }
4574  break;
4575  }
4576  return builder.saveIP();
4577  };
4578 
4579  auto customMapperCB =
4580  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4581  if (!combinedInfo.Mappers[i])
4582  return nullptr;
4583  info.HasMapper = true;
4584  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4585  moduleTranslation);
4586  };
4587 
4588  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4589  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4590  findAllocaInsertPoint(builder, moduleTranslation);
4591  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4592  if (isa<omp::TargetDataOp>(op))
4593  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4594  builder.getInt64(deviceID), ifCond,
4595  info, genMapInfoCB, customMapperCB,
4596  /*MapperFunc=*/nullptr, bodyGenCB,
4597  /*DeviceAddrCB=*/nullptr);
4598  return ompBuilder->createTargetData(
4599  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4600  info, genMapInfoCB, customMapperCB, &RTLFn);
4601  }();
4602 
4603  if (failed(handleError(afterIP, *op)))
4604  return failure();
4605 
4606  builder.restoreIP(*afterIP);
4607  return success();
4608 }
4609 
4610 static LogicalResult
4611 convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4612  LLVM::ModuleTranslation &moduleTranslation) {
4613  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4614  auto distributeOp = cast<omp::DistributeOp>(opInst);
4615  if (failed(checkImplementationStatus(opInst)))
4616  return failure();
4617 
4618  /// Process teams op reduction in distribute if the reduction is contained in
4619  /// the distribute op.
4620  omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>();
4621  bool doDistributeReduction =
4622  teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false;
4623 
4624  DenseMap<Value, llvm::Value *> reductionVariableMap;
4625  unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0;
4626  SmallVector<omp::DeclareReductionOp> reductionDecls;
4627  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
4628  llvm::ArrayRef<bool> isByRef;
4629 
4630  if (doDistributeReduction) {
4631  isByRef = getIsByRef(teamsOp.getReductionByref());
4632  assert(isByRef.size() == teamsOp.getNumReductionVars());
4633 
4634  collectReductionDecls(teamsOp, reductionDecls);
4635  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4636  findAllocaInsertPoint(builder, moduleTranslation);
4637 
4638  MutableArrayRef<BlockArgument> reductionArgs =
4639  llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
4640  .getReductionBlockArgs();
4641 
4643  teamsOp, reductionArgs, builder, moduleTranslation, allocaIP,
4644  reductionDecls, privateReductionVariables, reductionVariableMap,
4645  isByRef)))
4646  return failure();
4647  }
4648 
4649  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4650  auto bodyGenCB = [&](InsertPointTy allocaIP,
4651  InsertPointTy codeGenIP) -> llvm::Error {
4652  // Save the alloca insertion point on ModuleTranslation stack for use in
4653  // nested regions.
4655  moduleTranslation, allocaIP);
4656 
4657  // DistributeOp has only one region associated with it.
4658  builder.restoreIP(codeGenIP);
4659  PrivateVarsInfo privVarsInfo(distributeOp);
4660 
4661  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4662  allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4663  if (handleError(afterAllocas, opInst).failed())
4664  return llvm::make_error<PreviouslyReportedError>();
4665 
4666  if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4667  opInst)
4668  .failed())
4669  return llvm::make_error<PreviouslyReportedError>();
4670 
4671  if (failed(copyFirstPrivateVars(
4672  distributeOp, builder, moduleTranslation, privVarsInfo.mlirVars,
4673  privVarsInfo.llvmVars, privVarsInfo.privatizers,
4674  distributeOp.getPrivateNeedsBarrier())))
4675  return llvm::make_error<PreviouslyReportedError>();
4676 
4677  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4678  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4680  convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4681  builder, moduleTranslation);
4682  if (!regionBlock)
4683  return regionBlock.takeError();
4684  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4685 
4686  // Skip applying a workshare loop below when translating 'distribute
4687  // parallel do' (it's been already handled by this point while translating
4688  // the nested omp.wsloop).
4689  if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4690  // TODO: Add support for clauses which are valid for DISTRIBUTE
4691  // constructs. Static schedule is the default.
4692  auto schedule = omp::ClauseScheduleKind::Static;
4693  bool isOrdered = false;
4694  std::optional<omp::ScheduleModifier> scheduleMod;
4695  bool isSimd = false;
4696  llvm::omp::WorksharingLoopType workshareLoopType =
4697  llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4698  bool loopNeedsBarrier = false;
4699  llvm::Value *chunk = nullptr;
4700 
4701  llvm::CanonicalLoopInfo *loopInfo =
4702  findCurrentLoopInfo(moduleTranslation);
4703  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4704  ompBuilder->applyWorkshareLoop(
4705  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4706  convertToScheduleKind(schedule), chunk, isSimd,
4707  scheduleMod == omp::ScheduleModifier::monotonic,
4708  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4709  workshareLoopType);
4710 
4711  if (!wsloopIP)
4712  return wsloopIP.takeError();
4713  }
4714 
4715  if (failed(cleanupPrivateVars(builder, moduleTranslation,
4716  distributeOp.getLoc(), privVarsInfo.llvmVars,
4717  privVarsInfo.privatizers)))
4718  return llvm::make_error<PreviouslyReportedError>();
4719 
4720  return llvm::Error::success();
4721  };
4722 
4723  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4724  findAllocaInsertPoint(builder, moduleTranslation);
4725  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4726  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4727  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
4728 
4729  if (failed(handleError(afterIP, opInst)))
4730  return failure();
4731 
4732  builder.restoreIP(*afterIP);
4733 
4734  if (doDistributeReduction) {
4735  // Process the reductions if required.
4737  teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
4738  privateReductionVariables, isByRef,
4739  /*isNoWait*/ false, /*isTeamsReduction*/ true);
4740  }
4741  return success();
4742 }
4743 
4744 /// Lowers the FlagsAttr which is applied to the module on the device
4745 /// pass when offloading, this attribute contains OpenMP RTL globals that can
4746 /// be passed as flags to the frontend, otherwise they are set to default
4747 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
4748  LLVM::ModuleTranslation &moduleTranslation) {
4749  if (!cast<mlir::ModuleOp>(op))
4750  return failure();
4751 
4752  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4753 
4754  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
4755  attribute.getOpenmpDeviceVersion());
4756 
4757  if (attribute.getNoGpuLib())
4758  return success();
4759 
4760  ompBuilder->createGlobalFlag(
4761  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
4762  "__omp_rtl_debug_kind");
4763  ompBuilder->createGlobalFlag(
4764  attribute
4765  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
4766  ,
4767  "__omp_rtl_assume_teams_oversubscription");
4768  ompBuilder->createGlobalFlag(
4769  attribute
4770  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
4771  ,
4772  "__omp_rtl_assume_threads_oversubscription");
4773  ompBuilder->createGlobalFlag(
4774  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
4775  "__omp_rtl_assume_no_thread_state");
4776  ompBuilder->createGlobalFlag(
4777  attribute
4778  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
4779  ,
4780  "__omp_rtl_assume_no_nested_parallelism");
4781  return success();
4782 }
4783 
4784 static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
4785  omp::TargetOp targetOp,
4786  llvm::StringRef parentName = "") {
4787  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
4788 
4789  assert(fileLoc && "No file found from location");
4790  StringRef fileName = fileLoc.getFilename().getValue();
4791 
4792  llvm::sys::fs::UniqueID id;
4793  uint64_t line = fileLoc.getLine();
4794  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
4795  size_t fileHash = llvm::hash_value(fileName.str());
4796  size_t deviceId = 0xdeadf17e;
4797  targetInfo =
4798  llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
4799  } else {
4800  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
4801  id.getFile(), line);
4802  }
4803 }
4804 
4805 static void
4806 handleDeclareTargetMapVar(MapInfoData &mapData,
4807  LLVM::ModuleTranslation &moduleTranslation,
4808  llvm::IRBuilderBase &builder, llvm::Function *func) {
4809  assert(moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4810  "function only supported for target device codegen");
4811  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4812  // In the case of declare target mapped variables, the basePointer is
4813  // the reference pointer generated by the convertDeclareTargetAttr
4814  // method. Whereas the kernelValue is the original variable, so for
4815  // the device we must replace all uses of this original global variable
4816  // (stored in kernelValue) with the reference pointer (stored in
4817  // basePointer for declare target mapped variables), as for device the
4818  // data is mapped into this reference pointer and should be loaded
4819  // from it, the original variable is discarded. On host both exist and
4820  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
4821  // function to link the two variables in the runtime and then both the
4822  // reference pointer and the pointer are assigned in the kernel argument
4823  // structure for the host.
4824  if (mapData.IsDeclareTarget[i]) {
4825  // If the original map value is a constant, then we have to make sure all
4826  // of it's uses within the current kernel/function that we are going to
4827  // rewrite are converted to instructions, as we will be altering the old
4828  // use (OriginalValue) from a constant to an instruction, which will be
4829  // illegal and ICE the compiler if the user is a constant expression of
4830  // some kind e.g. a constant GEP.
4831  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
4832  convertUsersOfConstantsToInstructions(constant, func, false);
4833 
4834  // The users iterator will get invalidated if we modify an element,
4835  // so we populate this vector of uses to alter each user on an
4836  // individual basis to emit its own load (rather than one load for
4837  // all).
4839  for (llvm::User *user : mapData.OriginalValue[i]->users())
4840  userVec.push_back(user);
4841 
4842  for (llvm::User *user : userVec) {
4843  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
4844  if (insn->getFunction() == func) {
4845  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
4846  mapData.BasePointers[i]);
4847  load->moveBefore(insn->getIterator());
4848  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
4849  }
4850  }
4851  }
4852  }
4853  }
4854 }
4855 
4856 // The createDeviceArgumentAccessor function generates
4857 // instructions for retrieving (acessing) kernel
4858 // arguments inside of the device kernel for use by
4859 // the kernel. This enables different semantics such as
4860 // the creation of temporary copies of data allowing
4861 // semantics like read-only/no host write back kernel
4862 // arguments.
4863 //
4864 // This currently implements a very light version of Clang's
4865 // EmitParmDecl's handling of direct argument handling as well
4866 // as a portion of the argument access generation based on
4867 // capture types found at the end of emitOutlinedFunctionPrologue
4868 // in Clang. The indirect path handling of EmitParmDecl's may be
4869 // required for future work, but a direct 1-to-1 copy doesn't seem
4870 // possible as the logic is rather scattered throughout Clang's
4871 // lowering and perhaps we wish to deviate slightly.
4872 //
4873 // \param mapData - A container containing vectors of information
4874 // corresponding to the input argument, which should have a
4875 // corresponding entry in the MapInfoData containers
4876 // OrigialValue's.
4877 // \param arg - This is the generated kernel function argument that
4878 // corresponds to the passed in input argument. We generated different
4879 // accesses of this Argument, based on capture type and other Input
4880 // related information.
4881 // \param input - This is the host side value that will be passed to
4882 // the kernel i.e. the kernel input, we rewrite all uses of this within
4883 // the kernel (as we generate the kernel body based on the target's region
4884 // which maintians references to the original input) to the retVal argument
4885 // apon exit of this function inside of the OMPIRBuilder. This interlinks
4886 // the kernel argument to future uses of it in the function providing
4887 // appropriate "glue" instructions inbetween.
4888 // \param retVal - This is the value that all uses of input inside of the
4889 // kernel will be re-written to, the goal of this function is to generate
4890 // an appropriate location for the kernel argument to be accessed from,
4891 // e.g. ByRef will result in a temporary allocation location and then
4892 // a store of the kernel argument into this allocated memory which
4893 // will then be loaded from, ByCopy will use the allocated memory
4894 // directly.
4895 static llvm::IRBuilderBase::InsertPoint
4897  llvm::Value *input, llvm::Value *&retVal,
4898  llvm::IRBuilderBase &builder,
4899  llvm::OpenMPIRBuilder &ompBuilder,
4900  LLVM::ModuleTranslation &moduleTranslation,
4901  llvm::IRBuilderBase::InsertPoint allocaIP,
4902  llvm::IRBuilderBase::InsertPoint codeGenIP) {
4903  assert(ompBuilder.Config.isTargetDevice() &&
4904  "function only supported for target device codegen");
4905  builder.restoreIP(allocaIP);
4906 
4907  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
4908  LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
4909  ompBuilder.M.getContext());
4910  unsigned alignmentValue = 0;
4911  // Find the associated MapInfoData entry for the current input
4912  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
4913  if (mapData.OriginalValue[i] == input) {
4914  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4915  capture = mapOp.getMapCaptureType();
4916  // Get information of alignment of mapped object
4917  alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
4918  mapOp.getVarType(), ompBuilder.M.getDataLayout());
4919  break;
4920  }
4921 
4922  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
4923  unsigned int defaultAS =
4924  ompBuilder.M.getDataLayout().getProgramAddressSpace();
4925 
4926  // Create the alloca for the argument the current point.
4927  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
4928 
4929  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
4930  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
4931 
4932  builder.CreateStore(&arg, v);
4933 
4934  builder.restoreIP(codeGenIP);
4935 
4936  switch (capture) {
4937  case omp::VariableCaptureKind::ByCopy: {
4938  retVal = v;
4939  break;
4940  }
4941  case omp::VariableCaptureKind::ByRef: {
4942  llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
4943  v->getType(), v,
4944  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
4945  // CreateAlignedLoad function creates similar LLVM IR:
4946  // %res = load ptr, ptr %input, align 8
4947  // This LLVM IR does not contain information about alignment
4948  // of the loaded value. We need to add !align metadata to unblock
4949  // optimizer. The existence of the !align metadata on the instruction
4950  // tells the optimizer that the value loaded is known to be aligned to
4951  // a boundary specified by the integer value in the metadata node.
4952  // Example:
4953  // %res = load ptr, ptr %input, align 8, !align !align_md_node
4954  // ^ ^
4955  // | |
4956  // alignment of %input address |
4957  // |
4958  // alignment of %res object
4959  if (v->getType()->isPointerTy() && alignmentValue) {
4960  llvm::MDBuilder MDB(builder.getContext());
4961  loadInst->setMetadata(
4962  llvm::LLVMContext::MD_align,
4963  llvm::MDNode::get(builder.getContext(),
4964  MDB.createConstant(llvm::ConstantInt::get(
4965  llvm::Type::getInt64Ty(builder.getContext()),
4966  alignmentValue))));
4967  }
4968  retVal = loadInst;
4969 
4970  break;
4971  }
4972  case omp::VariableCaptureKind::This:
4973  case omp::VariableCaptureKind::VLAType:
4974  // TODO: Consider returning error to use standard reporting for
4975  // unimplemented features.
4976  assert(false && "Currently unsupported capture kind");
4977  break;
4978  }
4979 
4980  return builder.saveIP();
4981 }
4982 
4983 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
4984 /// operation and populate output variables with their corresponding host value
4985 /// (i.e. operand evaluated outside of the target region), based on their uses
4986 /// inside of the target region.
4987 ///
4988 /// Loop bounds and steps are only optionally populated, if output vectors are
4989 /// provided.
4990 static void
4991 extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
4992  Value &numTeamsLower, Value &numTeamsUpper,
4993  Value &threadLimit,
4994  llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
4995  llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
4996  llvm::SmallVectorImpl<Value> *steps = nullptr) {
4997  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
4998  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
4999  blockArgIface.getHostEvalBlockArgs())) {
5000  Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
5001 
5002  for (Operation *user : blockArg.getUsers()) {
5004  .Case([&](omp::TeamsOp teamsOp) {
5005  if (teamsOp.getNumTeamsLower() == blockArg)
5006  numTeamsLower = hostEvalVar;
5007  else if (teamsOp.getNumTeamsUpper() == blockArg)
5008  numTeamsUpper = hostEvalVar;
5009  else if (teamsOp.getThreadLimit() == blockArg)
5010  threadLimit = hostEvalVar;
5011  else
5012  llvm_unreachable("unsupported host_eval use");
5013  })
5014  .Case([&](omp::ParallelOp parallelOp) {
5015  if (parallelOp.getNumThreads() == blockArg)
5016  numThreads = hostEvalVar;
5017  else
5018  llvm_unreachable("unsupported host_eval use");
5019  })
5020  .Case([&](omp::LoopNestOp loopOp) {
5021  auto processBounds =
5022  [&](OperandRange opBounds,
5023  llvm::SmallVectorImpl<Value> *outBounds) -> bool {
5024  bool found = false;
5025  for (auto [i, lb] : llvm::enumerate(opBounds)) {
5026  if (lb == blockArg) {
5027  found = true;
5028  if (outBounds)
5029  (*outBounds)[i] = hostEvalVar;
5030  }
5031  }
5032  return found;
5033  };
5034  bool found =
5035  processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
5036  found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
5037  found;
5038  found = processBounds(loopOp.getLoopSteps(), steps) || found;
5039  (void)found;
5040  assert(found && "unsupported host_eval use");
5041  })
5042  .Default([](Operation *) {
5043  llvm_unreachable("unsupported host_eval use");
5044  });
5045  }
5046  }
5047 }
5048 
5049 /// If \p op is of the given type parameter, return it casted to that type.
5050 /// Otherwise, if its immediate parent operation (or some other higher-level
5051 /// parent, if \p immediateParent is false) is of that type, return that parent
5052 /// casted to the given type.
5053 ///
5054 /// If \p op is \c null or neither it or its parent(s) are of the specified
5055 /// type, return a \c null operation.
5056 template <typename OpTy>
5057 static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
5058  if (!op)
5059  return OpTy();
5060 
5061  if (OpTy casted = dyn_cast<OpTy>(op))
5062  return casted;
5063 
5064  if (immediateParent)
5065  return dyn_cast_if_present<OpTy>(op->getParentOp());
5066 
5067  return op->getParentOfType<OpTy>();
5068 }
5069 
5070 /// If the given \p value is defined by an \c llvm.mlir.constant operation and
5071 /// it is of an integer type, return its value.
5072 static std::optional<int64_t> extractConstInteger(Value value) {
5073  if (!value)
5074  return std::nullopt;
5075 
5076  if (auto constOp =
5077  dyn_cast_if_present<LLVM::ConstantOp>(value.getDefiningOp()))
5078  if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
5079  return constAttr.getInt();
5080 
5081  return std::nullopt;
5082 }
5083 
5084 static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
5085  uint64_t sizeInBits = dl.getTypeSizeInBits(type);
5086  uint64_t sizeInBytes = sizeInBits / 8;
5087  return sizeInBytes;
5088 }
5089 
5090 template <typename OpTy>
5091 static uint64_t getReductionDataSize(OpTy &op) {
5092  if (op.getNumReductionVars() > 0) {
5094  collectReductionDecls(op, reductions);
5095 
5097  members.reserve(reductions.size());
5098  for (omp::DeclareReductionOp &red : reductions)
5099  members.push_back(red.getType());
5100  Operation *opp = op.getOperation();
5101  auto structType = mlir::LLVM::LLVMStructType::getLiteral(
5102  opp->getContext(), members, /*isPacked=*/false);
5103  DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
5104  return getTypeByteSize(structType, dl);
5105  }
5106  return 0;
5107 }
5108 
5109 /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
5110 /// values as stated by the corresponding clauses, if constant.
5111 ///
5112 /// These default values must be set before the creation of the outlined LLVM
5113 /// function for the target region, so that they can be used to initialize the
5114 /// corresponding global `ConfigurationEnvironmentTy` structure.
5115 static void
5116 initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
5117  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
5118  bool isTargetDevice, bool isGPU) {
5119  // TODO: Handle constant 'if' clauses.
5120 
5121  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
5122  if (!isTargetDevice) {
5123  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5124  threadLimit);
5125  } else {
5126  // In the target device, values for these clauses are not passed as
5127  // host_eval, but instead evaluated prior to entry to the region. This
5128  // ensures values are mapped and available inside of the target region.
5129  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5130  numTeamsLower = teamsOp.getNumTeamsLower();
5131  numTeamsUpper = teamsOp.getNumTeamsUpper();
5132  threadLimit = teamsOp.getThreadLimit();
5133  }
5134 
5135  if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5136  numThreads = parallelOp.getNumThreads();
5137  }
5138 
5139  // Handle clauses impacting the number of teams.
5140 
5141  int32_t minTeamsVal = 1, maxTeamsVal = -1;
5142  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5143  // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
5144  // clang and set min and max to the same value.
5145  if (numTeamsUpper) {
5146  if (auto val = extractConstInteger(numTeamsUpper))
5147  minTeamsVal = maxTeamsVal = *val;
5148  } else {
5149  minTeamsVal = maxTeamsVal = 0;
5150  }
5151  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
5152  /*immediateParent=*/true) ||
5153  castOrGetParentOfType<omp::SimdOp>(capturedOp,
5154  /*immediateParent=*/true)) {
5155  minTeamsVal = maxTeamsVal = 1;
5156  } else {
5157  minTeamsVal = maxTeamsVal = -1;
5158  }
5159 
5160  // Handle clauses impacting the number of threads.
5161 
5162  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
5163  if (!clauseValue)
5164  return;
5165 
5166  if (auto val = extractConstInteger(clauseValue))
5167  result = *val;
5168 
5169  // Found an applicable clause, so it's not undefined. Mark as unknown
5170  // because it's not constant.
5171  if (result < 0)
5172  result = 0;
5173  };
5174 
5175  // Extract 'thread_limit' clause from 'target' and 'teams' directives.
5176  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
5177  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
5178  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
5179 
5180  // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
5181  int32_t maxThreadsVal = -1;
5182  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5183  setMaxValueFromClause(numThreads, maxThreadsVal);
5184  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
5185  /*immediateParent=*/true))
5186  maxThreadsVal = 1;
5187 
5188  // For max values, < 0 means unset, == 0 means set but unknown. Select the
5189  // minimum value between 'max_threads' and 'thread_limit' clauses that were
5190  // set.
5191  int32_t combinedMaxThreadsVal = targetThreadLimitVal;
5192  if (combinedMaxThreadsVal < 0 ||
5193  (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
5194  combinedMaxThreadsVal = teamsThreadLimitVal;
5195 
5196  if (combinedMaxThreadsVal < 0 ||
5197  (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
5198  combinedMaxThreadsVal = maxThreadsVal;
5199 
5200  int32_t reductionDataSize = 0;
5201  if (isGPU && capturedOp) {
5202  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
5203  reductionDataSize = getReductionDataSize(teamsOp);
5204  }
5205 
5206  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
5207  omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
5208  assert(
5209  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
5210  omp::TargetRegionFlags::spmd) &&
5211  "invalid kernel flags");
5212  attrs.ExecFlags =
5213  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
5214  ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
5215  ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
5216  : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
5217  : llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
5218  attrs.MinTeams = minTeamsVal;
5219  attrs.MaxTeams.front() = maxTeamsVal;
5220  attrs.MinThreads = 1;
5221  attrs.MaxThreads.front() = combinedMaxThreadsVal;
5222  attrs.ReductionDataSize = reductionDataSize;
5223  // TODO: Allow modified buffer length similar to
5224  // fopenmp-cuda-teams-reduction-recs-num flag in clang.
5225  if (attrs.ReductionDataSize != 0)
5226  attrs.ReductionBufferLength = 1024;
5227 }
5228 
5229 /// Gather LLVM runtime values for all clauses evaluated in the host that are
5230 /// passed to the kernel invocation.
5231 ///
5232 /// This function must be called only when compiling for the host. Also, it will
5233 /// only provide correct results if it's called after the body of \c targetOp
5234 /// has been fully generated.
5235 static void
5236 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
5237  LLVM::ModuleTranslation &moduleTranslation,
5238  omp::TargetOp targetOp, Operation *capturedOp,
5239  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
5240  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
5241  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
5242 
5243  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
5244  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
5245  steps(numLoops);
5246  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5247  teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
5248 
5249  // TODO: Handle constant 'if' clauses.
5250  if (Value targetThreadLimit = targetOp.getThreadLimit())
5251  attrs.TargetThreadLimit.front() =
5252  moduleTranslation.lookupValue(targetThreadLimit);
5253 
5254  if (numTeamsLower)
5255  attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
5256 
5257  if (numTeamsUpper)
5258  attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
5259 
5260  if (teamsThreadLimit)
5261  attrs.TeamsThreadLimit.front() =
5262  moduleTranslation.lookupValue(teamsThreadLimit);
5263 
5264  if (numThreads)
5265  attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
5266 
5267  if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
5268  omp::TargetRegionFlags::trip_count)) {
5269  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5270  attrs.LoopTripCount = nullptr;
5271 
5272  // To calculate the trip count, we multiply together the trip counts of
5273  // every collapsed canonical loop. We don't need to create the loop nests
5274  // here, since we're only interested in the trip count.
5275  for (auto [loopLower, loopUpper, loopStep] :
5276  llvm::zip_equal(lowerBounds, upperBounds, steps)) {
5277  llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
5278  llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
5279  llvm::Value *step = moduleTranslation.lookupValue(loopStep);
5280 
5281  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
5282  llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
5283  loc, lowerBound, upperBound, step, /*IsSigned=*/true,
5284  loopOp.getLoopInclusive());
5285 
5286  if (!attrs.LoopTripCount) {
5287  attrs.LoopTripCount = tripCount;
5288  continue;
5289  }
5290 
5291  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
5292  attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
5293  {}, /*HasNUW=*/true);
5294  }
5295  }
5296 }
5297 
5298 static LogicalResult
5299 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
5300  LLVM::ModuleTranslation &moduleTranslation) {
5301  auto targetOp = cast<omp::TargetOp>(opInst);
5302  if (failed(checkImplementationStatus(opInst)))
5303  return failure();
5304 
5305  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5306  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
5307  bool isGPU = ompBuilder->Config.isGPU();
5308 
5309  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
5310  auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
5311  auto &targetRegion = targetOp.getRegion();
5312  // Holds the private vars that have been mapped along with the block argument
5313  // that corresponds to the MapInfoOp corresponding to the private var in
5314  // question. So, for instance:
5315  //
5316  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
5317  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
5318  //
5319  // Then, %10 has been created so that the descriptor can be used by the
5320  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
5321  // %arg0} in the mappedPrivateVars map.
5322  llvm::DenseMap<Value, Value> mappedPrivateVars;
5323  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
5324  SmallVector<Value> mapVars = targetOp.getMapVars();
5325  SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
5326  ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
5327  ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
5328  llvm::Function *llvmOutlinedFn = nullptr;
5329 
5330  // TODO: It can also be false if a compile-time constant `false` IF clause is
5331  // specified.
5332  bool isOffloadEntry =
5333  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
5334 
5335  // For some private variables, the MapsForPrivatizedVariablesPass
5336  // creates MapInfoOp instances. Go through the private variables and
5337  // the mapped variables so that during codegeneration we are able
5338  // to quickly look up the corresponding map variable, if any for each
5339  // private variable.
5340  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
5341  OperandRange privateVars = targetOp.getPrivateVars();
5342  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
5343  std::optional<DenseI64ArrayAttr> privateMapIndices =
5344  targetOp.getPrivateMapsAttr();
5345 
5346  for (auto [privVarIdx, privVarSymPair] :
5347  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
5348  auto privVar = std::get<0>(privVarSymPair);
5349  auto privSym = std::get<1>(privVarSymPair);
5350 
5351  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
5352  omp::PrivateClauseOp privatizer =
5353  findPrivatizer(targetOp, privatizerName);
5354 
5355  if (!privatizer.needsMap())
5356  continue;
5357 
5358  mlir::Value mappedValue =
5359  targetOp.getMappedValueForPrivateVar(privVarIdx);
5360  assert(mappedValue && "Expected to find mapped value for a privatized "
5361  "variable that needs mapping");
5362 
5363  // The MapInfoOp defining the map var isn't really needed later.
5364  // So, we don't store it in any datastructure. Instead, we just
5365  // do some sanity checks on it right now.
5366  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
5367  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
5368 
5369  // Check #1: Check that the type of the private variable matches
5370  // the type of the variable being mapped.
5371  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
5372  assert(
5373  varType == privVar.getType() &&
5374  "Type of private var doesn't match the type of the mapped value");
5375 
5376  // Ok, only 1 sanity check for now.
5377  // Record the block argument corresponding to this mapvar.
5378  mappedPrivateVars.insert(
5379  {privVar,
5380  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
5381  (*privateMapIndices)[privVarIdx])});
5382  }
5383  }
5384 
5385  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5386  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
5387  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5388  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5389  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5390  // Forward target-cpu and target-features function attributes from the
5391  // original function to the new outlined function.
5392  llvm::Function *llvmParentFn =
5393  moduleTranslation.lookupFunction(parentFn.getName());
5394  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
5395  assert(llvmParentFn && llvmOutlinedFn &&
5396  "Both parent and outlined functions must exist at this point");
5397 
5398  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
5399  attr.isStringAttribute())
5400  llvmOutlinedFn->addFnAttr(attr);
5401 
5402  if (auto attr = llvmParentFn->getFnAttribute("target-features");
5403  attr.isStringAttribute())
5404  llvmOutlinedFn->addFnAttr(attr);
5405 
5406  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
5407  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5408  llvm::Value *mapOpValue =
5409  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5410  moduleTranslation.mapValue(arg, mapOpValue);
5411  }
5412  for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
5413  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5414  llvm::Value *mapOpValue =
5415  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5416  moduleTranslation.mapValue(arg, mapOpValue);
5417  }
5418 
5419  // Do privatization after moduleTranslation has already recorded
5420  // mapped values.
5421  PrivateVarsInfo privateVarsInfo(targetOp);
5422 
5423  llvm::Expected<llvm::BasicBlock *> afterAllocas =
5424  allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
5425  allocaIP, &mappedPrivateVars);
5426 
5427  if (failed(handleError(afterAllocas, *targetOp)))
5428  return llvm::make_error<PreviouslyReportedError>();
5429 
5430  builder.restoreIP(codeGenIP);
5431  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
5432  &mappedPrivateVars),
5433  *targetOp)
5434  .failed())
5435  return llvm::make_error<PreviouslyReportedError>();
5436 
5437  if (failed(copyFirstPrivateVars(
5438  targetOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
5439  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
5440  targetOp.getPrivateNeedsBarrier(), &mappedPrivateVars)))
5441  return llvm::make_error<PreviouslyReportedError>();
5442 
5443  SmallVector<Region *> privateCleanupRegions;
5444  llvm::transform(privateVarsInfo.privatizers,
5445  std::back_inserter(privateCleanupRegions),
5446  [](omp::PrivateClauseOp privatizer) {
5447  return &privatizer.getDeallocRegion();
5448  });
5449 
5451  targetRegion, "omp.target", builder, moduleTranslation);
5452 
5453  if (!exitBlock)
5454  return exitBlock.takeError();
5455 
5456  builder.SetInsertPoint(*exitBlock);
5457  if (!privateCleanupRegions.empty()) {
5458  if (failed(inlineOmpRegionCleanup(
5459  privateCleanupRegions, privateVarsInfo.llvmVars,
5460  moduleTranslation, builder, "omp.targetop.private.cleanup",
5461  /*shouldLoadCleanupRegionArg=*/false))) {
5462  return llvm::createStringError(
5463  "failed to inline `dealloc` region of `omp.private` "
5464  "op in the target region");
5465  }
5466  return builder.saveIP();
5467  }
5468 
5469  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
5470  };
5471 
5472  StringRef parentName = parentFn.getName();
5473 
5474  llvm::TargetRegionEntryInfo entryInfo;
5475 
5476  getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
5477 
5478  MapInfoData mapData;
5479  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
5480  builder, /*useDevPtrOperands=*/{},
5481  /*useDevAddrOperands=*/{}, hdaVars);
5482 
5483  MapInfosTy combinedInfos;
5484  auto genMapInfoCB =
5485  [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
5486  builder.restoreIP(codeGenIP);
5487  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
5488  return combinedInfos;
5489  };
5490 
5491  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
5492  llvm::Value *&retVal, InsertPointTy allocaIP,
5493  InsertPointTy codeGenIP)
5494  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5495  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5496  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5497  // We just return the unaltered argument for the host function
5498  // for now, some alterations may be required in the future to
5499  // keep host fallback functions working identically to the device
5500  // version (e.g. pass ByCopy values should be treated as such on
5501  // host and device, currently not always the case)
5502  if (!isTargetDevice) {
5503  retVal = cast<llvm::Value>(&arg);
5504  return codeGenIP;
5505  }
5506 
5507  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
5508  *ompBuilder, moduleTranslation,
5509  allocaIP, codeGenIP);
5510  };
5511 
5512  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
5513  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
5514  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
5515  initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
5516  isTargetDevice, isGPU);
5517 
5518  // Collect host-evaluated values needed to properly launch the kernel from the
5519  // host.
5520  if (!isTargetDevice)
5521  initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
5522  targetCapturedOp, runtimeAttrs);
5523 
5524  // Pass host-evaluated values as parameters to the kernel / host fallback,
5525  // except if they are constants. In any case, map the MLIR block argument to
5526  // the corresponding LLVM values.
5528  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
5529  ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
5530  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
5531  llvm::Value *value = moduleTranslation.lookupValue(var);
5532  moduleTranslation.mapValue(arg, value);
5533 
5534  if (!llvm::isa<llvm::Constant>(value))
5535  kernelInput.push_back(value);
5536  }
5537 
5538  for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
5539  // declare target arguments are not passed to kernels as arguments
5540  // TODO: We currently do not handle cases where a member is explicitly
5541  // passed in as an argument, this will likley need to be handled in
5542  // the near future, rather than using IsAMember, it may be better to
5543  // test if the relevant BlockArg is used within the target region and
5544  // then use that as a basis for exclusion in the kernel inputs.
5545  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
5546  kernelInput.push_back(mapData.OriginalValue[i]);
5547  }
5548 
5550  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
5551  moduleTranslation, dds);
5552 
5553  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5554  findAllocaInsertPoint(builder, moduleTranslation);
5555  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5556 
5557  llvm::OpenMPIRBuilder::TargetDataInfo info(
5558  /*RequiresDevicePointerInfo=*/false,
5559  /*SeparateBeginEndCalls=*/true);
5560 
5561  auto customMapperCB =
5562  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
5563  if (!combinedInfos.Mappers[i])
5564  return nullptr;
5565  info.HasMapper = true;
5566  return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
5567  moduleTranslation);
5568  };
5569 
5570  llvm::Value *ifCond = nullptr;
5571  if (Value targetIfCond = targetOp.getIfExpr())
5572  ifCond = moduleTranslation.lookupValue(targetIfCond);
5573 
5574  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5575  moduleTranslation.getOpenMPBuilder()->createTarget(
5576  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
5577  defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
5578  argAccessorCB, customMapperCB, dds, targetOp.getNowait());
5579 
5580  if (failed(handleError(afterIP, opInst)))
5581  return failure();
5582 
5583  builder.restoreIP(*afterIP);
5584 
5585  // Remap access operations to declare target reference pointers for the
5586  // device, essentially generating extra loadop's as necessary
5587  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5588  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5589  llvmOutlinedFn);
5590 
5591  return success();
5592 }
5593 
5594 static LogicalResult
5595 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5596  LLVM::ModuleTranslation &moduleTranslation) {
5597  // Amend omp.declare_target by deleting the IR of the outlined functions
5598  // created for target regions. They cannot be filtered out from MLIR earlier
5599  // because the omp.target operation inside must be translated to LLVM, but
5600  // the wrapper functions themselves must not remain at the end of the
5601  // process. We know that functions where omp.declare_target does not match
5602  // omp.is_target_device at this stage can only be wrapper functions because
5603  // those that aren't are removed earlier as an MLIR transformation pass.
5604  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5605  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5606  op->getParentOfType<ModuleOp>().getOperation())) {
5607  if (!offloadMod.getIsTargetDevice())
5608  return success();
5609 
5610  omp::DeclareTargetDeviceType declareType =
5611  attribute.getDeviceType().getValue();
5612 
5613  if (declareType == omp::DeclareTargetDeviceType::host) {
5614  llvm::Function *llvmFunc =
5615  moduleTranslation.lookupFunction(funcOp.getName());
5616  llvmFunc->dropAllReferences();
5617  llvmFunc->eraseFromParent();
5618  }
5619  }
5620  return success();
5621  }
5622 
5623  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5624  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5625  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5626  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5627  bool isDeclaration = gOp.isDeclaration();
5628  bool isExternallyVisible =
5629  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5630  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5631  llvm::StringRef mangledName = gOp.getSymName();
5632  auto captureClause =
5633  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5634  auto deviceClause =
5635  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5636  // unused for MLIR at the moment, required in Clang for book
5637  // keeping
5638  std::vector<llvm::GlobalVariable *> generatedRefs;
5639 
5640  std::vector<llvm::Triple> targetTriple;
5641  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5642  op->getParentOfType<mlir::ModuleOp>()->getAttr(
5643  LLVM::LLVMDialect::getTargetTripleAttrName()));
5644  if (targetTripleAttr)
5645  targetTriple.emplace_back(targetTripleAttr.data());
5646 
5647  auto fileInfoCallBack = [&loc]() {
5648  std::string filename = "";
5649  std::uint64_t lineNo = 0;
5650 
5651  if (loc) {
5652  filename = loc.getFilename().str();
5653  lineNo = loc.getLine();
5654  }
5655 
5656  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5657  lineNo);
5658  };
5659 
5660  ompBuilder->registerTargetGlobalVariable(
5661  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5662  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5663  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5664  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5665  gVal->getType(), gVal);
5666 
5667  if (ompBuilder->Config.isTargetDevice() &&
5668  (attribute.getCaptureClause().getValue() !=
5669  mlir::omp::DeclareTargetCaptureClause::to ||
5670  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5671  ompBuilder->getAddrOfDeclareTargetVar(
5672  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5673  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5674  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
5675  /*GlobalInitializer*/ nullptr,
5676  /*VariableLinkage*/ nullptr);
5677  }
5678  }
5679  }
5680 
5681  return success();
5682 }
5683 
5684 // Returns true if the operation is inside a TargetOp or
5685 // is part of a declare target function.
5686 static bool isTargetDeviceOp(Operation *op) {
5687  // Assumes no reverse offloading
5688  if (op->getParentOfType<omp::TargetOp>())
5689  return true;
5690 
5691  // Certain operations return results, and whether utilised in host or
5692  // target there is a chance an LLVM Dialect operation depends on it
5693  // by taking it in as an operand, so we must always lower these in
5694  // some manner or result in an ICE (whether they end up in a no-op
5695  // or otherwise).
5696  if (mlir::isa<omp::ThreadprivateOp>(op))
5697  return true;
5698 
5699  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
5700  if (auto declareTargetIface =
5701  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
5702  parentFn.getOperation()))
5703  if (declareTargetIface.isDeclareTarget() &&
5704  declareTargetIface.getDeclareTargetDeviceType() !=
5705  mlir::omp::DeclareTargetDeviceType::host)
5706  return true;
5707 
5708  return false;
5709 }
5710 
5711 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
5712 /// OpenMP runtime calls).
5713 static LogicalResult
5714 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
5715  LLVM::ModuleTranslation &moduleTranslation) {
5716  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5717 
5718  // For each loop, introduce one stack frame to hold loop information. Ensure
5719  // this is only done for the outermost loop wrapper to prevent introducing
5720  // multiple stack frames for a single loop. Initially set to null, the loop
5721  // information structure is initialized during translation of the nested
5722  // omp.loop_nest operation, making it available to translation of all loop
5723  // wrappers after their body has been successfully translated.
5724  bool isOutermostLoopWrapper =
5725  isa_and_present<omp::LoopWrapperInterface>(op) &&
5726  !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
5727 
5728  if (isOutermostLoopWrapper)
5729  moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
5730 
5731  auto result =
5733  .Case([&](omp::BarrierOp op) -> LogicalResult {
5734  if (failed(checkImplementationStatus(*op)))
5735  return failure();
5736 
5737  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5738  ompBuilder->createBarrier(builder.saveIP(),
5739  llvm::omp::OMPD_barrier);
5740  LogicalResult res = handleError(afterIP, *op);
5741  if (res.succeeded()) {
5742  // If the barrier generated a cancellation check, the insertion
5743  // point might now need to be changed to a new continuation block
5744  builder.restoreIP(*afterIP);
5745  }
5746  return res;
5747  })
5748  .Case([&](omp::TaskyieldOp op) {
5749  if (failed(checkImplementationStatus(*op)))
5750  return failure();
5751 
5752  ompBuilder->createTaskyield(builder.saveIP());
5753  return success();
5754  })
5755  .Case([&](omp::FlushOp op) {
5756  if (failed(checkImplementationStatus(*op)))
5757  return failure();
5758 
5759  // No support in Openmp runtime function (__kmpc_flush) to accept
5760  // the argument list.
5761  // OpenMP standard states the following:
5762  // "An implementation may implement a flush with a list by ignoring
5763  // the list, and treating it the same as a flush without a list."
5764  //
5765  // The argument list is discarded so that, flush with a list is
5766  // treated same as a flush without a list.
5767  ompBuilder->createFlush(builder.saveIP());
5768  return success();
5769  })
5770  .Case([&](omp::ParallelOp op) {
5771  return convertOmpParallel(op, builder, moduleTranslation);
5772  })
5773  .Case([&](omp::MaskedOp) {
5774  return convertOmpMasked(*op, builder, moduleTranslation);
5775  })
5776  .Case([&](omp::MasterOp) {
5777  return convertOmpMaster(*op, builder, moduleTranslation);
5778  })
5779  .Case([&](omp::CriticalOp) {
5780  return convertOmpCritical(*op, builder, moduleTranslation);
5781  })
5782  .Case([&](omp::OrderedRegionOp) {
5783  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
5784  })
5785  .Case([&](omp::OrderedOp) {
5786  return convertOmpOrdered(*op, builder, moduleTranslation);
5787  })
5788  .Case([&](omp::WsloopOp) {
5789  return convertOmpWsloop(*op, builder, moduleTranslation);
5790  })
5791  .Case([&](omp::SimdOp) {
5792  return convertOmpSimd(*op, builder, moduleTranslation);
5793  })
5794  .Case([&](omp::AtomicReadOp) {
5795  return convertOmpAtomicRead(*op, builder, moduleTranslation);
5796  })
5797  .Case([&](omp::AtomicWriteOp) {
5798  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
5799  })
5800  .Case([&](omp::AtomicUpdateOp op) {
5801  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
5802  })
5803  .Case([&](omp::AtomicCaptureOp op) {
5804  return convertOmpAtomicCapture(op, builder, moduleTranslation);
5805  })
5806  .Case([&](omp::CancelOp op) {
5807  return convertOmpCancel(op, builder, moduleTranslation);
5808  })
5809  .Case([&](omp::CancellationPointOp op) {
5810  return convertOmpCancellationPoint(op, builder, moduleTranslation);
5811  })
5812  .Case([&](omp::SectionsOp) {
5813  return convertOmpSections(*op, builder, moduleTranslation);
5814  })
5815  .Case([&](omp::SingleOp op) {
5816  return convertOmpSingle(op, builder, moduleTranslation);
5817  })
5818  .Case([&](omp::TeamsOp op) {
5819  return convertOmpTeams(op, builder, moduleTranslation);
5820  })
5821  .Case([&](omp::TaskOp op) {
5822  return convertOmpTaskOp(op, builder, moduleTranslation);
5823  })
5824  .Case([&](omp::TaskgroupOp op) {
5825  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
5826  })
5827  .Case([&](omp::TaskwaitOp op) {
5828  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
5829  })
5830  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
5831  omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
5832  omp::CriticalDeclareOp>([](auto op) {
5833  // `yield` and `terminator` can be just omitted. The block structure
5834  // was created in the region that handles their parent operation.
5835  // `declare_reduction` will be used by reductions and is not
5836  // converted directly, skip it.
5837  // `declare_mapper` and `declare_mapper.info` are handled whenever
5838  // they are referred to through a `map` clause.
5839  // `critical.declare` is only used to declare names of critical
5840  // sections which will be used by `critical` ops and hence can be
5841  // ignored for lowering. The OpenMP IRBuilder will create unique
5842  // name for critical section names.
5843  return success();
5844  })
5845  .Case([&](omp::ThreadprivateOp) {
5846  return convertOmpThreadprivate(*op, builder, moduleTranslation);
5847  })
5848  .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
5849  omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
5850  return convertOmpTargetData(op, builder, moduleTranslation);
5851  })
5852  .Case([&](omp::TargetOp) {
5853  return convertOmpTarget(*op, builder, moduleTranslation);
5854  })
5855  .Case([&](omp::DistributeOp) {
5856  return convertOmpDistribute(*op, builder, moduleTranslation);
5857  })
5858  .Case([&](omp::LoopNestOp) {
5859  return convertOmpLoopNest(*op, builder, moduleTranslation);
5860  })
5861  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
5862  [&](auto op) {
5863  // No-op, should be handled by relevant owning operations e.g.
5864  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
5865  // etc. and then discarded
5866  return success();
5867  })
5868  .Default([&](Operation *inst) {
5869  return inst->emitError()
5870  << "not yet implemented: " << inst->getName();
5871  });
5872 
5873  if (isOutermostLoopWrapper)
5874  moduleTranslation.stackPop();
5875 
5876  return result;
5877 }
5878 
5879 static LogicalResult
5880 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
5881  LLVM::ModuleTranslation &moduleTranslation) {
5882  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5883 }
5884 
5885 static LogicalResult
5886 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
5887  LLVM::ModuleTranslation &moduleTranslation) {
5888  if (isa<omp::TargetOp>(op))
5889  return convertOmpTarget(*op, builder, moduleTranslation);
5890  if (isa<omp::TargetDataOp>(op))
5891  return convertOmpTargetData(op, builder, moduleTranslation);
5892  bool interrupted =
5893  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
5894  if (isa<omp::TargetOp>(oper)) {
5895  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
5896  return WalkResult::interrupt();
5897  return WalkResult::skip();
5898  }
5899  if (isa<omp::TargetDataOp>(oper)) {
5900  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
5901  return WalkResult::interrupt();
5902  return WalkResult::skip();
5903  }
5904 
5905  // Non-target ops might nest target-related ops, therefore, we
5906  // translate them as non-OpenMP scopes. Translating them is needed by
5907  // nested target-related ops since they might need LLVM values defined
5908  // in their parent non-target ops.
5909  if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
5910  oper->getParentOfType<LLVM::LLVMFuncOp>() &&
5911  !oper->getRegions().empty()) {
5912  if (auto blockArgsIface =
5913  dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
5914  forwardArgs(moduleTranslation, blockArgsIface);
5915  else {
5916  // Here we map entry block arguments of
5917  // non-BlockArgOpenMPOpInterface ops if they can be encountered
5918  // inside of a function and they define any of these arguments.
5919  if (isa<mlir::omp::AtomicUpdateOp>(oper))
5920  for (auto [operand, arg] :
5921  llvm::zip_equal(oper->getOperands(),
5922  oper->getRegion(0).getArguments())) {
5923  moduleTranslation.mapValue(
5924  arg, builder.CreateLoad(
5925  moduleTranslation.convertType(arg.getType()),
5926  moduleTranslation.lookupValue(operand)));
5927  }
5928  }
5929 
5930  if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
5931  assert(builder.GetInsertBlock() &&
5932  "No insert block is set for the builder");
5933  for (auto iv : loopNest.getIVs()) {
5934  // Map iv to an undefined value just to keep the IR validity.
5935  moduleTranslation.mapValue(
5937  moduleTranslation.convertType(iv.getType())));
5938  }
5939  }
5940 
5941  for (Region &region : oper->getRegions()) {
5942  // Regions are fake in the sense that they are not a truthful
5943  // translation of the OpenMP construct being converted (e.g. no
5944  // OpenMP runtime calls will be generated). We just need this to
5945  // prepare the kernel invocation args.
5947  auto result = convertOmpOpRegions(
5948  region, oper->getName().getStringRef().str() + ".fake.region",
5949  builder, moduleTranslation, &phis);
5950  if (failed(handleError(result, *oper)))
5951  return WalkResult::interrupt();
5952 
5953  builder.SetInsertPoint(result.get(), result.get()->end());
5954  }
5955 
5956  return WalkResult::skip();
5957  }
5958 
5959  return WalkResult::advance();
5960  }).wasInterrupted();
5961  return failure(interrupted);
5962 }
5963 
5964 namespace {
5965 
5966 /// Implementation of the dialect interface that converts operations belonging
5967 /// to the OpenMP dialect to LLVM IR.
5968 class OpenMPDialectLLVMIRTranslationInterface
5970 public:
5972 
5973  /// Translates the given operation to LLVM IR using the provided IR builder
5974  /// and saving the state in `moduleTranslation`.
5975  LogicalResult
5976  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
5977  LLVM::ModuleTranslation &moduleTranslation) const final;
5978 
5979  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
5980  /// runtime calls, or operation amendments
5981  LogicalResult
5983  NamedAttribute attribute,
5984  LLVM::ModuleTranslation &moduleTranslation) const final;
5985 };
5986 
5987 } // namespace
5988 
5989 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
5990  Operation *op, ArrayRef<llvm::Instruction *> instructions,
5991  NamedAttribute attribute,
5992  LLVM::ModuleTranslation &moduleTranslation) const {
5993  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
5994  attribute.getName())
5995  .Case("omp.is_target_device",
5996  [&](Attribute attr) {
5997  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
5998  llvm::OpenMPIRBuilderConfig &config =
5999  moduleTranslation.getOpenMPBuilder()->Config;
6000  config.setIsTargetDevice(deviceAttr.getValue());
6001  return success();
6002  }
6003  return failure();
6004  })
6005  .Case("omp.is_gpu",
6006  [&](Attribute attr) {
6007  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
6008  llvm::OpenMPIRBuilderConfig &config =
6009  moduleTranslation.getOpenMPBuilder()->Config;
6010  config.setIsGPU(gpuAttr.getValue());
6011  return success();
6012  }
6013  return failure();
6014  })
6015  .Case("omp.host_ir_filepath",
6016  [&](Attribute attr) {
6017  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
6018  llvm::OpenMPIRBuilder *ompBuilder =
6019  moduleTranslation.getOpenMPBuilder();
6020  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
6021  return success();
6022  }
6023  return failure();
6024  })
6025  .Case("omp.flags",
6026  [&](Attribute attr) {
6027  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
6028  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
6029  return failure();
6030  })
6031  .Case("omp.version",
6032  [&](Attribute attr) {
6033  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
6034  llvm::OpenMPIRBuilder *ompBuilder =
6035  moduleTranslation.getOpenMPBuilder();
6036  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
6037  versionAttr.getVersion());
6038  return success();
6039  }
6040  return failure();
6041  })
6042  .Case("omp.declare_target",
6043  [&](Attribute attr) {
6044  if (auto declareTargetAttr =
6045  dyn_cast<omp::DeclareTargetAttr>(attr))
6046  return convertDeclareTargetAttr(op, declareTargetAttr,
6047  moduleTranslation);
6048  return failure();
6049  })
6050  .Case("omp.requires",
6051  [&](Attribute attr) {
6052  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
6053  using Requires = omp::ClauseRequires;
6054  Requires flags = requiresAttr.getValue();
6055  llvm::OpenMPIRBuilderConfig &config =
6056  moduleTranslation.getOpenMPBuilder()->Config;
6057  config.setHasRequiresReverseOffload(
6058  bitEnumContainsAll(flags, Requires::reverse_offload));
6059  config.setHasRequiresUnifiedAddress(
6060  bitEnumContainsAll(flags, Requires::unified_address));
6061  config.setHasRequiresUnifiedSharedMemory(
6062  bitEnumContainsAll(flags, Requires::unified_shared_memory));
6063  config.setHasRequiresDynamicAllocators(
6064  bitEnumContainsAll(flags, Requires::dynamic_allocators));
6065  return success();
6066  }
6067  return failure();
6068  })
6069  .Case("omp.target_triples",
6070  [&](Attribute attr) {
6071  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
6072  llvm::OpenMPIRBuilderConfig &config =
6073  moduleTranslation.getOpenMPBuilder()->Config;
6074  config.TargetTriples.clear();
6075  config.TargetTriples.reserve(triplesAttr.size());
6076  for (Attribute tripleAttr : triplesAttr) {
6077  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
6078  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
6079  else
6080  return failure();
6081  }
6082  return success();
6083  }
6084  return failure();
6085  })
6086  .Default([](Attribute) {
6087  // Fall through for omp attributes that do not require lowering.
6088  return success();
6089  })(attribute.getValue());
6090 
6091  return failure();
6092 }
6093 
6094 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
6095 /// (including OpenMP runtime calls).
6096 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
6097  Operation *op, llvm::IRBuilderBase &builder,
6098  LLVM::ModuleTranslation &moduleTranslation) const {
6099 
6100  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6101  if (ompBuilder->Config.isTargetDevice()) {
6102  if (isTargetDeviceOp(op)) {
6103  return convertTargetDeviceOp(op, builder, moduleTranslation);
6104  } else {
6105  return convertTargetOpsInNest(op, builder, moduleTranslation);
6106  }
6107  }
6108  return convertHostOrTargetOperation(op, builder, moduleTranslation);
6109 }
6110 
6112  registry.insert<omp::OpenMPDialect>();
6113  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
6114  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
6115  });
6116 }
6117 
6119  DialectRegistry registry;
6121  context.appendDialectRegistry(registry);
6122 }
union mlir::linalg::@1204::ArityGroupAndKind::Kind kind
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp)
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable.
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type.
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static void popCancelFinalizationCB(const ArrayRef< llvm::BranchInst * > cancelTerminators, llvm::OpenMPIRBuilder &ompBuilder, const llvm::OpenMPIRBuilder::InsertPointTy &afterIP)
If we cancelled the construct, we should branch to the finalization block of that construct.
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void pushCancelFinalizationCB(SmallVectorImpl< llvm::BranchInst * > &cancelTerminators, llvm::IRBuilderBase &llvmBuilder, llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op, llvm::omp::Directive cancelDirective)
Shared implementation of a callback which adds a termiator for the new block created for the branch t...
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
Inline reductions' init regions.
static LogicalResult convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl)
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool constructIsCancellable(Operation *op)
Returns true if the construct contains omp.cancel or omp.cancellation_point.
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static LogicalResult copyFirstPrivateVars(mlir::Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, bool insertBarrier, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef, bool isNowait=false, bool isTeamsReduction=false)
static LogicalResult convertOmpCancellationPoint(omp::CancellationPointOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static uint64_t getReductionDataSize(OpTy &op)
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static llvm::omp::Directive convertCancellationConstructType(omp::ClauseCancellationConstructType directive)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice, bool isGPU)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static LogicalResult convertIgnoredWrapper(omp::LoopWrapperInterface opInst, LLVM::ModuleTranslation &moduleTranslation)
Helper function to map block arguments defined by ignored loop wrappers to LLVM values and prevent an...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:331
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:309
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:174
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
WalkResult stackWalk(llvm::function_ref< WalkResult(T &)> callback)
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void stackPush(Args &&...args)
Creates a stack frame of type T on ModuleTranslation stack.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
void mapFunction(StringRef name, llvm::Function *func)
Stores the mapping between a function name and its LLVM IR representation.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
void stackPop()
Pops the last element from the ModuleTranslation stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
Utility class to translate MLIR LLVM dialect types to LLVM IR.
Definition: TypeToLLVM.h:39
unsigned getPreferredAlignment(Type type, const llvm::DataLayout &layout)
Returns the preferred alignment for the type given the data layout.
Definition: TypeToLLVM.cpp:183
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:45
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
Dialect * getDialect()
Return the dialect this operation is associated with, or nullptr if the associated dialect is not loa...
Definition: Operation.h:220
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
llvm::hash_code hash_value(const StructType::MemberDecorationInfo &memberDecorationInfo)
llvm::PointerUnion< NamedAttribute *, NamedProperty *, NamedTypeConstraint * > Argument
Definition: Argument.h:64
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars
RAII object calling stackPush/stackPop on construction/destruction.