MLIR  21.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
19 #include "mlir/IR/IRMapping.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Support/LLVM.h"
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/TypeSwitch.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/MDBuilder.h"
37 #include "llvm/IR/ReplaceConstant.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/TargetParser/Triple.h"
40 #include "llvm/Transforms/Utils/ModuleUtils.h"
41 
42 #include <any>
43 #include <cstdint>
44 #include <iterator>
45 #include <numeric>
46 #include <optional>
47 #include <utility>
48 
49 using namespace mlir;
50 
51 namespace {
52 static llvm::omp::ScheduleKind
53 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
54  if (!schedKind.has_value())
55  return llvm::omp::OMP_SCHEDULE_Default;
56  switch (schedKind.value()) {
57  case omp::ClauseScheduleKind::Static:
58  return llvm::omp::OMP_SCHEDULE_Static;
59  case omp::ClauseScheduleKind::Dynamic:
60  return llvm::omp::OMP_SCHEDULE_Dynamic;
61  case omp::ClauseScheduleKind::Guided:
62  return llvm::omp::OMP_SCHEDULE_Guided;
63  case omp::ClauseScheduleKind::Auto:
64  return llvm::omp::OMP_SCHEDULE_Auto;
66  return llvm::omp::OMP_SCHEDULE_Runtime;
67  }
68  llvm_unreachable("unhandled schedule clause argument");
69 }
70 
71 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
72 /// insertion points for allocas.
73 class OpenMPAllocaStackFrame
74  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
75 public:
76  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
77 
78  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
79  : allocaInsertPoint(allocaIP) {}
80  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
81 };
82 
83 /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
84 /// collapsed canonical loop information corresponding to an \c omp.loop_nest
85 /// operation.
86 class OpenMPLoopInfoStackFrame
87  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> {
88 public:
89  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
90  llvm::CanonicalLoopInfo *loopInfo = nullptr;
91 };
92 
93 /// Custom error class to signal translation errors that don't need reporting,
94 /// since encountering them will have already triggered relevant error messages.
95 ///
96 /// Its purpose is to serve as the glue between MLIR failures represented as
97 /// \see LogicalResult instances and \see llvm::Error instances used to
98 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
99 /// error of the first type is raised, a message is emitted directly (the \see
100 /// LogicalResult itself does not hold any information). If we need to forward
101 /// this error condition as an \see llvm::Error while avoiding triggering some
102 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
103 /// class to just signal this situation has happened.
104 ///
105 /// For example, this class should be used to trigger errors from within
106 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
107 /// translation of their own regions. This unclutters the error log from
108 /// redundant messages.
109 class PreviouslyReportedError
110  : public llvm::ErrorInfo<PreviouslyReportedError> {
111 public:
112  void log(raw_ostream &) const override {
113  // Do not log anything.
114  }
115 
116  std::error_code convertToErrorCode() const override {
117  llvm_unreachable(
118  "PreviouslyReportedError doesn't support ECError conversion");
119  }
120 
121  // Used by ErrorInfo::classID.
122  static char ID;
123 };
124 
126 
127 /*
128  * Custom class for processing linear clause for omp.wsloop
129  * and omp.simd. Linear clause translation requires setup,
130  * initialization, update, and finalization at varying
131  * basic blocks in the IR. This class helps maintain
132  * internal state to allow consistent translation in
133  * each of these stages.
134  */
135 
136 class LinearClauseProcessor {
137 
138 private:
139  SmallVector<llvm::Value *> linearPreconditionVars;
140  SmallVector<llvm::Value *> linearLoopBodyTemps;
141  SmallVector<llvm::AllocaInst *> linearOrigVars;
142  SmallVector<llvm::Value *> linearOrigVal;
143  SmallVector<llvm::Value *> linearSteps;
144  llvm::BasicBlock *linearFinalizationBB;
145  llvm::BasicBlock *linearExitBB;
146  llvm::BasicBlock *linearLastIterExitBB;
147 
148 public:
149  // Allocate space for linear variabes
150  void createLinearVar(llvm::IRBuilderBase &builder,
151  LLVM::ModuleTranslation &moduleTranslation,
152  mlir::Value &linearVar) {
153  if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
154  moduleTranslation.lookupValue(linearVar))) {
155  linearPreconditionVars.push_back(builder.CreateAlloca(
156  linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
157  llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
158  linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
159  linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
160  linearLoopBodyTemps.push_back(linearLoopBodyTemp);
161  linearOrigVars.push_back(linearVarAlloca);
162  }
163  }
164 
165  // Initialize linear step
166  inline void initLinearStep(LLVM::ModuleTranslation &moduleTranslation,
167  mlir::Value &linearStep) {
168  linearSteps.push_back(moduleTranslation.lookupValue(linearStep));
169  }
170 
171  // Emit IR for initialization of linear variables
172  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
173  initLinearVar(llvm::IRBuilderBase &builder,
174  LLVM::ModuleTranslation &moduleTranslation,
175  llvm::BasicBlock *loopPreHeader) {
176  builder.SetInsertPoint(loopPreHeader->getTerminator());
177  for (size_t index = 0; index < linearOrigVars.size(); index++) {
178  llvm::LoadInst *linearVarLoad = builder.CreateLoad(
179  linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
180  builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
181  }
182  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
183  moduleTranslation.getOpenMPBuilder()->createBarrier(
184  builder.saveIP(), llvm::omp::OMPD_barrier);
185  return afterBarrierIP;
186  }
187 
188  // Emit IR for updating Linear variables
189  void updateLinearVar(llvm::IRBuilderBase &builder, llvm::BasicBlock *loopBody,
190  llvm::Value *loopInductionVar) {
191  builder.SetInsertPoint(loopBody->getTerminator());
192  for (size_t index = 0; index < linearPreconditionVars.size(); index++) {
193  // Emit increments for linear vars
194  llvm::LoadInst *linearVarStart =
195  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
196 
197  linearPreconditionVars[index]);
198  auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
199  auto addInst = builder.CreateAdd(linearVarStart, mulInst);
200  builder.CreateStore(addInst, linearLoopBodyTemps[index]);
201  }
202  }
203 
204  // Linear variable finalization is conditional on the last logical iteration.
205  // Create BB splits to manage the same.
206  void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
207  llvm::BasicBlock *loopExit) {
208  linearFinalizationBB = loopExit->splitBasicBlock(
209  loopExit->getTerminator(), "omp_loop.linear_finalization");
210  linearExitBB = linearFinalizationBB->splitBasicBlock(
211  linearFinalizationBB->getTerminator(), "omp_loop.linear_exit");
212  linearLastIterExitBB = linearFinalizationBB->splitBasicBlock(
213  linearFinalizationBB->getTerminator(), "omp_loop.linear_lastiter_exit");
214  }
215 
216  // Finalize the linear vars
217  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
218  finalizeLinearVar(llvm::IRBuilderBase &builder,
219  LLVM::ModuleTranslation &moduleTranslation,
220  llvm::Value *lastIter) {
221  // Emit condition to check whether last logical iteration is being executed
222  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
223  llvm::Value *loopLastIterLoad = builder.CreateLoad(
224  llvm::Type::getInt32Ty(builder.getContext()), lastIter);
225  llvm::Value *isLast =
226  builder.CreateCmp(llvm::CmpInst::ICMP_NE, loopLastIterLoad,
228  llvm::Type::getInt32Ty(builder.getContext()), 0));
229  // Store the linear variable values to original variables.
230  builder.SetInsertPoint(linearLastIterExitBB->getTerminator());
231  for (size_t index = 0; index < linearOrigVars.size(); index++) {
232  llvm::LoadInst *linearVarTemp =
233  builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
234  linearLoopBodyTemps[index]);
235  builder.CreateStore(linearVarTemp, linearOrigVars[index]);
236  }
237 
238  // Create conditional branch such that the linear variable
239  // values are stored to original variables only at the
240  // last logical iteration
241  builder.SetInsertPoint(linearFinalizationBB->getTerminator());
242  builder.CreateCondBr(isLast, linearLastIterExitBB, linearExitBB);
243  linearFinalizationBB->getTerminator()->eraseFromParent();
244  // Emit barrier
245  builder.SetInsertPoint(linearExitBB->getTerminator());
246  return moduleTranslation.getOpenMPBuilder()->createBarrier(
247  builder.saveIP(), llvm::omp::OMPD_barrier);
248  }
249 
250  // Rewrite all uses of the original variable in `BBName`
251  // with the linear variable in-place
252  void rewriteInPlace(llvm::IRBuilderBase &builder, std::string BBName,
253  size_t varIndex) {
255  for (llvm::User *user : linearOrigVal[varIndex]->users())
256  users.push_back(user);
257  for (auto *user : users) {
258  if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
259  if (userInst->getParent()->getName().str() == BBName)
260  user->replaceUsesOfWith(linearOrigVal[varIndex],
261  linearLoopBodyTemps[varIndex]);
262  }
263  }
264  }
265 };
266 
267 } // namespace
268 
269 /// Looks up from the operation from and returns the PrivateClauseOp with
270 /// name symbolName
271 static omp::PrivateClauseOp findPrivatizer(Operation *from,
272  SymbolRefAttr symbolName) {
273  omp::PrivateClauseOp privatizer =
274  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
275  symbolName);
276  assert(privatizer && "privatizer not found in the symbol table");
277  return privatizer;
278 }
279 
280 /// Check whether translation to LLVM IR for the given operation is currently
281 /// supported. If not, descriptive diagnostics will be emitted to let users know
282 /// this is a not-yet-implemented feature.
283 ///
284 /// \returns success if no unimplemented features are needed to translate the
285 /// given operation.
286 static LogicalResult checkImplementationStatus(Operation &op) {
287  auto todo = [&op](StringRef clauseName) {
288  return op.emitError() << "not yet implemented: Unhandled clause "
289  << clauseName << " in " << op.getName()
290  << " operation";
291  };
292 
293  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
294  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
295  result = todo("allocate");
296  };
297  auto checkBare = [&todo](auto op, LogicalResult &result) {
298  if (op.getBare())
299  result = todo("ompx_bare");
300  };
301  auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
302  omp::ClauseCancellationConstructType cancelledDirective =
303  op.getCancelDirective();
304  // Cancelling a taskloop is not yet supported because we don't yet have LLVM
305  // IR conversion for taskloop
306  if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) {
307  Operation *parent = op->getParentOp();
308  while (parent) {
309  if (parent->getDialect() == op->getDialect())
310  break;
311  parent = parent->getParentOp();
312  }
313  if (isa_and_nonnull<omp::TaskloopOp>(parent))
314  result = todo("cancel directive inside of taskloop");
315  }
316  };
317  auto checkDepend = [&todo](auto op, LogicalResult &result) {
318  if (!op.getDependVars().empty() || op.getDependKinds())
319  result = todo("depend");
320  };
321  auto checkDevice = [&todo](auto op, LogicalResult &result) {
322  if (op.getDevice())
323  result = todo("device");
324  };
325  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
326  if (op.getDistScheduleChunkSize())
327  result = todo("dist_schedule with chunk_size");
328  };
329  auto checkHint = [](auto op, LogicalResult &) {
330  if (op.getHint())
331  op.emitWarning("hint clause discarded");
332  };
333  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
334  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
335  op.getInReductionSyms())
336  result = todo("in_reduction");
337  };
338  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
339  if (!op.getIsDevicePtrVars().empty())
340  result = todo("is_device_ptr");
341  };
342  auto checkLinear = [&todo](auto op, LogicalResult &result) {
343  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
344  result = todo("linear");
345  };
346  auto checkNowait = [&todo](auto op, LogicalResult &result) {
347  if (op.getNowait())
348  result = todo("nowait");
349  };
350  auto checkOrder = [&todo](auto op, LogicalResult &result) {
351  if (op.getOrder() || op.getOrderMod())
352  result = todo("order");
353  };
354  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
355  if (op.getParLevelSimd())
356  result = todo("parallelization-level");
357  };
358  auto checkPriority = [&todo](auto op, LogicalResult &result) {
359  if (op.getPriority())
360  result = todo("priority");
361  };
362  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
363  if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
364  // Privatization is supported only for included target tasks.
365  if (!op.getPrivateVars().empty() && op.getNowait())
366  result = todo("privatization for deferred target tasks");
367  } else {
368  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
369  result = todo("privatization");
370  }
371  };
372  auto checkReduction = [&todo](auto op, LogicalResult &result) {
373  if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op))
374  if (!op.getReductionVars().empty() || op.getReductionByref() ||
375  op.getReductionSyms())
376  result = todo("reduction");
377  if (op.getReductionMod() &&
378  op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
379  result = todo("reduction with modifier");
380  };
381  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
382  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
383  op.getTaskReductionSyms())
384  result = todo("task_reduction");
385  };
386  auto checkUntied = [&todo](auto op, LogicalResult &result) {
387  if (op.getUntied())
388  result = todo("untied");
389  };
390 
391  LogicalResult result = success();
393  .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); })
394  .Case([&](omp::CancellationPointOp op) {
395  checkCancelDirective(op, result);
396  })
397  .Case([&](omp::DistributeOp op) {
398  checkAllocate(op, result);
399  checkDistSchedule(op, result);
400  checkOrder(op, result);
401  })
402  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
403  .Case([&](omp::SectionsOp op) {
404  checkAllocate(op, result);
405  checkPrivate(op, result);
406  checkReduction(op, result);
407  })
408  .Case([&](omp::SingleOp op) {
409  checkAllocate(op, result);
410  checkPrivate(op, result);
411  })
412  .Case([&](omp::TeamsOp op) {
413  checkAllocate(op, result);
414  checkPrivate(op, result);
415  })
416  .Case([&](omp::TaskOp op) {
417  checkAllocate(op, result);
418  checkInReduction(op, result);
419  })
420  .Case([&](omp::TaskgroupOp op) {
421  checkAllocate(op, result);
422  checkTaskReduction(op, result);
423  })
424  .Case([&](omp::TaskwaitOp op) {
425  checkDepend(op, result);
426  checkNowait(op, result);
427  })
428  .Case([&](omp::TaskloopOp op) {
429  // TODO: Add other clauses check
430  checkUntied(op, result);
431  checkPriority(op, result);
432  })
433  .Case([&](omp::WsloopOp op) {
434  checkAllocate(op, result);
435  checkLinear(op, result);
436  checkOrder(op, result);
437  checkReduction(op, result);
438  })
439  .Case([&](omp::ParallelOp op) {
440  checkAllocate(op, result);
441  checkReduction(op, result);
442  })
443  .Case([&](omp::SimdOp op) {
444  checkLinear(op, result);
445  checkReduction(op, result);
446  })
447  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
448  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
449  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
450  [&](auto op) { checkDepend(op, result); })
451  .Case([&](omp::TargetOp op) {
452  checkAllocate(op, result);
453  checkBare(op, result);
454  checkDevice(op, result);
455  checkInReduction(op, result);
456  checkIsDevicePtr(op, result);
457  checkPrivate(op, result);
458  })
459  .Default([](Operation &) {
460  // Assume all clauses for an operation can be translated unless they are
461  // checked above.
462  });
463  return result;
464 }
465 
466 static LogicalResult handleError(llvm::Error error, Operation &op) {
467  LogicalResult result = success();
468  if (error) {
469  llvm::handleAllErrors(
470  std::move(error),
471  [&](const PreviouslyReportedError &) { result = failure(); },
472  [&](const llvm::ErrorInfoBase &err) {
473  result = op.emitError(err.message());
474  });
475  }
476  return result;
477 }
478 
479 template <typename T>
480 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
481  if (!result)
482  return handleError(result.takeError(), op);
483 
484  return success();
485 }
486 
487 /// Find the insertion point for allocas given the current insertion point for
488 /// normal operations in the builder.
489 static llvm::OpenMPIRBuilder::InsertPointTy
490 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
491  LLVM::ModuleTranslation &moduleTranslation) {
492  // If there is an alloca insertion point on stack, i.e. we are in a nested
493  // operation and a specific point was provided by some surrounding operation,
494  // use it.
495  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
496  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
497  [&](OpenMPAllocaStackFrame &frame) {
498  allocaInsertPoint = frame.allocaInsertPoint;
499  return WalkResult::interrupt();
500  });
501  if (walkResult.wasInterrupted())
502  return allocaInsertPoint;
503 
504  // Otherwise, insert to the entry block of the surrounding function.
505  // If the current IRBuilder InsertPoint is the function's entry, it cannot
506  // also be used for alloca insertion which would result in insertion order
507  // confusion. Create a new BasicBlock for the Builder and use the entry block
508  // for the allocs.
509  // TODO: Create a dedicated alloca BasicBlock at function creation such that
510  // we do not need to move the current InertPoint here.
511  if (builder.GetInsertBlock() ==
512  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
513  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
514  "Assuming end of basic block");
515  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
516  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
517  builder.GetInsertBlock()->getNextNode());
518  builder.CreateBr(entryBB);
519  builder.SetInsertPoint(entryBB);
520  }
521 
522  llvm::BasicBlock &funcEntryBlock =
523  builder.GetInsertBlock()->getParent()->getEntryBlock();
524  return llvm::OpenMPIRBuilder::InsertPointTy(
525  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
526 }
527 
528 /// Find the loop information structure for the loop nest being translated. It
529 /// will return a `null` value unless called from the translation function for
530 /// a loop wrapper operation after successfully translating its body.
531 static llvm::CanonicalLoopInfo *
533  llvm::CanonicalLoopInfo *loopInfo = nullptr;
534  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
535  [&](OpenMPLoopInfoStackFrame &frame) {
536  loopInfo = frame.loopInfo;
537  return WalkResult::interrupt();
538  });
539  return loopInfo;
540 }
541 
542 /// Converts the given region that appears within an OpenMP dialect operation to
543 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
544 /// region, and a branch from any block with an successor-less OpenMP terminator
545 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
546 /// of the continuation block if provided.
548  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
549  LLVM::ModuleTranslation &moduleTranslation,
550  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
551  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
552 
553  llvm::BasicBlock *continuationBlock =
554  splitBB(builder, true, "omp.region.cont");
555  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
556 
557  llvm::LLVMContext &llvmContext = builder.getContext();
558  for (Block &bb : region) {
559  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
560  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
561  builder.GetInsertBlock()->getNextNode());
562  moduleTranslation.mapBlock(&bb, llvmBB);
563  }
564 
565  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
566 
567  // Terminators (namely YieldOp) may be forwarding values to the region that
568  // need to be available in the continuation block. Collect the types of these
569  // operands in preparation of creating PHI nodes. This is skipped for loop
570  // wrapper operations, for which we know in advance they have no terminators.
571  SmallVector<llvm::Type *> continuationBlockPHITypes;
572  unsigned numYields = 0;
573 
574  if (!isLoopWrapper) {
575  bool operandsProcessed = false;
576  for (Block &bb : region.getBlocks()) {
577  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
578  if (!operandsProcessed) {
579  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
580  continuationBlockPHITypes.push_back(
581  moduleTranslation.convertType(yield->getOperand(i).getType()));
582  }
583  operandsProcessed = true;
584  } else {
585  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
586  "mismatching number of values yielded from the region");
587  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
588  llvm::Type *operandType =
589  moduleTranslation.convertType(yield->getOperand(i).getType());
590  (void)operandType;
591  assert(continuationBlockPHITypes[i] == operandType &&
592  "values of mismatching types yielded from the region");
593  }
594  }
595  numYields++;
596  }
597  }
598  }
599 
600  // Insert PHI nodes in the continuation block for any values forwarded by the
601  // terminators in this region.
602  if (!continuationBlockPHITypes.empty())
603  assert(
604  continuationBlockPHIs &&
605  "expected continuation block PHIs if converted regions yield values");
606  if (continuationBlockPHIs) {
607  llvm::IRBuilderBase::InsertPointGuard guard(builder);
608  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
609  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
610  for (llvm::Type *ty : continuationBlockPHITypes)
611  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
612  }
613 
614  // Convert blocks one by one in topological order to ensure
615  // defs are converted before uses.
617  for (Block *bb : blocks) {
618  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
619  // Retarget the branch of the entry block to the entry block of the
620  // converted region (regions are single-entry).
621  if (bb->isEntryBlock()) {
622  assert(sourceTerminator->getNumSuccessors() == 1 &&
623  "provided entry block has multiple successors");
624  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
625  "ContinuationBlock is not the successor of the entry block");
626  sourceTerminator->setSuccessor(0, llvmBB);
627  }
628 
629  llvm::IRBuilderBase::InsertPointGuard guard(builder);
630  if (failed(
631  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
632  return llvm::make_error<PreviouslyReportedError>();
633 
634  // Create a direct branch here for loop wrappers to prevent their lack of a
635  // terminator from causing a crash below.
636  if (isLoopWrapper) {
637  builder.CreateBr(continuationBlock);
638  continue;
639  }
640 
641  // Special handling for `omp.yield` and `omp.terminator` (we may have more
642  // than one): they return the control to the parent OpenMP dialect operation
643  // so replace them with the branch to the continuation block. We handle this
644  // here to avoid relying inter-function communication through the
645  // ModuleTranslation class to set up the correct insertion point. This is
646  // also consistent with MLIR's idiom of handling special region terminators
647  // in the same code that handles the region-owning operation.
648  Operation *terminator = bb->getTerminator();
649  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
650  builder.CreateBr(continuationBlock);
651 
652  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
653  (*continuationBlockPHIs)[i]->addIncoming(
654  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
655  }
656  }
657  // After all blocks have been traversed and values mapped, connect the PHI
658  // nodes to the results of preceding blocks.
659  LLVM::detail::connectPHINodes(region, moduleTranslation);
660 
661  // Remove the blocks and values defined in this region from the mapping since
662  // they are not visible outside of this region. This allows the same region to
663  // be converted several times, that is cloned, without clashes, and slightly
664  // speeds up the lookups.
665  moduleTranslation.forgetMapping(region);
666 
667  return continuationBlock;
668 }
669 
670 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
671 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
672  switch (kind) {
673  case omp::ClauseProcBindKind::Close:
674  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
675  case omp::ClauseProcBindKind::Master:
676  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
677  case omp::ClauseProcBindKind::Primary:
678  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
679  case omp::ClauseProcBindKind::Spread:
680  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
681  }
682  llvm_unreachable("Unknown ClauseProcBindKind kind");
683 }
684 
685 /// Maps block arguments from \p blockArgIface (which are MLIR values) to the
686 /// corresponding LLVM values of \p the interface's operands. This is useful
687 /// when an OpenMP region with entry block arguments is converted to LLVM. In
688 /// this case the block arguments are (part of) of the OpenMP region's entry
689 /// arguments and the operands are (part of) of the operands to the OpenMP op
690 /// containing the region.
691 static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
692  omp::BlockArgOpenMPOpInterface blockArgIface) {
694  blockArgIface.getBlockArgsPairs(blockArgsPairs);
695  for (auto [var, arg] : blockArgsPairs)
696  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
697 }
698 
699 /// Helper function to map block arguments defined by ignored loop wrappers to
700 /// LLVM values and prevent any uses of those from triggering null pointer
701 /// dereferences.
702 ///
703 /// This must be called after block arguments of parent wrappers have already
704 /// been mapped to LLVM IR values.
705 static LogicalResult
706 convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
707  LLVM::ModuleTranslation &moduleTranslation) {
708  // Map block arguments directly to the LLVM value associated to the
709  // corresponding operand. This is semantically equivalent to this wrapper not
710  // being present.
712  .Case([&](omp::SimdOp op) {
713  forwardArgs(moduleTranslation,
714  cast<omp::BlockArgOpenMPOpInterface>(*op));
715  op.emitWarning() << "simd information on composite construct discarded";
716  return success();
717  })
718  .Default([&](Operation *op) {
719  return op->emitError() << "cannot ignore wrapper";
720  });
721 }
722 
723 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
724 static LogicalResult
725 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
726  LLVM::ModuleTranslation &moduleTranslation) {
727  auto maskedOp = cast<omp::MaskedOp>(opInst);
728  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
729 
730  if (failed(checkImplementationStatus(opInst)))
731  return failure();
732 
733  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
734  // MaskedOp has only one region associated with it.
735  auto &region = maskedOp.getRegion();
736  builder.restoreIP(codeGenIP);
737  return convertOmpOpRegions(region, "omp.masked.region", builder,
738  moduleTranslation)
739  .takeError();
740  };
741 
742  // TODO: Perform finalization actions for variables. This has to be
743  // called for variables which have destructors/finalizers.
744  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
745 
746  llvm::Value *filterVal = nullptr;
747  if (auto filterVar = maskedOp.getFilteredThreadId()) {
748  filterVal = moduleTranslation.lookupValue(filterVar);
749  } else {
750  llvm::LLVMContext &llvmContext = builder.getContext();
751  filterVal =
752  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
753  }
754  assert(filterVal != nullptr);
755  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
756  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
757  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
758  finiCB, filterVal);
759 
760  if (failed(handleError(afterIP, opInst)))
761  return failure();
762 
763  builder.restoreIP(*afterIP);
764  return success();
765 }
766 
767 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
768 static LogicalResult
769 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
770  LLVM::ModuleTranslation &moduleTranslation) {
771  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
772  auto masterOp = cast<omp::MasterOp>(opInst);
773 
774  if (failed(checkImplementationStatus(opInst)))
775  return failure();
776 
777  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
778  // MasterOp has only one region associated with it.
779  auto &region = masterOp.getRegion();
780  builder.restoreIP(codeGenIP);
781  return convertOmpOpRegions(region, "omp.master.region", builder,
782  moduleTranslation)
783  .takeError();
784  };
785 
786  // TODO: Perform finalization actions for variables. This has to be
787  // called for variables which have destructors/finalizers.
788  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
789 
790  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
791  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
792  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
793  finiCB);
794 
795  if (failed(handleError(afterIP, opInst)))
796  return failure();
797 
798  builder.restoreIP(*afterIP);
799  return success();
800 }
801 
802 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
803 static LogicalResult
804 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
805  LLVM::ModuleTranslation &moduleTranslation) {
806  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
807  auto criticalOp = cast<omp::CriticalOp>(opInst);
808 
809  if (failed(checkImplementationStatus(opInst)))
810  return failure();
811 
812  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
813  // CriticalOp has only one region associated with it.
814  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
815  builder.restoreIP(codeGenIP);
816  return convertOmpOpRegions(region, "omp.critical.region", builder,
817  moduleTranslation)
818  .takeError();
819  };
820 
821  // TODO: Perform finalization actions for variables. This has to be
822  // called for variables which have destructors/finalizers.
823  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
824 
825  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
826  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
827  llvm::Constant *hint = nullptr;
828 
829  // If it has a name, it probably has a hint too.
830  if (criticalOp.getNameAttr()) {
831  // The verifiers in OpenMP Dialect guarentee that all the pointers are
832  // non-null
833  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
834  auto criticalDeclareOp =
835  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
836  symbolRef);
837  hint =
838  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
839  static_cast<int>(criticalDeclareOp.getHint()));
840  }
841  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
842  moduleTranslation.getOpenMPBuilder()->createCritical(
843  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
844 
845  if (failed(handleError(afterIP, opInst)))
846  return failure();
847 
848  builder.restoreIP(*afterIP);
849  return success();
850 }
851 
852 /// A util to collect info needed to convert delayed privatizers from MLIR to
853 /// LLVM.
855  template <typename OP>
857  : blockArgs(
858  cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
859  mlirVars.reserve(blockArgs.size());
860  llvmVars.reserve(blockArgs.size());
861  collectPrivatizationDecls<OP>(op);
862 
863  for (mlir::Value privateVar : op.getPrivateVars())
864  mlirVars.push_back(privateVar);
865  }
866 
871 
872 private:
873  /// Populates `privatizations` with privatization declarations used for the
874  /// given op.
875  template <class OP>
876  void collectPrivatizationDecls(OP op) {
877  std::optional<ArrayAttr> attr = op.getPrivateSyms();
878  if (!attr)
879  return;
880 
881  privatizers.reserve(privatizers.size() + attr->size());
882  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
883  privatizers.push_back(findPrivatizer(op, symbolRef));
884  }
885  }
886 };
887 
888 /// Populates `reductions` with reduction declarations used in the given op.
889 template <typename T>
890 static void
893  std::optional<ArrayAttr> attr = op.getReductionSyms();
894  if (!attr)
895  return;
896 
897  reductions.reserve(reductions.size() + op.getNumReductionVars());
898  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
899  reductions.push_back(
900  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
901  op, symbolRef));
902  }
903 }
904 
905 /// Translates the blocks contained in the given region and appends them to at
906 /// the current insertion point of `builder`. The operations of the entry block
907 /// are appended to the current insertion block. If set, `continuationBlockArgs`
908 /// is populated with translated values that correspond to the values
909 /// omp.yield'ed from the region.
910 static LogicalResult inlineConvertOmpRegions(
911  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
912  LLVM::ModuleTranslation &moduleTranslation,
913  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
914  if (region.empty())
915  return success();
916 
917  // Special case for single-block regions that don't create additional blocks:
918  // insert operations without creating additional blocks.
919  if (llvm::hasSingleElement(region)) {
920  llvm::Instruction *potentialTerminator =
921  builder.GetInsertBlock()->empty() ? nullptr
922  : &builder.GetInsertBlock()->back();
923 
924  if (potentialTerminator && potentialTerminator->isTerminator())
925  potentialTerminator->removeFromParent();
926  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
927 
928  if (failed(moduleTranslation.convertBlock(
929  region.front(), /*ignoreArguments=*/true, builder)))
930  return failure();
931 
932  // The continuation arguments are simply the translated terminator operands.
933  if (continuationBlockArgs)
934  llvm::append_range(
935  *continuationBlockArgs,
936  moduleTranslation.lookupValues(region.front().back().getOperands()));
937 
938  // Drop the mapping that is no longer necessary so that the same region can
939  // be processed multiple times.
940  moduleTranslation.forgetMapping(region);
941 
942  if (potentialTerminator && potentialTerminator->isTerminator()) {
943  llvm::BasicBlock *block = builder.GetInsertBlock();
944  if (block->empty()) {
945  // this can happen for really simple reduction init regions e.g.
946  // %0 = llvm.mlir.constant(0 : i32) : i32
947  // omp.yield(%0 : i32)
948  // because the llvm.mlir.constant (MLIR op) isn't converted into any
949  // llvm op
950  potentialTerminator->insertInto(block, block->begin());
951  } else {
952  potentialTerminator->insertAfter(&block->back());
953  }
954  }
955 
956  return success();
957  }
958 
960  llvm::Expected<llvm::BasicBlock *> continuationBlock =
961  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
962 
963  if (failed(handleError(continuationBlock, *region.getParentOp())))
964  return failure();
965 
966  if (continuationBlockArgs)
967  llvm::append_range(*continuationBlockArgs, phis);
968  builder.SetInsertPoint(*continuationBlock,
969  (*continuationBlock)->getFirstInsertionPt());
970  return success();
971 }
972 
973 namespace {
974 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
975 /// store lambdas with capture.
976 using OwningReductionGen =
977  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
978  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
979  llvm::Value *&)>;
980 using OwningAtomicReductionGen =
981  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
982  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
983  llvm::Value *)>;
984 } // namespace
985 
986 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
987 /// reduction declaration. The generator uses `builder` but ignores its
988 /// insertion point.
989 static OwningReductionGen
990 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
991  LLVM::ModuleTranslation &moduleTranslation) {
992  // The lambda is mutable because we need access to non-const methods of decl
993  // (which aren't actually mutating it), and we must capture decl by-value to
994  // avoid the dangling reference after the parent function returns.
995  OwningReductionGen gen =
996  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
997  llvm::Value *lhs, llvm::Value *rhs,
998  llvm::Value *&result) mutable
999  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1000  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
1001  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
1002  builder.restoreIP(insertPoint);
1004  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
1005  "omp.reduction.nonatomic.body", builder,
1006  moduleTranslation, &phis)))
1007  return llvm::createStringError(
1008  "failed to inline `combiner` region of `omp.declare_reduction`");
1009  result = llvm::getSingleElement(phis);
1010  return builder.saveIP();
1011  };
1012  return gen;
1013 }
1014 
1015 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
1016 /// given reduction declaration. The generator uses `builder` but ignores its
1017 /// insertion point. Returns null if there is no atomic region available in the
1018 /// reduction declaration.
1019 static OwningAtomicReductionGen
1020 makeAtomicReductionGen(omp::DeclareReductionOp decl,
1021  llvm::IRBuilderBase &builder,
1022  LLVM::ModuleTranslation &moduleTranslation) {
1023  if (decl.getAtomicReductionRegion().empty())
1024  return OwningAtomicReductionGen();
1025 
1026  // The lambda is mutable because we need access to non-const methods of decl
1027  // (which aren't actually mutating it), and we must capture decl by-value to
1028  // avoid the dangling reference after the parent function returns.
1029  OwningAtomicReductionGen atomicGen =
1030  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
1031  llvm::Value *lhs, llvm::Value *rhs) mutable
1032  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1033  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
1034  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
1035  builder.restoreIP(insertPoint);
1037  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
1038  "omp.reduction.atomic.body", builder,
1039  moduleTranslation, &phis)))
1040  return llvm::createStringError(
1041  "failed to inline `atomic` region of `omp.declare_reduction`");
1042  assert(phis.empty());
1043  return builder.saveIP();
1044  };
1045  return atomicGen;
1046 }
1047 
1048 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
1049 static LogicalResult
1050 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
1051  LLVM::ModuleTranslation &moduleTranslation) {
1052  auto orderedOp = cast<omp::OrderedOp>(opInst);
1053 
1054  if (failed(checkImplementationStatus(opInst)))
1055  return failure();
1056 
1057  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
1058  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
1059  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
1060  SmallVector<llvm::Value *> vecValues =
1061  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
1062 
1063  size_t indexVecValues = 0;
1064  while (indexVecValues < vecValues.size()) {
1065  SmallVector<llvm::Value *> storeValues;
1066  storeValues.reserve(numLoops);
1067  for (unsigned i = 0; i < numLoops; i++) {
1068  storeValues.push_back(vecValues[indexVecValues]);
1069  indexVecValues++;
1070  }
1071  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1072  findAllocaInsertPoint(builder, moduleTranslation);
1073  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1074  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
1075  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
1076  }
1077  return success();
1078 }
1079 
1080 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
1081 /// OpenMPIRBuilder.
1082 static LogicalResult
1083 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
1084  LLVM::ModuleTranslation &moduleTranslation) {
1085  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1086  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
1087 
1088  if (failed(checkImplementationStatus(opInst)))
1089  return failure();
1090 
1091  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1092  // OrderedOp has only one region associated with it.
1093  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
1094  builder.restoreIP(codeGenIP);
1095  return convertOmpOpRegions(region, "omp.ordered.region", builder,
1096  moduleTranslation)
1097  .takeError();
1098  };
1099 
1100  // TODO: Perform finalization actions for variables. This has to be
1101  // called for variables which have destructors/finalizers.
1102  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1103 
1104  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1105  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1106  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
1107  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
1108 
1109  if (failed(handleError(afterIP, opInst)))
1110  return failure();
1111 
1112  builder.restoreIP(*afterIP);
1113  return success();
1114 }
1115 
1116 namespace {
1117 /// Contains the arguments for an LLVM store operation
1118 struct DeferredStore {
1119  DeferredStore(llvm::Value *value, llvm::Value *address)
1120  : value(value), address(address) {}
1121 
1122  llvm::Value *value;
1123  llvm::Value *address;
1124 };
1125 } // namespace
1126 
1127 /// Allocate space for privatized reduction variables.
1128 /// `deferredStores` contains information to create store operations which needs
1129 /// to be inserted after all allocas
1130 template <typename T>
1131 static LogicalResult
1133  llvm::IRBuilderBase &builder,
1134  LLVM::ModuleTranslation &moduleTranslation,
1135  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1137  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1138  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1139  SmallVectorImpl<DeferredStore> &deferredStores,
1140  llvm::ArrayRef<bool> isByRefs) {
1141  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1142  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1143 
1144  // delay creating stores until after all allocas
1145  deferredStores.reserve(loop.getNumReductionVars());
1146 
1147  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1148  Region &allocRegion = reductionDecls[i].getAllocRegion();
1149  if (isByRefs[i]) {
1150  if (allocRegion.empty())
1151  continue;
1152 
1154  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1155  builder, moduleTranslation, &phis)))
1156  return loop.emitError(
1157  "failed to inline `alloc` region of `omp.declare_reduction`");
1158 
1159  assert(phis.size() == 1 && "expected one allocation to be yielded");
1160  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1161 
1162  // Allocate reduction variable (which is a pointer to the real reduction
1163  // variable allocated in the inlined region)
1164  llvm::Value *var = builder.CreateAlloca(
1165  moduleTranslation.convertType(reductionDecls[i].getType()));
1166 
1167  llvm::Type *ptrTy = builder.getPtrTy();
1168  llvm::Value *castVar =
1169  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1170  llvm::Value *castPhi =
1171  builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
1172 
1173  deferredStores.emplace_back(castPhi, castVar);
1174 
1175  privateReductionVariables[i] = castVar;
1176  moduleTranslation.mapValue(reductionArgs[i], castPhi);
1177  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
1178  } else {
1179  assert(allocRegion.empty() &&
1180  "allocaction is implicit for by-val reduction");
1181  llvm::Value *var = builder.CreateAlloca(
1182  moduleTranslation.convertType(reductionDecls[i].getType()));
1183 
1184  llvm::Type *ptrTy = builder.getPtrTy();
1185  llvm::Value *castVar =
1186  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1187 
1188  moduleTranslation.mapValue(reductionArgs[i], castVar);
1189  privateReductionVariables[i] = castVar;
1190  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
1191  }
1192  }
1193 
1194  return success();
1195 }
1196 
1197 /// Map input arguments to reduction initialization region
1198 template <typename T>
1199 static void
1202  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1203  unsigned i) {
1204  // map input argument to the initialization region
1205  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1206  Region &initializerRegion = reduction.getInitializerRegion();
1207  Block &entry = initializerRegion.front();
1208 
1209  mlir::Value mlirSource = loop.getReductionVars()[i];
1210  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1211  assert(llvmSource && "lookup reduction var");
1212  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1213 
1214  if (entry.getNumArguments() > 1) {
1215  llvm::Value *allocation =
1216  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1217  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1218  }
1219 }
1220 
1221 static void
1222 setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1223  llvm::BasicBlock *block = nullptr) {
1224  if (block == nullptr)
1225  block = builder.GetInsertBlock();
1226 
1227  if (block->empty() || block->getTerminator() == nullptr)
1228  builder.SetInsertPoint(block);
1229  else
1230  builder.SetInsertPoint(block->getTerminator());
1231 }
1232 
1233 /// Inline reductions' `init` regions. This functions assumes that the
1234 /// `builder`'s insertion point is where the user wants the `init` regions to be
1235 /// inlined; i.e. it does not try to find a proper insertion location for the
1236 /// `init` regions. It also leaves the `builder's insertions point in a state
1237 /// where the user can continue the code-gen directly afterwards.
1238 template <typename OP>
1239 static LogicalResult
1241  llvm::IRBuilderBase &builder,
1242  LLVM::ModuleTranslation &moduleTranslation,
1243  llvm::BasicBlock *latestAllocaBlock,
1245  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1246  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1247  llvm::ArrayRef<bool> isByRef,
1248  SmallVectorImpl<DeferredStore> &deferredStores) {
1249  if (op.getNumReductionVars() == 0)
1250  return success();
1251 
1252  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1253  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1254  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1255  builder.restoreIP(allocaIP);
1256  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1257 
1258  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1259  if (isByRef[i]) {
1260  if (!reductionDecls[i].getAllocRegion().empty())
1261  continue;
1262 
1263  // TODO: remove after all users of by-ref are updated to use the alloc
1264  // region: Allocate reduction variable (which is a pointer to the real
1265  // reduciton variable allocated in the inlined region)
1266  byRefVars[i] = builder.CreateAlloca(
1267  moduleTranslation.convertType(reductionDecls[i].getType()));
1268  }
1269  }
1270 
1271  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1272 
1273  // store result of the alloc region to the allocated pointer to the real
1274  // reduction variable
1275  for (auto [data, addr] : deferredStores)
1276  builder.CreateStore(data, addr);
1277 
1278  // Before the loop, store the initial values of reductions into reduction
1279  // variables. Although this could be done after allocas, we don't want to mess
1280  // up with the alloca insertion point.
1281  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1283 
1284  // map block argument to initializer region
1285  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1286  reductionVariableMap, i);
1287 
1288  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1289  "omp.reduction.neutral", builder,
1290  moduleTranslation, &phis)))
1291  return failure();
1292 
1293  assert(phis.size() == 1 && "expected one value to be yielded from the "
1294  "reduction neutral element declaration region");
1295 
1297 
1298  if (isByRef[i]) {
1299  if (!reductionDecls[i].getAllocRegion().empty())
1300  // done in allocReductionVars
1301  continue;
1302 
1303  // TODO: this path can be removed once all users of by-ref are updated to
1304  // use an alloc region
1305 
1306  // Store the result of the inlined region to the allocated reduction var
1307  // ptr
1308  builder.CreateStore(phis[0], byRefVars[i]);
1309 
1310  privateReductionVariables[i] = byRefVars[i];
1311  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1312  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1313  } else {
1314  // for by-ref case the store is inside of the reduction region
1315  builder.CreateStore(phis[0], privateReductionVariables[i]);
1316  // the rest was handled in allocByValReductionVars
1317  }
1318 
1319  // forget the mapping for the initializer region because we might need a
1320  // different mapping if this reduction declaration is re-used for a
1321  // different variable
1322  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1323  }
1324 
1325  return success();
1326 }
1327 
1328 /// Collect reduction info
1329 template <typename T>
1331  T loop, llvm::IRBuilderBase &builder,
1332  LLVM::ModuleTranslation &moduleTranslation,
1334  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1335  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1336  const ArrayRef<llvm::Value *> privateReductionVariables,
1338  unsigned numReductions = loop.getNumReductionVars();
1339 
1340  for (unsigned i = 0; i < numReductions; ++i) {
1341  owningReductionGens.push_back(
1342  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1343  owningAtomicReductionGens.push_back(
1344  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1345  }
1346 
1347  // Collect the reduction information.
1348  reductionInfos.reserve(numReductions);
1349  for (unsigned i = 0; i < numReductions; ++i) {
1350  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1351  if (owningAtomicReductionGens[i])
1352  atomicGen = owningAtomicReductionGens[i];
1353  llvm::Value *variable =
1354  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1355  reductionInfos.push_back(
1356  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1357  privateReductionVariables[i],
1358  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1359  owningReductionGens[i],
1360  /*ReductionGenClang=*/nullptr, atomicGen});
1361  }
1362 }
1363 
1364 /// handling of DeclareReductionOp's cleanup region
1365 static LogicalResult
1367  llvm::ArrayRef<llvm::Value *> privateVariables,
1368  LLVM::ModuleTranslation &moduleTranslation,
1369  llvm::IRBuilderBase &builder, StringRef regionName,
1370  bool shouldLoadCleanupRegionArg = true) {
1371  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1372  if (cleanupRegion->empty())
1373  continue;
1374 
1375  // map the argument to the cleanup region
1376  Block &entry = cleanupRegion->front();
1377 
1378  llvm::Instruction *potentialTerminator =
1379  builder.GetInsertBlock()->empty() ? nullptr
1380  : &builder.GetInsertBlock()->back();
1381  if (potentialTerminator && potentialTerminator->isTerminator())
1382  builder.SetInsertPoint(potentialTerminator);
1383  llvm::Value *privateVarValue =
1384  shouldLoadCleanupRegionArg
1385  ? builder.CreateLoad(
1386  moduleTranslation.convertType(entry.getArgument(0).getType()),
1387  privateVariables[i])
1388  : privateVariables[i];
1389 
1390  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1391 
1392  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1393  moduleTranslation)))
1394  return failure();
1395 
1396  // clear block argument mapping in case it needs to be re-created with a
1397  // different source for another use of the same reduction decl
1398  moduleTranslation.forgetMapping(*cleanupRegion);
1399  }
1400  return success();
1401 }
1402 
1403 // TODO: not used by ParallelOp
1404 template <class OP>
1405 static LogicalResult createReductionsAndCleanup(
1406  OP op, llvm::IRBuilderBase &builder,
1407  LLVM::ModuleTranslation &moduleTranslation,
1408  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1410  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
1411  bool isNowait = false, bool isTeamsReduction = false) {
1412  // Process the reductions if required.
1413  if (op.getNumReductionVars() == 0)
1414  return success();
1415 
1416  SmallVector<OwningReductionGen> owningReductionGens;
1417  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1419 
1420  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1421 
1422  // Create the reduction generators. We need to own them here because
1423  // ReductionInfo only accepts references to the generators.
1424  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1425  owningReductionGens, owningAtomicReductionGens,
1426  privateReductionVariables, reductionInfos);
1427 
1428  // The call to createReductions below expects the block to have a
1429  // terminator. Create an unreachable instruction to serve as terminator
1430  // and remove it later.
1431  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1432  builder.SetInsertPoint(tempTerminator);
1433  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1434  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1435  isByRef, isNowait, isTeamsReduction);
1436 
1437  if (failed(handleError(contInsertPoint, *op)))
1438  return failure();
1439 
1440  if (!contInsertPoint->getBlock())
1441  return op->emitOpError() << "failed to convert reductions";
1442 
1443  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1444  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1445 
1446  if (failed(handleError(afterIP, *op)))
1447  return failure();
1448 
1449  tempTerminator->eraseFromParent();
1450  builder.restoreIP(*afterIP);
1451 
1452  // after the construct, deallocate private reduction variables
1453  SmallVector<Region *> reductionRegions;
1454  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1455  [](omp::DeclareReductionOp reductionDecl) {
1456  return &reductionDecl.getCleanupRegion();
1457  });
1458  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1459  moduleTranslation, builder,
1460  "omp.reduction.cleanup");
1461  return success();
1462 }
1463 
1464 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1465  if (!attr)
1466  return {};
1467  return *attr;
1468 }
1469 
1470 // TODO: not used by omp.parallel
1471 template <typename OP>
1472 static LogicalResult allocAndInitializeReductionVars(
1473  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1474  LLVM::ModuleTranslation &moduleTranslation,
1475  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1477  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1478  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1479  llvm::ArrayRef<bool> isByRef) {
1480  if (op.getNumReductionVars() == 0)
1481  return success();
1482 
1483  SmallVector<DeferredStore> deferredStores;
1484 
1485  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1486  allocaIP, reductionDecls,
1487  privateReductionVariables, reductionVariableMap,
1488  deferredStores, isByRef)))
1489  return failure();
1490 
1491  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1492  allocaIP.getBlock(), reductionDecls,
1493  privateReductionVariables, reductionVariableMap,
1494  isByRef, deferredStores);
1495 }
1496 
1497 /// Return the llvm::Value * corresponding to the `privateVar` that
1498 /// is being privatized. It isn't always as simple as looking up
1499 /// moduleTranslation with privateVar. For instance, in case of
1500 /// an allocatable, the descriptor for the allocatable is privatized.
1501 /// This descriptor is mapped using an MapInfoOp. So, this function
1502 /// will return a pointer to the llvm::Value corresponding to the
1503 /// block argument for the mapped descriptor.
1504 static llvm::Value *
1505 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1506  LLVM::ModuleTranslation &moduleTranslation,
1507  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1508  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1509  return moduleTranslation.lookupValue(privateVar);
1510 
1511  Value blockArg = (*mappedPrivateVars)[privateVar];
1512  Type privVarType = privateVar.getType();
1513  Type blockArgType = blockArg.getType();
1514  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1515  "A block argument corresponding to a mapped var should have "
1516  "!llvm.ptr type");
1517 
1518  if (privVarType == blockArgType)
1519  return moduleTranslation.lookupValue(blockArg);
1520 
1521  // This typically happens when the privatized type is lowered from
1522  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1523  // struct/pair is passed by value. But, mapped values are passed only as
1524  // pointers, so before we privatize, we must load the pointer.
1525  if (!isa<LLVM::LLVMPointerType>(privVarType))
1526  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1527  moduleTranslation.lookupValue(blockArg));
1528 
1529  return moduleTranslation.lookupValue(privateVar);
1530 }
1531 
1532 /// Initialize a single (first)private variable. You probably want to use
1533 /// allocateAndInitPrivateVars instead of this.
1534 /// This returns the private variable which has been initialized. This
1535 /// variable should be mapped before constructing the body of the Op.
1537  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1538  omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1539  llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1540  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1541  Region &initRegion = privDecl.getInitRegion();
1542  if (initRegion.empty())
1543  return llvmPrivateVar;
1544 
1545  // map initialization region block arguments
1546  llvm::Value *nonPrivateVar = findAssociatedValue(
1547  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1548  assert(nonPrivateVar);
1549  moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1550  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1551 
1552  // in-place convert the private initialization region
1554  if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1555  moduleTranslation, &phis)))
1556  return llvm::createStringError(
1557  "failed to inline `init` region of `omp.private`");
1558 
1559  assert(phis.size() == 1 && "expected one allocation to be yielded");
1560 
1561  // clear init region block argument mapping in case it needs to be
1562  // re-created with a different source for another use of the same
1563  // reduction decl
1564  moduleTranslation.forgetMapping(initRegion);
1565 
1566  // Prefer the value yielded from the init region to the allocated private
1567  // variable in case the region is operating on arguments by-value (e.g.
1568  // Fortran character boxes).
1569  return phis[0];
1570 }
1571 
1572 static llvm::Error
1573 initPrivateVars(llvm::IRBuilderBase &builder,
1574  LLVM::ModuleTranslation &moduleTranslation,
1575  PrivateVarsInfo &privateVarsInfo,
1576  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1577  if (privateVarsInfo.blockArgs.empty())
1578  return llvm::Error::success();
1579 
1580  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1581  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1582 
1583  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1584  privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1585  privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1586  auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1588  builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1589  llvmPrivateVar, privInitBlock, mappedPrivateVars);
1590 
1591  if (!privVarOrErr)
1592  return privVarOrErr.takeError();
1593 
1594  llvmPrivateVar = privVarOrErr.get();
1595  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1596 
1598  }
1599 
1600  return llvm::Error::success();
1601 }
1602 
1603 /// Allocate and initialize delayed private variables. Returns the basic block
1604 /// which comes after all of these allocations. llvm::Value * for each of these
1605 /// private variables are populated in llvmPrivateVars.
1607 allocatePrivateVars(llvm::IRBuilderBase &builder,
1608  LLVM::ModuleTranslation &moduleTranslation,
1609  PrivateVarsInfo &privateVarsInfo,
1610  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1611  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1612  // Allocate private vars
1613  llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1614  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1615  allocaTerminator->getIterator()),
1616  true, allocaTerminator->getStableDebugLoc(),
1617  "omp.region.after_alloca");
1618 
1619  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1620  // Update the allocaTerminator since the alloca block was split above.
1621  allocaTerminator = allocaIP.getBlock()->getTerminator();
1622  builder.SetInsertPoint(allocaTerminator);
1623  // The new terminator is an uncondition branch created by the splitBB above.
1624  assert(allocaTerminator->getNumSuccessors() == 1 &&
1625  "This is an unconditional branch created by splitBB");
1626 
1627  llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
1628  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1629 
1630  unsigned int allocaAS =
1631  moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1632  unsigned int defaultAS = moduleTranslation.getLLVMModule()
1633  ->getDataLayout()
1634  .getProgramAddressSpace();
1635 
1636  for (auto [privDecl, mlirPrivVar, blockArg] :
1637  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1638  privateVarsInfo.blockArgs)) {
1639  llvm::Type *llvmAllocType =
1640  moduleTranslation.convertType(privDecl.getType());
1641  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1642  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1643  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1644  if (allocaAS != defaultAS)
1645  llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1646  builder.getPtrTy(defaultAS));
1647 
1648  privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1649  }
1650 
1651  return afterAllocas;
1652 }
1653 
1654 static LogicalResult copyFirstPrivateVars(
1655  mlir::Operation *op, llvm::IRBuilderBase &builder,
1656  LLVM::ModuleTranslation &moduleTranslation,
1657  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1658  ArrayRef<llvm::Value *> llvmPrivateVars,
1659  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls, bool insertBarrier,
1660  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1661  // Apply copy region for firstprivate.
1662  bool needsFirstprivate =
1663  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1664  return privOp.getDataSharingType() ==
1665  omp::DataSharingClauseType::FirstPrivate;
1666  });
1667 
1668  if (!needsFirstprivate)
1669  return success();
1670 
1671  llvm::BasicBlock *copyBlock =
1672  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1673  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1674 
1675  for (auto [decl, mlirVar, llvmVar] :
1676  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1677  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1678  continue;
1679 
1680  // copyRegion implements `lhs = rhs`
1681  Region &copyRegion = decl.getCopyRegion();
1682 
1683  // map copyRegion rhs arg
1684  llvm::Value *nonPrivateVar = findAssociatedValue(
1685  mlirVar, builder, moduleTranslation, mappedPrivateVars);
1686  assert(nonPrivateVar);
1687  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1688 
1689  // map copyRegion lhs arg
1690  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1691 
1692  // in-place convert copy region
1693  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1694  moduleTranslation)))
1695  return decl.emitError("failed to inline `copy` region of `omp.private`");
1696 
1698 
1699  // ignore unused value yielded from copy region
1700 
1701  // clear copy region block argument mapping in case it needs to be
1702  // re-created with different sources for reuse of the same reduction
1703  // decl
1704  moduleTranslation.forgetMapping(copyRegion);
1705  }
1706 
1707  if (insertBarrier) {
1708  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1709  llvm::OpenMPIRBuilder::InsertPointOrErrorTy res =
1710  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
1711  if (failed(handleError(res, *op)))
1712  return failure();
1713  }
1714 
1715  return success();
1716 }
1717 
1718 static LogicalResult
1719 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1720  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1721  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1722  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1723  // private variable deallocation
1724  SmallVector<Region *> privateCleanupRegions;
1725  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1726  [](omp::PrivateClauseOp privatizer) {
1727  return &privatizer.getDeallocRegion();
1728  });
1729 
1730  if (failed(inlineOmpRegionCleanup(
1731  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1732  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1733  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1734  "`omp.private` op in");
1735 
1736  return success();
1737 }
1738 
1739 /// Returns true if the construct contains omp.cancel or omp.cancellation_point
1741  // omp.cancel and omp.cancellation_point must be "closely nested" so they will
1742  // be visible and not inside of function calls. This is enforced by the
1743  // verifier.
1744  return op
1745  ->walk([](Operation *child) {
1746  if (mlir::isa<omp::CancelOp, omp::CancellationPointOp>(child))
1747  return WalkResult::interrupt();
1748  return WalkResult::advance();
1749  })
1750  .wasInterrupted();
1751 }
1752 
1753 static LogicalResult
1754 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1755  LLVM::ModuleTranslation &moduleTranslation) {
1756  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1757  using StorableBodyGenCallbackTy =
1758  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1759 
1760  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1761 
1762  if (failed(checkImplementationStatus(opInst)))
1763  return failure();
1764 
1765  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1766  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1767 
1768  SmallVector<omp::DeclareReductionOp> reductionDecls;
1769  collectReductionDecls(sectionsOp, reductionDecls);
1770  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1771  findAllocaInsertPoint(builder, moduleTranslation);
1772 
1773  SmallVector<llvm::Value *> privateReductionVariables(
1774  sectionsOp.getNumReductionVars());
1775  DenseMap<Value, llvm::Value *> reductionVariableMap;
1776 
1777  MutableArrayRef<BlockArgument> reductionArgs =
1778  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1779 
1781  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1782  reductionDecls, privateReductionVariables, reductionVariableMap,
1783  isByRef)))
1784  return failure();
1785 
1787 
1788  for (Operation &op : *sectionsOp.getRegion().begin()) {
1789  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1790  if (!sectionOp) // omp.terminator
1791  continue;
1792 
1793  Region &region = sectionOp.getRegion();
1794  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1795  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1796  builder.restoreIP(codeGenIP);
1797 
1798  // map the omp.section reduction block argument to the omp.sections block
1799  // arguments
1800  // TODO: this assumes that the only block arguments are reduction
1801  // variables
1802  assert(region.getNumArguments() ==
1803  sectionsOp.getRegion().getNumArguments());
1804  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1805  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1806  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1807  assert(llvmVal);
1808  moduleTranslation.mapValue(sectionArg, llvmVal);
1809  }
1810 
1811  return convertOmpOpRegions(region, "omp.section.region", builder,
1812  moduleTranslation)
1813  .takeError();
1814  };
1815  sectionCBs.push_back(sectionCB);
1816  }
1817 
1818  // No sections within omp.sections operation - skip generation. This situation
1819  // is only possible if there is only a terminator operation inside the
1820  // sections operation
1821  if (sectionCBs.empty())
1822  return success();
1823 
1824  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1825 
1826  // TODO: Perform appropriate actions according to the data-sharing
1827  // attribute (shared, private, firstprivate, ...) of variables.
1828  // Currently defaults to shared.
1829  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1830  llvm::Value &vPtr, llvm::Value *&replacementValue)
1831  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1832  replacementValue = &vPtr;
1833  return codeGenIP;
1834  };
1835 
1836  // TODO: Perform finalization actions for variables. This has to be
1837  // called for variables which have destructors/finalizers.
1838  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1839 
1840  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1841  bool isCancellable = constructIsCancellable(sectionsOp);
1842  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1843  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1844  moduleTranslation.getOpenMPBuilder()->createSections(
1845  ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable,
1846  sectionsOp.getNowait());
1847 
1848  if (failed(handleError(afterIP, opInst)))
1849  return failure();
1850 
1851  builder.restoreIP(*afterIP);
1852 
1853  // Process the reductions if required.
1855  sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
1856  privateReductionVariables, isByRef, sectionsOp.getNowait());
1857 }
1858 
1859 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1860 static LogicalResult
1861 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1862  LLVM::ModuleTranslation &moduleTranslation) {
1863  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1864  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1865 
1866  if (failed(checkImplementationStatus(*singleOp)))
1867  return failure();
1868 
1869  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1870  builder.restoreIP(codegenIP);
1871  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1872  builder, moduleTranslation)
1873  .takeError();
1874  };
1875  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1876 
1877  // Handle copyprivate
1878  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1879  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1882  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1883  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1884  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1885  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1886  llvmCPFuncs.push_back(
1887  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1888  }
1889 
1890  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1891  moduleTranslation.getOpenMPBuilder()->createSingle(
1892  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1893  llvmCPFuncs);
1894 
1895  if (failed(handleError(afterIP, *singleOp)))
1896  return failure();
1897 
1898  builder.restoreIP(*afterIP);
1899  return success();
1900 }
1901 
1902 static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
1903  auto iface =
1904  llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation());
1905  // Check that all uses of the reduction block arg has the same distribute op
1906  // parent.
1908  Operation *distOp = nullptr;
1909  for (auto ra : iface.getReductionBlockArgs())
1910  for (auto &use : ra.getUses()) {
1911  auto *useOp = use.getOwner();
1912  // Ignore debug uses.
1913  if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
1914  debugUses.push_back(useOp);
1915  continue;
1916  }
1917 
1918  auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>();
1919  // Use is not inside a distribute op - return false
1920  if (!currentDistOp)
1921  return false;
1922  // Multiple distribute operations - return false
1923  Operation *currentOp = currentDistOp.getOperation();
1924  if (distOp && (distOp != currentOp))
1925  return false;
1926 
1927  distOp = currentOp;
1928  }
1929 
1930  // If we are going to use distribute reduction then remove any debug uses of
1931  // the reduction parameters in teamsOp. Otherwise they will be left without
1932  // any mapped value in moduleTranslation and will eventually error out.
1933  for (auto use : debugUses)
1934  use->erase();
1935  return true;
1936 }
1937 
1938 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1939 static LogicalResult
1940 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1941  LLVM::ModuleTranslation &moduleTranslation) {
1942  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1943  if (failed(checkImplementationStatus(*op)))
1944  return failure();
1945 
1946  DenseMap<Value, llvm::Value *> reductionVariableMap;
1947  unsigned numReductionVars = op.getNumReductionVars();
1948  SmallVector<omp::DeclareReductionOp> reductionDecls;
1949  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
1950  llvm::ArrayRef<bool> isByRef;
1951  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1952  findAllocaInsertPoint(builder, moduleTranslation);
1953 
1954  // Only do teams reduction if there is no distribute op that captures the
1955  // reduction instead.
1956  bool doTeamsReduction = !teamsReductionContainedInDistribute(op);
1957  if (doTeamsReduction) {
1958  isByRef = getIsByRef(op.getReductionByref());
1959 
1960  assert(isByRef.size() == op.getNumReductionVars());
1961 
1962  MutableArrayRef<BlockArgument> reductionArgs =
1963  llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
1964 
1965  collectReductionDecls(op, reductionDecls);
1966 
1968  op, reductionArgs, builder, moduleTranslation, allocaIP,
1969  reductionDecls, privateReductionVariables, reductionVariableMap,
1970  isByRef)))
1971  return failure();
1972  }
1973 
1974  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1976  moduleTranslation, allocaIP);
1977  builder.restoreIP(codegenIP);
1978  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1979  moduleTranslation)
1980  .takeError();
1981  };
1982 
1983  llvm::Value *numTeamsLower = nullptr;
1984  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1985  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1986 
1987  llvm::Value *numTeamsUpper = nullptr;
1988  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1989  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1990 
1991  llvm::Value *threadLimit = nullptr;
1992  if (Value threadLimitVar = op.getThreadLimit())
1993  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1994 
1995  llvm::Value *ifExpr = nullptr;
1996  if (Value ifVar = op.getIfExpr())
1997  ifExpr = moduleTranslation.lookupValue(ifVar);
1998 
1999  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2000  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2001  moduleTranslation.getOpenMPBuilder()->createTeams(
2002  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
2003 
2004  if (failed(handleError(afterIP, *op)))
2005  return failure();
2006 
2007  builder.restoreIP(*afterIP);
2008  if (doTeamsReduction) {
2009  // Process the reductions if required.
2011  op, builder, moduleTranslation, allocaIP, reductionDecls,
2012  privateReductionVariables, isByRef,
2013  /*isNoWait*/ false, /*isTeamsReduction*/ true);
2014  }
2015  return success();
2016 }
2017 
2018 static void
2019 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
2020  LLVM::ModuleTranslation &moduleTranslation,
2022  if (dependVars.empty())
2023  return;
2024  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
2025  llvm::omp::RTLDependenceKindTy type;
2026  switch (
2027  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
2028  case mlir::omp::ClauseTaskDepend::taskdependin:
2029  type = llvm::omp::RTLDependenceKindTy::DepIn;
2030  break;
2031  // The OpenMP runtime requires that the codegen for 'depend' clause for
2032  // 'out' dependency kind must be the same as codegen for 'depend' clause
2033  // with 'inout' dependency.
2034  case mlir::omp::ClauseTaskDepend::taskdependout:
2035  case mlir::omp::ClauseTaskDepend::taskdependinout:
2036  type = llvm::omp::RTLDependenceKindTy::DepInOut;
2037  break;
2038  case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
2039  type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
2040  break;
2041  case mlir::omp::ClauseTaskDepend::taskdependinoutset:
2042  type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
2043  break;
2044  };
2045  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
2046  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
2047  dds.emplace_back(dd);
2048  }
2049 }
2050 
2051 /// Shared implementation of a callback which adds a termiator for the new block
2052 /// created for the branch taken when an openmp construct is cancelled. The
2053 /// terminator is saved in \p cancelTerminators. This callback is invoked only
2054 /// if there is cancellation inside of the taskgroup body.
2055 /// The terminator will need to be fixed to branch to the correct block to
2056 /// cleanup the construct.
2057 static void
2059  llvm::IRBuilderBase &llvmBuilder,
2060  llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op,
2061  llvm::omp::Directive cancelDirective) {
2062  auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
2063  llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
2064 
2065  // ip is currently in the block branched to if cancellation occured.
2066  // We need to create a branch to terminate that block.
2067  llvmBuilder.restoreIP(ip);
2068 
2069  // We must still clean up the construct after cancelling it, so we need to
2070  // branch to the block that finalizes the taskgroup.
2071  // That block has not been created yet so use this block as a dummy for now
2072  // and fix this after creating the operation.
2073  cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
2074  return llvm::Error::success();
2075  };
2076  // We have to add the cleanup to the OpenMPIRBuilder before the body gets
2077  // created in case the body contains omp.cancel (which will then expect to be
2078  // able to find this cleanup callback).
2079  ompBuilder.pushFinalizationCB(
2080  {finiCB, cancelDirective, constructIsCancellable(op)});
2081 }
2082 
2083 /// If we cancelled the construct, we should branch to the finalization block of
2084 /// that construct. OMPIRBuilder structures the CFG such that the cleanup block
2085 /// is immediately before the continuation block. Now this finalization has
2086 /// been created we can fix the branch.
2087 static void
2089  llvm::OpenMPIRBuilder &ompBuilder,
2090  const llvm::OpenMPIRBuilder::InsertPointTy &afterIP) {
2091  ompBuilder.popFinalizationCB();
2092  llvm::BasicBlock *constructFini = afterIP.getBlock()->getSinglePredecessor();
2093  for (llvm::BranchInst *cancelBranch : cancelTerminators) {
2094  assert(cancelBranch->getNumSuccessors() == 1 &&
2095  "cancel branch should have one target");
2096  cancelBranch->setSuccessor(0, constructFini);
2097  }
2098 }
2099 
2100 namespace {
2101 /// TaskContextStructManager takes care of creating and freeing a structure
2102 /// containing information needed by the task body to execute.
2103 class TaskContextStructManager {
2104 public:
2105  TaskContextStructManager(llvm::IRBuilderBase &builder,
2106  LLVM::ModuleTranslation &moduleTranslation,
2108  : builder{builder}, moduleTranslation{moduleTranslation},
2109  privateDecls{privateDecls} {}
2110 
2111  /// Creates a heap allocated struct containing space for each private
2112  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
2113  /// the structure should all have the same order (although privateDecls which
2114  /// do not read from the mold argument are skipped).
2115  void generateTaskContextStruct();
2116 
2117  /// Create GEPs to access each member of the structure representing a private
2118  /// variable, adding them to llvmPrivateVars. Null values are added where
2119  /// private decls were skipped so that the ordering continues to match the
2120  /// private decls.
2121  void createGEPsToPrivateVars();
2122 
2123  /// De-allocate the task context structure.
2124  void freeStructPtr();
2125 
2126  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
2127  return llvmPrivateVarGEPs;
2128  }
2129 
2130  llvm::Value *getStructPtr() { return structPtr; }
2131 
2132 private:
2133  llvm::IRBuilderBase &builder;
2134  LLVM::ModuleTranslation &moduleTranslation;
2136 
2137  /// The type of each member of the structure, in order.
2138  SmallVector<llvm::Type *> privateVarTypes;
2139 
2140  /// LLVM values for each private variable, or null if that private variable is
2141  /// not included in the task context structure
2142  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
2143 
2144  /// A pointer to the structure containing context for this task.
2145  llvm::Value *structPtr = nullptr;
2146  /// The type of the structure
2147  llvm::Type *structTy = nullptr;
2148 };
2149 } // namespace
2150 
2151 void TaskContextStructManager::generateTaskContextStruct() {
2152  if (privateDecls.empty())
2153  return;
2154  privateVarTypes.reserve(privateDecls.size());
2155 
2156  for (omp::PrivateClauseOp &privOp : privateDecls) {
2157  // Skip private variables which can safely be allocated and initialised
2158  // inside of the task
2159  if (!privOp.readsFromMold())
2160  continue;
2161  Type mlirType = privOp.getType();
2162  privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
2163  }
2164 
2165  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
2166  privateVarTypes);
2167 
2168  llvm::DataLayout dataLayout =
2169  builder.GetInsertBlock()->getModule()->getDataLayout();
2170  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
2171  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
2172 
2173  // Heap allocate the structure
2174  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
2175  /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
2176  "omp.task.context_ptr");
2177 }
2178 
2179 void TaskContextStructManager::createGEPsToPrivateVars() {
2180  if (!structPtr) {
2181  assert(privateVarTypes.empty());
2182  return;
2183  }
2184 
2185  // Create GEPs for each struct member
2186  llvmPrivateVarGEPs.clear();
2187  llvmPrivateVarGEPs.reserve(privateDecls.size());
2188  llvm::Value *zero = builder.getInt32(0);
2189  unsigned i = 0;
2190  for (auto privDecl : privateDecls) {
2191  if (!privDecl.readsFromMold()) {
2192  // Handle this inside of the task so we don't pass unnessecary vars in
2193  llvmPrivateVarGEPs.push_back(nullptr);
2194  continue;
2195  }
2196  llvm::Value *iVal = builder.getInt32(i);
2197  llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
2198  llvmPrivateVarGEPs.push_back(gep);
2199  i += 1;
2200  }
2201 }
2202 
2203 void TaskContextStructManager::freeStructPtr() {
2204  if (!structPtr)
2205  return;
2206 
2207  llvm::IRBuilderBase::InsertPointGuard guard{builder};
2208  // Ensure we don't put the call to free() after the terminator
2209  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2210  builder.CreateFree(structPtr);
2211 }
2212 
2213 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
2214 static LogicalResult
2215 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
2216  LLVM::ModuleTranslation &moduleTranslation) {
2217  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2218  if (failed(checkImplementationStatus(*taskOp)))
2219  return failure();
2220 
2221  PrivateVarsInfo privateVarsInfo(taskOp);
2222  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
2223  privateVarsInfo.privatizers};
2224 
2225  // Allocate and copy private variables before creating the task. This avoids
2226  // accessing invalid memory if (after this scope ends) the private variables
2227  // are initialized from host variables or if the variables are copied into
2228  // from host variables (firstprivate). The insertion point is just before
2229  // where the code for creating and scheduling the task will go. That puts this
2230  // code outside of the outlined task region, which is what we want because
2231  // this way the initialization and copy regions are executed immediately while
2232  // the host variable data are still live.
2233 
2234  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2235  findAllocaInsertPoint(builder, moduleTranslation);
2236 
2237  // Not using splitBB() because that requires the current block to have a
2238  // terminator.
2239  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
2240  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
2241  builder.getContext(), "omp.task.start",
2242  /*Parent=*/builder.GetInsertBlock()->getParent());
2243  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
2244  builder.SetInsertPoint(branchToTaskStartBlock);
2245 
2246  // Now do this again to make the initialization and copy blocks
2247  llvm::BasicBlock *copyBlock =
2248  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
2249  llvm::BasicBlock *initBlock =
2250  splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
2251 
2252  // Now the control flow graph should look like
2253  // starter_block:
2254  // <---- where we started when convertOmpTaskOp was called
2255  // br %omp.private.init
2256  // omp.private.init:
2257  // br %omp.private.copy
2258  // omp.private.copy:
2259  // br %omp.task.start
2260  // omp.task.start:
2261  // <---- where we want the insertion point to be when we call createTask()
2262 
2263  // Save the alloca insertion point on ModuleTranslation stack for use in
2264  // nested regions.
2266  moduleTranslation, allocaIP);
2267 
2268  // Allocate and initialize private variables
2269  builder.SetInsertPoint(initBlock->getTerminator());
2270 
2271  // Create task variable structure
2272  taskStructMgr.generateTaskContextStruct();
2273  // GEPs so that we can initialize the variables. Don't use these GEPs inside
2274  // of the body otherwise it will be the GEP not the struct which is fowarded
2275  // to the outlined function. GEPs forwarded in this way are passed in a
2276  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
2277  // which may not be executed until after the current stack frame goes out of
2278  // scope.
2279  taskStructMgr.createGEPsToPrivateVars();
2280 
2281  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
2282  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
2283  privateVarsInfo.blockArgs,
2284  taskStructMgr.getLLVMPrivateVarGEPs())) {
2285  // To be handled inside the task.
2286  if (!privDecl.readsFromMold())
2287  continue;
2288  assert(llvmPrivateVarAlloc &&
2289  "reads from mold so shouldn't have been skipped");
2290 
2291  llvm::Expected<llvm::Value *> privateVarOrErr =
2292  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2293  blockArg, llvmPrivateVarAlloc, initBlock);
2294  if (!privateVarOrErr)
2295  return handleError(privateVarOrErr, *taskOp.getOperation());
2296 
2298 
2299  // TODO: this is a bit of a hack for Fortran character boxes.
2300  // Character boxes are passed by value into the init region and then the
2301  // initialized character box is yielded by value. Here we need to store the
2302  // yielded value into the private allocation, and load the private
2303  // allocation to match the type expected by region block arguments.
2304  if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2305  !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2306  builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2307  // Load it so we have the value pointed to by the GEP
2308  llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2309  llvmPrivateVarAlloc);
2310  }
2311  assert(llvmPrivateVarAlloc->getType() ==
2312  moduleTranslation.convertType(blockArg.getType()));
2313 
2314  // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2315  // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2316  // stack allocated structure.
2317  }
2318 
2319  // firstprivate copy region
2320  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2321  if (failed(copyFirstPrivateVars(
2322  taskOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2323  taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
2324  taskOp.getPrivateNeedsBarrier())))
2325  return llvm::failure();
2326 
2327  // Set up for call to createTask()
2328  builder.SetInsertPoint(taskStartBlock);
2329 
2330  auto bodyCB = [&](InsertPointTy allocaIP,
2331  InsertPointTy codegenIP) -> llvm::Error {
2332  // Save the alloca insertion point on ModuleTranslation stack for use in
2333  // nested regions.
2335  moduleTranslation, allocaIP);
2336 
2337  // translate the body of the task:
2338  builder.restoreIP(codegenIP);
2339 
2340  llvm::BasicBlock *privInitBlock = nullptr;
2341  privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2342  for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2343  privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2344  privateVarsInfo.mlirVars))) {
2345  auto [blockArg, privDecl, mlirPrivVar] = zip;
2346  // This is handled before the task executes
2347  if (privDecl.readsFromMold())
2348  continue;
2349 
2350  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2351  llvm::Type *llvmAllocType =
2352  moduleTranslation.convertType(privDecl.getType());
2353  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2354  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2355  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2356 
2357  llvm::Expected<llvm::Value *> privateVarOrError =
2358  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2359  blockArg, llvmPrivateVar, privInitBlock);
2360  if (!privateVarOrError)
2361  return privateVarOrError.takeError();
2362  moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2363  privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2364  }
2365 
2366  taskStructMgr.createGEPsToPrivateVars();
2367  for (auto [i, llvmPrivVar] :
2368  llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2369  if (!llvmPrivVar) {
2370  assert(privateVarsInfo.llvmVars[i] &&
2371  "This is added in the loop above");
2372  continue;
2373  }
2374  privateVarsInfo.llvmVars[i] = llvmPrivVar;
2375  }
2376 
2377  // Find and map the addresses of each variable within the task context
2378  // structure
2379  for (auto [blockArg, llvmPrivateVar, privateDecl] :
2380  llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2381  privateVarsInfo.privatizers)) {
2382  // This was handled above.
2383  if (!privateDecl.readsFromMold())
2384  continue;
2385  // Fix broken pass-by-value case for Fortran character boxes
2386  if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2387  llvmPrivateVar = builder.CreateLoad(
2388  moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2389  }
2390  assert(llvmPrivateVar->getType() ==
2391  moduleTranslation.convertType(blockArg.getType()));
2392  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2393  }
2394 
2395  auto continuationBlockOrError = convertOmpOpRegions(
2396  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2397  if (failed(handleError(continuationBlockOrError, *taskOp)))
2398  return llvm::make_error<PreviouslyReportedError>();
2399 
2400  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2401 
2402  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2403  privateVarsInfo.llvmVars,
2404  privateVarsInfo.privatizers)))
2405  return llvm::make_error<PreviouslyReportedError>();
2406 
2407  // Free heap allocated task context structure at the end of the task.
2408  taskStructMgr.freeStructPtr();
2409 
2410  return llvm::Error::success();
2411  };
2412 
2413  llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
2414  SmallVector<llvm::BranchInst *> cancelTerminators;
2415  // The directive to match here is OMPD_taskgroup because it is the taskgroup
2416  // which is canceled. This is handled here because it is the task's cleanup
2417  // block which should be branched to.
2418  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskOp,
2419  llvm::omp::Directive::OMPD_taskgroup);
2420 
2422  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2423  moduleTranslation, dds);
2424 
2425  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2426  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2427  moduleTranslation.getOpenMPBuilder()->createTask(
2428  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2429  moduleTranslation.lookupValue(taskOp.getFinal()),
2430  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2431  taskOp.getMergeable(),
2432  moduleTranslation.lookupValue(taskOp.getEventHandle()),
2433  moduleTranslation.lookupValue(taskOp.getPriority()));
2434 
2435  if (failed(handleError(afterIP, *taskOp)))
2436  return failure();
2437 
2438  // Set the correct branch target for task cancellation
2439  popCancelFinalizationCB(cancelTerminators, ompBuilder, afterIP.get());
2440 
2441  builder.restoreIP(*afterIP);
2442  return success();
2443 }
2444 
2445 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2446 static LogicalResult
2447 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2448  LLVM::ModuleTranslation &moduleTranslation) {
2449  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2450  if (failed(checkImplementationStatus(*tgOp)))
2451  return failure();
2452 
2453  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2454  builder.restoreIP(codegenIP);
2455  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2456  builder, moduleTranslation)
2457  .takeError();
2458  };
2459 
2460  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2461  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2462  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2463  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2464  bodyCB);
2465 
2466  if (failed(handleError(afterIP, *tgOp)))
2467  return failure();
2468 
2469  builder.restoreIP(*afterIP);
2470  return success();
2471 }
2472 
2473 static LogicalResult
2474 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2475  LLVM::ModuleTranslation &moduleTranslation) {
2476  if (failed(checkImplementationStatus(*twOp)))
2477  return failure();
2478 
2479  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2480  return success();
2481 }
2482 
2483 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2484 static LogicalResult
2485 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2486  LLVM::ModuleTranslation &moduleTranslation) {
2487  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2488  auto wsloopOp = cast<omp::WsloopOp>(opInst);
2489  if (failed(checkImplementationStatus(opInst)))
2490  return failure();
2491 
2492  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2493  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2494  assert(isByRef.size() == wsloopOp.getNumReductionVars());
2495 
2496  // Static is the default.
2497  auto schedule =
2498  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2499 
2500  // Find the loop configuration.
2501  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2502  llvm::Type *ivType = step->getType();
2503  llvm::Value *chunk = nullptr;
2504  if (wsloopOp.getScheduleChunk()) {
2505  llvm::Value *chunkVar =
2506  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2507  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2508  }
2509 
2510  PrivateVarsInfo privateVarsInfo(wsloopOp);
2511 
2512  SmallVector<omp::DeclareReductionOp> reductionDecls;
2513  collectReductionDecls(wsloopOp, reductionDecls);
2514  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2515  findAllocaInsertPoint(builder, moduleTranslation);
2516 
2517  SmallVector<llvm::Value *> privateReductionVariables(
2518  wsloopOp.getNumReductionVars());
2519 
2521  builder, moduleTranslation, privateVarsInfo, allocaIP);
2522  if (handleError(afterAllocas, opInst).failed())
2523  return failure();
2524 
2525  DenseMap<Value, llvm::Value *> reductionVariableMap;
2526 
2527  MutableArrayRef<BlockArgument> reductionArgs =
2528  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2529 
2530  SmallVector<DeferredStore> deferredStores;
2531 
2532  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2533  moduleTranslation, allocaIP, reductionDecls,
2534  privateReductionVariables, reductionVariableMap,
2535  deferredStores, isByRef)))
2536  return failure();
2537 
2538  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2539  opInst)
2540  .failed())
2541  return failure();
2542 
2543  if (failed(copyFirstPrivateVars(
2544  wsloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
2545  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2546  wsloopOp.getPrivateNeedsBarrier())))
2547  return failure();
2548 
2549  assert(afterAllocas.get()->getSinglePredecessor());
2550  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2551  moduleTranslation,
2552  afterAllocas.get()->getSinglePredecessor(),
2553  reductionDecls, privateReductionVariables,
2554  reductionVariableMap, isByRef, deferredStores)))
2555  return failure();
2556 
2557  // TODO: Handle doacross loops when the ordered clause has a parameter.
2558  bool isOrdered = wsloopOp.getOrdered().has_value();
2559  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2560  bool isSimd = wsloopOp.getScheduleSimd();
2561  bool loopNeedsBarrier = !wsloopOp.getNowait();
2562 
2563  // The only legal way for the direct parent to be omp.distribute is that this
2564  // represents 'distribute parallel do'. Otherwise, this is a regular
2565  // worksharing loop.
2566  llvm::omp::WorksharingLoopType workshareLoopType =
2567  llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2568  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2569  : llvm::omp::WorksharingLoopType::ForStaticLoop;
2570 
2571  SmallVector<llvm::BranchInst *> cancelTerminators;
2572  pushCancelFinalizationCB(cancelTerminators, builder, *ompBuilder, wsloopOp,
2573  llvm::omp::Directive::OMPD_for);
2574 
2575  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2576 
2577  // Initialize linear variables and linear step
2578  LinearClauseProcessor linearClauseProcessor;
2579  if (wsloopOp.getLinearVars().size()) {
2580  for (mlir::Value linearVar : wsloopOp.getLinearVars())
2581  linearClauseProcessor.createLinearVar(builder, moduleTranslation,
2582  linearVar);
2583  for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
2584  linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
2585  }
2586 
2588  wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2589 
2590  if (failed(handleError(regionBlock, opInst)))
2591  return failure();
2592 
2593  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2594 
2595  // Emit Initialization and Update IR for linear variables
2596  if (wsloopOp.getLinearVars().size()) {
2597  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2598  linearClauseProcessor.initLinearVar(builder, moduleTranslation,
2599  loopInfo->getPreheader());
2600  if (failed(handleError(afterBarrierIP, *loopOp)))
2601  return failure();
2602  builder.restoreIP(*afterBarrierIP);
2603  linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
2604  loopInfo->getIndVar());
2605  linearClauseProcessor.outlineLinearFinalizationBB(builder,
2606  loopInfo->getExit());
2607  }
2608 
2609  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2610  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2611  ompBuilder->applyWorkshareLoop(
2612  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2613  convertToScheduleKind(schedule), chunk, isSimd,
2614  scheduleMod == omp::ScheduleModifier::monotonic,
2615  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2616  workshareLoopType);
2617 
2618  if (failed(handleError(wsloopIP, opInst)))
2619  return failure();
2620 
2621  // Emit finalization and in-place rewrites for linear vars.
2622  if (wsloopOp.getLinearVars().size()) {
2623  llvm::OpenMPIRBuilder::InsertPointTy oldIP = builder.saveIP();
2624  assert(loopInfo->getLastIter() &&
2625  "`lastiter` in CanonicalLoopInfo is nullptr");
2626  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
2627  linearClauseProcessor.finalizeLinearVar(builder, moduleTranslation,
2628  loopInfo->getLastIter());
2629  if (failed(handleError(afterBarrierIP, *loopOp)))
2630  return failure();
2631  for (size_t index = 0; index < wsloopOp.getLinearVars().size(); index++)
2632  linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
2633  index);
2634  builder.restoreIP(oldIP);
2635  }
2636 
2637  // Set the correct branch target for task cancellation
2638  popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
2639 
2640  // Process the reductions if required.
2641  if (failed(createReductionsAndCleanup(
2642  wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
2643  privateReductionVariables, isByRef, wsloopOp.getNowait(),
2644  /*isTeamsReduction=*/false)))
2645  return failure();
2646 
2647  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2648  privateVarsInfo.llvmVars,
2649  privateVarsInfo.privatizers);
2650 }
2651 
2652 /// Converts the OpenMP parallel operation to LLVM IR.
2653 static LogicalResult
2654 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2655  LLVM::ModuleTranslation &moduleTranslation) {
2656  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2657  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2658  assert(isByRef.size() == opInst.getNumReductionVars());
2659  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2660 
2661  if (failed(checkImplementationStatus(*opInst)))
2662  return failure();
2663 
2664  PrivateVarsInfo privateVarsInfo(opInst);
2665 
2666  // Collect reduction declarations
2667  SmallVector<omp::DeclareReductionOp> reductionDecls;
2668  collectReductionDecls(opInst, reductionDecls);
2669  SmallVector<llvm::Value *> privateReductionVariables(
2670  opInst.getNumReductionVars());
2671  SmallVector<DeferredStore> deferredStores;
2672 
2673  auto bodyGenCB = [&](InsertPointTy allocaIP,
2674  InsertPointTy codeGenIP) -> llvm::Error {
2676  builder, moduleTranslation, privateVarsInfo, allocaIP);
2677  if (handleError(afterAllocas, *opInst).failed())
2678  return llvm::make_error<PreviouslyReportedError>();
2679 
2680  // Allocate reduction vars
2681  DenseMap<Value, llvm::Value *> reductionVariableMap;
2682 
2683  MutableArrayRef<BlockArgument> reductionArgs =
2684  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2685 
2686  allocaIP =
2687  InsertPointTy(allocaIP.getBlock(),
2688  allocaIP.getBlock()->getTerminator()->getIterator());
2689 
2690  if (failed(allocReductionVars(
2691  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2692  reductionDecls, privateReductionVariables, reductionVariableMap,
2693  deferredStores, isByRef)))
2694  return llvm::make_error<PreviouslyReportedError>();
2695 
2696  assert(afterAllocas.get()->getSinglePredecessor());
2697  builder.restoreIP(codeGenIP);
2698 
2699  if (handleError(
2700  initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2701  *opInst)
2702  .failed())
2703  return llvm::make_error<PreviouslyReportedError>();
2704 
2705  if (failed(copyFirstPrivateVars(
2706  opInst, builder, moduleTranslation, privateVarsInfo.mlirVars,
2707  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
2708  opInst.getPrivateNeedsBarrier())))
2709  return llvm::make_error<PreviouslyReportedError>();
2710 
2711  if (failed(
2712  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2713  afterAllocas.get()->getSinglePredecessor(),
2714  reductionDecls, privateReductionVariables,
2715  reductionVariableMap, isByRef, deferredStores)))
2716  return llvm::make_error<PreviouslyReportedError>();
2717 
2718  // Save the alloca insertion point on ModuleTranslation stack for use in
2719  // nested regions.
2721  moduleTranslation, allocaIP);
2722 
2723  // ParallelOp has only one region associated with it.
2725  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2726  if (!regionBlock)
2727  return regionBlock.takeError();
2728 
2729  // Process the reductions if required.
2730  if (opInst.getNumReductionVars() > 0) {
2731  // Collect reduction info
2732  SmallVector<OwningReductionGen> owningReductionGens;
2733  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2735  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2736  owningReductionGens, owningAtomicReductionGens,
2737  privateReductionVariables, reductionInfos);
2738 
2739  // Move to region cont block
2740  builder.SetInsertPoint((*regionBlock)->getTerminator());
2741 
2742  // Generate reductions from info
2743  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2744  builder.SetInsertPoint(tempTerminator);
2745 
2746  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2747  ompBuilder->createReductions(
2748  builder.saveIP(), allocaIP, reductionInfos, isByRef,
2749  /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
2750  if (!contInsertPoint)
2751  return contInsertPoint.takeError();
2752 
2753  if (!contInsertPoint->getBlock())
2754  return llvm::make_error<PreviouslyReportedError>();
2755 
2756  tempTerminator->eraseFromParent();
2757  builder.restoreIP(*contInsertPoint);
2758  }
2759 
2760  return llvm::Error::success();
2761  };
2762 
2763  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2764  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2765  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2766  // bodyGenCB.
2767  replVal = &val;
2768  return codeGenIP;
2769  };
2770 
2771  // TODO: Perform finalization actions for variables. This has to be
2772  // called for variables which have destructors/finalizers.
2773  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2774  InsertPointTy oldIP = builder.saveIP();
2775  builder.restoreIP(codeGenIP);
2776 
2777  // if the reduction has a cleanup region, inline it here to finalize the
2778  // reduction variables
2779  SmallVector<Region *> reductionCleanupRegions;
2780  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2781  [](omp::DeclareReductionOp reductionDecl) {
2782  return &reductionDecl.getCleanupRegion();
2783  });
2784  if (failed(inlineOmpRegionCleanup(
2785  reductionCleanupRegions, privateReductionVariables,
2786  moduleTranslation, builder, "omp.reduction.cleanup")))
2787  return llvm::createStringError(
2788  "failed to inline `cleanup` region of `omp.declare_reduction`");
2789 
2790  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2791  privateVarsInfo.llvmVars,
2792  privateVarsInfo.privatizers)))
2793  return llvm::make_error<PreviouslyReportedError>();
2794 
2795  builder.restoreIP(oldIP);
2796  return llvm::Error::success();
2797  };
2798 
2799  llvm::Value *ifCond = nullptr;
2800  if (auto ifVar = opInst.getIfExpr())
2801  ifCond = moduleTranslation.lookupValue(ifVar);
2802  llvm::Value *numThreads = nullptr;
2803  if (auto numThreadsVar = opInst.getNumThreads())
2804  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2805  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2806  if (auto bind = opInst.getProcBindKind())
2807  pbKind = getProcBindKind(*bind);
2808  bool isCancellable = constructIsCancellable(opInst);
2809 
2810  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2811  findAllocaInsertPoint(builder, moduleTranslation);
2812  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2813 
2814  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2815  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2816  ifCond, numThreads, pbKind, isCancellable);
2817 
2818  if (failed(handleError(afterIP, *opInst)))
2819  return failure();
2820 
2821  builder.restoreIP(*afterIP);
2822  return success();
2823 }
2824 
2825 /// Convert Order attribute to llvm::omp::OrderKind.
2826 static llvm::omp::OrderKind
2827 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2828  if (!o)
2829  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2830  switch (*o) {
2831  case omp::ClauseOrderKind::Concurrent:
2832  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2833  }
2834  llvm_unreachable("Unknown ClauseOrderKind kind");
2835 }
2836 
2837 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2838 static LogicalResult
2839 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2840  LLVM::ModuleTranslation &moduleTranslation) {
2841  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2842  auto simdOp = cast<omp::SimdOp>(opInst);
2843 
2844  // TODO: Replace this with proper composite translation support.
2845  // Currently, simd information on composite constructs is ignored, so e.g.
2846  // 'do/for simd' will be treated the same as a standalone 'do/for'. This is
2847  // allowed by the spec, since it's equivalent to using a SIMD length of 1.
2848  if (simdOp.isComposite()) {
2849  if (failed(convertIgnoredWrapper(simdOp, moduleTranslation)))
2850  return failure();
2851 
2852  return inlineConvertOmpRegions(simdOp.getRegion(), "omp.simd.region",
2853  builder, moduleTranslation);
2854  }
2855 
2856  if (failed(checkImplementationStatus(opInst)))
2857  return failure();
2858 
2859  PrivateVarsInfo privateVarsInfo(simdOp);
2860 
2861  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2862  findAllocaInsertPoint(builder, moduleTranslation);
2863 
2865  builder, moduleTranslation, privateVarsInfo, allocaIP);
2866  if (handleError(afterAllocas, opInst).failed())
2867  return failure();
2868 
2869  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2870  opInst)
2871  .failed())
2872  return failure();
2873 
2874  llvm::ConstantInt *simdlen = nullptr;
2875  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2876  simdlen = builder.getInt64(simdlenVar.value());
2877 
2878  llvm::ConstantInt *safelen = nullptr;
2879  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2880  safelen = builder.getInt64(safelenVar.value());
2881 
2882  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2883  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2884 
2885  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2886  std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2887  mlir::OperandRange operands = simdOp.getAlignedVars();
2888  for (size_t i = 0; i < operands.size(); ++i) {
2889  llvm::Value *alignment = nullptr;
2890  llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2891  llvm::Type *ty = llvmVal->getType();
2892 
2893  auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2894  alignment = builder.getInt64(intAttr.getInt());
2895  assert(ty->isPointerTy() && "Invalid type for aligned variable");
2896  assert(alignment && "Invalid alignment value");
2897  auto curInsert = builder.saveIP();
2898  builder.SetInsertPoint(sourceBlock);
2899  llvmVal = builder.CreateLoad(ty, llvmVal);
2900  builder.restoreIP(curInsert);
2901  alignedVars[llvmVal] = alignment;
2902  }
2903 
2905  simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2906 
2907  if (failed(handleError(regionBlock, opInst)))
2908  return failure();
2909 
2910  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2911  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2912  ompBuilder->applySimd(loopInfo, alignedVars,
2913  simdOp.getIfExpr()
2914  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2915  : nullptr,
2916  order, simdlen, safelen);
2917 
2918  return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2919  privateVarsInfo.llvmVars,
2920  privateVarsInfo.privatizers);
2921 }
2922 
2923 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2924 static LogicalResult
2925 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2926  LLVM::ModuleTranslation &moduleTranslation) {
2927  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2928  auto loopOp = cast<omp::LoopNestOp>(opInst);
2929 
2930  // Set up the source location value for OpenMP runtime.
2931  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2932 
2933  // Generator of the canonical loop body.
2936  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2937  llvm::Value *iv) -> llvm::Error {
2938  // Make sure further conversions know about the induction variable.
2939  moduleTranslation.mapValue(
2940  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2941 
2942  // Capture the body insertion point for use in nested loops. BodyIP of the
2943  // CanonicalLoopInfo always points to the beginning of the entry block of
2944  // the body.
2945  bodyInsertPoints.push_back(ip);
2946 
2947  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2948  return llvm::Error::success();
2949 
2950  // Convert the body of the loop.
2951  builder.restoreIP(ip);
2953  loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
2954  if (!regionBlock)
2955  return regionBlock.takeError();
2956 
2957  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2958  return llvm::Error::success();
2959  };
2960 
2961  // Delegate actual loop construction to the OpenMP IRBuilder.
2962  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
2963  // loop, i.e. it has a positive step, uses signed integer semantics.
2964  // Reconsider this code when the nested loop operation clearly supports more
2965  // cases.
2966  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
2967  llvm::Value *lowerBound =
2968  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
2969  llvm::Value *upperBound =
2970  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
2971  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
2972 
2973  // Make sure loop trip count are emitted in the preheader of the outermost
2974  // loop at the latest so that they are all available for the new collapsed
2975  // loop will be created below.
2976  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
2977  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
2978  if (i != 0) {
2979  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
2980  ompLoc.DL);
2981  computeIP = loopInfos.front()->getPreheaderIP();
2982  }
2983 
2985  ompBuilder->createCanonicalLoop(
2986  loc, bodyGen, lowerBound, upperBound, step,
2987  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
2988 
2989  if (failed(handleError(loopResult, *loopOp)))
2990  return failure();
2991 
2992  loopInfos.push_back(*loopResult);
2993  }
2994 
2995  // Collapse loops. Store the insertion point because LoopInfos may get
2996  // invalidated.
2997  llvm::OpenMPIRBuilder::InsertPointTy afterIP =
2998  loopInfos.front()->getAfterIP();
2999 
3000  // Update the stack frame created for this loop to point to the resulting loop
3001  // after applying transformations.
3002  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
3003  [&](OpenMPLoopInfoStackFrame &frame) {
3004  frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
3005  return WalkResult::interrupt();
3006  });
3007 
3008  // Continue building IR after the loop. Note that the LoopInfo returned by
3009  // `collapseLoops` points inside the outermost loop and is intended for
3010  // potential further loop transformations. Use the insertion point stored
3011  // before collapsing loops instead.
3012  builder.restoreIP(afterIP);
3013  return success();
3014 }
3015 
3016 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
3017 static llvm::AtomicOrdering
3018 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
3019  if (!ao)
3020  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
3021 
3022  switch (*ao) {
3023  case omp::ClauseMemoryOrderKind::Seq_cst:
3024  return llvm::AtomicOrdering::SequentiallyConsistent;
3025  case omp::ClauseMemoryOrderKind::Acq_rel:
3026  return llvm::AtomicOrdering::AcquireRelease;
3027  case omp::ClauseMemoryOrderKind::Acquire:
3028  return llvm::AtomicOrdering::Acquire;
3029  case omp::ClauseMemoryOrderKind::Release:
3030  return llvm::AtomicOrdering::Release;
3031  case omp::ClauseMemoryOrderKind::Relaxed:
3032  return llvm::AtomicOrdering::Monotonic;
3033  }
3034  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
3035 }
3036 
3037 /// Convert omp.atomic.read operation to LLVM IR.
3038 static LogicalResult
3039 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
3040  LLVM::ModuleTranslation &moduleTranslation) {
3041  auto readOp = cast<omp::AtomicReadOp>(opInst);
3042  if (failed(checkImplementationStatus(opInst)))
3043  return failure();
3044 
3045  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3046  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3047  findAllocaInsertPoint(builder, moduleTranslation);
3048 
3049  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3050 
3051  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
3052  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
3053  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
3054 
3055  llvm::Type *elementType =
3056  moduleTranslation.convertType(readOp.getElementType());
3057 
3058  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
3059  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
3060  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP));
3061  return success();
3062 }
3063 
3064 /// Converts an omp.atomic.write operation to LLVM IR.
3065 static LogicalResult
3066 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
3067  LLVM::ModuleTranslation &moduleTranslation) {
3068  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
3069  if (failed(checkImplementationStatus(opInst)))
3070  return failure();
3071 
3072  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3073  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3074  findAllocaInsertPoint(builder, moduleTranslation);
3075 
3076  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3077  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
3078  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
3079  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
3080  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
3081  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
3082  /*isVolatile=*/false};
3083  builder.restoreIP(
3084  ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP));
3085  return success();
3086 }
3087 
3088 /// Converts an LLVM dialect binary operation to the corresponding enum value
3089 /// for `atomicrmw` supported binary operation.
3090 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
3092  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
3093  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
3094  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
3095  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
3096  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
3097  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
3098  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
3099  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
3100  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
3101  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
3102 }
3103 
3104 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
3105 static LogicalResult
3106 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
3107  llvm::IRBuilderBase &builder,
3108  LLVM::ModuleTranslation &moduleTranslation) {
3109  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3110  if (failed(checkImplementationStatus(*opInst)))
3111  return failure();
3112 
3113  // Convert values and types.
3114  auto &innerOpList = opInst.getRegion().front().getOperations();
3115  bool isXBinopExpr{false};
3116  llvm::AtomicRMWInst::BinOp binop;
3117  mlir::Value mlirExpr;
3118  llvm::Value *llvmExpr = nullptr;
3119  llvm::Value *llvmX = nullptr;
3120  llvm::Type *llvmXElementType = nullptr;
3121  if (innerOpList.size() == 2) {
3122  // The two operations here are the update and the terminator.
3123  // Since we can identify the update operation, there is a possibility
3124  // that we can generate the atomicrmw instruction.
3125  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
3126  if (!llvm::is_contained(innerOp.getOperands(),
3127  opInst.getRegion().getArgument(0))) {
3128  return opInst.emitError("no atomic update operation with region argument"
3129  " as operand found inside atomic.update region");
3130  }
3131  binop = convertBinOpToAtomic(innerOp);
3132  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
3133  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3134  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3135  } else {
3136  // Since the update region includes more than one operation
3137  // we will resort to generating a cmpxchg loop.
3138  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3139  }
3140  llvmX = moduleTranslation.lookupValue(opInst.getX());
3141  llvmXElementType = moduleTranslation.convertType(
3142  opInst.getRegion().getArgument(0).getType());
3143  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3144  /*isSigned=*/false,
3145  /*isVolatile=*/false};
3146 
3147  llvm::AtomicOrdering atomicOrdering =
3148  convertAtomicOrdering(opInst.getMemoryOrder());
3149 
3150  // Generate update code.
3151  auto updateFn =
3152  [&opInst, &moduleTranslation](
3153  llvm::Value *atomicx,
3154  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3155  Block &bb = *opInst.getRegion().begin();
3156  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
3157  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3158  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3159  return llvm::make_error<PreviouslyReportedError>();
3160 
3161  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3162  assert(yieldop && yieldop.getResults().size() == 1 &&
3163  "terminator must be omp.yield op and it must have exactly one "
3164  "argument");
3165  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3166  };
3167 
3168  // Handle ambiguous alloca, if any.
3169  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3170  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3171  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3172  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
3173  atomicOrdering, binop, updateFn,
3174  isXBinopExpr);
3175 
3176  if (failed(handleError(afterIP, *opInst)))
3177  return failure();
3178 
3179  builder.restoreIP(*afterIP);
3180  return success();
3181 }
3182 
3183 static LogicalResult
3184 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
3185  llvm::IRBuilderBase &builder,
3186  LLVM::ModuleTranslation &moduleTranslation) {
3187  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3188  if (failed(checkImplementationStatus(*atomicCaptureOp)))
3189  return failure();
3190 
3191  mlir::Value mlirExpr;
3192  bool isXBinopExpr = false, isPostfixUpdate = false;
3193  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3194 
3195  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3196  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
3197 
3198  assert((atomicUpdateOp || atomicWriteOp) &&
3199  "internal op must be an atomic.update or atomic.write op");
3200 
3201  if (atomicWriteOp) {
3202  isPostfixUpdate = true;
3203  mlirExpr = atomicWriteOp.getExpr();
3204  } else {
3205  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
3206  atomicCaptureOp.getAtomicUpdateOp().getOperation();
3207  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
3208  // Find the binary update operation that uses the region argument
3209  // and get the expression to update
3210  if (innerOpList.size() == 2) {
3211  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
3212  if (!llvm::is_contained(innerOp.getOperands(),
3213  atomicUpdateOp.getRegion().getArgument(0))) {
3214  return atomicUpdateOp.emitError(
3215  "no atomic update operation with region argument"
3216  " as operand found inside atomic.update region");
3217  }
3218  binop = convertBinOpToAtomic(innerOp);
3219  isXBinopExpr =
3220  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
3221  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3222  } else {
3223  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3224  }
3225  }
3226 
3227  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3228  llvm::Value *llvmX =
3229  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
3230  llvm::Value *llvmV =
3231  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
3232  llvm::Type *llvmXElementType = moduleTranslation.convertType(
3233  atomicCaptureOp.getAtomicReadOp().getElementType());
3234  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3235  /*isSigned=*/false,
3236  /*isVolatile=*/false};
3237  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
3238  /*isSigned=*/false,
3239  /*isVolatile=*/false};
3240 
3241  llvm::AtomicOrdering atomicOrdering =
3242  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
3243 
3244  auto updateFn =
3245  [&](llvm::Value *atomicx,
3246  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3247  if (atomicWriteOp)
3248  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
3249  Block &bb = *atomicUpdateOp.getRegion().begin();
3250  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
3251  atomicx);
3252  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3253  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3254  return llvm::make_error<PreviouslyReportedError>();
3255 
3256  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3257  assert(yieldop && yieldop.getResults().size() == 1 &&
3258  "terminator must be omp.yield op and it must have exactly one "
3259  "argument");
3260  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3261  };
3262 
3263  // Handle ambiguous alloca, if any.
3264  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3265  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3266  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3267  ompBuilder->createAtomicCapture(
3268  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3269  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3270 
3271  if (failed(handleError(afterIP, *atomicCaptureOp)))
3272  return failure();
3273 
3274  builder.restoreIP(*afterIP);
3275  return success();
3276 }
3277 
3278 static llvm::omp::Directive convertCancellationConstructType(
3279  omp::ClauseCancellationConstructType directive) {
3280  switch (directive) {
3281  case omp::ClauseCancellationConstructType::Loop:
3282  return llvm::omp::Directive::OMPD_for;
3283  case omp::ClauseCancellationConstructType::Parallel:
3284  return llvm::omp::Directive::OMPD_parallel;
3285  case omp::ClauseCancellationConstructType::Sections:
3286  return llvm::omp::Directive::OMPD_sections;
3287  case omp::ClauseCancellationConstructType::Taskgroup:
3288  return llvm::omp::Directive::OMPD_taskgroup;
3289  }
3290 }
3291 
3292 static LogicalResult
3293 convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder,
3294  LLVM::ModuleTranslation &moduleTranslation) {
3295  if (failed(checkImplementationStatus(*op.getOperation())))
3296  return failure();
3297 
3298  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3299  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3300 
3301  llvm::Value *ifCond = nullptr;
3302  if (Value ifVar = op.getIfExpr())
3303  ifCond = moduleTranslation.lookupValue(ifVar);
3304 
3305  llvm::omp::Directive cancelledDirective =
3306  convertCancellationConstructType(op.getCancelDirective());
3307 
3308  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3309  ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective);
3310 
3311  if (failed(handleError(afterIP, *op.getOperation())))
3312  return failure();
3313 
3314  builder.restoreIP(afterIP.get());
3315 
3316  return success();
3317 }
3318 
3319 static LogicalResult
3320 convertOmpCancellationPoint(omp::CancellationPointOp op,
3321  llvm::IRBuilderBase &builder,
3322  LLVM::ModuleTranslation &moduleTranslation) {
3323  if (failed(checkImplementationStatus(*op.getOperation())))
3324  return failure();
3325 
3326  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3327  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3328 
3329  llvm::omp::Directive cancelledDirective =
3330  convertCancellationConstructType(op.getCancelDirective());
3331 
3332  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3333  ompBuilder->createCancellationPoint(ompLoc, cancelledDirective);
3334 
3335  if (failed(handleError(afterIP, *op.getOperation())))
3336  return failure();
3337 
3338  builder.restoreIP(afterIP.get());
3339 
3340  return success();
3341 }
3342 
3343 /// Converts an OpenMP Threadprivate operation into LLVM IR using
3344 /// OpenMPIRBuilder.
3345 static LogicalResult
3346 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
3347  LLVM::ModuleTranslation &moduleTranslation) {
3348  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3349  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3350  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
3351 
3352  if (failed(checkImplementationStatus(opInst)))
3353  return failure();
3354 
3355  Value symAddr = threadprivateOp.getSymAddr();
3356  auto *symOp = symAddr.getDefiningOp();
3357 
3358  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
3359  symOp = asCast.getOperand().getDefiningOp();
3360 
3361  if (!isa<LLVM::AddressOfOp>(symOp))
3362  return opInst.emitError("Addressing symbol not found");
3363  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
3364 
3365  LLVM::GlobalOp global =
3366  addressOfOp.getGlobal(moduleTranslation.symbolTable());
3367  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
3368 
3369  if (!ompBuilder->Config.isTargetDevice()) {
3370  llvm::Type *type = globalValue->getValueType();
3371  llvm::TypeSize typeSize =
3372  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
3373  type);
3374  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
3375  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
3376  ompLoc, globalValue, size, global.getSymName() + ".cache");
3377  moduleTranslation.mapValue(opInst.getResult(0), callInst);
3378  } else {
3379  moduleTranslation.mapValue(opInst.getResult(0), globalValue);
3380  }
3381 
3382  return success();
3383 }
3384 
3385 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
3386 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
3387  switch (deviceClause) {
3388  case mlir::omp::DeclareTargetDeviceType::host:
3389  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
3390  break;
3391  case mlir::omp::DeclareTargetDeviceType::nohost:
3392  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
3393  break;
3394  case mlir::omp::DeclareTargetDeviceType::any:
3395  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
3396  break;
3397  }
3398  llvm_unreachable("unhandled device clause");
3399 }
3400 
3401 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
3403  mlir::omp::DeclareTargetCaptureClause captureClause) {
3404  switch (captureClause) {
3405  case mlir::omp::DeclareTargetCaptureClause::to:
3406  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
3407  case mlir::omp::DeclareTargetCaptureClause::link:
3408  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
3409  case mlir::omp::DeclareTargetCaptureClause::enter:
3410  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
3411  }
3412  llvm_unreachable("unhandled capture clause");
3413 }
3414 
3415 static llvm::SmallString<64>
3416 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
3417  llvm::OpenMPIRBuilder &ompBuilder) {
3418  llvm::SmallString<64> suffix;
3419  llvm::raw_svector_ostream os(suffix);
3420  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
3421  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
3422  auto fileInfoCallBack = [&loc]() {
3423  return std::pair<std::string, uint64_t>(
3424  llvm::StringRef(loc.getFilename()), loc.getLine());
3425  };
3426 
3427  os << llvm::format(
3428  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
3429  }
3430  os << "_decl_tgt_ref_ptr";
3431 
3432  return suffix;
3433 }
3434 
3435 static bool isDeclareTargetLink(mlir::Value value) {
3436  if (auto addressOfOp =
3437  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3438  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3439  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3440  if (auto declareTargetGlobal =
3441  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3442  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3443  mlir::omp::DeclareTargetCaptureClause::link)
3444  return true;
3445  }
3446  return false;
3447 }
3448 
3449 // Returns the reference pointer generated by the lowering of the declare target
3450 // operation in cases where the link clause is used or the to clause is used in
3451 // USM mode.
3452 static llvm::Value *
3454  LLVM::ModuleTranslation &moduleTranslation) {
3455  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3456 
3457  // An easier way to do this may just be to keep track of any pointer
3458  // references and their mapping to their respective operation
3459  if (auto addressOfOp =
3460  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3461  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3462  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3463  addressOfOp.getGlobalName()))) {
3464 
3465  if (auto declareTargetGlobal =
3466  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3467  gOp.getOperation())) {
3468 
3469  // In this case, we must utilise the reference pointer generated by the
3470  // declare target operation, similar to Clang
3471  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3472  mlir::omp::DeclareTargetCaptureClause::link) ||
3473  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3474  mlir::omp::DeclareTargetCaptureClause::to &&
3475  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3476  llvm::SmallString<64> suffix =
3477  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3478 
3479  if (gOp.getSymName().contains(suffix))
3480  return moduleTranslation.getLLVMModule()->getNamedValue(
3481  gOp.getSymName());
3482 
3483  return moduleTranslation.getLLVMModule()->getNamedValue(
3484  (gOp.getSymName().str() + suffix.str()).str());
3485  }
3486  }
3487  }
3488  }
3489 
3490  return nullptr;
3491 }
3492 
3493 namespace {
3494 // Append customMappers information to existing MapInfosTy
3495 struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3497 
3498  /// Append arrays in \a CurInfo.
3499  void append(MapInfosTy &curInfo) {
3500  Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3501  llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3502  }
3503 };
3504 // A small helper structure to contain data gathered
3505 // for map lowering and coalese it into one area and
3506 // avoiding extra computations such as searches in the
3507 // llvm module for lowered mapped variables or checking
3508 // if something is declare target (and retrieving the
3509 // value) more than neccessary.
3510 struct MapInfoData : MapInfosTy {
3511  llvm::SmallVector<bool, 4> IsDeclareTarget;
3512  llvm::SmallVector<bool, 4> IsAMember;
3513  // Identify if mapping was added by mapClause or use_device clauses.
3514  llvm::SmallVector<bool, 4> IsAMapping;
3517  // Stripped off array/pointer to get the underlying
3518  // element type
3520 
3521  /// Append arrays in \a CurInfo.
3522  void append(MapInfoData &CurInfo) {
3523  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3524  CurInfo.IsDeclareTarget.end());
3525  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3526  OriginalValue.append(CurInfo.OriginalValue.begin(),
3527  CurInfo.OriginalValue.end());
3528  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3529  MapInfosTy::append(CurInfo);
3530  }
3531 };
3532 } // namespace
3533 
3534 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
3535  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3536  arrTy.getElementType()))
3537  return getArrayElementSizeInBits(nestedArrTy, dl);
3538  return dl.getTypeSizeInBits(arrTy.getElementType());
3539 }
3540 
3541 // This function calculates the size to be offloaded for a specified type, given
3542 // its associated map clause (which can contain bounds information which affects
3543 // the total size), this size is calculated based on the underlying element type
3544 // e.g. given a 1-D array of ints, we will calculate the size from the integer
3545 // type * number of elements in the array. This size can be used in other
3546 // calculations but is ultimately used as an argument to the OpenMP runtimes
3547 // kernel argument structure which is generated through the combinedInfo data
3548 // structures.
3549 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
3550 // CGOpenMPRuntime.cpp.
3551 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3552  Operation *clauseOp, llvm::Value *basePointer,
3553  llvm::Type *baseType, llvm::IRBuilderBase &builder,
3554  LLVM::ModuleTranslation &moduleTranslation) {
3555  if (auto memberClause =
3556  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3557  // This calculates the size to transfer based on bounds and the underlying
3558  // element type, provided bounds have been specified (Fortran
3559  // pointers/allocatables/target and arrays that have sections specified fall
3560  // into this as well).
3561  if (!memberClause.getBounds().empty()) {
3562  llvm::Value *elementCount = builder.getInt64(1);
3563  for (auto bounds : memberClause.getBounds()) {
3564  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3565  bounds.getDefiningOp())) {
3566  // The below calculation for the size to be mapped calculated from the
3567  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3568  // multiply by the underlying element types byte size to get the full
3569  // size to be offloaded based on the bounds
3570  elementCount = builder.CreateMul(
3571  elementCount,
3572  builder.CreateAdd(
3573  builder.CreateSub(
3574  moduleTranslation.lookupValue(boundOp.getUpperBound()),
3575  moduleTranslation.lookupValue(boundOp.getLowerBound())),
3576  builder.getInt64(1)));
3577  }
3578  }
3579 
3580  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3581  // the size in inconsistent byte or bit format.
3582  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3583  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3584  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3585 
3586  // The size in bytes x number of elements, the sizeInBytes stored is
3587  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3588  // size, so we do some on the fly runtime math to get the size in
3589  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3590  // some adjustment for members with more complex types.
3591  return builder.CreateMul(elementCount,
3592  builder.getInt64(underlyingTypeSzInBits / 8));
3593  }
3594  }
3595 
3596  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3597 }
3598 
3600  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3601  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3602  llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3603  ArrayRef<Value> useDevAddrOperands = {},
3604  ArrayRef<Value> hasDevAddrOperands = {}) {
3605  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3606  // Check if this is a member mapping and correctly assign that it is, if
3607  // it is a member of a larger object.
3608  // TODO: Need better handling of members, and distinguishing of members
3609  // that are implicitly allocated on device vs explicitly passed in as
3610  // arguments.
3611  // TODO: May require some further additions to support nested record
3612  // types, i.e. member maps that can have member maps.
3613  for (Value mapValue : mapVars) {
3614  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3615  for (auto member : map.getMembers())
3616  if (member == mapOp)
3617  return true;
3618  }
3619  return false;
3620  };
3621 
3622  // Process MapOperands
3623  for (Value mapValue : mapVars) {
3624  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3625  Value offloadPtr =
3626  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3627  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3628  mapData.Pointers.push_back(mapData.OriginalValue.back());
3629 
3630  if (llvm::Value *refPtr =
3631  getRefPtrIfDeclareTarget(offloadPtr,
3632  moduleTranslation)) { // declare target
3633  mapData.IsDeclareTarget.push_back(true);
3634  mapData.BasePointers.push_back(refPtr);
3635  } else { // regular mapped variable
3636  mapData.IsDeclareTarget.push_back(false);
3637  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3638  }
3639 
3640  mapData.BaseType.push_back(
3641  moduleTranslation.convertType(mapOp.getVarType()));
3642  mapData.Sizes.push_back(
3643  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3644  mapData.BaseType.back(), builder, moduleTranslation));
3645  mapData.MapClause.push_back(mapOp.getOperation());
3646  mapData.Types.push_back(
3647  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
3648  mapData.Names.push_back(LLVM::createMappingInformation(
3649  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3650  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3651  if (mapOp.getMapperId())
3652  mapData.Mappers.push_back(
3653  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3654  mapOp, mapOp.getMapperIdAttr()));
3655  else
3656  mapData.Mappers.push_back(nullptr);
3657  mapData.IsAMapping.push_back(true);
3658  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3659  }
3660 
3661  auto findMapInfo = [&mapData](llvm::Value *val,
3662  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3663  unsigned index = 0;
3664  bool found = false;
3665  for (llvm::Value *basePtr : mapData.OriginalValue) {
3666  if (basePtr == val && mapData.IsAMapping[index]) {
3667  found = true;
3668  mapData.Types[index] |=
3669  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3670  mapData.DevicePointers[index] = devInfoTy;
3671  }
3672  index++;
3673  }
3674  return found;
3675  };
3676 
3677  // Process useDevPtr(Addr)Operands
3678  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3679  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3680  for (Value mapValue : useDevOperands) {
3681  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3682  Value offloadPtr =
3683  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3684  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3685 
3686  // Check if map info is already present for this entry.
3687  if (!findMapInfo(origValue, devInfoTy)) {
3688  mapData.OriginalValue.push_back(origValue);
3689  mapData.Pointers.push_back(mapData.OriginalValue.back());
3690  mapData.IsDeclareTarget.push_back(false);
3691  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3692  mapData.BaseType.push_back(
3693  moduleTranslation.convertType(mapOp.getVarType()));
3694  mapData.Sizes.push_back(builder.getInt64(0));
3695  mapData.MapClause.push_back(mapOp.getOperation());
3696  mapData.Types.push_back(
3697  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3698  mapData.Names.push_back(LLVM::createMappingInformation(
3699  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3700  mapData.DevicePointers.push_back(devInfoTy);
3701  mapData.Mappers.push_back(nullptr);
3702  mapData.IsAMapping.push_back(false);
3703  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3704  }
3705  }
3706  };
3707 
3708  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3709  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3710 
3711  for (Value mapValue : hasDevAddrOperands) {
3712  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3713  Value offloadPtr =
3714  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3715  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3716  auto mapType =
3717  static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
3718  auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3719 
3720  mapData.OriginalValue.push_back(origValue);
3721  mapData.BasePointers.push_back(origValue);
3722  mapData.Pointers.push_back(origValue);
3723  mapData.IsDeclareTarget.push_back(false);
3724  mapData.BaseType.push_back(
3725  moduleTranslation.convertType(mapOp.getVarType()));
3726  mapData.Sizes.push_back(
3727  builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
3728  mapData.MapClause.push_back(mapOp.getOperation());
3729  if (llvm::to_underlying(mapType & mapTypeAlways)) {
3730  // Descriptors are mapped with the ALWAYS flag, since they can get
3731  // rematerialized, so the address of the decriptor for a given object
3732  // may change from one place to another.
3733  mapData.Types.push_back(mapType);
3734  // Technically it's possible for a non-descriptor mapping to have
3735  // both has-device-addr and ALWAYS, so lookup the mapper in case it
3736  // exists.
3737  if (mapOp.getMapperId()) {
3738  mapData.Mappers.push_back(
3739  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3740  mapOp, mapOp.getMapperIdAttr()));
3741  } else {
3742  mapData.Mappers.push_back(nullptr);
3743  }
3744  } else {
3745  mapData.Types.push_back(
3746  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
3747  mapData.Mappers.push_back(nullptr);
3748  }
3749  mapData.Names.push_back(LLVM::createMappingInformation(
3750  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3751  mapData.DevicePointers.push_back(
3752  llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3753  mapData.IsAMapping.push_back(false);
3754  mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
3755  }
3756 }
3757 
3758 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
3759  auto *res = llvm::find(mapData.MapClause, memberOp);
3760  assert(res != mapData.MapClause.end() &&
3761  "MapInfoOp for member not found in MapData, cannot return index");
3762  return std::distance(mapData.MapClause.begin(), res);
3763 }
3764 
3765 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
3766  bool first) {
3767  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
3768  // Only 1 member has been mapped, we can return it.
3769  if (indexAttr.size() == 1)
3770  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
3771 
3772  llvm::SmallVector<size_t> indices(indexAttr.size());
3773  std::iota(indices.begin(), indices.end(), 0);
3774 
3775  llvm::sort(indices.begin(), indices.end(),
3776  [&](const size_t a, const size_t b) {
3777  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
3778  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
3779  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
3780  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
3781  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
3782 
3783  if (aIndex == bIndex)
3784  continue;
3785 
3786  if (aIndex < bIndex)
3787  return first;
3788 
3789  if (aIndex > bIndex)
3790  return !first;
3791  }
3792 
3793  // Iterated the up until the end of the smallest member and
3794  // they were found to be equal up to that point, so select
3795  // the member with the lowest index count, so the "parent"
3796  return memberIndicesA.size() < memberIndicesB.size();
3797  });
3798 
3799  return llvm::cast<omp::MapInfoOp>(
3800  mapInfo.getMembers()[indices.front()].getDefiningOp());
3801 }
3802 
3803 /// This function calculates the array/pointer offset for map data provided
3804 /// with bounds operations, e.g. when provided something like the following:
3805 ///
3806 /// Fortran
3807 /// map(tofrom: array(2:5, 3:2))
3808 /// or
3809 /// C++
3810 /// map(tofrom: array[1:4][2:3])
3811 /// We must calculate the initial pointer offset to pass across, this function
3812 /// performs this using bounds.
3813 ///
3814 /// NOTE: which while specified in row-major order it currently needs to be
3815 /// flipped for Fortran's column order array allocation and access (as
3816 /// opposed to C++'s row-major, hence the backwards processing where order is
3817 /// important). This is likely important to keep in mind for the future when
3818 /// we incorporate a C++ frontend, both frontends will need to agree on the
3819 /// ordering of generated bounds operations (one may have to flip them) to
3820 /// make the below lowering frontend agnostic. The offload size
3821 /// calcualtion may also have to be adjusted for C++.
3822 std::vector<llvm::Value *>
3824  llvm::IRBuilderBase &builder, bool isArrayTy,
3825  OperandRange bounds) {
3826  std::vector<llvm::Value *> idx;
3827  // There's no bounds to calculate an offset from, we can safely
3828  // ignore and return no indices.
3829  if (bounds.empty())
3830  return idx;
3831 
3832  // If we have an array type, then we have its type so can treat it as a
3833  // normal GEP instruction where the bounds operations are simply indexes
3834  // into the array. We currently do reverse order of the bounds, which
3835  // I believe leans more towards Fortran's column-major in memory.
3836  if (isArrayTy) {
3837  idx.push_back(builder.getInt64(0));
3838  for (int i = bounds.size() - 1; i >= 0; --i) {
3839  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3840  bounds[i].getDefiningOp())) {
3841  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
3842  }
3843  }
3844  } else {
3845  // If we do not have an array type, but we have bounds, then we're dealing
3846  // with a pointer that's being treated like an array and we have the
3847  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
3848  // address (pointer pointing to the actual data) so we must caclulate the
3849  // offset using a single index which the following two loops attempts to
3850  // compute.
3851 
3852  // Calculates the size offset we need to make per row e.g. first row or
3853  // column only needs to be offset by one, but the next would have to be
3854  // the previous row/column offset multiplied by the extent of current row.
3855  //
3856  // For example ([1][10][100]):
3857  //
3858  // - First row/column we move by 1 for each index increment
3859  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
3860  // current) for 10 for each index increment
3861  // - Third row/column we would move by 10 (second row/column) *
3862  // (extent/size of current) 100 for 1000 for each index increment
3863  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
3864  for (size_t i = 1; i < bounds.size(); ++i) {
3865  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3866  bounds[i].getDefiningOp())) {
3867  dimensionIndexSizeOffset.push_back(builder.CreateMul(
3868  moduleTranslation.lookupValue(boundOp.getExtent()),
3869  dimensionIndexSizeOffset[i - 1]));
3870  }
3871  }
3872 
3873  // Now that we have calculated how much we move by per index, we must
3874  // multiply each lower bound offset in indexes by the size offset we
3875  // have calculated in the previous and accumulate the results to get
3876  // our final resulting offset.
3877  for (int i = bounds.size() - 1; i >= 0; --i) {
3878  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3879  bounds[i].getDefiningOp())) {
3880  if (idx.empty())
3881  idx.emplace_back(builder.CreateMul(
3882  moduleTranslation.lookupValue(boundOp.getLowerBound()),
3883  dimensionIndexSizeOffset[i]));
3884  else
3885  idx.back() = builder.CreateAdd(
3886  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
3887  boundOp.getLowerBound()),
3888  dimensionIndexSizeOffset[i]));
3889  }
3890  }
3891  }
3892 
3893  return idx;
3894 }
3895 
3896 // This creates two insertions into the MapInfosTy data structure for the
3897 // "parent" of a set of members, (usually a container e.g.
3898 // class/structure/derived type) when subsequent members have also been
3899 // explicitly mapped on the same map clause. Certain types, such as Fortran
3900 // descriptors are mapped like this as well, however, the members are
3901 // implicit as far as a user is concerned, but we must explicitly map them
3902 // internally.
3903 //
3904 // This function also returns the memberOfFlag for this particular parent,
3905 // which is utilised in subsequent member mappings (by modifying there map type
3906 // with it) to indicate that a member is part of this parent and should be
3907 // treated by the runtime as such. Important to achieve the correct mapping.
3908 //
3909 // This function borrows a lot from Clang's emitCombinedEntry function
3910 // inside of CGOpenMPRuntime.cpp
3911 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
3912  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3913  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3914  MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
3915  assert(!ompBuilder.Config.isTargetDevice() &&
3916  "function only supported for host device codegen");
3917 
3918  // Map the first segment of our structure
3919  combinedInfo.Types.emplace_back(
3920  isTargetParams
3921  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
3922  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
3923  combinedInfo.DevicePointers.emplace_back(
3924  mapData.DevicePointers[mapDataIndex]);
3925  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
3926  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3927  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3928  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3929 
3930  // Calculate size of the parent object being mapped based on the
3931  // addresses at runtime, highAddr - lowAddr = size. This of course
3932  // doesn't factor in allocated data like pointers, hence the further
3933  // processing of members specified by users, or in the case of
3934  // Fortran pointers and allocatables, the mapping of the pointed to
3935  // data by the descriptor (which itself, is a structure containing
3936  // runtime information on the dynamically allocated data).
3937  auto parentClause =
3938  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3939 
3940  llvm::Value *lowAddr, *highAddr;
3941  if (!parentClause.getPartialMap()) {
3942  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3943  builder.getPtrTy());
3944  highAddr = builder.CreatePointerCast(
3945  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3946  mapData.Pointers[mapDataIndex], 1),
3947  builder.getPtrTy());
3948  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3949  } else {
3950  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3951  int firstMemberIdx = getMapDataMemberIdx(
3952  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
3953  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3954  builder.getPtrTy());
3955  int lastMemberIdx = getMapDataMemberIdx(
3956  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
3957  highAddr = builder.CreatePointerCast(
3958  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3959  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3960  builder.getPtrTy());
3961  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
3962  }
3963 
3964  llvm::Value *size = builder.CreateIntCast(
3965  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3966  builder.getInt64Ty(),
3967  /*isSigned=*/false);
3968  combinedInfo.Sizes.push_back(size);
3969 
3970  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
3971  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3972 
3973  // This creates the initial MEMBER_OF mapping that consists of
3974  // the parent/top level container (same as above effectively, except
3975  // with a fixed initial compile time size and separate maptype which
3976  // indicates the true mape type (tofrom etc.). This parent mapping is
3977  // only relevant if the structure in its totality is being mapped,
3978  // otherwise the above suffices.
3979  if (!parentClause.getPartialMap()) {
3980  // TODO: This will need to be expanded to include the whole host of logic
3981  // for the map flags that Clang currently supports (e.g. it should do some
3982  // further case specific flag modifications). For the moment, it handles
3983  // what we support as expected.
3984  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
3985  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3986  combinedInfo.Types.emplace_back(mapFlag);
3987  combinedInfo.DevicePointers.emplace_back(
3989  combinedInfo.Mappers.emplace_back(nullptr);
3990  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3991  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3992  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3993  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3994  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
3995  }
3996  return memberOfFlag;
3997 }
3998 
3999 // The intent is to verify if the mapped data being passed is a
4000 // pointer -> pointee that requires special handling in certain cases,
4001 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
4002 //
4003 // There may be a better way to verify this, but unfortunately with
4004 // opaque pointers we lose the ability to easily check if something is
4005 // a pointer whilst maintaining access to the underlying type.
4006 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
4007  // If we have a varPtrPtr field assigned then the underlying type is a pointer
4008  if (mapOp.getVarPtrPtr())
4009  return true;
4010 
4011  // If the map data is declare target with a link clause, then it's represented
4012  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
4013  // no relation to pointers.
4014  if (isDeclareTargetLink(mapOp.getVarPtr()))
4015  return true;
4016 
4017  return false;
4018 }
4019 
4020 // This function is intended to add explicit mappings of members
4022  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
4023  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
4024  MapInfoData &mapData, uint64_t mapDataIndex,
4025  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
4026  assert(!ompBuilder.Config.isTargetDevice() &&
4027  "function only supported for host device codegen");
4028 
4029  auto parentClause =
4030  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4031 
4032  for (auto mappedMembers : parentClause.getMembers()) {
4033  auto memberClause =
4034  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
4035  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4036 
4037  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
4038 
4039  // If we're currently mapping a pointer to a block of data, we must
4040  // initially map the pointer, and then attatch/bind the data with a
4041  // subsequent map to the pointer. This segment of code generates the
4042  // pointer mapping, which can in certain cases be optimised out as Clang
4043  // currently does in its lowering. However, for the moment we do not do so,
4044  // in part as we currently have substantially less information on the data
4045  // being mapped at this stage.
4046  if (checkIfPointerMap(memberClause)) {
4047  auto mapFlag =
4048  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4049  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4050  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4051  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4052  combinedInfo.Types.emplace_back(mapFlag);
4053  combinedInfo.DevicePointers.emplace_back(
4055  combinedInfo.Mappers.emplace_back(nullptr);
4056  combinedInfo.Names.emplace_back(
4057  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4058  combinedInfo.BasePointers.emplace_back(
4059  mapData.BasePointers[mapDataIndex]);
4060  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
4061  combinedInfo.Sizes.emplace_back(builder.getInt64(
4062  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
4063  }
4064 
4065  // Same MemberOfFlag to indicate its link with parent and other members
4066  // of.
4067  auto mapFlag =
4068  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
4069  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4070  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
4071  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
4072  if (checkIfPointerMap(memberClause))
4073  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4074 
4075  combinedInfo.Types.emplace_back(mapFlag);
4076  combinedInfo.DevicePointers.emplace_back(
4077  mapData.DevicePointers[memberDataIdx]);
4078  combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
4079  combinedInfo.Names.emplace_back(
4080  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
4081  uint64_t basePointerIndex =
4082  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
4083  combinedInfo.BasePointers.emplace_back(
4084  mapData.BasePointers[basePointerIndex]);
4085  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
4086 
4087  llvm::Value *size = mapData.Sizes[memberDataIdx];
4088  if (checkIfPointerMap(memberClause)) {
4089  size = builder.CreateSelect(
4090  builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
4091  builder.getInt64(0), size);
4092  }
4093 
4094  combinedInfo.Sizes.emplace_back(size);
4095  }
4096 }
4097 
4098 static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
4099  MapInfosTy &combinedInfo, bool isTargetParams,
4100  int mapDataParentIdx = -1) {
4101  // Declare Target Mappings are excluded from being marked as
4102  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
4103  // marked with OMP_MAP_PTR_AND_OBJ instead.
4104  auto mapFlag = mapData.Types[mapDataIdx];
4105  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
4106 
4107  bool isPtrTy = checkIfPointerMap(mapInfoOp);
4108  if (isPtrTy)
4109  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
4110 
4111  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
4112  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
4113 
4114  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
4115  !isPtrTy)
4116  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
4117 
4118  // if we're provided a mapDataParentIdx, then the data being mapped is
4119  // part of a larger object (in a parent <-> member mapping) and in this
4120  // case our BasePointer should be the parent.
4121  if (mapDataParentIdx >= 0)
4122  combinedInfo.BasePointers.emplace_back(
4123  mapData.BasePointers[mapDataParentIdx]);
4124  else
4125  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
4126 
4127  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
4128  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
4129  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
4130  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
4131  combinedInfo.Types.emplace_back(mapFlag);
4132  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
4133 }
4134 
4135 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
4136  llvm::IRBuilderBase &builder,
4137  llvm::OpenMPIRBuilder &ompBuilder,
4138  DataLayout &dl, MapInfosTy &combinedInfo,
4139  MapInfoData &mapData, uint64_t mapDataIndex,
4140  bool isTargetParams) {
4141  assert(!ompBuilder.Config.isTargetDevice() &&
4142  "function only supported for host device codegen");
4143 
4144  auto parentClause =
4145  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
4146 
4147  // If we have a partial map (no parent referenced in the map clauses of the
4148  // directive, only members) and only a single member, we do not need to bind
4149  // the map of the member to the parent, we can pass the member separately.
4150  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
4151  auto memberClause = llvm::cast<omp::MapInfoOp>(
4152  parentClause.getMembers()[0].getDefiningOp());
4153  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
4154  // Note: Clang treats arrays with explicit bounds that fall into this
4155  // category as a parent with map case, however, it seems this isn't a
4156  // requirement, and processing them as an individual map is fine. So,
4157  // we will handle them as individual maps for the moment, as it's
4158  // difficult for us to check this as we always require bounds to be
4159  // specified currently and it's also marginally more optimal (single
4160  // map rather than two). The difference may come from the fact that
4161  // Clang maps array without bounds as pointers (which we do not
4162  // currently do), whereas we treat them as arrays in all cases
4163  // currently.
4164  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
4165  mapDataIndex);
4166  return;
4167  }
4168 
4169  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
4170  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
4171  combinedInfo, mapData, mapDataIndex, isTargetParams);
4172  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
4173  combinedInfo, mapData, mapDataIndex,
4174  memberOfParentFlag);
4175 }
4176 
4177 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
4178 // generates different operation (e.g. load/store) combinations for
4179 // arguments to the kernel, based on map capture kinds which are then
4180 // utilised in the combinedInfo in place of the original Map value.
4181 static void
4182 createAlteredByCaptureMap(MapInfoData &mapData,
4183  LLVM::ModuleTranslation &moduleTranslation,
4184  llvm::IRBuilderBase &builder) {
4185  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4186  "function only supported for host device codegen");
4187  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4188  // if it's declare target, skip it, it's handled separately.
4189  if (!mapData.IsDeclareTarget[i]) {
4190  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4191  omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
4192  bool isPtrTy = checkIfPointerMap(mapOp);
4193 
4194  // Currently handles array sectioning lowerbound case, but more
4195  // logic may be required in the future. Clang invokes EmitLValue,
4196  // which has specialised logic for special Clang types such as user
4197  // defines, so it is possible we will have to extend this for
4198  // structures or other complex types. As the general idea is that this
4199  // function mimics some of the logic from Clang that we require for
4200  // kernel argument passing from host -> device.
4201  switch (captureKind) {
4202  case omp::VariableCaptureKind::ByRef: {
4203  llvm::Value *newV = mapData.Pointers[i];
4204  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
4205  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
4206  mapOp.getBounds());
4207  if (isPtrTy)
4208  newV = builder.CreateLoad(builder.getPtrTy(), newV);
4209 
4210  if (!offsetIdx.empty())
4211  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
4212  "array_offset");
4213  mapData.Pointers[i] = newV;
4214  } break;
4215  case omp::VariableCaptureKind::ByCopy: {
4216  llvm::Type *type = mapData.BaseType[i];
4217  llvm::Value *newV;
4218  if (mapData.Pointers[i]->getType()->isPointerTy())
4219  newV = builder.CreateLoad(type, mapData.Pointers[i]);
4220  else
4221  newV = mapData.Pointers[i];
4222 
4223  if (!isPtrTy) {
4224  auto curInsert = builder.saveIP();
4225  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
4226  auto *memTempAlloc =
4227  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
4228  builder.restoreIP(curInsert);
4229 
4230  builder.CreateStore(newV, memTempAlloc);
4231  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
4232  }
4233 
4234  mapData.Pointers[i] = newV;
4235  mapData.BasePointers[i] = newV;
4236  } break;
4237  case omp::VariableCaptureKind::This:
4238  case omp::VariableCaptureKind::VLAType:
4239  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
4240  break;
4241  }
4242  }
4243  }
4244 }
4245 
4246 // Generate all map related information and fill the combinedInfo.
4247 static void genMapInfos(llvm::IRBuilderBase &builder,
4248  LLVM::ModuleTranslation &moduleTranslation,
4249  DataLayout &dl, MapInfosTy &combinedInfo,
4250  MapInfoData &mapData, bool isTargetParams = false) {
4251  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4252  "function only supported for host device codegen");
4253 
4254  // We wish to modify some of the methods in which arguments are
4255  // passed based on their capture type by the target region, this can
4256  // involve generating new loads and stores, which changes the
4257  // MLIR value to LLVM value mapping, however, we only wish to do this
4258  // locally for the current function/target and also avoid altering
4259  // ModuleTranslation, so we remap the base pointer or pointer stored
4260  // in the map infos corresponding MapInfoData, which is later accessed
4261  // by genMapInfos and createTarget to help generate the kernel and
4262  // kernel arg structure. It primarily becomes relevant in cases like
4263  // bycopy, or byref range'd arrays. In the default case, we simply
4264  // pass thee pointer byref as both basePointer and pointer.
4265  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
4266 
4267  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4268 
4269  // We operate under the assumption that all vectors that are
4270  // required in MapInfoData are of equal lengths (either filled with
4271  // default constructed data or appropiate information) so we can
4272  // utilise the size from any component of MapInfoData, if we can't
4273  // something is missing from the initial MapInfoData construction.
4274  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4275  // NOTE/TODO: We currently do not support arbitrary depth record
4276  // type mapping.
4277  if (mapData.IsAMember[i])
4278  continue;
4279 
4280  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
4281  if (!mapInfoOp.getMembers().empty()) {
4282  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
4283  combinedInfo, mapData, i, isTargetParams);
4284  continue;
4285  }
4286 
4287  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
4288  }
4289 }
4290 
4292 emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
4293  LLVM::ModuleTranslation &moduleTranslation,
4294  llvm::StringRef mapperFuncName);
4295 
4297 getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
4298  LLVM::ModuleTranslation &moduleTranslation) {
4299  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4300  "function only supported for host device codegen");
4301  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4302  std::string mapperFuncName =
4303  moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
4304  {"omp_mapper", declMapperOp.getSymName()});
4305 
4306  if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
4307  return lookupFunc;
4308 
4309  return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
4310  mapperFuncName);
4311 }
4312 
4314 emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
4315  LLVM::ModuleTranslation &moduleTranslation,
4316  llvm::StringRef mapperFuncName) {
4317  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4318  "function only supported for host device codegen");
4319  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4320  auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
4321  DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
4322  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4323  llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
4324  SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
4325 
4326  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4327 
4328  // Fill up the arrays with all the mapped variables.
4329  MapInfosTy combinedInfo;
4330  auto genMapInfoCB =
4331  [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
4332  llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
4333  builder.restoreIP(codeGenIP);
4334  moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
4335  moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
4336  builder.GetInsertBlock());
4337  if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
4338  /*ignoreArguments=*/true,
4339  builder)))
4340  return llvm::make_error<PreviouslyReportedError>();
4341  MapInfoData mapData;
4342  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4343  builder);
4344  genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
4345 
4346  // Drop the mapping that is no longer necessary so that the same region can
4347  // be processed multiple times.
4348  moduleTranslation.forgetMapping(declMapperOp.getRegion());
4349  return combinedInfo;
4350  };
4351 
4352  auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
4353  if (!combinedInfo.Mappers[i])
4354  return nullptr;
4355  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4356  moduleTranslation);
4357  };
4358 
4359  llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
4360  genMapInfoCB, varType, mapperFuncName, customMapperCB);
4361  if (!newFn)
4362  return newFn.takeError();
4363  moduleTranslation.mapFunction(mapperFuncName, *newFn);
4364  return *newFn;
4365 }
4366 
4367 static LogicalResult
4368 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
4369  LLVM::ModuleTranslation &moduleTranslation) {
4370  llvm::Value *ifCond = nullptr;
4371  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
4372  SmallVector<Value> mapVars;
4373  SmallVector<Value> useDevicePtrVars;
4374  SmallVector<Value> useDeviceAddrVars;
4375  llvm::omp::RuntimeFunction RTLFn;
4376  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
4377 
4378  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4379  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
4380  /*SeparateBeginEndCalls=*/true);
4381  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
4382  bool isOffloadEntry =
4383  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
4384 
4385  LogicalResult result =
4387  .Case([&](omp::TargetDataOp dataOp) {
4388  if (failed(checkImplementationStatus(*dataOp)))
4389  return failure();
4390 
4391  if (auto ifVar = dataOp.getIfExpr())
4392  ifCond = moduleTranslation.lookupValue(ifVar);
4393 
4394  if (auto devId = dataOp.getDevice())
4395  if (auto constOp =
4396  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4397  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4398  deviceID = intAttr.getInt();
4399 
4400  mapVars = dataOp.getMapVars();
4401  useDevicePtrVars = dataOp.getUseDevicePtrVars();
4402  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
4403  return success();
4404  })
4405  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
4406  if (failed(checkImplementationStatus(*enterDataOp)))
4407  return failure();
4408 
4409  if (auto ifVar = enterDataOp.getIfExpr())
4410  ifCond = moduleTranslation.lookupValue(ifVar);
4411 
4412  if (auto devId = enterDataOp.getDevice())
4413  if (auto constOp =
4414  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4415  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4416  deviceID = intAttr.getInt();
4417  RTLFn =
4418  enterDataOp.getNowait()
4419  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
4420  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
4421  mapVars = enterDataOp.getMapVars();
4422  info.HasNoWait = enterDataOp.getNowait();
4423  return success();
4424  })
4425  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
4426  if (failed(checkImplementationStatus(*exitDataOp)))
4427  return failure();
4428 
4429  if (auto ifVar = exitDataOp.getIfExpr())
4430  ifCond = moduleTranslation.lookupValue(ifVar);
4431 
4432  if (auto devId = exitDataOp.getDevice())
4433  if (auto constOp =
4434  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4435  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4436  deviceID = intAttr.getInt();
4437 
4438  RTLFn = exitDataOp.getNowait()
4439  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
4440  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
4441  mapVars = exitDataOp.getMapVars();
4442  info.HasNoWait = exitDataOp.getNowait();
4443  return success();
4444  })
4445  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
4446  if (failed(checkImplementationStatus(*updateDataOp)))
4447  return failure();
4448 
4449  if (auto ifVar = updateDataOp.getIfExpr())
4450  ifCond = moduleTranslation.lookupValue(ifVar);
4451 
4452  if (auto devId = updateDataOp.getDevice())
4453  if (auto constOp =
4454  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4455  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4456  deviceID = intAttr.getInt();
4457 
4458  RTLFn =
4459  updateDataOp.getNowait()
4460  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4461  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4462  mapVars = updateDataOp.getMapVars();
4463  info.HasNoWait = updateDataOp.getNowait();
4464  return success();
4465  })
4466  .Default([&](Operation *op) {
4467  llvm_unreachable("unexpected operation");
4468  return failure();
4469  });
4470 
4471  if (failed(result))
4472  return failure();
4473  // Pretend we have IF(false) if we're not doing offload.
4474  if (!isOffloadEntry)
4475  ifCond = builder.getFalse();
4476 
4477  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4478  MapInfoData mapData;
4479  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4480  builder, useDevicePtrVars, useDeviceAddrVars);
4481 
4482  // Fill up the arrays with all the mapped variables.
4483  MapInfosTy combinedInfo;
4484  auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4485  builder.restoreIP(codeGenIP);
4486  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4487  return combinedInfo;
4488  };
4489 
4490  // Define a lambda to apply mappings between use_device_addr and
4491  // use_device_ptr base pointers, and their associated block arguments.
4492  auto mapUseDevice =
4493  [&moduleTranslation](
4494  llvm::OpenMPIRBuilder::DeviceInfoTy type,
4496  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4497  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4498  for (auto [arg, useDevVar] :
4499  llvm::zip_equal(blockArgs, useDeviceVars)) {
4500 
4501  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4502  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4503  : mapInfoOp.getVarPtr();
4504  };
4505 
4506  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4507  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4508  mapInfoData.MapClause, mapInfoData.DevicePointers,
4509  mapInfoData.BasePointers)) {
4510  auto mapOp = cast<omp::MapInfoOp>(mapClause);
4511  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4512  devicePointer != type)
4513  continue;
4514 
4515  if (llvm::Value *devPtrInfoMap =
4516  mapper ? mapper(basePointer) : basePointer) {
4517  moduleTranslation.mapValue(arg, devPtrInfoMap);
4518  break;
4519  }
4520  }
4521  }
4522  };
4523 
4524  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4525  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4526  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4527  builder.restoreIP(codeGenIP);
4528  assert(isa<omp::TargetDataOp>(op) &&
4529  "BodyGen requested for non TargetDataOp");
4530  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4531  Region &region = cast<omp::TargetDataOp>(op).getRegion();
4532  switch (bodyGenType) {
4533  case BodyGenTy::Priv:
4534  // Check if any device ptr/addr info is available
4535  if (!info.DevicePtrInfoMap.empty()) {
4536  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4537  blockArgIface.getUseDeviceAddrBlockArgs(),
4538  useDeviceAddrVars, mapData,
4539  [&](llvm::Value *basePointer) -> llvm::Value * {
4540  if (!info.DevicePtrInfoMap[basePointer].second)
4541  return nullptr;
4542  return builder.CreateLoad(
4543  builder.getPtrTy(),
4544  info.DevicePtrInfoMap[basePointer].second);
4545  });
4546  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4547  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4548  mapData, [&](llvm::Value *basePointer) {
4549  return info.DevicePtrInfoMap[basePointer].second;
4550  });
4551 
4552  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4553  moduleTranslation)))
4554  return llvm::make_error<PreviouslyReportedError>();
4555  }
4556  break;
4557  case BodyGenTy::DupNoPriv:
4558  // We must always restoreIP regardless of doing anything the caller
4559  // does not restore it, leading to incorrect (no) branch generation.
4560  builder.restoreIP(codeGenIP);
4561  break;
4562  case BodyGenTy::NoPriv:
4563  // If device info is available then region has already been generated
4564  if (info.DevicePtrInfoMap.empty()) {
4565  // For device pass, if use_device_ptr(addr) mappings were present,
4566  // we need to link them here before codegen.
4567  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4568  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4569  blockArgIface.getUseDeviceAddrBlockArgs(),
4570  useDeviceAddrVars, mapData);
4571  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4572  blockArgIface.getUseDevicePtrBlockArgs(),
4573  useDevicePtrVars, mapData);
4574  }
4575 
4576  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4577  moduleTranslation)))
4578  return llvm::make_error<PreviouslyReportedError>();
4579  }
4580  break;
4581  }
4582  return builder.saveIP();
4583  };
4584 
4585  auto customMapperCB =
4586  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4587  if (!combinedInfo.Mappers[i])
4588  return nullptr;
4589  info.HasMapper = true;
4590  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4591  moduleTranslation);
4592  };
4593 
4594  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4595  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4596  findAllocaInsertPoint(builder, moduleTranslation);
4597  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4598  if (isa<omp::TargetDataOp>(op))
4599  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4600  builder.getInt64(deviceID), ifCond,
4601  info, genMapInfoCB, customMapperCB,
4602  /*MapperFunc=*/nullptr, bodyGenCB,
4603  /*DeviceAddrCB=*/nullptr);
4604  return ompBuilder->createTargetData(
4605  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4606  info, genMapInfoCB, customMapperCB, &RTLFn);
4607  }();
4608 
4609  if (failed(handleError(afterIP, *op)))
4610  return failure();
4611 
4612  builder.restoreIP(*afterIP);
4613  return success();
4614 }
4615 
4616 static LogicalResult
4617 convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4618  LLVM::ModuleTranslation &moduleTranslation) {
4619  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4620  auto distributeOp = cast<omp::DistributeOp>(opInst);
4621  if (failed(checkImplementationStatus(opInst)))
4622  return failure();
4623 
4624  /// Process teams op reduction in distribute if the reduction is contained in
4625  /// the distribute op.
4626  omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>();
4627  bool doDistributeReduction =
4628  teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false;
4629 
4630  DenseMap<Value, llvm::Value *> reductionVariableMap;
4631  unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0;
4632  SmallVector<omp::DeclareReductionOp> reductionDecls;
4633  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
4634  llvm::ArrayRef<bool> isByRef;
4635 
4636  if (doDistributeReduction) {
4637  isByRef = getIsByRef(teamsOp.getReductionByref());
4638  assert(isByRef.size() == teamsOp.getNumReductionVars());
4639 
4640  collectReductionDecls(teamsOp, reductionDecls);
4641  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4642  findAllocaInsertPoint(builder, moduleTranslation);
4643 
4644  MutableArrayRef<BlockArgument> reductionArgs =
4645  llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
4646  .getReductionBlockArgs();
4647 
4649  teamsOp, reductionArgs, builder, moduleTranslation, allocaIP,
4650  reductionDecls, privateReductionVariables, reductionVariableMap,
4651  isByRef)))
4652  return failure();
4653  }
4654 
4655  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4656  auto bodyGenCB = [&](InsertPointTy allocaIP,
4657  InsertPointTy codeGenIP) -> llvm::Error {
4658  // Save the alloca insertion point on ModuleTranslation stack for use in
4659  // nested regions.
4661  moduleTranslation, allocaIP);
4662 
4663  // DistributeOp has only one region associated with it.
4664  builder.restoreIP(codeGenIP);
4665  PrivateVarsInfo privVarsInfo(distributeOp);
4666 
4667  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4668  allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4669  if (handleError(afterAllocas, opInst).failed())
4670  return llvm::make_error<PreviouslyReportedError>();
4671 
4672  if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4673  opInst)
4674  .failed())
4675  return llvm::make_error<PreviouslyReportedError>();
4676 
4677  if (failed(copyFirstPrivateVars(
4678  distributeOp, builder, moduleTranslation, privVarsInfo.mlirVars,
4679  privVarsInfo.llvmVars, privVarsInfo.privatizers,
4680  distributeOp.getPrivateNeedsBarrier())))
4681  return llvm::make_error<PreviouslyReportedError>();
4682 
4683  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4684  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4686  convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4687  builder, moduleTranslation);
4688  if (!regionBlock)
4689  return regionBlock.takeError();
4690  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4691 
4692  // Skip applying a workshare loop below when translating 'distribute
4693  // parallel do' (it's been already handled by this point while translating
4694  // the nested omp.wsloop).
4695  if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4696  // TODO: Add support for clauses which are valid for DISTRIBUTE
4697  // constructs. Static schedule is the default.
4698  auto schedule = omp::ClauseScheduleKind::Static;
4699  bool isOrdered = false;
4700  std::optional<omp::ScheduleModifier> scheduleMod;
4701  bool isSimd = false;
4702  llvm::omp::WorksharingLoopType workshareLoopType =
4703  llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4704  bool loopNeedsBarrier = false;
4705  llvm::Value *chunk = nullptr;
4706 
4707  llvm::CanonicalLoopInfo *loopInfo =
4708  findCurrentLoopInfo(moduleTranslation);
4709  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4710  ompBuilder->applyWorkshareLoop(
4711  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4712  convertToScheduleKind(schedule), chunk, isSimd,
4713  scheduleMod == omp::ScheduleModifier::monotonic,
4714  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4715  workshareLoopType);
4716 
4717  if (!wsloopIP)
4718  return wsloopIP.takeError();
4719  }
4720 
4721  if (failed(cleanupPrivateVars(builder, moduleTranslation,
4722  distributeOp.getLoc(), privVarsInfo.llvmVars,
4723  privVarsInfo.privatizers)))
4724  return llvm::make_error<PreviouslyReportedError>();
4725 
4726  return llvm::Error::success();
4727  };
4728 
4729  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4730  findAllocaInsertPoint(builder, moduleTranslation);
4731  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4732  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4733  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
4734 
4735  if (failed(handleError(afterIP, opInst)))
4736  return failure();
4737 
4738  builder.restoreIP(*afterIP);
4739 
4740  if (doDistributeReduction) {
4741  // Process the reductions if required.
4743  teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
4744  privateReductionVariables, isByRef,
4745  /*isNoWait*/ false, /*isTeamsReduction*/ true);
4746  }
4747  return success();
4748 }
4749 
4750 /// Lowers the FlagsAttr which is applied to the module on the device
4751 /// pass when offloading, this attribute contains OpenMP RTL globals that can
4752 /// be passed as flags to the frontend, otherwise they are set to default
4753 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
4754  LLVM::ModuleTranslation &moduleTranslation) {
4755  if (!cast<mlir::ModuleOp>(op))
4756  return failure();
4757 
4758  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4759 
4760  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
4761  attribute.getOpenmpDeviceVersion());
4762 
4763  if (attribute.getNoGpuLib())
4764  return success();
4765 
4766  ompBuilder->createGlobalFlag(
4767  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
4768  "__omp_rtl_debug_kind");
4769  ompBuilder->createGlobalFlag(
4770  attribute
4771  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
4772  ,
4773  "__omp_rtl_assume_teams_oversubscription");
4774  ompBuilder->createGlobalFlag(
4775  attribute
4776  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
4777  ,
4778  "__omp_rtl_assume_threads_oversubscription");
4779  ompBuilder->createGlobalFlag(
4780  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
4781  "__omp_rtl_assume_no_thread_state");
4782  ompBuilder->createGlobalFlag(
4783  attribute
4784  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
4785  ,
4786  "__omp_rtl_assume_no_nested_parallelism");
4787  return success();
4788 }
4789 
4790 static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
4791  omp::TargetOp targetOp,
4792  llvm::StringRef parentName = "") {
4793  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
4794 
4795  assert(fileLoc && "No file found from location");
4796  StringRef fileName = fileLoc.getFilename().getValue();
4797 
4798  llvm::sys::fs::UniqueID id;
4799  uint64_t line = fileLoc.getLine();
4800  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
4801  size_t fileHash = llvm::hash_value(fileName.str());
4802  size_t deviceId = 0xdeadf17e;
4803  targetInfo =
4804  llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
4805  } else {
4806  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
4807  id.getFile(), line);
4808  }
4809 }
4810 
4811 static void
4812 handleDeclareTargetMapVar(MapInfoData &mapData,
4813  LLVM::ModuleTranslation &moduleTranslation,
4814  llvm::IRBuilderBase &builder, llvm::Function *func) {
4815  assert(moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4816  "function only supported for target device codegen");
4817  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4818  // In the case of declare target mapped variables, the basePointer is
4819  // the reference pointer generated by the convertDeclareTargetAttr
4820  // method. Whereas the kernelValue is the original variable, so for
4821  // the device we must replace all uses of this original global variable
4822  // (stored in kernelValue) with the reference pointer (stored in
4823  // basePointer for declare target mapped variables), as for device the
4824  // data is mapped into this reference pointer and should be loaded
4825  // from it, the original variable is discarded. On host both exist and
4826  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
4827  // function to link the two variables in the runtime and then both the
4828  // reference pointer and the pointer are assigned in the kernel argument
4829  // structure for the host.
4830  if (mapData.IsDeclareTarget[i]) {
4831  // If the original map value is a constant, then we have to make sure all
4832  // of it's uses within the current kernel/function that we are going to
4833  // rewrite are converted to instructions, as we will be altering the old
4834  // use (OriginalValue) from a constant to an instruction, which will be
4835  // illegal and ICE the compiler if the user is a constant expression of
4836  // some kind e.g. a constant GEP.
4837  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
4838  convertUsersOfConstantsToInstructions(constant, func, false);
4839 
4840  // The users iterator will get invalidated if we modify an element,
4841  // so we populate this vector of uses to alter each user on an
4842  // individual basis to emit its own load (rather than one load for
4843  // all).
4845  for (llvm::User *user : mapData.OriginalValue[i]->users())
4846  userVec.push_back(user);
4847 
4848  for (llvm::User *user : userVec) {
4849  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
4850  if (insn->getFunction() == func) {
4851  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
4852  mapData.BasePointers[i]);
4853  load->moveBefore(insn->getIterator());
4854  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
4855  }
4856  }
4857  }
4858  }
4859  }
4860 }
4861 
4862 // The createDeviceArgumentAccessor function generates
4863 // instructions for retrieving (acessing) kernel
4864 // arguments inside of the device kernel for use by
4865 // the kernel. This enables different semantics such as
4866 // the creation of temporary copies of data allowing
4867 // semantics like read-only/no host write back kernel
4868 // arguments.
4869 //
4870 // This currently implements a very light version of Clang's
4871 // EmitParmDecl's handling of direct argument handling as well
4872 // as a portion of the argument access generation based on
4873 // capture types found at the end of emitOutlinedFunctionPrologue
4874 // in Clang. The indirect path handling of EmitParmDecl's may be
4875 // required for future work, but a direct 1-to-1 copy doesn't seem
4876 // possible as the logic is rather scattered throughout Clang's
4877 // lowering and perhaps we wish to deviate slightly.
4878 //
4879 // \param mapData - A container containing vectors of information
4880 // corresponding to the input argument, which should have a
4881 // corresponding entry in the MapInfoData containers
4882 // OrigialValue's.
4883 // \param arg - This is the generated kernel function argument that
4884 // corresponds to the passed in input argument. We generated different
4885 // accesses of this Argument, based on capture type and other Input
4886 // related information.
4887 // \param input - This is the host side value that will be passed to
4888 // the kernel i.e. the kernel input, we rewrite all uses of this within
4889 // the kernel (as we generate the kernel body based on the target's region
4890 // which maintians references to the original input) to the retVal argument
4891 // apon exit of this function inside of the OMPIRBuilder. This interlinks
4892 // the kernel argument to future uses of it in the function providing
4893 // appropriate "glue" instructions inbetween.
4894 // \param retVal - This is the value that all uses of input inside of the
4895 // kernel will be re-written to, the goal of this function is to generate
4896 // an appropriate location for the kernel argument to be accessed from,
4897 // e.g. ByRef will result in a temporary allocation location and then
4898 // a store of the kernel argument into this allocated memory which
4899 // will then be loaded from, ByCopy will use the allocated memory
4900 // directly.
4901 static llvm::IRBuilderBase::InsertPoint
4903  llvm::Value *input, llvm::Value *&retVal,
4904  llvm::IRBuilderBase &builder,
4905  llvm::OpenMPIRBuilder &ompBuilder,
4906  LLVM::ModuleTranslation &moduleTranslation,
4907  llvm::IRBuilderBase::InsertPoint allocaIP,
4908  llvm::IRBuilderBase::InsertPoint codeGenIP) {
4909  assert(ompBuilder.Config.isTargetDevice() &&
4910  "function only supported for target device codegen");
4911  builder.restoreIP(allocaIP);
4912 
4913  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
4914  LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
4915  ompBuilder.M.getContext());
4916  unsigned alignmentValue = 0;
4917  // Find the associated MapInfoData entry for the current input
4918  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
4919  if (mapData.OriginalValue[i] == input) {
4920  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4921  capture = mapOp.getMapCaptureType();
4922  // Get information of alignment of mapped object
4923  alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
4924  mapOp.getVarType(), ompBuilder.M.getDataLayout());
4925  break;
4926  }
4927 
4928  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
4929  unsigned int defaultAS =
4930  ompBuilder.M.getDataLayout().getProgramAddressSpace();
4931 
4932  // Create the alloca for the argument the current point.
4933  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
4934 
4935  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
4936  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
4937 
4938  builder.CreateStore(&arg, v);
4939 
4940  builder.restoreIP(codeGenIP);
4941 
4942  switch (capture) {
4943  case omp::VariableCaptureKind::ByCopy: {
4944  retVal = v;
4945  break;
4946  }
4947  case omp::VariableCaptureKind::ByRef: {
4948  llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
4949  v->getType(), v,
4950  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
4951  // CreateAlignedLoad function creates similar LLVM IR:
4952  // %res = load ptr, ptr %input, align 8
4953  // This LLVM IR does not contain information about alignment
4954  // of the loaded value. We need to add !align metadata to unblock
4955  // optimizer. The existence of the !align metadata on the instruction
4956  // tells the optimizer that the value loaded is known to be aligned to
4957  // a boundary specified by the integer value in the metadata node.
4958  // Example:
4959  // %res = load ptr, ptr %input, align 8, !align !align_md_node
4960  // ^ ^
4961  // | |
4962  // alignment of %input address |
4963  // |
4964  // alignment of %res object
4965  if (v->getType()->isPointerTy() && alignmentValue) {
4966  llvm::MDBuilder MDB(builder.getContext());
4967  loadInst->setMetadata(
4968  llvm::LLVMContext::MD_align,
4969  llvm::MDNode::get(builder.getContext(),
4970  MDB.createConstant(llvm::ConstantInt::get(
4971  llvm::Type::getInt64Ty(builder.getContext()),
4972  alignmentValue))));
4973  }
4974  retVal = loadInst;
4975 
4976  break;
4977  }
4978  case omp::VariableCaptureKind::This:
4979  case omp::VariableCaptureKind::VLAType:
4980  // TODO: Consider returning error to use standard reporting for
4981  // unimplemented features.
4982  assert(false && "Currently unsupported capture kind");
4983  break;
4984  }
4985 
4986  return builder.saveIP();
4987 }
4988 
4989 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
4990 /// operation and populate output variables with their corresponding host value
4991 /// (i.e. operand evaluated outside of the target region), based on their uses
4992 /// inside of the target region.
4993 ///
4994 /// Loop bounds and steps are only optionally populated, if output vectors are
4995 /// provided.
4996 static void
4997 extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
4998  Value &numTeamsLower, Value &numTeamsUpper,
4999  Value &threadLimit,
5000  llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
5001  llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
5002  llvm::SmallVectorImpl<Value> *steps = nullptr) {
5003  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
5004  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
5005  blockArgIface.getHostEvalBlockArgs())) {
5006  Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
5007 
5008  for (Operation *user : blockArg.getUsers()) {
5010  .Case([&](omp::TeamsOp teamsOp) {
5011  if (teamsOp.getNumTeamsLower() == blockArg)
5012  numTeamsLower = hostEvalVar;
5013  else if (teamsOp.getNumTeamsUpper() == blockArg)
5014  numTeamsUpper = hostEvalVar;
5015  else if (teamsOp.getThreadLimit() == blockArg)
5016  threadLimit = hostEvalVar;
5017  else
5018  llvm_unreachable("unsupported host_eval use");
5019  })
5020  .Case([&](omp::ParallelOp parallelOp) {
5021  if (parallelOp.getNumThreads() == blockArg)
5022  numThreads = hostEvalVar;
5023  else
5024  llvm_unreachable("unsupported host_eval use");
5025  })
5026  .Case([&](omp::LoopNestOp loopOp) {
5027  auto processBounds =
5028  [&](OperandRange opBounds,
5029  llvm::SmallVectorImpl<Value> *outBounds) -> bool {
5030  bool found = false;
5031  for (auto [i, lb] : llvm::enumerate(opBounds)) {
5032  if (lb == blockArg) {
5033  found = true;
5034  if (outBounds)
5035  (*outBounds)[i] = hostEvalVar;
5036  }
5037  }
5038  return found;
5039  };
5040  bool found =
5041  processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
5042  found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
5043  found;
5044  found = processBounds(loopOp.getLoopSteps(), steps) || found;
5045  (void)found;
5046  assert(found && "unsupported host_eval use");
5047  })
5048  .Default([](Operation *) {
5049  llvm_unreachable("unsupported host_eval use");
5050  });
5051  }
5052  }
5053 }
5054 
5055 /// If \p op is of the given type parameter, return it casted to that type.
5056 /// Otherwise, if its immediate parent operation (or some other higher-level
5057 /// parent, if \p immediateParent is false) is of that type, return that parent
5058 /// casted to the given type.
5059 ///
5060 /// If \p op is \c null or neither it or its parent(s) are of the specified
5061 /// type, return a \c null operation.
5062 template <typename OpTy>
5063 static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
5064  if (!op)
5065  return OpTy();
5066 
5067  if (OpTy casted = dyn_cast<OpTy>(op))
5068  return casted;
5069 
5070  if (immediateParent)
5071  return dyn_cast_if_present<OpTy>(op->getParentOp());
5072 
5073  return op->getParentOfType<OpTy>();
5074 }
5075 
5076 /// If the given \p value is defined by an \c llvm.mlir.constant operation and
5077 /// it is of an integer type, return its value.
5078 static std::optional<int64_t> extractConstInteger(Value value) {
5079  if (!value)
5080  return std::nullopt;
5081 
5082  if (auto constOp =
5083  dyn_cast_if_present<LLVM::ConstantOp>(value.getDefiningOp()))
5084  if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
5085  return constAttr.getInt();
5086 
5087  return std::nullopt;
5088 }
5089 
5090 static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
5091  uint64_t sizeInBits = dl.getTypeSizeInBits(type);
5092  uint64_t sizeInBytes = sizeInBits / 8;
5093  return sizeInBytes;
5094 }
5095 
5096 template <typename OpTy>
5097 static uint64_t getReductionDataSize(OpTy &op) {
5098  if (op.getNumReductionVars() > 0) {
5100  collectReductionDecls(op, reductions);
5101 
5103  members.reserve(reductions.size());
5104  for (omp::DeclareReductionOp &red : reductions)
5105  members.push_back(red.getType());
5106  Operation *opp = op.getOperation();
5107  auto structType = mlir::LLVM::LLVMStructType::getLiteral(
5108  opp->getContext(), members, /*isPacked=*/false);
5109  DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
5110  return getTypeByteSize(structType, dl);
5111  }
5112  return 0;
5113 }
5114 
5115 /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
5116 /// values as stated by the corresponding clauses, if constant.
5117 ///
5118 /// These default values must be set before the creation of the outlined LLVM
5119 /// function for the target region, so that they can be used to initialize the
5120 /// corresponding global `ConfigurationEnvironmentTy` structure.
5121 static void
5122 initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
5123  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
5124  bool isTargetDevice, bool isGPU) {
5125  // TODO: Handle constant 'if' clauses.
5126 
5127  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
5128  if (!isTargetDevice) {
5129  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5130  threadLimit);
5131  } else {
5132  // In the target device, values for these clauses are not passed as
5133  // host_eval, but instead evaluated prior to entry to the region. This
5134  // ensures values are mapped and available inside of the target region.
5135  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5136  numTeamsLower = teamsOp.getNumTeamsLower();
5137  numTeamsUpper = teamsOp.getNumTeamsUpper();
5138  threadLimit = teamsOp.getThreadLimit();
5139  }
5140 
5141  if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5142  numThreads = parallelOp.getNumThreads();
5143  }
5144 
5145  // Handle clauses impacting the number of teams.
5146 
5147  int32_t minTeamsVal = 1, maxTeamsVal = -1;
5148  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
5149  // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
5150  // clang and set min and max to the same value.
5151  if (numTeamsUpper) {
5152  if (auto val = extractConstInteger(numTeamsUpper))
5153  minTeamsVal = maxTeamsVal = *val;
5154  } else {
5155  minTeamsVal = maxTeamsVal = 0;
5156  }
5157  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
5158  /*immediateParent=*/true) ||
5159  castOrGetParentOfType<omp::SimdOp>(capturedOp,
5160  /*immediateParent=*/true)) {
5161  minTeamsVal = maxTeamsVal = 1;
5162  } else {
5163  minTeamsVal = maxTeamsVal = -1;
5164  }
5165 
5166  // Handle clauses impacting the number of threads.
5167 
5168  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
5169  if (!clauseValue)
5170  return;
5171 
5172  if (auto val = extractConstInteger(clauseValue))
5173  result = *val;
5174 
5175  // Found an applicable clause, so it's not undefined. Mark as unknown
5176  // because it's not constant.
5177  if (result < 0)
5178  result = 0;
5179  };
5180 
5181  // Extract 'thread_limit' clause from 'target' and 'teams' directives.
5182  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
5183  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
5184  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
5185 
5186  // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
5187  int32_t maxThreadsVal = -1;
5188  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
5189  setMaxValueFromClause(numThreads, maxThreadsVal);
5190  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
5191  /*immediateParent=*/true))
5192  maxThreadsVal = 1;
5193 
5194  // For max values, < 0 means unset, == 0 means set but unknown. Select the
5195  // minimum value between 'max_threads' and 'thread_limit' clauses that were
5196  // set.
5197  int32_t combinedMaxThreadsVal = targetThreadLimitVal;
5198  if (combinedMaxThreadsVal < 0 ||
5199  (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
5200  combinedMaxThreadsVal = teamsThreadLimitVal;
5201 
5202  if (combinedMaxThreadsVal < 0 ||
5203  (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
5204  combinedMaxThreadsVal = maxThreadsVal;
5205 
5206  int32_t reductionDataSize = 0;
5207  if (isGPU && capturedOp) {
5208  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
5209  reductionDataSize = getReductionDataSize(teamsOp);
5210  }
5211 
5212  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
5213  omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
5214  assert(
5215  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
5216  omp::TargetRegionFlags::spmd) &&
5217  "invalid kernel flags");
5218  attrs.ExecFlags =
5219  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
5220  ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
5221  ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
5222  : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
5223  : llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
5224  attrs.MinTeams = minTeamsVal;
5225  attrs.MaxTeams.front() = maxTeamsVal;
5226  attrs.MinThreads = 1;
5227  attrs.MaxThreads.front() = combinedMaxThreadsVal;
5228  attrs.ReductionDataSize = reductionDataSize;
5229  // TODO: Allow modified buffer length similar to
5230  // fopenmp-cuda-teams-reduction-recs-num flag in clang.
5231  if (attrs.ReductionDataSize != 0)
5232  attrs.ReductionBufferLength = 1024;
5233 }
5234 
5235 /// Gather LLVM runtime values for all clauses evaluated in the host that are
5236 /// passed to the kernel invocation.
5237 ///
5238 /// This function must be called only when compiling for the host. Also, it will
5239 /// only provide correct results if it's called after the body of \c targetOp
5240 /// has been fully generated.
5241 static void
5242 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
5243  LLVM::ModuleTranslation &moduleTranslation,
5244  omp::TargetOp targetOp, Operation *capturedOp,
5245  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
5246  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
5247  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
5248 
5249  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
5250  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
5251  steps(numLoops);
5252  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5253  teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
5254 
5255  // TODO: Handle constant 'if' clauses.
5256  if (Value targetThreadLimit = targetOp.getThreadLimit())
5257  attrs.TargetThreadLimit.front() =
5258  moduleTranslation.lookupValue(targetThreadLimit);
5259 
5260  if (numTeamsLower)
5261  attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
5262 
5263  if (numTeamsUpper)
5264  attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
5265 
5266  if (teamsThreadLimit)
5267  attrs.TeamsThreadLimit.front() =
5268  moduleTranslation.lookupValue(teamsThreadLimit);
5269 
5270  if (numThreads)
5271  attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
5272 
5273  if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
5274  omp::TargetRegionFlags::trip_count)) {
5275  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5276  attrs.LoopTripCount = nullptr;
5277 
5278  // To calculate the trip count, we multiply together the trip counts of
5279  // every collapsed canonical loop. We don't need to create the loop nests
5280  // here, since we're only interested in the trip count.
5281  for (auto [loopLower, loopUpper, loopStep] :
5282  llvm::zip_equal(lowerBounds, upperBounds, steps)) {
5283  llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
5284  llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
5285  llvm::Value *step = moduleTranslation.lookupValue(loopStep);
5286 
5287  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
5288  llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
5289  loc, lowerBound, upperBound, step, /*IsSigned=*/true,
5290  loopOp.getLoopInclusive());
5291 
5292  if (!attrs.LoopTripCount) {
5293  attrs.LoopTripCount = tripCount;
5294  continue;
5295  }
5296 
5297  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
5298  attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
5299  {}, /*HasNUW=*/true);
5300  }
5301  }
5302 }
5303 
5304 static LogicalResult
5305 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
5306  LLVM::ModuleTranslation &moduleTranslation) {
5307  auto targetOp = cast<omp::TargetOp>(opInst);
5308  if (failed(checkImplementationStatus(opInst)))
5309  return failure();
5310 
5311  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5312  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
5313  bool isGPU = ompBuilder->Config.isGPU();
5314 
5315  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
5316  auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
5317  auto &targetRegion = targetOp.getRegion();
5318  // Holds the private vars that have been mapped along with the block argument
5319  // that corresponds to the MapInfoOp corresponding to the private var in
5320  // question. So, for instance:
5321  //
5322  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
5323  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
5324  //
5325  // Then, %10 has been created so that the descriptor can be used by the
5326  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
5327  // %arg0} in the mappedPrivateVars map.
5328  llvm::DenseMap<Value, Value> mappedPrivateVars;
5329  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
5330  SmallVector<Value> mapVars = targetOp.getMapVars();
5331  SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
5332  ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
5333  ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
5334  llvm::Function *llvmOutlinedFn = nullptr;
5335 
5336  // TODO: It can also be false if a compile-time constant `false` IF clause is
5337  // specified.
5338  bool isOffloadEntry =
5339  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
5340 
5341  // For some private variables, the MapsForPrivatizedVariablesPass
5342  // creates MapInfoOp instances. Go through the private variables and
5343  // the mapped variables so that during codegeneration we are able
5344  // to quickly look up the corresponding map variable, if any for each
5345  // private variable.
5346  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
5347  OperandRange privateVars = targetOp.getPrivateVars();
5348  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
5349  std::optional<DenseI64ArrayAttr> privateMapIndices =
5350  targetOp.getPrivateMapsAttr();
5351 
5352  for (auto [privVarIdx, privVarSymPair] :
5353  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
5354  auto privVar = std::get<0>(privVarSymPair);
5355  auto privSym = std::get<1>(privVarSymPair);
5356 
5357  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
5358  omp::PrivateClauseOp privatizer =
5359  findPrivatizer(targetOp, privatizerName);
5360 
5361  if (!privatizer.needsMap())
5362  continue;
5363 
5364  mlir::Value mappedValue =
5365  targetOp.getMappedValueForPrivateVar(privVarIdx);
5366  assert(mappedValue && "Expected to find mapped value for a privatized "
5367  "variable that needs mapping");
5368 
5369  // The MapInfoOp defining the map var isn't really needed later.
5370  // So, we don't store it in any datastructure. Instead, we just
5371  // do some sanity checks on it right now.
5372  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
5373  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
5374 
5375  // Check #1: Check that the type of the private variable matches
5376  // the type of the variable being mapped.
5377  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
5378  assert(
5379  varType == privVar.getType() &&
5380  "Type of private var doesn't match the type of the mapped value");
5381 
5382  // Ok, only 1 sanity check for now.
5383  // Record the block argument corresponding to this mapvar.
5384  mappedPrivateVars.insert(
5385  {privVar,
5386  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
5387  (*privateMapIndices)[privVarIdx])});
5388  }
5389  }
5390 
5391  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5392  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
5393  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5394  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5395  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5396  // Forward target-cpu and target-features function attributes from the
5397  // original function to the new outlined function.
5398  llvm::Function *llvmParentFn =
5399  moduleTranslation.lookupFunction(parentFn.getName());
5400  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
5401  assert(llvmParentFn && llvmOutlinedFn &&
5402  "Both parent and outlined functions must exist at this point");
5403 
5404  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
5405  attr.isStringAttribute())
5406  llvmOutlinedFn->addFnAttr(attr);
5407 
5408  if (auto attr = llvmParentFn->getFnAttribute("target-features");
5409  attr.isStringAttribute())
5410  llvmOutlinedFn->addFnAttr(attr);
5411 
5412  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
5413  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5414  llvm::Value *mapOpValue =
5415  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5416  moduleTranslation.mapValue(arg, mapOpValue);
5417  }
5418  for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
5419  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5420  llvm::Value *mapOpValue =
5421  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5422  moduleTranslation.mapValue(arg, mapOpValue);
5423  }
5424 
5425  // Do privatization after moduleTranslation has already recorded
5426  // mapped values.
5427  PrivateVarsInfo privateVarsInfo(targetOp);
5428 
5429  llvm::Expected<llvm::BasicBlock *> afterAllocas =
5430  allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
5431  allocaIP, &mappedPrivateVars);
5432 
5433  if (failed(handleError(afterAllocas, *targetOp)))
5434  return llvm::make_error<PreviouslyReportedError>();
5435 
5436  builder.restoreIP(codeGenIP);
5437  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
5438  &mappedPrivateVars),
5439  *targetOp)
5440  .failed())
5441  return llvm::make_error<PreviouslyReportedError>();
5442 
5443  if (failed(copyFirstPrivateVars(
5444  targetOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
5445  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
5446  targetOp.getPrivateNeedsBarrier(), &mappedPrivateVars)))
5447  return llvm::make_error<PreviouslyReportedError>();
5448 
5449  SmallVector<Region *> privateCleanupRegions;
5450  llvm::transform(privateVarsInfo.privatizers,
5451  std::back_inserter(privateCleanupRegions),
5452  [](omp::PrivateClauseOp privatizer) {
5453  return &privatizer.getDeallocRegion();
5454  });
5455 
5457  targetRegion, "omp.target", builder, moduleTranslation);
5458 
5459  if (!exitBlock)
5460  return exitBlock.takeError();
5461 
5462  builder.SetInsertPoint(*exitBlock);
5463  if (!privateCleanupRegions.empty()) {
5464  if (failed(inlineOmpRegionCleanup(
5465  privateCleanupRegions, privateVarsInfo.llvmVars,
5466  moduleTranslation, builder, "omp.targetop.private.cleanup",
5467  /*shouldLoadCleanupRegionArg=*/false))) {
5468  return llvm::createStringError(
5469  "failed to inline `dealloc` region of `omp.private` "
5470  "op in the target region");
5471  }
5472  return builder.saveIP();
5473  }
5474 
5475  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
5476  };
5477 
5478  StringRef parentName = parentFn.getName();
5479 
5480  llvm::TargetRegionEntryInfo entryInfo;
5481 
5482  getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
5483 
5484  MapInfoData mapData;
5485  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
5486  builder, /*useDevPtrOperands=*/{},
5487  /*useDevAddrOperands=*/{}, hdaVars);
5488 
5489  MapInfosTy combinedInfos;
5490  auto genMapInfoCB =
5491  [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
5492  builder.restoreIP(codeGenIP);
5493  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
5494  return combinedInfos;
5495  };
5496 
5497  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
5498  llvm::Value *&retVal, InsertPointTy allocaIP,
5499  InsertPointTy codeGenIP)
5500  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5501  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5502  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5503  // We just return the unaltered argument for the host function
5504  // for now, some alterations may be required in the future to
5505  // keep host fallback functions working identically to the device
5506  // version (e.g. pass ByCopy values should be treated as such on
5507  // host and device, currently not always the case)
5508  if (!isTargetDevice) {
5509  retVal = cast<llvm::Value>(&arg);
5510  return codeGenIP;
5511  }
5512 
5513  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
5514  *ompBuilder, moduleTranslation,
5515  allocaIP, codeGenIP);
5516  };
5517 
5518  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
5519  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
5520  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
5521  initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
5522  isTargetDevice, isGPU);
5523 
5524  // Collect host-evaluated values needed to properly launch the kernel from the
5525  // host.
5526  if (!isTargetDevice)
5527  initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
5528  targetCapturedOp, runtimeAttrs);
5529 
5530  // Pass host-evaluated values as parameters to the kernel / host fallback,
5531  // except if they are constants. In any case, map the MLIR block argument to
5532  // the corresponding LLVM values.
5534  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
5535  ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
5536  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
5537  llvm::Value *value = moduleTranslation.lookupValue(var);
5538  moduleTranslation.mapValue(arg, value);
5539 
5540  if (!llvm::isa<llvm::Constant>(value))
5541  kernelInput.push_back(value);
5542  }
5543 
5544  for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
5545  // declare target arguments are not passed to kernels as arguments
5546  // TODO: We currently do not handle cases where a member is explicitly
5547  // passed in as an argument, this will likley need to be handled in
5548  // the near future, rather than using IsAMember, it may be better to
5549  // test if the relevant BlockArg is used within the target region and
5550  // then use that as a basis for exclusion in the kernel inputs.
5551  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
5552  kernelInput.push_back(mapData.OriginalValue[i]);
5553  }
5554 
5556  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
5557  moduleTranslation, dds);
5558 
5559  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5560  findAllocaInsertPoint(builder, moduleTranslation);
5561  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5562 
5563  llvm::OpenMPIRBuilder::TargetDataInfo info(
5564  /*RequiresDevicePointerInfo=*/false,
5565  /*SeparateBeginEndCalls=*/true);
5566 
5567  auto customMapperCB =
5568  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
5569  if (!combinedInfos.Mappers[i])
5570  return nullptr;
5571  info.HasMapper = true;
5572  return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
5573  moduleTranslation);
5574  };
5575 
5576  llvm::Value *ifCond = nullptr;
5577  if (Value targetIfCond = targetOp.getIfExpr())
5578  ifCond = moduleTranslation.lookupValue(targetIfCond);
5579 
5580  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5581  moduleTranslation.getOpenMPBuilder()->createTarget(
5582  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
5583  defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
5584  argAccessorCB, customMapperCB, dds, targetOp.getNowait());
5585 
5586  if (failed(handleError(afterIP, opInst)))
5587  return failure();
5588 
5589  builder.restoreIP(*afterIP);
5590 
5591  // Remap access operations to declare target reference pointers for the
5592  // device, essentially generating extra loadop's as necessary
5593  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5594  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5595  llvmOutlinedFn);
5596 
5597  return success();
5598 }
5599 
5600 static LogicalResult
5601 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5602  LLVM::ModuleTranslation &moduleTranslation) {
5603  // Amend omp.declare_target by deleting the IR of the outlined functions
5604  // created for target regions. They cannot be filtered out from MLIR earlier
5605  // because the omp.target operation inside must be translated to LLVM, but
5606  // the wrapper functions themselves must not remain at the end of the
5607  // process. We know that functions where omp.declare_target does not match
5608  // omp.is_target_device at this stage can only be wrapper functions because
5609  // those that aren't are removed earlier as an MLIR transformation pass.
5610  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5611  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5612  op->getParentOfType<ModuleOp>().getOperation())) {
5613  if (!offloadMod.getIsTargetDevice())
5614  return success();
5615 
5616  omp::DeclareTargetDeviceType declareType =
5617  attribute.getDeviceType().getValue();
5618 
5619  if (declareType == omp::DeclareTargetDeviceType::host) {
5620  llvm::Function *llvmFunc =
5621  moduleTranslation.lookupFunction(funcOp.getName());
5622  llvmFunc->dropAllReferences();
5623  llvmFunc->eraseFromParent();
5624  }
5625  }
5626  return success();
5627  }
5628 
5629  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5630  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5631  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5632  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5633  bool isDeclaration = gOp.isDeclaration();
5634  bool isExternallyVisible =
5635  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5636  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5637  llvm::StringRef mangledName = gOp.getSymName();
5638  auto captureClause =
5639  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5640  auto deviceClause =
5641  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5642  // unused for MLIR at the moment, required in Clang for book
5643  // keeping
5644  std::vector<llvm::GlobalVariable *> generatedRefs;
5645 
5646  std::vector<llvm::Triple> targetTriple;
5647  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5648  op->getParentOfType<mlir::ModuleOp>()->getAttr(
5649  LLVM::LLVMDialect::getTargetTripleAttrName()));
5650  if (targetTripleAttr)
5651  targetTriple.emplace_back(targetTripleAttr.data());
5652 
5653  auto fileInfoCallBack = [&loc]() {
5654  std::string filename = "";
5655  std::uint64_t lineNo = 0;
5656 
5657  if (loc) {
5658  filename = loc.getFilename().str();
5659  lineNo = loc.getLine();
5660  }
5661 
5662  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5663  lineNo);
5664  };
5665 
5666  ompBuilder->registerTargetGlobalVariable(
5667  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5668  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5669  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5670  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5671  gVal->getType(), gVal);
5672 
5673  if (ompBuilder->Config.isTargetDevice() &&
5674  (attribute.getCaptureClause().getValue() !=
5675  mlir::omp::DeclareTargetCaptureClause::to ||
5676  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5677  ompBuilder->getAddrOfDeclareTargetVar(
5678  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5679  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5680  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
5681  /*GlobalInitializer*/ nullptr,
5682  /*VariableLinkage*/ nullptr);
5683  }
5684  }
5685  }
5686 
5687  return success();
5688 }
5689 
5690 // Returns true if the operation is inside a TargetOp or
5691 // is part of a declare target function.
5692 static bool isTargetDeviceOp(Operation *op) {
5693  // Assumes no reverse offloading
5694  if (op->getParentOfType<omp::TargetOp>())
5695  return true;
5696 
5697  // Certain operations return results, and whether utilised in host or
5698  // target there is a chance an LLVM Dialect operation depends on it
5699  // by taking it in as an operand, so we must always lower these in
5700  // some manner or result in an ICE (whether they end up in a no-op
5701  // or otherwise).
5702  if (mlir::isa<omp::ThreadprivateOp>(op))
5703  return true;
5704 
5705  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
5706  if (auto declareTargetIface =
5707  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
5708  parentFn.getOperation()))
5709  if (declareTargetIface.isDeclareTarget() &&
5710  declareTargetIface.getDeclareTargetDeviceType() !=
5711  mlir::omp::DeclareTargetDeviceType::host)
5712  return true;
5713 
5714  return false;
5715 }
5716 
5717 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
5718 /// OpenMP runtime calls).
5719 static LogicalResult
5720 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
5721  LLVM::ModuleTranslation &moduleTranslation) {
5722  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5723 
5724  // For each loop, introduce one stack frame to hold loop information. Ensure
5725  // this is only done for the outermost loop wrapper to prevent introducing
5726  // multiple stack frames for a single loop. Initially set to null, the loop
5727  // information structure is initialized during translation of the nested
5728  // omp.loop_nest operation, making it available to translation of all loop
5729  // wrappers after their body has been successfully translated.
5730  bool isOutermostLoopWrapper =
5731  isa_and_present<omp::LoopWrapperInterface>(op) &&
5732  !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
5733 
5734  if (isOutermostLoopWrapper)
5735  moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
5736 
5737  auto result =
5739  .Case([&](omp::BarrierOp op) -> LogicalResult {
5740  if (failed(checkImplementationStatus(*op)))
5741  return failure();
5742 
5743  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5744  ompBuilder->createBarrier(builder.saveIP(),
5745  llvm::omp::OMPD_barrier);
5746  LogicalResult res = handleError(afterIP, *op);
5747  if (res.succeeded()) {
5748  // If the barrier generated a cancellation check, the insertion
5749  // point might now need to be changed to a new continuation block
5750  builder.restoreIP(*afterIP);
5751  }
5752  return res;
5753  })
5754  .Case([&](omp::TaskyieldOp op) {
5755  if (failed(checkImplementationStatus(*op)))
5756  return failure();
5757 
5758  ompBuilder->createTaskyield(builder.saveIP());
5759  return success();
5760  })
5761  .Case([&](omp::FlushOp op) {
5762  if (failed(checkImplementationStatus(*op)))
5763  return failure();
5764 
5765  // No support in Openmp runtime function (__kmpc_flush) to accept
5766  // the argument list.
5767  // OpenMP standard states the following:
5768  // "An implementation may implement a flush with a list by ignoring
5769  // the list, and treating it the same as a flush without a list."
5770  //
5771  // The argument list is discarded so that, flush with a list is
5772  // treated same as a flush without a list.
5773  ompBuilder->createFlush(builder.saveIP());
5774  return success();
5775  })
5776  .Case([&](omp::ParallelOp op) {
5777  return convertOmpParallel(op, builder, moduleTranslation);
5778  })
5779  .Case([&](omp::MaskedOp) {
5780  return convertOmpMasked(*op, builder, moduleTranslation);
5781  })
5782  .Case([&](omp::MasterOp) {
5783  return convertOmpMaster(*op, builder, moduleTranslation);
5784  })
5785  .Case([&](omp::CriticalOp) {
5786  return convertOmpCritical(*op, builder, moduleTranslation);
5787  })
5788  .Case([&](omp::OrderedRegionOp) {
5789  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
5790  })
5791  .Case([&](omp::OrderedOp) {
5792  return convertOmpOrdered(*op, builder, moduleTranslation);
5793  })
5794  .Case([&](omp::WsloopOp) {
5795  return convertOmpWsloop(*op, builder, moduleTranslation);
5796  })
5797  .Case([&](omp::SimdOp) {
5798  return convertOmpSimd(*op, builder, moduleTranslation);
5799  })
5800  .Case([&](omp::AtomicReadOp) {
5801  return convertOmpAtomicRead(*op, builder, moduleTranslation);
5802  })
5803  .Case([&](omp::AtomicWriteOp) {
5804  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
5805  })
5806  .Case([&](omp::AtomicUpdateOp op) {
5807  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
5808  })
5809  .Case([&](omp::AtomicCaptureOp op) {
5810  return convertOmpAtomicCapture(op, builder, moduleTranslation);
5811  })
5812  .Case([&](omp::CancelOp op) {
5813  return convertOmpCancel(op, builder, moduleTranslation);
5814  })
5815  .Case([&](omp::CancellationPointOp op) {
5816  return convertOmpCancellationPoint(op, builder, moduleTranslation);
5817  })
5818  .Case([&](omp::SectionsOp) {
5819  return convertOmpSections(*op, builder, moduleTranslation);
5820  })
5821  .Case([&](omp::SingleOp op) {
5822  return convertOmpSingle(op, builder, moduleTranslation);
5823  })
5824  .Case([&](omp::TeamsOp op) {
5825  return convertOmpTeams(op, builder, moduleTranslation);
5826  })
5827  .Case([&](omp::TaskOp op) {
5828  return convertOmpTaskOp(op, builder, moduleTranslation);
5829  })
5830  .Case([&](omp::TaskgroupOp op) {
5831  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
5832  })
5833  .Case([&](omp::TaskwaitOp op) {
5834  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
5835  })
5836  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
5837  omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
5838  omp::CriticalDeclareOp>([](auto op) {
5839  // `yield` and `terminator` can be just omitted. The block structure
5840  // was created in the region that handles their parent operation.
5841  // `declare_reduction` will be used by reductions and is not
5842  // converted directly, skip it.
5843  // `declare_mapper` and `declare_mapper.info` are handled whenever
5844  // they are referred to through a `map` clause.
5845  // `critical.declare` is only used to declare names of critical
5846  // sections which will be used by `critical` ops and hence can be
5847  // ignored for lowering. The OpenMP IRBuilder will create unique
5848  // name for critical section names.
5849  return success();
5850  })
5851  .Case([&](omp::ThreadprivateOp) {
5852  return convertOmpThreadprivate(*op, builder, moduleTranslation);
5853  })
5854  .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
5855  omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
5856  return convertOmpTargetData(op, builder, moduleTranslation);
5857  })
5858  .Case([&](omp::TargetOp) {
5859  return convertOmpTarget(*op, builder, moduleTranslation);
5860  })
5861  .Case([&](omp::DistributeOp) {
5862  return convertOmpDistribute(*op, builder, moduleTranslation);
5863  })
5864  .Case([&](omp::LoopNestOp) {
5865  return convertOmpLoopNest(*op, builder, moduleTranslation);
5866  })
5867  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
5868  [&](auto op) {
5869  // No-op, should be handled by relevant owning operations e.g.
5870  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
5871  // etc. and then discarded
5872  return success();
5873  })
5874  .Default([&](Operation *inst) {
5875  return inst->emitError()
5876  << "not yet implemented: " << inst->getName();
5877  });
5878 
5879  if (isOutermostLoopWrapper)
5880  moduleTranslation.stackPop();
5881 
5882  return result;
5883 }
5884 
5885 static LogicalResult
5886 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
5887  LLVM::ModuleTranslation &moduleTranslation) {
5888  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5889 }
5890 
5891 static LogicalResult
5892 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
5893  LLVM::ModuleTranslation &moduleTranslation) {
5894  if (isa<omp::TargetOp>(op))
5895  return convertOmpTarget(*op, builder, moduleTranslation);
5896  if (isa<omp::TargetDataOp>(op))
5897  return convertOmpTargetData(op, builder, moduleTranslation);
5898  bool interrupted =
5899  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
5900  if (isa<omp::TargetOp>(oper)) {
5901  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
5902  return WalkResult::interrupt();
5903  return WalkResult::skip();
5904  }
5905  if (isa<omp::TargetDataOp>(oper)) {
5906  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
5907  return WalkResult::interrupt();
5908  return WalkResult::skip();
5909  }
5910 
5911  // Non-target ops might nest target-related ops, therefore, we
5912  // translate them as non-OpenMP scopes. Translating them is needed by
5913  // nested target-related ops since they might need LLVM values defined
5914  // in their parent non-target ops.
5915  if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
5916  oper->getParentOfType<LLVM::LLVMFuncOp>() &&
5917  !oper->getRegions().empty()) {
5918  if (auto blockArgsIface =
5919  dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
5920  forwardArgs(moduleTranslation, blockArgsIface);
5921  else {
5922  // Here we map entry block arguments of
5923  // non-BlockArgOpenMPOpInterface ops if they can be encountered
5924  // inside of a function and they define any of these arguments.
5925  if (isa<mlir::omp::AtomicUpdateOp>(oper))
5926  for (auto [operand, arg] :
5927  llvm::zip_equal(oper->getOperands(),
5928  oper->getRegion(0).getArguments())) {
5929  moduleTranslation.mapValue(
5930  arg, builder.CreateLoad(
5931  moduleTranslation.convertType(arg.getType()),
5932  moduleTranslation.lookupValue(operand)));
5933  }
5934  }
5935 
5936  if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
5937  assert(builder.GetInsertBlock() &&
5938  "No insert block is set for the builder");
5939  for (auto iv : loopNest.getIVs()) {
5940  // Map iv to an undefined value just to keep the IR validity.
5941  moduleTranslation.mapValue(
5943  moduleTranslation.convertType(iv.getType())));
5944  }
5945  }
5946 
5947  for (Region &region : oper->getRegions()) {
5948  // Regions are fake in the sense that they are not a truthful
5949  // translation of the OpenMP construct being converted (e.g. no
5950  // OpenMP runtime calls will be generated). We just need this to
5951  // prepare the kernel invocation args.
5953  auto result = convertOmpOpRegions(
5954  region, oper->getName().getStringRef().str() + ".fake.region",
5955  builder, moduleTranslation, &phis);
5956  if (failed(handleError(result, *oper)))
5957  return WalkResult::interrupt();
5958 
5959  builder.SetInsertPoint(result.get(), result.get()->end());
5960  }
5961 
5962  return WalkResult::skip();
5963  }
5964 
5965  return WalkResult::advance();
5966  }).wasInterrupted();
5967  return failure(interrupted);
5968 }
5969 
5970 namespace {
5971 
5972 /// Implementation of the dialect interface that converts operations belonging
5973 /// to the OpenMP dialect to LLVM IR.
5974 class OpenMPDialectLLVMIRTranslationInterface
5976 public:
5978 
5979  /// Translates the given operation to LLVM IR using the provided IR builder
5980  /// and saving the state in `moduleTranslation`.
5981  LogicalResult
5982  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
5983  LLVM::ModuleTranslation &moduleTranslation) const final;
5984 
5985  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
5986  /// runtime calls, or operation amendments
5987  LogicalResult
5989  NamedAttribute attribute,
5990  LLVM::ModuleTranslation &moduleTranslation) const final;
5991 };
5992 
5993 } // namespace
5994 
5995 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
5996  Operation *op, ArrayRef<llvm::Instruction *> instructions,
5997  NamedAttribute attribute,
5998  LLVM::ModuleTranslation &moduleTranslation) const {
5999  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
6000  attribute.getName())
6001  .Case("omp.is_target_device",
6002  [&](Attribute attr) {
6003  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
6004  llvm::OpenMPIRBuilderConfig &config =
6005  moduleTranslation.getOpenMPBuilder()->Config;
6006  config.setIsTargetDevice(deviceAttr.getValue());
6007  return success();
6008  }
6009  return failure();
6010  })
6011  .Case("omp.is_gpu",
6012  [&](Attribute attr) {
6013  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
6014  llvm::OpenMPIRBuilderConfig &config =
6015  moduleTranslation.getOpenMPBuilder()->Config;
6016  config.setIsGPU(gpuAttr.getValue());
6017  return success();
6018  }
6019  return failure();
6020  })
6021  .Case("omp.host_ir_filepath",
6022  [&](Attribute attr) {
6023  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
6024  llvm::OpenMPIRBuilder *ompBuilder =
6025  moduleTranslation.getOpenMPBuilder();
6026  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
6027  return success();
6028  }
6029  return failure();
6030  })
6031  .Case("omp.flags",
6032  [&](Attribute attr) {
6033  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
6034  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
6035  return failure();
6036  })
6037  .Case("omp.version",
6038  [&](Attribute attr) {
6039  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
6040  llvm::OpenMPIRBuilder *ompBuilder =
6041  moduleTranslation.getOpenMPBuilder();
6042  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
6043  versionAttr.getVersion());
6044  return success();
6045  }
6046  return failure();
6047  })
6048  .Case("omp.declare_target",
6049  [&](Attribute attr) {
6050  if (auto declareTargetAttr =
6051  dyn_cast<omp::DeclareTargetAttr>(attr))
6052  return convertDeclareTargetAttr(op, declareTargetAttr,
6053  moduleTranslation);
6054  return failure();
6055  })
6056  .Case("omp.requires",
6057  [&](Attribute attr) {
6058  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
6059  using Requires = omp::ClauseRequires;
6060  Requires flags = requiresAttr.getValue();
6061  llvm::OpenMPIRBuilderConfig &config =
6062  moduleTranslation.getOpenMPBuilder()->Config;
6063  config.setHasRequiresReverseOffload(
6064  bitEnumContainsAll(flags, Requires::reverse_offload));
6065  config.setHasRequiresUnifiedAddress(
6066  bitEnumContainsAll(flags, Requires::unified_address));
6067  config.setHasRequiresUnifiedSharedMemory(
6068  bitEnumContainsAll(flags, Requires::unified_shared_memory));
6069  config.setHasRequiresDynamicAllocators(
6070  bitEnumContainsAll(flags, Requires::dynamic_allocators));
6071  return success();
6072  }
6073  return failure();
6074  })
6075  .Case("omp.target_triples",
6076  [&](Attribute attr) {
6077  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
6078  llvm::OpenMPIRBuilderConfig &config =
6079  moduleTranslation.getOpenMPBuilder()->Config;
6080  config.TargetTriples.clear();
6081  config.TargetTriples.reserve(triplesAttr.size());
6082  for (Attribute tripleAttr : triplesAttr) {
6083  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
6084  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
6085  else
6086  return failure();
6087  }
6088  return success();
6089  }
6090  return failure();
6091  })
6092  .Default([](Attribute) {
6093  // Fall through for omp attributes that do not require lowering.
6094  return success();
6095  })(attribute.getValue());
6096 
6097  return failure();
6098 }
6099 
6100 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
6101 /// (including OpenMP runtime calls).
6102 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
6103  Operation *op, llvm::IRBuilderBase &builder,
6104  LLVM::ModuleTranslation &moduleTranslation) const {
6105 
6106  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6107  if (ompBuilder->Config.isTargetDevice()) {
6108  if (isTargetDeviceOp(op)) {
6109  return convertTargetDeviceOp(op, builder, moduleTranslation);
6110  } else {
6111  return convertTargetOpsInNest(op, builder, moduleTranslation);
6112  }
6113  }
6114  return convertHostOrTargetOperation(op, builder, moduleTranslation);
6115 }
6116 
6118  registry.insert<omp::OpenMPDialect>();
6119  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
6120  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
6121  });
6122 }
6123 
6125  DialectRegistry registry;
6127  context.appendDialectRegistry(registry);
6128 }
union mlir::linalg::@1205::ArityGroupAndKind::Kind kind
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp)
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable.
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type.
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static void popCancelFinalizationCB(const ArrayRef< llvm::BranchInst * > cancelTerminators, llvm::OpenMPIRBuilder &ompBuilder, const llvm::OpenMPIRBuilder::InsertPointTy &afterIP)
If we cancelled the construct, we should branch to the finalization block of that construct.
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void pushCancelFinalizationCB(SmallVectorImpl< llvm::BranchInst * > &cancelTerminators, llvm::IRBuilderBase &llvmBuilder, llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op, llvm::omp::Directive cancelDirective)
Shared implementation of a callback which adds a termiator for the new block created for the branch t...
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
Inline reductions' init regions.
static LogicalResult convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl)
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool constructIsCancellable(Operation *op)
Returns true if the construct contains omp.cancel or omp.cancellation_point.
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static LogicalResult copyFirstPrivateVars(mlir::Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, bool insertBarrier, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef, bool isNowait=false, bool isTeamsReduction=false)
static LogicalResult convertOmpCancellationPoint(omp::CancellationPointOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static uint64_t getReductionDataSize(OpTy &op)
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static llvm::omp::Directive convertCancellationConstructType(omp::ClauseCancellationConstructType directive)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice, bool isGPU)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static LogicalResult convertIgnoredWrapper(omp::LoopWrapperInterface opInst, LLVM::ModuleTranslation &moduleTranslation)
Helper function to map block arguments defined by ignored loop wrappers to LLVM values and prevent an...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:331
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:309
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:174
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
WalkResult stackWalk(llvm::function_ref< WalkResult(T &)> callback)
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void stackPush(Args &&...args)
Creates a stack frame of type T on ModuleTranslation stack.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
void mapFunction(StringRef name, llvm::Function *func)
Stores the mapping between a function name and its LLVM IR representation.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
void stackPop()
Pops the last element from the ModuleTranslation stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
Utility class to translate MLIR LLVM dialect types to LLVM IR.
Definition: TypeToLLVM.h:39
unsigned getPreferredAlignment(Type type, const llvm::DataLayout &layout)
Returns the preferred alignment for the type given the data layout.
Definition: TypeToLLVM.cpp:183
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:45
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
Dialect * getDialect()
Return the dialect this operation is associated with, or nullptr if the associated dialect is not loa...
Definition: Operation.h:220
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
llvm::hash_code hash_value(const StructType::MemberDecorationInfo &memberDecorationInfo)
llvm::PointerUnion< NamedAttribute *, NamedProperty *, NamedTypeConstraint * > Argument
Definition: Argument.h:64
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars
RAII object calling stackPush/stackPop on construction/destruction.