MLIR  21.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
19 #include "mlir/IR/IRMapping.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Support/LLVM.h"
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/TypeSwitch.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/MDBuilder.h"
37 #include "llvm/IR/ReplaceConstant.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/TargetParser/Triple.h"
40 #include "llvm/Transforms/Utils/ModuleUtils.h"
41 
42 #include <any>
43 #include <cstdint>
44 #include <iterator>
45 #include <numeric>
46 #include <optional>
47 #include <utility>
48 
49 using namespace mlir;
50 
51 namespace {
52 static llvm::omp::ScheduleKind
53 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
54  if (!schedKind.has_value())
55  return llvm::omp::OMP_SCHEDULE_Default;
56  switch (schedKind.value()) {
57  case omp::ClauseScheduleKind::Static:
58  return llvm::omp::OMP_SCHEDULE_Static;
59  case omp::ClauseScheduleKind::Dynamic:
60  return llvm::omp::OMP_SCHEDULE_Dynamic;
61  case omp::ClauseScheduleKind::Guided:
62  return llvm::omp::OMP_SCHEDULE_Guided;
63  case omp::ClauseScheduleKind::Auto:
64  return llvm::omp::OMP_SCHEDULE_Auto;
66  return llvm::omp::OMP_SCHEDULE_Runtime;
67  }
68  llvm_unreachable("unhandled schedule clause argument");
69 }
70 
71 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
72 /// insertion points for allocas.
73 class OpenMPAllocaStackFrame
74  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
75 public:
76  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
77 
78  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
79  : allocaInsertPoint(allocaIP) {}
80  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
81 };
82 
83 /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
84 /// collapsed canonical loop information corresponding to an \c omp.loop_nest
85 /// operation.
86 class OpenMPLoopInfoStackFrame
87  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> {
88 public:
89  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
90  llvm::CanonicalLoopInfo *loopInfo = nullptr;
91 };
92 
93 /// Custom error class to signal translation errors that don't need reporting,
94 /// since encountering them will have already triggered relevant error messages.
95 ///
96 /// Its purpose is to serve as the glue between MLIR failures represented as
97 /// \see LogicalResult instances and \see llvm::Error instances used to
98 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
99 /// error of the first type is raised, a message is emitted directly (the \see
100 /// LogicalResult itself does not hold any information). If we need to forward
101 /// this error condition as an \see llvm::Error while avoiding triggering some
102 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
103 /// class to just signal this situation has happened.
104 ///
105 /// For example, this class should be used to trigger errors from within
106 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
107 /// translation of their own regions. This unclutters the error log from
108 /// redundant messages.
109 class PreviouslyReportedError
110  : public llvm::ErrorInfo<PreviouslyReportedError> {
111 public:
112  void log(raw_ostream &) const override {
113  // Do not log anything.
114  }
115 
116  std::error_code convertToErrorCode() const override {
117  llvm_unreachable(
118  "PreviouslyReportedError doesn't support ECError conversion");
119  }
120 
121  // Used by ErrorInfo::classID.
122  static char ID;
123 };
124 
126 
127 } // namespace
128 
129 /// Looks up from the operation from and returns the PrivateClauseOp with
130 /// name symbolName
131 static omp::PrivateClauseOp findPrivatizer(Operation *from,
132  SymbolRefAttr symbolName) {
133  omp::PrivateClauseOp privatizer =
134  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
135  symbolName);
136  assert(privatizer && "privatizer not found in the symbol table");
137  return privatizer;
138 }
139 
140 /// Check whether translation to LLVM IR for the given operation is currently
141 /// supported. If not, descriptive diagnostics will be emitted to let users know
142 /// this is a not-yet-implemented feature.
143 ///
144 /// \returns success if no unimplemented features are needed to translate the
145 /// given operation.
146 static LogicalResult checkImplementationStatus(Operation &op) {
147  auto todo = [&op](StringRef clauseName) {
148  return op.emitError() << "not yet implemented: Unhandled clause "
149  << clauseName << " in " << op.getName()
150  << " operation";
151  };
152 
153  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
154  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
155  result = todo("allocate");
156  };
157  auto checkBare = [&todo](auto op, LogicalResult &result) {
158  if (op.getBare())
159  result = todo("ompx_bare");
160  };
161  auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
162  omp::ClauseCancellationConstructType cancelledDirective =
163  op.getCancelDirective();
164  // Cancelling a taskloop is not yet supported because we don't yet have LLVM
165  // IR conversion for taskloop
166  if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) {
167  Operation *parent = op->getParentOp();
168  while (parent) {
169  if (parent->getDialect() == op->getDialect())
170  break;
171  parent = parent->getParentOp();
172  }
173  if (isa_and_nonnull<omp::TaskloopOp>(parent))
174  result = todo("cancel directive inside of taskloop");
175  }
176  };
177  auto checkDepend = [&todo](auto op, LogicalResult &result) {
178  if (!op.getDependVars().empty() || op.getDependKinds())
179  result = todo("depend");
180  };
181  auto checkDevice = [&todo](auto op, LogicalResult &result) {
182  if (op.getDevice())
183  result = todo("device");
184  };
185  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
186  if (op.getDistScheduleChunkSize())
187  result = todo("dist_schedule with chunk_size");
188  };
189  auto checkHint = [](auto op, LogicalResult &) {
190  if (op.getHint())
191  op.emitWarning("hint clause discarded");
192  };
193  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
194  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
195  op.getInReductionSyms())
196  result = todo("in_reduction");
197  };
198  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
199  if (!op.getIsDevicePtrVars().empty())
200  result = todo("is_device_ptr");
201  };
202  auto checkLinear = [&todo](auto op, LogicalResult &result) {
203  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
204  result = todo("linear");
205  };
206  auto checkNowait = [&todo](auto op, LogicalResult &result) {
207  if (op.getNowait())
208  result = todo("nowait");
209  };
210  auto checkOrder = [&todo](auto op, LogicalResult &result) {
211  if (op.getOrder() || op.getOrderMod())
212  result = todo("order");
213  };
214  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
215  if (op.getParLevelSimd())
216  result = todo("parallelization-level");
217  };
218  auto checkPriority = [&todo](auto op, LogicalResult &result) {
219  if (op.getPriority())
220  result = todo("priority");
221  };
222  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
223  if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
224  // Privatization is supported only for included target tasks.
225  if (!op.getPrivateVars().empty() && op.getNowait())
226  result = todo("privatization for deferred target tasks");
227  } else {
228  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
229  result = todo("privatization");
230  }
231  };
232  auto checkReduction = [&todo](auto op, LogicalResult &result) {
233  if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op))
234  if (!op.getReductionVars().empty() || op.getReductionByref() ||
235  op.getReductionSyms())
236  result = todo("reduction");
237  if (op.getReductionMod() &&
238  op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
239  result = todo("reduction with modifier");
240  };
241  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
242  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
243  op.getTaskReductionSyms())
244  result = todo("task_reduction");
245  };
246  auto checkUntied = [&todo](auto op, LogicalResult &result) {
247  if (op.getUntied())
248  result = todo("untied");
249  };
250 
251  LogicalResult result = success();
253  .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); })
254  .Case([&](omp::CancellationPointOp op) {
255  checkCancelDirective(op, result);
256  })
257  .Case([&](omp::DistributeOp op) {
258  checkAllocate(op, result);
259  checkDistSchedule(op, result);
260  checkOrder(op, result);
261  })
262  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
263  .Case([&](omp::SectionsOp op) {
264  checkAllocate(op, result);
265  checkPrivate(op, result);
266  checkReduction(op, result);
267  })
268  .Case([&](omp::SingleOp op) {
269  checkAllocate(op, result);
270  checkPrivate(op, result);
271  })
272  .Case([&](omp::TeamsOp op) {
273  checkAllocate(op, result);
274  checkPrivate(op, result);
275  })
276  .Case([&](omp::TaskOp op) {
277  checkAllocate(op, result);
278  checkInReduction(op, result);
279  })
280  .Case([&](omp::TaskgroupOp op) {
281  checkAllocate(op, result);
282  checkTaskReduction(op, result);
283  })
284  .Case([&](omp::TaskwaitOp op) {
285  checkDepend(op, result);
286  checkNowait(op, result);
287  })
288  .Case([&](omp::TaskloopOp op) {
289  // TODO: Add other clauses check
290  checkUntied(op, result);
291  checkPriority(op, result);
292  })
293  .Case([&](omp::WsloopOp op) {
294  checkAllocate(op, result);
295  checkLinear(op, result);
296  checkOrder(op, result);
297  checkReduction(op, result);
298  })
299  .Case([&](omp::ParallelOp op) {
300  checkAllocate(op, result);
301  checkReduction(op, result);
302  })
303  .Case([&](omp::SimdOp op) {
304  checkLinear(op, result);
305  checkReduction(op, result);
306  })
307  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
308  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
309  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
310  [&](auto op) { checkDepend(op, result); })
311  .Case([&](omp::TargetOp op) {
312  checkAllocate(op, result);
313  checkBare(op, result);
314  checkDevice(op, result);
315  checkInReduction(op, result);
316  checkIsDevicePtr(op, result);
317  checkPrivate(op, result);
318  })
319  .Default([](Operation &) {
320  // Assume all clauses for an operation can be translated unless they are
321  // checked above.
322  });
323  return result;
324 }
325 
326 static LogicalResult handleError(llvm::Error error, Operation &op) {
327  LogicalResult result = success();
328  if (error) {
329  llvm::handleAllErrors(
330  std::move(error),
331  [&](const PreviouslyReportedError &) { result = failure(); },
332  [&](const llvm::ErrorInfoBase &err) {
333  result = op.emitError(err.message());
334  });
335  }
336  return result;
337 }
338 
339 template <typename T>
340 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
341  if (!result)
342  return handleError(result.takeError(), op);
343 
344  return success();
345 }
346 
347 /// Find the insertion point for allocas given the current insertion point for
348 /// normal operations in the builder.
349 static llvm::OpenMPIRBuilder::InsertPointTy
350 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
351  LLVM::ModuleTranslation &moduleTranslation) {
352  // If there is an alloca insertion point on stack, i.e. we are in a nested
353  // operation and a specific point was provided by some surrounding operation,
354  // use it.
355  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
356  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
357  [&](OpenMPAllocaStackFrame &frame) {
358  allocaInsertPoint = frame.allocaInsertPoint;
359  return WalkResult::interrupt();
360  });
361  if (walkResult.wasInterrupted())
362  return allocaInsertPoint;
363 
364  // Otherwise, insert to the entry block of the surrounding function.
365  // If the current IRBuilder InsertPoint is the function's entry, it cannot
366  // also be used for alloca insertion which would result in insertion order
367  // confusion. Create a new BasicBlock for the Builder and use the entry block
368  // for the allocs.
369  // TODO: Create a dedicated alloca BasicBlock at function creation such that
370  // we do not need to move the current InertPoint here.
371  if (builder.GetInsertBlock() ==
372  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
373  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
374  "Assuming end of basic block");
375  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
376  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
377  builder.GetInsertBlock()->getNextNode());
378  builder.CreateBr(entryBB);
379  builder.SetInsertPoint(entryBB);
380  }
381 
382  llvm::BasicBlock &funcEntryBlock =
383  builder.GetInsertBlock()->getParent()->getEntryBlock();
384  return llvm::OpenMPIRBuilder::InsertPointTy(
385  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
386 }
387 
388 /// Find the loop information structure for the loop nest being translated. It
389 /// will return a `null` value unless called from the translation function for
390 /// a loop wrapper operation after successfully translating its body.
391 static llvm::CanonicalLoopInfo *
393  llvm::CanonicalLoopInfo *loopInfo = nullptr;
394  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
395  [&](OpenMPLoopInfoStackFrame &frame) {
396  loopInfo = frame.loopInfo;
397  return WalkResult::interrupt();
398  });
399  return loopInfo;
400 }
401 
402 /// Converts the given region that appears within an OpenMP dialect operation to
403 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
404 /// region, and a branch from any block with an successor-less OpenMP terminator
405 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
406 /// of the continuation block if provided.
408  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
409  LLVM::ModuleTranslation &moduleTranslation,
410  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
411  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
412 
413  llvm::BasicBlock *continuationBlock =
414  splitBB(builder, true, "omp.region.cont");
415  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
416 
417  llvm::LLVMContext &llvmContext = builder.getContext();
418  for (Block &bb : region) {
419  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
420  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
421  builder.GetInsertBlock()->getNextNode());
422  moduleTranslation.mapBlock(&bb, llvmBB);
423  }
424 
425  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
426 
427  // Terminators (namely YieldOp) may be forwarding values to the region that
428  // need to be available in the continuation block. Collect the types of these
429  // operands in preparation of creating PHI nodes. This is skipped for loop
430  // wrapper operations, for which we know in advance they have no terminators.
431  SmallVector<llvm::Type *> continuationBlockPHITypes;
432  unsigned numYields = 0;
433 
434  if (!isLoopWrapper) {
435  bool operandsProcessed = false;
436  for (Block &bb : region.getBlocks()) {
437  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
438  if (!operandsProcessed) {
439  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
440  continuationBlockPHITypes.push_back(
441  moduleTranslation.convertType(yield->getOperand(i).getType()));
442  }
443  operandsProcessed = true;
444  } else {
445  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
446  "mismatching number of values yielded from the region");
447  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
448  llvm::Type *operandType =
449  moduleTranslation.convertType(yield->getOperand(i).getType());
450  (void)operandType;
451  assert(continuationBlockPHITypes[i] == operandType &&
452  "values of mismatching types yielded from the region");
453  }
454  }
455  numYields++;
456  }
457  }
458  }
459 
460  // Insert PHI nodes in the continuation block for any values forwarded by the
461  // terminators in this region.
462  if (!continuationBlockPHITypes.empty())
463  assert(
464  continuationBlockPHIs &&
465  "expected continuation block PHIs if converted regions yield values");
466  if (continuationBlockPHIs) {
467  llvm::IRBuilderBase::InsertPointGuard guard(builder);
468  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
469  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
470  for (llvm::Type *ty : continuationBlockPHITypes)
471  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
472  }
473 
474  // Convert blocks one by one in topological order to ensure
475  // defs are converted before uses.
477  for (Block *bb : blocks) {
478  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
479  // Retarget the branch of the entry block to the entry block of the
480  // converted region (regions are single-entry).
481  if (bb->isEntryBlock()) {
482  assert(sourceTerminator->getNumSuccessors() == 1 &&
483  "provided entry block has multiple successors");
484  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
485  "ContinuationBlock is not the successor of the entry block");
486  sourceTerminator->setSuccessor(0, llvmBB);
487  }
488 
489  llvm::IRBuilderBase::InsertPointGuard guard(builder);
490  if (failed(
491  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
492  return llvm::make_error<PreviouslyReportedError>();
493 
494  // Create a direct branch here for loop wrappers to prevent their lack of a
495  // terminator from causing a crash below.
496  if (isLoopWrapper) {
497  builder.CreateBr(continuationBlock);
498  continue;
499  }
500 
501  // Special handling for `omp.yield` and `omp.terminator` (we may have more
502  // than one): they return the control to the parent OpenMP dialect operation
503  // so replace them with the branch to the continuation block. We handle this
504  // here to avoid relying inter-function communication through the
505  // ModuleTranslation class to set up the correct insertion point. This is
506  // also consistent with MLIR's idiom of handling special region terminators
507  // in the same code that handles the region-owning operation.
508  Operation *terminator = bb->getTerminator();
509  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
510  builder.CreateBr(continuationBlock);
511 
512  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
513  (*continuationBlockPHIs)[i]->addIncoming(
514  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
515  }
516  }
517  // After all blocks have been traversed and values mapped, connect the PHI
518  // nodes to the results of preceding blocks.
519  LLVM::detail::connectPHINodes(region, moduleTranslation);
520 
521  // Remove the blocks and values defined in this region from the mapping since
522  // they are not visible outside of this region. This allows the same region to
523  // be converted several times, that is cloned, without clashes, and slightly
524  // speeds up the lookups.
525  moduleTranslation.forgetMapping(region);
526 
527  return continuationBlock;
528 }
529 
530 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
531 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
532  switch (kind) {
533  case omp::ClauseProcBindKind::Close:
534  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
535  case omp::ClauseProcBindKind::Master:
536  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
537  case omp::ClauseProcBindKind::Primary:
538  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
539  case omp::ClauseProcBindKind::Spread:
540  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
541  }
542  llvm_unreachable("Unknown ClauseProcBindKind kind");
543 }
544 
545 /// Maps block arguments from \p blockArgIface (which are MLIR values) to the
546 /// corresponding LLVM values of \p the interface's operands. This is useful
547 /// when an OpenMP region with entry block arguments is converted to LLVM. In
548 /// this case the block arguments are (part of) of the OpenMP region's entry
549 /// arguments and the operands are (part of) of the operands to the OpenMP op
550 /// containing the region.
551 static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
552  omp::BlockArgOpenMPOpInterface blockArgIface) {
554  blockArgIface.getBlockArgsPairs(blockArgsPairs);
555  for (auto [var, arg] : blockArgsPairs)
556  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
557 }
558 
559 /// Helper function to map block arguments defined by ignored loop wrappers to
560 /// LLVM values and prevent any uses of those from triggering null pointer
561 /// dereferences.
562 ///
563 /// This must be called after block arguments of parent wrappers have already
564 /// been mapped to LLVM IR values.
565 static LogicalResult
566 convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
567  LLVM::ModuleTranslation &moduleTranslation) {
568  // Map block arguments directly to the LLVM value associated to the
569  // corresponding operand. This is semantically equivalent to this wrapper not
570  // being present.
572  .Case([&](omp::SimdOp op) {
573  forwardArgs(moduleTranslation,
574  cast<omp::BlockArgOpenMPOpInterface>(*op));
575  op.emitWarning() << "simd information on composite construct discarded";
576  return success();
577  })
578  .Default([&](Operation *op) {
579  return op->emitError() << "cannot ignore wrapper";
580  });
581 }
582 
583 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
584 static LogicalResult
585 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
586  LLVM::ModuleTranslation &moduleTranslation) {
587  auto maskedOp = cast<omp::MaskedOp>(opInst);
588  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
589 
590  if (failed(checkImplementationStatus(opInst)))
591  return failure();
592 
593  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
594  // MaskedOp has only one region associated with it.
595  auto &region = maskedOp.getRegion();
596  builder.restoreIP(codeGenIP);
597  return convertOmpOpRegions(region, "omp.masked.region", builder,
598  moduleTranslation)
599  .takeError();
600  };
601 
602  // TODO: Perform finalization actions for variables. This has to be
603  // called for variables which have destructors/finalizers.
604  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
605 
606  llvm::Value *filterVal = nullptr;
607  if (auto filterVar = maskedOp.getFilteredThreadId()) {
608  filterVal = moduleTranslation.lookupValue(filterVar);
609  } else {
610  llvm::LLVMContext &llvmContext = builder.getContext();
611  filterVal =
612  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
613  }
614  assert(filterVal != nullptr);
615  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
616  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
617  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
618  finiCB, filterVal);
619 
620  if (failed(handleError(afterIP, opInst)))
621  return failure();
622 
623  builder.restoreIP(*afterIP);
624  return success();
625 }
626 
627 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
628 static LogicalResult
629 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
630  LLVM::ModuleTranslation &moduleTranslation) {
631  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
632  auto masterOp = cast<omp::MasterOp>(opInst);
633 
634  if (failed(checkImplementationStatus(opInst)))
635  return failure();
636 
637  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
638  // MasterOp has only one region associated with it.
639  auto &region = masterOp.getRegion();
640  builder.restoreIP(codeGenIP);
641  return convertOmpOpRegions(region, "omp.master.region", builder,
642  moduleTranslation)
643  .takeError();
644  };
645 
646  // TODO: Perform finalization actions for variables. This has to be
647  // called for variables which have destructors/finalizers.
648  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
649 
650  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
651  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
652  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
653  finiCB);
654 
655  if (failed(handleError(afterIP, opInst)))
656  return failure();
657 
658  builder.restoreIP(*afterIP);
659  return success();
660 }
661 
662 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
663 static LogicalResult
664 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
665  LLVM::ModuleTranslation &moduleTranslation) {
666  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
667  auto criticalOp = cast<omp::CriticalOp>(opInst);
668 
669  if (failed(checkImplementationStatus(opInst)))
670  return failure();
671 
672  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
673  // CriticalOp has only one region associated with it.
674  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
675  builder.restoreIP(codeGenIP);
676  return convertOmpOpRegions(region, "omp.critical.region", builder,
677  moduleTranslation)
678  .takeError();
679  };
680 
681  // TODO: Perform finalization actions for variables. This has to be
682  // called for variables which have destructors/finalizers.
683  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
684 
685  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
686  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
687  llvm::Constant *hint = nullptr;
688 
689  // If it has a name, it probably has a hint too.
690  if (criticalOp.getNameAttr()) {
691  // The verifiers in OpenMP Dialect guarentee that all the pointers are
692  // non-null
693  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
694  auto criticalDeclareOp =
695  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
696  symbolRef);
697  hint =
698  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
699  static_cast<int>(criticalDeclareOp.getHint()));
700  }
701  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
702  moduleTranslation.getOpenMPBuilder()->createCritical(
703  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
704 
705  if (failed(handleError(afterIP, opInst)))
706  return failure();
707 
708  builder.restoreIP(*afterIP);
709  return success();
710 }
711 
712 /// A util to collect info needed to convert delayed privatizers from MLIR to
713 /// LLVM.
715  template <typename OP>
717  : blockArgs(
718  cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
719  mlirVars.reserve(blockArgs.size());
720  llvmVars.reserve(blockArgs.size());
721  collectPrivatizationDecls<OP>(op);
722 
723  for (mlir::Value privateVar : op.getPrivateVars())
724  mlirVars.push_back(privateVar);
725  }
726 
731 
732 private:
733  /// Populates `privatizations` with privatization declarations used for the
734  /// given op.
735  template <class OP>
736  void collectPrivatizationDecls(OP op) {
737  std::optional<ArrayAttr> attr = op.getPrivateSyms();
738  if (!attr)
739  return;
740 
741  privatizers.reserve(privatizers.size() + attr->size());
742  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
743  privatizers.push_back(findPrivatizer(op, symbolRef));
744  }
745  }
746 };
747 
748 /// Populates `reductions` with reduction declarations used in the given op.
749 template <typename T>
750 static void
753  std::optional<ArrayAttr> attr = op.getReductionSyms();
754  if (!attr)
755  return;
756 
757  reductions.reserve(reductions.size() + op.getNumReductionVars());
758  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
759  reductions.push_back(
760  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
761  op, symbolRef));
762  }
763 }
764 
765 /// Translates the blocks contained in the given region and appends them to at
766 /// the current insertion point of `builder`. The operations of the entry block
767 /// are appended to the current insertion block. If set, `continuationBlockArgs`
768 /// is populated with translated values that correspond to the values
769 /// omp.yield'ed from the region.
770 static LogicalResult inlineConvertOmpRegions(
771  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
772  LLVM::ModuleTranslation &moduleTranslation,
773  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
774  if (region.empty())
775  return success();
776 
777  // Special case for single-block regions that don't create additional blocks:
778  // insert operations without creating additional blocks.
779  if (llvm::hasSingleElement(region)) {
780  llvm::Instruction *potentialTerminator =
781  builder.GetInsertBlock()->empty() ? nullptr
782  : &builder.GetInsertBlock()->back();
783 
784  if (potentialTerminator && potentialTerminator->isTerminator())
785  potentialTerminator->removeFromParent();
786  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
787 
788  if (failed(moduleTranslation.convertBlock(
789  region.front(), /*ignoreArguments=*/true, builder)))
790  return failure();
791 
792  // The continuation arguments are simply the translated terminator operands.
793  if (continuationBlockArgs)
794  llvm::append_range(
795  *continuationBlockArgs,
796  moduleTranslation.lookupValues(region.front().back().getOperands()));
797 
798  // Drop the mapping that is no longer necessary so that the same region can
799  // be processed multiple times.
800  moduleTranslation.forgetMapping(region);
801 
802  if (potentialTerminator && potentialTerminator->isTerminator()) {
803  llvm::BasicBlock *block = builder.GetInsertBlock();
804  if (block->empty()) {
805  // this can happen for really simple reduction init regions e.g.
806  // %0 = llvm.mlir.constant(0 : i32) : i32
807  // omp.yield(%0 : i32)
808  // because the llvm.mlir.constant (MLIR op) isn't converted into any
809  // llvm op
810  potentialTerminator->insertInto(block, block->begin());
811  } else {
812  potentialTerminator->insertAfter(&block->back());
813  }
814  }
815 
816  return success();
817  }
818 
820  llvm::Expected<llvm::BasicBlock *> continuationBlock =
821  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
822 
823  if (failed(handleError(continuationBlock, *region.getParentOp())))
824  return failure();
825 
826  if (continuationBlockArgs)
827  llvm::append_range(*continuationBlockArgs, phis);
828  builder.SetInsertPoint(*continuationBlock,
829  (*continuationBlock)->getFirstInsertionPt());
830  return success();
831 }
832 
833 namespace {
834 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
835 /// store lambdas with capture.
836 using OwningReductionGen =
837  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
838  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
839  llvm::Value *&)>;
840 using OwningAtomicReductionGen =
841  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
842  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
843  llvm::Value *)>;
844 } // namespace
845 
846 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
847 /// reduction declaration. The generator uses `builder` but ignores its
848 /// insertion point.
849 static OwningReductionGen
850 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
851  LLVM::ModuleTranslation &moduleTranslation) {
852  // The lambda is mutable because we need access to non-const methods of decl
853  // (which aren't actually mutating it), and we must capture decl by-value to
854  // avoid the dangling reference after the parent function returns.
855  OwningReductionGen gen =
856  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
857  llvm::Value *lhs, llvm::Value *rhs,
858  llvm::Value *&result) mutable
859  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
860  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
861  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
862  builder.restoreIP(insertPoint);
864  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
865  "omp.reduction.nonatomic.body", builder,
866  moduleTranslation, &phis)))
867  return llvm::createStringError(
868  "failed to inline `combiner` region of `omp.declare_reduction`");
869  result = llvm::getSingleElement(phis);
870  return builder.saveIP();
871  };
872  return gen;
873 }
874 
875 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
876 /// given reduction declaration. The generator uses `builder` but ignores its
877 /// insertion point. Returns null if there is no atomic region available in the
878 /// reduction declaration.
879 static OwningAtomicReductionGen
880 makeAtomicReductionGen(omp::DeclareReductionOp decl,
881  llvm::IRBuilderBase &builder,
882  LLVM::ModuleTranslation &moduleTranslation) {
883  if (decl.getAtomicReductionRegion().empty())
884  return OwningAtomicReductionGen();
885 
886  // The lambda is mutable because we need access to non-const methods of decl
887  // (which aren't actually mutating it), and we must capture decl by-value to
888  // avoid the dangling reference after the parent function returns.
889  OwningAtomicReductionGen atomicGen =
890  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
891  llvm::Value *lhs, llvm::Value *rhs) mutable
892  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
893  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
894  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
895  builder.restoreIP(insertPoint);
897  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
898  "omp.reduction.atomic.body", builder,
899  moduleTranslation, &phis)))
900  return llvm::createStringError(
901  "failed to inline `atomic` region of `omp.declare_reduction`");
902  assert(phis.empty());
903  return builder.saveIP();
904  };
905  return atomicGen;
906 }
907 
908 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
909 static LogicalResult
910 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
911  LLVM::ModuleTranslation &moduleTranslation) {
912  auto orderedOp = cast<omp::OrderedOp>(opInst);
913 
914  if (failed(checkImplementationStatus(opInst)))
915  return failure();
916 
917  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
918  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
919  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
920  SmallVector<llvm::Value *> vecValues =
921  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
922 
923  size_t indexVecValues = 0;
924  while (indexVecValues < vecValues.size()) {
925  SmallVector<llvm::Value *> storeValues;
926  storeValues.reserve(numLoops);
927  for (unsigned i = 0; i < numLoops; i++) {
928  storeValues.push_back(vecValues[indexVecValues]);
929  indexVecValues++;
930  }
931  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
932  findAllocaInsertPoint(builder, moduleTranslation);
933  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
934  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
935  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
936  }
937  return success();
938 }
939 
940 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
941 /// OpenMPIRBuilder.
942 static LogicalResult
943 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
944  LLVM::ModuleTranslation &moduleTranslation) {
945  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
946  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
947 
948  if (failed(checkImplementationStatus(opInst)))
949  return failure();
950 
951  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
952  // OrderedOp has only one region associated with it.
953  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
954  builder.restoreIP(codeGenIP);
955  return convertOmpOpRegions(region, "omp.ordered.region", builder,
956  moduleTranslation)
957  .takeError();
958  };
959 
960  // TODO: Perform finalization actions for variables. This has to be
961  // called for variables which have destructors/finalizers.
962  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
963 
964  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
965  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
966  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
967  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
968 
969  if (failed(handleError(afterIP, opInst)))
970  return failure();
971 
972  builder.restoreIP(*afterIP);
973  return success();
974 }
975 
976 namespace {
977 /// Contains the arguments for an LLVM store operation
978 struct DeferredStore {
979  DeferredStore(llvm::Value *value, llvm::Value *address)
980  : value(value), address(address) {}
981 
982  llvm::Value *value;
983  llvm::Value *address;
984 };
985 } // namespace
986 
987 /// Allocate space for privatized reduction variables.
988 /// `deferredStores` contains information to create store operations which needs
989 /// to be inserted after all allocas
990 template <typename T>
991 static LogicalResult
993  llvm::IRBuilderBase &builder,
994  LLVM::ModuleTranslation &moduleTranslation,
995  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
997  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
998  DenseMap<Value, llvm::Value *> &reductionVariableMap,
999  SmallVectorImpl<DeferredStore> &deferredStores,
1000  llvm::ArrayRef<bool> isByRefs) {
1001  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1002  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1003 
1004  // delay creating stores until after all allocas
1005  deferredStores.reserve(loop.getNumReductionVars());
1006 
1007  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1008  Region &allocRegion = reductionDecls[i].getAllocRegion();
1009  if (isByRefs[i]) {
1010  if (allocRegion.empty())
1011  continue;
1012 
1014  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1015  builder, moduleTranslation, &phis)))
1016  return loop.emitError(
1017  "failed to inline `alloc` region of `omp.declare_reduction`");
1018 
1019  assert(phis.size() == 1 && "expected one allocation to be yielded");
1020  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1021 
1022  // Allocate reduction variable (which is a pointer to the real reduction
1023  // variable allocated in the inlined region)
1024  llvm::Value *var = builder.CreateAlloca(
1025  moduleTranslation.convertType(reductionDecls[i].getType()));
1026 
1027  llvm::Type *ptrTy = builder.getPtrTy();
1028  llvm::Value *castVar =
1029  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1030  llvm::Value *castPhi =
1031  builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
1032 
1033  deferredStores.emplace_back(castPhi, castVar);
1034 
1035  privateReductionVariables[i] = castVar;
1036  moduleTranslation.mapValue(reductionArgs[i], castPhi);
1037  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
1038  } else {
1039  assert(allocRegion.empty() &&
1040  "allocaction is implicit for by-val reduction");
1041  llvm::Value *var = builder.CreateAlloca(
1042  moduleTranslation.convertType(reductionDecls[i].getType()));
1043 
1044  llvm::Type *ptrTy = builder.getPtrTy();
1045  llvm::Value *castVar =
1046  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1047 
1048  moduleTranslation.mapValue(reductionArgs[i], castVar);
1049  privateReductionVariables[i] = castVar;
1050  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
1051  }
1052  }
1053 
1054  return success();
1055 }
1056 
1057 /// Map input arguments to reduction initialization region
1058 template <typename T>
1059 static void
1062  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1063  unsigned i) {
1064  // map input argument to the initialization region
1065  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1066  Region &initializerRegion = reduction.getInitializerRegion();
1067  Block &entry = initializerRegion.front();
1068 
1069  mlir::Value mlirSource = loop.getReductionVars()[i];
1070  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1071  assert(llvmSource && "lookup reduction var");
1072  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1073 
1074  if (entry.getNumArguments() > 1) {
1075  llvm::Value *allocation =
1076  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1077  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1078  }
1079 }
1080 
1081 static void
1082 setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1083  llvm::BasicBlock *block = nullptr) {
1084  if (block == nullptr)
1085  block = builder.GetInsertBlock();
1086 
1087  if (block->empty() || block->getTerminator() == nullptr)
1088  builder.SetInsertPoint(block);
1089  else
1090  builder.SetInsertPoint(block->getTerminator());
1091 }
1092 
1093 /// Inline reductions' `init` regions. This functions assumes that the
1094 /// `builder`'s insertion point is where the user wants the `init` regions to be
1095 /// inlined; i.e. it does not try to find a proper insertion location for the
1096 /// `init` regions. It also leaves the `builder's insertions point in a state
1097 /// where the user can continue the code-gen directly afterwards.
1098 template <typename OP>
1099 static LogicalResult
1101  llvm::IRBuilderBase &builder,
1102  LLVM::ModuleTranslation &moduleTranslation,
1103  llvm::BasicBlock *latestAllocaBlock,
1105  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1106  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1107  llvm::ArrayRef<bool> isByRef,
1108  SmallVectorImpl<DeferredStore> &deferredStores) {
1109  if (op.getNumReductionVars() == 0)
1110  return success();
1111 
1112  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1113  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1114  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1115  builder.restoreIP(allocaIP);
1116  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1117 
1118  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1119  if (isByRef[i]) {
1120  if (!reductionDecls[i].getAllocRegion().empty())
1121  continue;
1122 
1123  // TODO: remove after all users of by-ref are updated to use the alloc
1124  // region: Allocate reduction variable (which is a pointer to the real
1125  // reduciton variable allocated in the inlined region)
1126  byRefVars[i] = builder.CreateAlloca(
1127  moduleTranslation.convertType(reductionDecls[i].getType()));
1128  }
1129  }
1130 
1131  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1132 
1133  // store result of the alloc region to the allocated pointer to the real
1134  // reduction variable
1135  for (auto [data, addr] : deferredStores)
1136  builder.CreateStore(data, addr);
1137 
1138  // Before the loop, store the initial values of reductions into reduction
1139  // variables. Although this could be done after allocas, we don't want to mess
1140  // up with the alloca insertion point.
1141  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1143 
1144  // map block argument to initializer region
1145  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1146  reductionVariableMap, i);
1147 
1148  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1149  "omp.reduction.neutral", builder,
1150  moduleTranslation, &phis)))
1151  return failure();
1152 
1153  assert(phis.size() == 1 && "expected one value to be yielded from the "
1154  "reduction neutral element declaration region");
1155 
1157 
1158  if (isByRef[i]) {
1159  if (!reductionDecls[i].getAllocRegion().empty())
1160  // done in allocReductionVars
1161  continue;
1162 
1163  // TODO: this path can be removed once all users of by-ref are updated to
1164  // use an alloc region
1165 
1166  // Store the result of the inlined region to the allocated reduction var
1167  // ptr
1168  builder.CreateStore(phis[0], byRefVars[i]);
1169 
1170  privateReductionVariables[i] = byRefVars[i];
1171  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1172  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1173  } else {
1174  // for by-ref case the store is inside of the reduction region
1175  builder.CreateStore(phis[0], privateReductionVariables[i]);
1176  // the rest was handled in allocByValReductionVars
1177  }
1178 
1179  // forget the mapping for the initializer region because we might need a
1180  // different mapping if this reduction declaration is re-used for a
1181  // different variable
1182  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1183  }
1184 
1185  return success();
1186 }
1187 
1188 /// Collect reduction info
1189 template <typename T>
1191  T loop, llvm::IRBuilderBase &builder,
1192  LLVM::ModuleTranslation &moduleTranslation,
1194  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1195  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1196  const ArrayRef<llvm::Value *> privateReductionVariables,
1198  unsigned numReductions = loop.getNumReductionVars();
1199 
1200  for (unsigned i = 0; i < numReductions; ++i) {
1201  owningReductionGens.push_back(
1202  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1203  owningAtomicReductionGens.push_back(
1204  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1205  }
1206 
1207  // Collect the reduction information.
1208  reductionInfos.reserve(numReductions);
1209  for (unsigned i = 0; i < numReductions; ++i) {
1210  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1211  if (owningAtomicReductionGens[i])
1212  atomicGen = owningAtomicReductionGens[i];
1213  llvm::Value *variable =
1214  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1215  reductionInfos.push_back(
1216  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1217  privateReductionVariables[i],
1218  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1219  owningReductionGens[i],
1220  /*ReductionGenClang=*/nullptr, atomicGen});
1221  }
1222 }
1223 
1224 /// handling of DeclareReductionOp's cleanup region
1225 static LogicalResult
1227  llvm::ArrayRef<llvm::Value *> privateVariables,
1228  LLVM::ModuleTranslation &moduleTranslation,
1229  llvm::IRBuilderBase &builder, StringRef regionName,
1230  bool shouldLoadCleanupRegionArg = true) {
1231  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1232  if (cleanupRegion->empty())
1233  continue;
1234 
1235  // map the argument to the cleanup region
1236  Block &entry = cleanupRegion->front();
1237 
1238  llvm::Instruction *potentialTerminator =
1239  builder.GetInsertBlock()->empty() ? nullptr
1240  : &builder.GetInsertBlock()->back();
1241  if (potentialTerminator && potentialTerminator->isTerminator())
1242  builder.SetInsertPoint(potentialTerminator);
1243  llvm::Value *privateVarValue =
1244  shouldLoadCleanupRegionArg
1245  ? builder.CreateLoad(
1246  moduleTranslation.convertType(entry.getArgument(0).getType()),
1247  privateVariables[i])
1248  : privateVariables[i];
1249 
1250  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1251 
1252  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1253  moduleTranslation)))
1254  return failure();
1255 
1256  // clear block argument mapping in case it needs to be re-created with a
1257  // different source for another use of the same reduction decl
1258  moduleTranslation.forgetMapping(*cleanupRegion);
1259  }
1260  return success();
1261 }
1262 
1263 // TODO: not used by ParallelOp
1264 template <class OP>
1265 static LogicalResult createReductionsAndCleanup(
1266  OP op, llvm::IRBuilderBase &builder,
1267  LLVM::ModuleTranslation &moduleTranslation,
1268  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1270  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
1271  bool isNowait = false, bool isTeamsReduction = false) {
1272  // Process the reductions if required.
1273  if (op.getNumReductionVars() == 0)
1274  return success();
1275 
1276  SmallVector<OwningReductionGen> owningReductionGens;
1277  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1279 
1280  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1281 
1282  // Create the reduction generators. We need to own them here because
1283  // ReductionInfo only accepts references to the generators.
1284  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1285  owningReductionGens, owningAtomicReductionGens,
1286  privateReductionVariables, reductionInfos);
1287 
1288  // The call to createReductions below expects the block to have a
1289  // terminator. Create an unreachable instruction to serve as terminator
1290  // and remove it later.
1291  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1292  builder.SetInsertPoint(tempTerminator);
1293  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1294  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1295  isByRef, isNowait, isTeamsReduction);
1296 
1297  if (failed(handleError(contInsertPoint, *op)))
1298  return failure();
1299 
1300  if (!contInsertPoint->getBlock())
1301  return op->emitOpError() << "failed to convert reductions";
1302 
1303  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1304  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1305 
1306  if (failed(handleError(afterIP, *op)))
1307  return failure();
1308 
1309  tempTerminator->eraseFromParent();
1310  builder.restoreIP(*afterIP);
1311 
1312  // after the construct, deallocate private reduction variables
1313  SmallVector<Region *> reductionRegions;
1314  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1315  [](omp::DeclareReductionOp reductionDecl) {
1316  return &reductionDecl.getCleanupRegion();
1317  });
1318  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1319  moduleTranslation, builder,
1320  "omp.reduction.cleanup");
1321  return success();
1322 }
1323 
1324 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1325  if (!attr)
1326  return {};
1327  return *attr;
1328 }
1329 
1330 // TODO: not used by omp.parallel
1331 template <typename OP>
1332 static LogicalResult allocAndInitializeReductionVars(
1333  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1334  LLVM::ModuleTranslation &moduleTranslation,
1335  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1337  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1338  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1339  llvm::ArrayRef<bool> isByRef) {
1340  if (op.getNumReductionVars() == 0)
1341  return success();
1342 
1343  SmallVector<DeferredStore> deferredStores;
1344 
1345  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1346  allocaIP, reductionDecls,
1347  privateReductionVariables, reductionVariableMap,
1348  deferredStores, isByRef)))
1349  return failure();
1350 
1351  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1352  allocaIP.getBlock(), reductionDecls,
1353  privateReductionVariables, reductionVariableMap,
1354  isByRef, deferredStores);
1355 }
1356 
1357 /// Return the llvm::Value * corresponding to the `privateVar` that
1358 /// is being privatized. It isn't always as simple as looking up
1359 /// moduleTranslation with privateVar. For instance, in case of
1360 /// an allocatable, the descriptor for the allocatable is privatized.
1361 /// This descriptor is mapped using an MapInfoOp. So, this function
1362 /// will return a pointer to the llvm::Value corresponding to the
1363 /// block argument for the mapped descriptor.
1364 static llvm::Value *
1365 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1366  LLVM::ModuleTranslation &moduleTranslation,
1367  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1368  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1369  return moduleTranslation.lookupValue(privateVar);
1370 
1371  Value blockArg = (*mappedPrivateVars)[privateVar];
1372  Type privVarType = privateVar.getType();
1373  Type blockArgType = blockArg.getType();
1374  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1375  "A block argument corresponding to a mapped var should have "
1376  "!llvm.ptr type");
1377 
1378  if (privVarType == blockArgType)
1379  return moduleTranslation.lookupValue(blockArg);
1380 
1381  // This typically happens when the privatized type is lowered from
1382  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1383  // struct/pair is passed by value. But, mapped values are passed only as
1384  // pointers, so before we privatize, we must load the pointer.
1385  if (!isa<LLVM::LLVMPointerType>(privVarType))
1386  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1387  moduleTranslation.lookupValue(blockArg));
1388 
1389  return moduleTranslation.lookupValue(privateVar);
1390 }
1391 
1392 /// Initialize a single (first)private variable. You probably want to use
1393 /// allocateAndInitPrivateVars instead of this.
1394 /// This returns the private variable which has been initialized. This
1395 /// variable should be mapped before constructing the body of the Op.
1397  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1398  omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1399  llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1400  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1401  Region &initRegion = privDecl.getInitRegion();
1402  if (initRegion.empty())
1403  return llvmPrivateVar;
1404 
1405  // map initialization region block arguments
1406  llvm::Value *nonPrivateVar = findAssociatedValue(
1407  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1408  assert(nonPrivateVar);
1409  moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1410  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1411 
1412  // in-place convert the private initialization region
1414  if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1415  moduleTranslation, &phis)))
1416  return llvm::createStringError(
1417  "failed to inline `init` region of `omp.private`");
1418 
1419  assert(phis.size() == 1 && "expected one allocation to be yielded");
1420 
1421  // clear init region block argument mapping in case it needs to be
1422  // re-created with a different source for another use of the same
1423  // reduction decl
1424  moduleTranslation.forgetMapping(initRegion);
1425 
1426  // Prefer the value yielded from the init region to the allocated private
1427  // variable in case the region is operating on arguments by-value (e.g.
1428  // Fortran character boxes).
1429  return phis[0];
1430 }
1431 
1432 static llvm::Error
1433 initPrivateVars(llvm::IRBuilderBase &builder,
1434  LLVM::ModuleTranslation &moduleTranslation,
1435  PrivateVarsInfo &privateVarsInfo,
1436  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1437  if (privateVarsInfo.blockArgs.empty())
1438  return llvm::Error::success();
1439 
1440  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1441  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1442 
1443  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1444  privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1445  privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1446  auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1448  builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1449  llvmPrivateVar, privInitBlock, mappedPrivateVars);
1450 
1451  if (!privVarOrErr)
1452  return privVarOrErr.takeError();
1453 
1454  llvmPrivateVar = privVarOrErr.get();
1455  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1456 
1458  }
1459 
1460  return llvm::Error::success();
1461 }
1462 
1463 /// Allocate and initialize delayed private variables. Returns the basic block
1464 /// which comes after all of these allocations. llvm::Value * for each of these
1465 /// private variables are populated in llvmPrivateVars.
1467 allocatePrivateVars(llvm::IRBuilderBase &builder,
1468  LLVM::ModuleTranslation &moduleTranslation,
1469  PrivateVarsInfo &privateVarsInfo,
1470  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1471  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1472  // Allocate private vars
1473  llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1474  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1475  allocaTerminator->getIterator()),
1476  true, allocaTerminator->getStableDebugLoc(),
1477  "omp.region.after_alloca");
1478 
1479  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1480  // Update the allocaTerminator since the alloca block was split above.
1481  allocaTerminator = allocaIP.getBlock()->getTerminator();
1482  builder.SetInsertPoint(allocaTerminator);
1483  // The new terminator is an uncondition branch created by the splitBB above.
1484  assert(allocaTerminator->getNumSuccessors() == 1 &&
1485  "This is an unconditional branch created by splitBB");
1486 
1487  llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
1488  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1489 
1490  unsigned int allocaAS =
1491  moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1492  unsigned int defaultAS = moduleTranslation.getLLVMModule()
1493  ->getDataLayout()
1494  .getProgramAddressSpace();
1495 
1496  for (auto [privDecl, mlirPrivVar, blockArg] :
1497  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1498  privateVarsInfo.blockArgs)) {
1499  llvm::Type *llvmAllocType =
1500  moduleTranslation.convertType(privDecl.getType());
1501  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1502  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1503  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1504  if (allocaAS != defaultAS)
1505  llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1506  builder.getPtrTy(defaultAS));
1507 
1508  privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1509  }
1510 
1511  return afterAllocas;
1512 }
1513 
1514 static LogicalResult copyFirstPrivateVars(
1515  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1516  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1517  ArrayRef<llvm::Value *> llvmPrivateVars,
1519  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1520  // Apply copy region for firstprivate.
1521  bool needsFirstprivate =
1522  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1523  return privOp.getDataSharingType() ==
1524  omp::DataSharingClauseType::FirstPrivate;
1525  });
1526 
1527  if (!needsFirstprivate)
1528  return success();
1529 
1530  llvm::BasicBlock *copyBlock =
1531  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1532  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1533 
1534  for (auto [decl, mlirVar, llvmVar] :
1535  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1536  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1537  continue;
1538 
1539  // copyRegion implements `lhs = rhs`
1540  Region &copyRegion = decl.getCopyRegion();
1541 
1542  // map copyRegion rhs arg
1543  llvm::Value *nonPrivateVar = findAssociatedValue(
1544  mlirVar, builder, moduleTranslation, mappedPrivateVars);
1545  assert(nonPrivateVar);
1546  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1547 
1548  // map copyRegion lhs arg
1549  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1550 
1551  // in-place convert copy region
1552  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1553  moduleTranslation)))
1554  return decl.emitError("failed to inline `copy` region of `omp.private`");
1555 
1557 
1558  // ignore unused value yielded from copy region
1559 
1560  // clear copy region block argument mapping in case it needs to be
1561  // re-created with different sources for reuse of the same reduction
1562  // decl
1563  moduleTranslation.forgetMapping(copyRegion);
1564  }
1565 
1566  return success();
1567 }
1568 
1569 static LogicalResult
1570 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1571  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1572  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1573  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1574  // private variable deallocation
1575  SmallVector<Region *> privateCleanupRegions;
1576  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1577  [](omp::PrivateClauseOp privatizer) {
1578  return &privatizer.getDeallocRegion();
1579  });
1580 
1581  if (failed(inlineOmpRegionCleanup(
1582  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1583  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1584  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1585  "`omp.private` op in");
1586 
1587  return success();
1588 }
1589 
1590 /// Returns true if the construct contains omp.cancel or omp.cancellation_point
1592  // omp.cancel and omp.cancellation_point must be "closely nested" so they will
1593  // be visible and not inside of function calls. This is enforced by the
1594  // verifier.
1595  return op
1596  ->walk([](Operation *child) {
1597  if (mlir::isa<omp::CancelOp, omp::CancellationPointOp>(child))
1598  return WalkResult::interrupt();
1599  return WalkResult::advance();
1600  })
1601  .wasInterrupted();
1602 }
1603 
1604 static LogicalResult
1605 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1606  LLVM::ModuleTranslation &moduleTranslation) {
1607  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1608  using StorableBodyGenCallbackTy =
1609  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1610 
1611  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1612 
1613  if (failed(checkImplementationStatus(opInst)))
1614  return failure();
1615 
1616  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1617  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1618 
1619  SmallVector<omp::DeclareReductionOp> reductionDecls;
1620  collectReductionDecls(sectionsOp, reductionDecls);
1621  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1622  findAllocaInsertPoint(builder, moduleTranslation);
1623 
1624  SmallVector<llvm::Value *> privateReductionVariables(
1625  sectionsOp.getNumReductionVars());
1626  DenseMap<Value, llvm::Value *> reductionVariableMap;
1627 
1628  MutableArrayRef<BlockArgument> reductionArgs =
1629  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1630 
1632  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1633  reductionDecls, privateReductionVariables, reductionVariableMap,
1634  isByRef)))
1635  return failure();
1636 
1638 
1639  for (Operation &op : *sectionsOp.getRegion().begin()) {
1640  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1641  if (!sectionOp) // omp.terminator
1642  continue;
1643 
1644  Region &region = sectionOp.getRegion();
1645  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1646  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1647  builder.restoreIP(codeGenIP);
1648 
1649  // map the omp.section reduction block argument to the omp.sections block
1650  // arguments
1651  // TODO: this assumes that the only block arguments are reduction
1652  // variables
1653  assert(region.getNumArguments() ==
1654  sectionsOp.getRegion().getNumArguments());
1655  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1656  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1657  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1658  assert(llvmVal);
1659  moduleTranslation.mapValue(sectionArg, llvmVal);
1660  }
1661 
1662  return convertOmpOpRegions(region, "omp.section.region", builder,
1663  moduleTranslation)
1664  .takeError();
1665  };
1666  sectionCBs.push_back(sectionCB);
1667  }
1668 
1669  // No sections within omp.sections operation - skip generation. This situation
1670  // is only possible if there is only a terminator operation inside the
1671  // sections operation
1672  if (sectionCBs.empty())
1673  return success();
1674 
1675  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1676 
1677  // TODO: Perform appropriate actions according to the data-sharing
1678  // attribute (shared, private, firstprivate, ...) of variables.
1679  // Currently defaults to shared.
1680  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1681  llvm::Value &vPtr, llvm::Value *&replacementValue)
1682  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1683  replacementValue = &vPtr;
1684  return codeGenIP;
1685  };
1686 
1687  // TODO: Perform finalization actions for variables. This has to be
1688  // called for variables which have destructors/finalizers.
1689  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1690 
1691  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1692  bool isCancellable = constructIsCancellable(sectionsOp);
1693  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1694  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1695  moduleTranslation.getOpenMPBuilder()->createSections(
1696  ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable,
1697  sectionsOp.getNowait());
1698 
1699  if (failed(handleError(afterIP, opInst)))
1700  return failure();
1701 
1702  builder.restoreIP(*afterIP);
1703 
1704  // Process the reductions if required.
1706  sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
1707  privateReductionVariables, isByRef, sectionsOp.getNowait());
1708 }
1709 
1710 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1711 static LogicalResult
1712 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1713  LLVM::ModuleTranslation &moduleTranslation) {
1714  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1715  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1716 
1717  if (failed(checkImplementationStatus(*singleOp)))
1718  return failure();
1719 
1720  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1721  builder.restoreIP(codegenIP);
1722  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1723  builder, moduleTranslation)
1724  .takeError();
1725  };
1726  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1727 
1728  // Handle copyprivate
1729  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1730  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1733  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1734  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1735  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1736  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1737  llvmCPFuncs.push_back(
1738  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1739  }
1740 
1741  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1742  moduleTranslation.getOpenMPBuilder()->createSingle(
1743  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1744  llvmCPFuncs);
1745 
1746  if (failed(handleError(afterIP, *singleOp)))
1747  return failure();
1748 
1749  builder.restoreIP(*afterIP);
1750  return success();
1751 }
1752 
1753 static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
1754  auto iface =
1755  llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation());
1756  // Check that all uses of the reduction block arg has the same distribute op
1757  // parent.
1759  Operation *distOp = nullptr;
1760  for (auto ra : iface.getReductionBlockArgs())
1761  for (auto &use : ra.getUses()) {
1762  auto *useOp = use.getOwner();
1763  // Ignore debug uses.
1764  if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
1765  debugUses.push_back(useOp);
1766  continue;
1767  }
1768 
1769  auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>();
1770  // Use is not inside a distribute op - return false
1771  if (!currentDistOp)
1772  return false;
1773  // Multiple distribute operations - return false
1774  Operation *currentOp = currentDistOp.getOperation();
1775  if (distOp && (distOp != currentOp))
1776  return false;
1777 
1778  distOp = currentOp;
1779  }
1780 
1781  // If we are going to use distribute reduction then remove any debug uses of
1782  // the reduction parameters in teamsOp. Otherwise they will be left without
1783  // any mapped value in moduleTranslation and will eventually error out.
1784  for (auto use : debugUses)
1785  use->erase();
1786  return true;
1787 }
1788 
1789 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1790 static LogicalResult
1791 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1792  LLVM::ModuleTranslation &moduleTranslation) {
1793  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1794  if (failed(checkImplementationStatus(*op)))
1795  return failure();
1796 
1797  DenseMap<Value, llvm::Value *> reductionVariableMap;
1798  unsigned numReductionVars = op.getNumReductionVars();
1799  SmallVector<omp::DeclareReductionOp> reductionDecls;
1800  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
1801  llvm::ArrayRef<bool> isByRef;
1802  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1803  findAllocaInsertPoint(builder, moduleTranslation);
1804 
1805  // Only do teams reduction if there is no distribute op that captures the
1806  // reduction instead.
1807  bool doTeamsReduction = !teamsReductionContainedInDistribute(op);
1808  if (doTeamsReduction) {
1809  isByRef = getIsByRef(op.getReductionByref());
1810 
1811  assert(isByRef.size() == op.getNumReductionVars());
1812 
1813  MutableArrayRef<BlockArgument> reductionArgs =
1814  llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
1815 
1816  collectReductionDecls(op, reductionDecls);
1817 
1819  op, reductionArgs, builder, moduleTranslation, allocaIP,
1820  reductionDecls, privateReductionVariables, reductionVariableMap,
1821  isByRef)))
1822  return failure();
1823  }
1824 
1825  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1827  moduleTranslation, allocaIP);
1828  builder.restoreIP(codegenIP);
1829  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1830  moduleTranslation)
1831  .takeError();
1832  };
1833 
1834  llvm::Value *numTeamsLower = nullptr;
1835  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1836  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1837 
1838  llvm::Value *numTeamsUpper = nullptr;
1839  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1840  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1841 
1842  llvm::Value *threadLimit = nullptr;
1843  if (Value threadLimitVar = op.getThreadLimit())
1844  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1845 
1846  llvm::Value *ifExpr = nullptr;
1847  if (Value ifVar = op.getIfExpr())
1848  ifExpr = moduleTranslation.lookupValue(ifVar);
1849 
1850  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1851  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1852  moduleTranslation.getOpenMPBuilder()->createTeams(
1853  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1854 
1855  if (failed(handleError(afterIP, *op)))
1856  return failure();
1857 
1858  builder.restoreIP(*afterIP);
1859  if (doTeamsReduction) {
1860  // Process the reductions if required.
1862  op, builder, moduleTranslation, allocaIP, reductionDecls,
1863  privateReductionVariables, isByRef,
1864  /*isNoWait*/ false, /*isTeamsReduction*/ true);
1865  }
1866  return success();
1867 }
1868 
1869 static void
1870 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
1871  LLVM::ModuleTranslation &moduleTranslation,
1873  if (dependVars.empty())
1874  return;
1875  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
1876  llvm::omp::RTLDependenceKindTy type;
1877  switch (
1878  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1879  case mlir::omp::ClauseTaskDepend::taskdependin:
1880  type = llvm::omp::RTLDependenceKindTy::DepIn;
1881  break;
1882  // The OpenMP runtime requires that the codegen for 'depend' clause for
1883  // 'out' dependency kind must be the same as codegen for 'depend' clause
1884  // with 'inout' dependency.
1885  case mlir::omp::ClauseTaskDepend::taskdependout:
1886  case mlir::omp::ClauseTaskDepend::taskdependinout:
1887  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1888  break;
1889  case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
1890  type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
1891  break;
1892  case mlir::omp::ClauseTaskDepend::taskdependinoutset:
1893  type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
1894  break;
1895  };
1896  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1897  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1898  dds.emplace_back(dd);
1899  }
1900 }
1901 
1902 /// Shared implementation of a callback which adds a termiator for the new block
1903 /// created for the branch taken when an openmp construct is cancelled. The
1904 /// terminator is saved in \p cancelTerminators. This callback is invoked only
1905 /// if there is cancellation inside of the taskgroup body.
1906 /// The terminator will need to be fixed to branch to the correct block to
1907 /// cleanup the construct.
1908 static void
1910  llvm::IRBuilderBase &llvmBuilder,
1911  llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op,
1912  llvm::omp::Directive cancelDirective) {
1913  auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
1914  llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
1915 
1916  // ip is currently in the block branched to if cancellation occured.
1917  // We need to create a branch to terminate that block.
1918  llvmBuilder.restoreIP(ip);
1919 
1920  // We must still clean up the construct after cancelling it, so we need to
1921  // branch to the block that finalizes the taskgroup.
1922  // That block has not been created yet so use this block as a dummy for now
1923  // and fix this after creating the operation.
1924  cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
1925  return llvm::Error::success();
1926  };
1927  // We have to add the cleanup to the OpenMPIRBuilder before the body gets
1928  // created in case the body contains omp.cancel (which will then expect to be
1929  // able to find this cleanup callback).
1930  ompBuilder.pushFinalizationCB(
1931  {finiCB, cancelDirective, constructIsCancellable(op)});
1932 }
1933 
1934 /// If we cancelled the construct, we should branch to the finalization block of
1935 /// that construct. OMPIRBuilder structures the CFG such that the cleanup block
1936 /// is immediately before the continuation block. Now this finalization has
1937 /// been created we can fix the branch.
1938 static void
1940  llvm::OpenMPIRBuilder &ompBuilder,
1941  const llvm::OpenMPIRBuilder::InsertPointTy &afterIP) {
1942  ompBuilder.popFinalizationCB();
1943  llvm::BasicBlock *constructFini = afterIP.getBlock()->getSinglePredecessor();
1944  for (llvm::BranchInst *cancelBranch : cancelTerminators) {
1945  assert(cancelBranch->getNumSuccessors() == 1 &&
1946  "cancel branch should have one target");
1947  cancelBranch->setSuccessor(0, constructFini);
1948  }
1949 }
1950 
1951 namespace {
1952 /// TaskContextStructManager takes care of creating and freeing a structure
1953 /// containing information needed by the task body to execute.
1954 class TaskContextStructManager {
1955 public:
1956  TaskContextStructManager(llvm::IRBuilderBase &builder,
1957  LLVM::ModuleTranslation &moduleTranslation,
1959  : builder{builder}, moduleTranslation{moduleTranslation},
1960  privateDecls{privateDecls} {}
1961 
1962  /// Creates a heap allocated struct containing space for each private
1963  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
1964  /// the structure should all have the same order (although privateDecls which
1965  /// do not read from the mold argument are skipped).
1966  void generateTaskContextStruct();
1967 
1968  /// Create GEPs to access each member of the structure representing a private
1969  /// variable, adding them to llvmPrivateVars. Null values are added where
1970  /// private decls were skipped so that the ordering continues to match the
1971  /// private decls.
1972  void createGEPsToPrivateVars();
1973 
1974  /// De-allocate the task context structure.
1975  void freeStructPtr();
1976 
1977  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
1978  return llvmPrivateVarGEPs;
1979  }
1980 
1981  llvm::Value *getStructPtr() { return structPtr; }
1982 
1983 private:
1984  llvm::IRBuilderBase &builder;
1985  LLVM::ModuleTranslation &moduleTranslation;
1987 
1988  /// The type of each member of the structure, in order.
1989  SmallVector<llvm::Type *> privateVarTypes;
1990 
1991  /// LLVM values for each private variable, or null if that private variable is
1992  /// not included in the task context structure
1993  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
1994 
1995  /// A pointer to the structure containing context for this task.
1996  llvm::Value *structPtr = nullptr;
1997  /// The type of the structure
1998  llvm::Type *structTy = nullptr;
1999 };
2000 } // namespace
2001 
2002 void TaskContextStructManager::generateTaskContextStruct() {
2003  if (privateDecls.empty())
2004  return;
2005  privateVarTypes.reserve(privateDecls.size());
2006 
2007  for (omp::PrivateClauseOp &privOp : privateDecls) {
2008  // Skip private variables which can safely be allocated and initialised
2009  // inside of the task
2010  if (!privOp.readsFromMold())
2011  continue;
2012  Type mlirType = privOp.getType();
2013  privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
2014  }
2015 
2016  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
2017  privateVarTypes);
2018 
2019  llvm::DataLayout dataLayout =
2020  builder.GetInsertBlock()->getModule()->getDataLayout();
2021  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
2022  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
2023 
2024  // Heap allocate the structure
2025  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
2026  /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
2027  "omp.task.context_ptr");
2028 }
2029 
2030 void TaskContextStructManager::createGEPsToPrivateVars() {
2031  if (!structPtr) {
2032  assert(privateVarTypes.empty());
2033  return;
2034  }
2035 
2036  // Create GEPs for each struct member
2037  llvmPrivateVarGEPs.clear();
2038  llvmPrivateVarGEPs.reserve(privateDecls.size());
2039  llvm::Value *zero = builder.getInt32(0);
2040  unsigned i = 0;
2041  for (auto privDecl : privateDecls) {
2042  if (!privDecl.readsFromMold()) {
2043  // Handle this inside of the task so we don't pass unnessecary vars in
2044  llvmPrivateVarGEPs.push_back(nullptr);
2045  continue;
2046  }
2047  llvm::Value *iVal = builder.getInt32(i);
2048  llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
2049  llvmPrivateVarGEPs.push_back(gep);
2050  i += 1;
2051  }
2052 }
2053 
2054 void TaskContextStructManager::freeStructPtr() {
2055  if (!structPtr)
2056  return;
2057 
2058  llvm::IRBuilderBase::InsertPointGuard guard{builder};
2059  // Ensure we don't put the call to free() after the terminator
2060  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2061  builder.CreateFree(structPtr);
2062 }
2063 
2064 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
2065 static LogicalResult
2066 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
2067  LLVM::ModuleTranslation &moduleTranslation) {
2068  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2069  if (failed(checkImplementationStatus(*taskOp)))
2070  return failure();
2071 
2072  PrivateVarsInfo privateVarsInfo(taskOp);
2073  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
2074  privateVarsInfo.privatizers};
2075 
2076  // Allocate and copy private variables before creating the task. This avoids
2077  // accessing invalid memory if (after this scope ends) the private variables
2078  // are initialized from host variables or if the variables are copied into
2079  // from host variables (firstprivate). The insertion point is just before
2080  // where the code for creating and scheduling the task will go. That puts this
2081  // code outside of the outlined task region, which is what we want because
2082  // this way the initialization and copy regions are executed immediately while
2083  // the host variable data are still live.
2084 
2085  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2086  findAllocaInsertPoint(builder, moduleTranslation);
2087 
2088  // Not using splitBB() because that requires the current block to have a
2089  // terminator.
2090  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
2091  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
2092  builder.getContext(), "omp.task.start",
2093  /*Parent=*/builder.GetInsertBlock()->getParent());
2094  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
2095  builder.SetInsertPoint(branchToTaskStartBlock);
2096 
2097  // Now do this again to make the initialization and copy blocks
2098  llvm::BasicBlock *copyBlock =
2099  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
2100  llvm::BasicBlock *initBlock =
2101  splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
2102 
2103  // Now the control flow graph should look like
2104  // starter_block:
2105  // <---- where we started when convertOmpTaskOp was called
2106  // br %omp.private.init
2107  // omp.private.init:
2108  // br %omp.private.copy
2109  // omp.private.copy:
2110  // br %omp.task.start
2111  // omp.task.start:
2112  // <---- where we want the insertion point to be when we call createTask()
2113 
2114  // Save the alloca insertion point on ModuleTranslation stack for use in
2115  // nested regions.
2117  moduleTranslation, allocaIP);
2118 
2119  // Allocate and initialize private variables
2120  builder.SetInsertPoint(initBlock->getTerminator());
2121 
2122  // Create task variable structure
2123  taskStructMgr.generateTaskContextStruct();
2124  // GEPs so that we can initialize the variables. Don't use these GEPs inside
2125  // of the body otherwise it will be the GEP not the struct which is fowarded
2126  // to the outlined function. GEPs forwarded in this way are passed in a
2127  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
2128  // which may not be executed until after the current stack frame goes out of
2129  // scope.
2130  taskStructMgr.createGEPsToPrivateVars();
2131 
2132  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
2133  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
2134  privateVarsInfo.blockArgs,
2135  taskStructMgr.getLLVMPrivateVarGEPs())) {
2136  // To be handled inside the task.
2137  if (!privDecl.readsFromMold())
2138  continue;
2139  assert(llvmPrivateVarAlloc &&
2140  "reads from mold so shouldn't have been skipped");
2141 
2142  llvm::Expected<llvm::Value *> privateVarOrErr =
2143  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2144  blockArg, llvmPrivateVarAlloc, initBlock);
2145  if (!privateVarOrErr)
2146  return handleError(privateVarOrErr, *taskOp.getOperation());
2147 
2148  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2149  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2150 
2151  // TODO: this is a bit of a hack for Fortran character boxes.
2152  // Character boxes are passed by value into the init region and then the
2153  // initialized character box is yielded by value. Here we need to store the
2154  // yielded value into the private allocation, and load the private
2155  // allocation to match the type expected by region block arguments.
2156  if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2157  !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2158  builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2159  // Load it so we have the value pointed to by the GEP
2160  llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2161  llvmPrivateVarAlloc);
2162  }
2163  assert(llvmPrivateVarAlloc->getType() ==
2164  moduleTranslation.convertType(blockArg.getType()));
2165 
2166  // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2167  // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2168  // stack allocated structure.
2169  }
2170 
2171  // firstprivate copy region
2172  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2173  if (failed(copyFirstPrivateVars(
2174  builder, moduleTranslation, privateVarsInfo.mlirVars,
2175  taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers)))
2176  return llvm::failure();
2177 
2178  // Set up for call to createTask()
2179  builder.SetInsertPoint(taskStartBlock);
2180 
2181  auto bodyCB = [&](InsertPointTy allocaIP,
2182  InsertPointTy codegenIP) -> llvm::Error {
2183  // Save the alloca insertion point on ModuleTranslation stack for use in
2184  // nested regions.
2186  moduleTranslation, allocaIP);
2187 
2188  // translate the body of the task:
2189  builder.restoreIP(codegenIP);
2190 
2191  llvm::BasicBlock *privInitBlock = nullptr;
2192  privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2193  for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2194  privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2195  privateVarsInfo.mlirVars))) {
2196  auto [blockArg, privDecl, mlirPrivVar] = zip;
2197  // This is handled before the task executes
2198  if (privDecl.readsFromMold())
2199  continue;
2200 
2201  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2202  llvm::Type *llvmAllocType =
2203  moduleTranslation.convertType(privDecl.getType());
2204  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2205  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2206  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2207 
2208  llvm::Expected<llvm::Value *> privateVarOrError =
2209  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2210  blockArg, llvmPrivateVar, privInitBlock);
2211  if (!privateVarOrError)
2212  return privateVarOrError.takeError();
2213  moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2214  privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2215  }
2216 
2217  taskStructMgr.createGEPsToPrivateVars();
2218  for (auto [i, llvmPrivVar] :
2219  llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2220  if (!llvmPrivVar) {
2221  assert(privateVarsInfo.llvmVars[i] &&
2222  "This is added in the loop above");
2223  continue;
2224  }
2225  privateVarsInfo.llvmVars[i] = llvmPrivVar;
2226  }
2227 
2228  // Find and map the addresses of each variable within the task context
2229  // structure
2230  for (auto [blockArg, llvmPrivateVar, privateDecl] :
2231  llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2232  privateVarsInfo.privatizers)) {
2233  // This was handled above.
2234  if (!privateDecl.readsFromMold())
2235  continue;
2236  // Fix broken pass-by-value case for Fortran character boxes
2237  if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2238  llvmPrivateVar = builder.CreateLoad(
2239  moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2240  }
2241  assert(llvmPrivateVar->getType() ==
2242  moduleTranslation.convertType(blockArg.getType()));
2243  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2244  }
2245 
2246  auto continuationBlockOrError = convertOmpOpRegions(
2247  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2248  if (failed(handleError(continuationBlockOrError, *taskOp)))
2249  return llvm::make_error<PreviouslyReportedError>();
2250 
2251  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2252 
2253  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2254  privateVarsInfo.llvmVars,
2255  privateVarsInfo.privatizers)))
2256  return llvm::make_error<PreviouslyReportedError>();
2257 
2258  // Free heap allocated task context structure at the end of the task.
2259  taskStructMgr.freeStructPtr();
2260 
2261  return llvm::Error::success();
2262  };
2263 
2264  llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
2265  SmallVector<llvm::BranchInst *> cancelTerminators;
2266  // The directive to match here is OMPD_taskgroup because it is the taskgroup
2267  // which is canceled. This is handled here because it is the task's cleanup
2268  // block which should be branched to.
2269  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskOp,
2270  llvm::omp::Directive::OMPD_taskgroup);
2271 
2273  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2274  moduleTranslation, dds);
2275 
2276  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2277  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2278  moduleTranslation.getOpenMPBuilder()->createTask(
2279  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2280  moduleTranslation.lookupValue(taskOp.getFinal()),
2281  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2282  taskOp.getMergeable(),
2283  moduleTranslation.lookupValue(taskOp.getEventHandle()),
2284  moduleTranslation.lookupValue(taskOp.getPriority()));
2285 
2286  if (failed(handleError(afterIP, *taskOp)))
2287  return failure();
2288 
2289  // Set the correct branch target for task cancellation
2290  popCancelFinalizationCB(cancelTerminators, ompBuilder, afterIP.get());
2291 
2292  builder.restoreIP(*afterIP);
2293  return success();
2294 }
2295 
2296 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2297 static LogicalResult
2298 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2299  LLVM::ModuleTranslation &moduleTranslation) {
2300  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2301  if (failed(checkImplementationStatus(*tgOp)))
2302  return failure();
2303 
2304  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2305  builder.restoreIP(codegenIP);
2306  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2307  builder, moduleTranslation)
2308  .takeError();
2309  };
2310 
2311  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2312  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2313  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2314  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2315  bodyCB);
2316 
2317  if (failed(handleError(afterIP, *tgOp)))
2318  return failure();
2319 
2320  builder.restoreIP(*afterIP);
2321  return success();
2322 }
2323 
2324 static LogicalResult
2325 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2326  LLVM::ModuleTranslation &moduleTranslation) {
2327  if (failed(checkImplementationStatus(*twOp)))
2328  return failure();
2329 
2330  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2331  return success();
2332 }
2333 
2334 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2335 static LogicalResult
2336 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2337  LLVM::ModuleTranslation &moduleTranslation) {
2338  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2339  auto wsloopOp = cast<omp::WsloopOp>(opInst);
2340  if (failed(checkImplementationStatus(opInst)))
2341  return failure();
2342 
2343  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2344  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2345  assert(isByRef.size() == wsloopOp.getNumReductionVars());
2346 
2347  // Static is the default.
2348  auto schedule =
2349  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2350 
2351  // Find the loop configuration.
2352  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2353  llvm::Type *ivType = step->getType();
2354  llvm::Value *chunk = nullptr;
2355  if (wsloopOp.getScheduleChunk()) {
2356  llvm::Value *chunkVar =
2357  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2358  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2359  }
2360 
2361  PrivateVarsInfo privateVarsInfo(wsloopOp);
2362 
2363  SmallVector<omp::DeclareReductionOp> reductionDecls;
2364  collectReductionDecls(wsloopOp, reductionDecls);
2365  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2366  findAllocaInsertPoint(builder, moduleTranslation);
2367 
2368  SmallVector<llvm::Value *> privateReductionVariables(
2369  wsloopOp.getNumReductionVars());
2370 
2372  builder, moduleTranslation, privateVarsInfo, allocaIP);
2373  if (handleError(afterAllocas, opInst).failed())
2374  return failure();
2375 
2376  DenseMap<Value, llvm::Value *> reductionVariableMap;
2377 
2378  MutableArrayRef<BlockArgument> reductionArgs =
2379  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2380 
2381  SmallVector<DeferredStore> deferredStores;
2382 
2383  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2384  moduleTranslation, allocaIP, reductionDecls,
2385  privateReductionVariables, reductionVariableMap,
2386  deferredStores, isByRef)))
2387  return failure();
2388 
2389  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2390  opInst)
2391  .failed())
2392  return failure();
2393 
2394  if (failed(copyFirstPrivateVars(
2395  builder, moduleTranslation, privateVarsInfo.mlirVars,
2396  privateVarsInfo.llvmVars, privateVarsInfo.privatizers)))
2397  return failure();
2398 
2399  assert(afterAllocas.get()->getSinglePredecessor());
2400  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2401  moduleTranslation,
2402  afterAllocas.get()->getSinglePredecessor(),
2403  reductionDecls, privateReductionVariables,
2404  reductionVariableMap, isByRef, deferredStores)))
2405  return failure();
2406 
2407  // TODO: Handle doacross loops when the ordered clause has a parameter.
2408  bool isOrdered = wsloopOp.getOrdered().has_value();
2409  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2410  bool isSimd = wsloopOp.getScheduleSimd();
2411  bool loopNeedsBarrier = !wsloopOp.getNowait();
2412 
2413  // The only legal way for the direct parent to be omp.distribute is that this
2414  // represents 'distribute parallel do'. Otherwise, this is a regular
2415  // worksharing loop.
2416  llvm::omp::WorksharingLoopType workshareLoopType =
2417  llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2418  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2419  : llvm::omp::WorksharingLoopType::ForStaticLoop;
2420 
2421  SmallVector<llvm::BranchInst *> cancelTerminators;
2422  pushCancelFinalizationCB(cancelTerminators, builder, *ompBuilder, wsloopOp,
2423  llvm::omp::Directive::OMPD_for);
2424 
2425  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2427  wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2428 
2429  if (failed(handleError(regionBlock, opInst)))
2430  return failure();
2431 
2432  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2433  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2434 
2435  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2436  ompBuilder->applyWorkshareLoop(
2437  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2438  convertToScheduleKind(schedule), chunk, isSimd,
2439  scheduleMod == omp::ScheduleModifier::monotonic,
2440  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2441  workshareLoopType);
2442 
2443  if (failed(handleError(wsloopIP, opInst)))
2444  return failure();
2445 
2446  // Set the correct branch target for task cancellation
2447  popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
2448 
2449  // Process the reductions if required.
2450  if (failed(createReductionsAndCleanup(
2451  wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
2452  privateReductionVariables, isByRef, wsloopOp.getNowait(),
2453  /*isTeamsReduction=*/false)))
2454  return failure();
2455 
2456  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2457  privateVarsInfo.llvmVars,
2458  privateVarsInfo.privatizers);
2459 }
2460 
2461 /// Converts the OpenMP parallel operation to LLVM IR.
2462 static LogicalResult
2463 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2464  LLVM::ModuleTranslation &moduleTranslation) {
2465  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2466  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2467  assert(isByRef.size() == opInst.getNumReductionVars());
2468  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2469 
2470  if (failed(checkImplementationStatus(*opInst)))
2471  return failure();
2472 
2473  PrivateVarsInfo privateVarsInfo(opInst);
2474 
2475  // Collect reduction declarations
2476  SmallVector<omp::DeclareReductionOp> reductionDecls;
2477  collectReductionDecls(opInst, reductionDecls);
2478  SmallVector<llvm::Value *> privateReductionVariables(
2479  opInst.getNumReductionVars());
2480  SmallVector<DeferredStore> deferredStores;
2481 
2482  auto bodyGenCB = [&](InsertPointTy allocaIP,
2483  InsertPointTy codeGenIP) -> llvm::Error {
2485  builder, moduleTranslation, privateVarsInfo, allocaIP);
2486  if (handleError(afterAllocas, *opInst).failed())
2487  return llvm::make_error<PreviouslyReportedError>();
2488 
2489  // Allocate reduction vars
2490  DenseMap<Value, llvm::Value *> reductionVariableMap;
2491 
2492  MutableArrayRef<BlockArgument> reductionArgs =
2493  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2494 
2495  allocaIP =
2496  InsertPointTy(allocaIP.getBlock(),
2497  allocaIP.getBlock()->getTerminator()->getIterator());
2498 
2499  if (failed(allocReductionVars(
2500  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2501  reductionDecls, privateReductionVariables, reductionVariableMap,
2502  deferredStores, isByRef)))
2503  return llvm::make_error<PreviouslyReportedError>();
2504 
2505  assert(afterAllocas.get()->getSinglePredecessor());
2506  builder.restoreIP(codeGenIP);
2507 
2508  if (handleError(
2509  initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2510  *opInst)
2511  .failed())
2512  return llvm::make_error<PreviouslyReportedError>();
2513 
2514  if (failed(copyFirstPrivateVars(
2515  builder, moduleTranslation, privateVarsInfo.mlirVars,
2516  privateVarsInfo.llvmVars, privateVarsInfo.privatizers)))
2517  return llvm::make_error<PreviouslyReportedError>();
2518 
2519  if (failed(
2520  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2521  afterAllocas.get()->getSinglePredecessor(),
2522  reductionDecls, privateReductionVariables,
2523  reductionVariableMap, isByRef, deferredStores)))
2524  return llvm::make_error<PreviouslyReportedError>();
2525 
2526  // Save the alloca insertion point on ModuleTranslation stack for use in
2527  // nested regions.
2529  moduleTranslation, allocaIP);
2530 
2531  // ParallelOp has only one region associated with it.
2533  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2534  if (!regionBlock)
2535  return regionBlock.takeError();
2536 
2537  // Process the reductions if required.
2538  if (opInst.getNumReductionVars() > 0) {
2539  // Collect reduction info
2540  SmallVector<OwningReductionGen> owningReductionGens;
2541  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2543  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2544  owningReductionGens, owningAtomicReductionGens,
2545  privateReductionVariables, reductionInfos);
2546 
2547  // Move to region cont block
2548  builder.SetInsertPoint((*regionBlock)->getTerminator());
2549 
2550  // Generate reductions from info
2551  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2552  builder.SetInsertPoint(tempTerminator);
2553 
2554  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2555  ompBuilder->createReductions(
2556  builder.saveIP(), allocaIP, reductionInfos, isByRef,
2557  /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
2558  if (!contInsertPoint)
2559  return contInsertPoint.takeError();
2560 
2561  if (!contInsertPoint->getBlock())
2562  return llvm::make_error<PreviouslyReportedError>();
2563 
2564  tempTerminator->eraseFromParent();
2565  builder.restoreIP(*contInsertPoint);
2566  }
2567 
2568  return llvm::Error::success();
2569  };
2570 
2571  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2572  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2573  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2574  // bodyGenCB.
2575  replVal = &val;
2576  return codeGenIP;
2577  };
2578 
2579  // TODO: Perform finalization actions for variables. This has to be
2580  // called for variables which have destructors/finalizers.
2581  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2582  InsertPointTy oldIP = builder.saveIP();
2583  builder.restoreIP(codeGenIP);
2584 
2585  // if the reduction has a cleanup region, inline it here to finalize the
2586  // reduction variables
2587  SmallVector<Region *> reductionCleanupRegions;
2588  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2589  [](omp::DeclareReductionOp reductionDecl) {
2590  return &reductionDecl.getCleanupRegion();
2591  });
2592  if (failed(inlineOmpRegionCleanup(
2593  reductionCleanupRegions, privateReductionVariables,
2594  moduleTranslation, builder, "omp.reduction.cleanup")))
2595  return llvm::createStringError(
2596  "failed to inline `cleanup` region of `omp.declare_reduction`");
2597 
2598  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2599  privateVarsInfo.llvmVars,
2600  privateVarsInfo.privatizers)))
2601  return llvm::make_error<PreviouslyReportedError>();
2602 
2603  builder.restoreIP(oldIP);
2604  return llvm::Error::success();
2605  };
2606 
2607  llvm::Value *ifCond = nullptr;
2608  if (auto ifVar = opInst.getIfExpr())
2609  ifCond = moduleTranslation.lookupValue(ifVar);
2610  llvm::Value *numThreads = nullptr;
2611  if (auto numThreadsVar = opInst.getNumThreads())
2612  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2613  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2614  if (auto bind = opInst.getProcBindKind())
2615  pbKind = getProcBindKind(*bind);
2616  bool isCancellable = constructIsCancellable(opInst);
2617 
2618  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2619  findAllocaInsertPoint(builder, moduleTranslation);
2620  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2621 
2622  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2623  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2624  ifCond, numThreads, pbKind, isCancellable);
2625 
2626  if (failed(handleError(afterIP, *opInst)))
2627  return failure();
2628 
2629  builder.restoreIP(*afterIP);
2630  return success();
2631 }
2632 
2633 /// Convert Order attribute to llvm::omp::OrderKind.
2634 static llvm::omp::OrderKind
2635 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2636  if (!o)
2637  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2638  switch (*o) {
2639  case omp::ClauseOrderKind::Concurrent:
2640  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2641  }
2642  llvm_unreachable("Unknown ClauseOrderKind kind");
2643 }
2644 
2645 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2646 static LogicalResult
2647 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2648  LLVM::ModuleTranslation &moduleTranslation) {
2649  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2650  auto simdOp = cast<omp::SimdOp>(opInst);
2651 
2652  // TODO: Replace this with proper composite translation support.
2653  // Currently, simd information on composite constructs is ignored, so e.g.
2654  // 'do/for simd' will be treated the same as a standalone 'do/for'. This is
2655  // allowed by the spec, since it's equivalent to using a SIMD length of 1.
2656  if (simdOp.isComposite()) {
2657  if (failed(convertIgnoredWrapper(simdOp, moduleTranslation)))
2658  return failure();
2659 
2660  return inlineConvertOmpRegions(simdOp.getRegion(), "omp.simd.region",
2661  builder, moduleTranslation);
2662  }
2663 
2664  if (failed(checkImplementationStatus(opInst)))
2665  return failure();
2666 
2667  PrivateVarsInfo privateVarsInfo(simdOp);
2668 
2669  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2670  findAllocaInsertPoint(builder, moduleTranslation);
2671 
2673  builder, moduleTranslation, privateVarsInfo, allocaIP);
2674  if (handleError(afterAllocas, opInst).failed())
2675  return failure();
2676 
2677  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2678  opInst)
2679  .failed())
2680  return failure();
2681 
2682  llvm::ConstantInt *simdlen = nullptr;
2683  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2684  simdlen = builder.getInt64(simdlenVar.value());
2685 
2686  llvm::ConstantInt *safelen = nullptr;
2687  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2688  safelen = builder.getInt64(safelenVar.value());
2689 
2690  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2691  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2692 
2693  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2694  std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2695  mlir::OperandRange operands = simdOp.getAlignedVars();
2696  for (size_t i = 0; i < operands.size(); ++i) {
2697  llvm::Value *alignment = nullptr;
2698  llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2699  llvm::Type *ty = llvmVal->getType();
2700 
2701  auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2702  alignment = builder.getInt64(intAttr.getInt());
2703  assert(ty->isPointerTy() && "Invalid type for aligned variable");
2704  assert(alignment && "Invalid alignment value");
2705  auto curInsert = builder.saveIP();
2706  builder.SetInsertPoint(sourceBlock);
2707  llvmVal = builder.CreateLoad(ty, llvmVal);
2708  builder.restoreIP(curInsert);
2709  alignedVars[llvmVal] = alignment;
2710  }
2711 
2713  simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2714 
2715  if (failed(handleError(regionBlock, opInst)))
2716  return failure();
2717 
2718  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2719  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2720  ompBuilder->applySimd(loopInfo, alignedVars,
2721  simdOp.getIfExpr()
2722  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2723  : nullptr,
2724  order, simdlen, safelen);
2725 
2726  return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2727  privateVarsInfo.llvmVars,
2728  privateVarsInfo.privatizers);
2729 }
2730 
2731 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2732 static LogicalResult
2733 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2734  LLVM::ModuleTranslation &moduleTranslation) {
2735  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2736  auto loopOp = cast<omp::LoopNestOp>(opInst);
2737 
2738  // Set up the source location value for OpenMP runtime.
2739  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2740 
2741  // Generator of the canonical loop body.
2744  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2745  llvm::Value *iv) -> llvm::Error {
2746  // Make sure further conversions know about the induction variable.
2747  moduleTranslation.mapValue(
2748  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2749 
2750  // Capture the body insertion point for use in nested loops. BodyIP of the
2751  // CanonicalLoopInfo always points to the beginning of the entry block of
2752  // the body.
2753  bodyInsertPoints.push_back(ip);
2754 
2755  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2756  return llvm::Error::success();
2757 
2758  // Convert the body of the loop.
2759  builder.restoreIP(ip);
2761  loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
2762  if (!regionBlock)
2763  return regionBlock.takeError();
2764 
2765  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2766  return llvm::Error::success();
2767  };
2768 
2769  // Delegate actual loop construction to the OpenMP IRBuilder.
2770  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
2771  // loop, i.e. it has a positive step, uses signed integer semantics.
2772  // Reconsider this code when the nested loop operation clearly supports more
2773  // cases.
2774  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
2775  llvm::Value *lowerBound =
2776  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
2777  llvm::Value *upperBound =
2778  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
2779  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
2780 
2781  // Make sure loop trip count are emitted in the preheader of the outermost
2782  // loop at the latest so that they are all available for the new collapsed
2783  // loop will be created below.
2784  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
2785  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
2786  if (i != 0) {
2787  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
2788  ompLoc.DL);
2789  computeIP = loopInfos.front()->getPreheaderIP();
2790  }
2791 
2793  ompBuilder->createCanonicalLoop(
2794  loc, bodyGen, lowerBound, upperBound, step,
2795  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
2796 
2797  if (failed(handleError(loopResult, *loopOp)))
2798  return failure();
2799 
2800  loopInfos.push_back(*loopResult);
2801  }
2802 
2803  // Collapse loops. Store the insertion point because LoopInfos may get
2804  // invalidated.
2805  llvm::OpenMPIRBuilder::InsertPointTy afterIP =
2806  loopInfos.front()->getAfterIP();
2807 
2808  // Update the stack frame created for this loop to point to the resulting loop
2809  // after applying transformations.
2810  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
2811  [&](OpenMPLoopInfoStackFrame &frame) {
2812  frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
2813  return WalkResult::interrupt();
2814  });
2815 
2816  // Continue building IR after the loop. Note that the LoopInfo returned by
2817  // `collapseLoops` points inside the outermost loop and is intended for
2818  // potential further loop transformations. Use the insertion point stored
2819  // before collapsing loops instead.
2820  builder.restoreIP(afterIP);
2821  return success();
2822 }
2823 
2824 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
2825 static llvm::AtomicOrdering
2826 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
2827  if (!ao)
2828  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
2829 
2830  switch (*ao) {
2831  case omp::ClauseMemoryOrderKind::Seq_cst:
2832  return llvm::AtomicOrdering::SequentiallyConsistent;
2833  case omp::ClauseMemoryOrderKind::Acq_rel:
2834  return llvm::AtomicOrdering::AcquireRelease;
2835  case omp::ClauseMemoryOrderKind::Acquire:
2836  return llvm::AtomicOrdering::Acquire;
2837  case omp::ClauseMemoryOrderKind::Release:
2838  return llvm::AtomicOrdering::Release;
2839  case omp::ClauseMemoryOrderKind::Relaxed:
2840  return llvm::AtomicOrdering::Monotonic;
2841  }
2842  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
2843 }
2844 
2845 /// Convert omp.atomic.read operation to LLVM IR.
2846 static LogicalResult
2847 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
2848  LLVM::ModuleTranslation &moduleTranslation) {
2849  auto readOp = cast<omp::AtomicReadOp>(opInst);
2850  if (failed(checkImplementationStatus(opInst)))
2851  return failure();
2852 
2853  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2854  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2855  findAllocaInsertPoint(builder, moduleTranslation);
2856 
2857  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2858 
2859  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
2860  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
2861  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
2862 
2863  llvm::Type *elementType =
2864  moduleTranslation.convertType(readOp.getElementType());
2865 
2866  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
2867  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
2868  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP));
2869  return success();
2870 }
2871 
2872 /// Converts an omp.atomic.write operation to LLVM IR.
2873 static LogicalResult
2874 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
2875  LLVM::ModuleTranslation &moduleTranslation) {
2876  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
2877  if (failed(checkImplementationStatus(opInst)))
2878  return failure();
2879 
2880  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2881  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2882  findAllocaInsertPoint(builder, moduleTranslation);
2883 
2884  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2885  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
2886  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
2887  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
2888  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
2889  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
2890  /*isVolatile=*/false};
2891  builder.restoreIP(
2892  ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP));
2893  return success();
2894 }
2895 
2896 /// Converts an LLVM dialect binary operation to the corresponding enum value
2897 /// for `atomicrmw` supported binary operation.
2898 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
2900  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
2901  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
2902  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
2903  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
2904  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
2905  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
2906  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
2907  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
2908  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
2909  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
2910 }
2911 
2912 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
2913 static LogicalResult
2914 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
2915  llvm::IRBuilderBase &builder,
2916  LLVM::ModuleTranslation &moduleTranslation) {
2917  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2918  if (failed(checkImplementationStatus(*opInst)))
2919  return failure();
2920 
2921  // Convert values and types.
2922  auto &innerOpList = opInst.getRegion().front().getOperations();
2923  bool isXBinopExpr{false};
2924  llvm::AtomicRMWInst::BinOp binop;
2925  mlir::Value mlirExpr;
2926  llvm::Value *llvmExpr = nullptr;
2927  llvm::Value *llvmX = nullptr;
2928  llvm::Type *llvmXElementType = nullptr;
2929  if (innerOpList.size() == 2) {
2930  // The two operations here are the update and the terminator.
2931  // Since we can identify the update operation, there is a possibility
2932  // that we can generate the atomicrmw instruction.
2933  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
2934  if (!llvm::is_contained(innerOp.getOperands(),
2935  opInst.getRegion().getArgument(0))) {
2936  return opInst.emitError("no atomic update operation with region argument"
2937  " as operand found inside atomic.update region");
2938  }
2939  binop = convertBinOpToAtomic(innerOp);
2940  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
2941  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2942  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2943  } else {
2944  // Since the update region includes more than one operation
2945  // we will resort to generating a cmpxchg loop.
2946  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2947  }
2948  llvmX = moduleTranslation.lookupValue(opInst.getX());
2949  llvmXElementType = moduleTranslation.convertType(
2950  opInst.getRegion().getArgument(0).getType());
2951  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2952  /*isSigned=*/false,
2953  /*isVolatile=*/false};
2954 
2955  llvm::AtomicOrdering atomicOrdering =
2956  convertAtomicOrdering(opInst.getMemoryOrder());
2957 
2958  // Generate update code.
2959  auto updateFn =
2960  [&opInst, &moduleTranslation](
2961  llvm::Value *atomicx,
2962  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2963  Block &bb = *opInst.getRegion().begin();
2964  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
2965  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2966  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2967  return llvm::make_error<PreviouslyReportedError>();
2968 
2969  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2970  assert(yieldop && yieldop.getResults().size() == 1 &&
2971  "terminator must be omp.yield op and it must have exactly one "
2972  "argument");
2973  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2974  };
2975 
2976  // Handle ambiguous alloca, if any.
2977  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2978  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2979  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2980  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
2981  atomicOrdering, binop, updateFn,
2982  isXBinopExpr);
2983 
2984  if (failed(handleError(afterIP, *opInst)))
2985  return failure();
2986 
2987  builder.restoreIP(*afterIP);
2988  return success();
2989 }
2990 
2991 static LogicalResult
2992 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
2993  llvm::IRBuilderBase &builder,
2994  LLVM::ModuleTranslation &moduleTranslation) {
2995  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2996  if (failed(checkImplementationStatus(*atomicCaptureOp)))
2997  return failure();
2998 
2999  mlir::Value mlirExpr;
3000  bool isXBinopExpr = false, isPostfixUpdate = false;
3001  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3002 
3003  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3004  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
3005 
3006  assert((atomicUpdateOp || atomicWriteOp) &&
3007  "internal op must be an atomic.update or atomic.write op");
3008 
3009  if (atomicWriteOp) {
3010  isPostfixUpdate = true;
3011  mlirExpr = atomicWriteOp.getExpr();
3012  } else {
3013  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
3014  atomicCaptureOp.getAtomicUpdateOp().getOperation();
3015  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
3016  // Find the binary update operation that uses the region argument
3017  // and get the expression to update
3018  if (innerOpList.size() == 2) {
3019  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
3020  if (!llvm::is_contained(innerOp.getOperands(),
3021  atomicUpdateOp.getRegion().getArgument(0))) {
3022  return atomicUpdateOp.emitError(
3023  "no atomic update operation with region argument"
3024  " as operand found inside atomic.update region");
3025  }
3026  binop = convertBinOpToAtomic(innerOp);
3027  isXBinopExpr =
3028  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
3029  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3030  } else {
3031  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3032  }
3033  }
3034 
3035  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3036  llvm::Value *llvmX =
3037  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
3038  llvm::Value *llvmV =
3039  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
3040  llvm::Type *llvmXElementType = moduleTranslation.convertType(
3041  atomicCaptureOp.getAtomicReadOp().getElementType());
3042  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3043  /*isSigned=*/false,
3044  /*isVolatile=*/false};
3045  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
3046  /*isSigned=*/false,
3047  /*isVolatile=*/false};
3048 
3049  llvm::AtomicOrdering atomicOrdering =
3050  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
3051 
3052  auto updateFn =
3053  [&](llvm::Value *atomicx,
3054  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3055  if (atomicWriteOp)
3056  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
3057  Block &bb = *atomicUpdateOp.getRegion().begin();
3058  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
3059  atomicx);
3060  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3061  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3062  return llvm::make_error<PreviouslyReportedError>();
3063 
3064  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3065  assert(yieldop && yieldop.getResults().size() == 1 &&
3066  "terminator must be omp.yield op and it must have exactly one "
3067  "argument");
3068  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3069  };
3070 
3071  // Handle ambiguous alloca, if any.
3072  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3073  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3074  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3075  ompBuilder->createAtomicCapture(
3076  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3077  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3078 
3079  if (failed(handleError(afterIP, *atomicCaptureOp)))
3080  return failure();
3081 
3082  builder.restoreIP(*afterIP);
3083  return success();
3084 }
3085 
3086 static llvm::omp::Directive convertCancellationConstructType(
3087  omp::ClauseCancellationConstructType directive) {
3088  switch (directive) {
3089  case omp::ClauseCancellationConstructType::Loop:
3090  return llvm::omp::Directive::OMPD_for;
3091  case omp::ClauseCancellationConstructType::Parallel:
3092  return llvm::omp::Directive::OMPD_parallel;
3093  case omp::ClauseCancellationConstructType::Sections:
3094  return llvm::omp::Directive::OMPD_sections;
3095  case omp::ClauseCancellationConstructType::Taskgroup:
3096  return llvm::omp::Directive::OMPD_taskgroup;
3097  }
3098 }
3099 
3100 static LogicalResult
3101 convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder,
3102  LLVM::ModuleTranslation &moduleTranslation) {
3103  if (failed(checkImplementationStatus(*op.getOperation())))
3104  return failure();
3105 
3106  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3107  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3108 
3109  llvm::Value *ifCond = nullptr;
3110  if (Value ifVar = op.getIfExpr())
3111  ifCond = moduleTranslation.lookupValue(ifVar);
3112 
3113  llvm::omp::Directive cancelledDirective =
3114  convertCancellationConstructType(op.getCancelDirective());
3115 
3116  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3117  ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective);
3118 
3119  if (failed(handleError(afterIP, *op.getOperation())))
3120  return failure();
3121 
3122  builder.restoreIP(afterIP.get());
3123 
3124  return success();
3125 }
3126 
3127 static LogicalResult
3128 convertOmpCancellationPoint(omp::CancellationPointOp op,
3129  llvm::IRBuilderBase &builder,
3130  LLVM::ModuleTranslation &moduleTranslation) {
3131  if (failed(checkImplementationStatus(*op.getOperation())))
3132  return failure();
3133 
3134  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3135  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3136 
3137  llvm::omp::Directive cancelledDirective =
3138  convertCancellationConstructType(op.getCancelDirective());
3139 
3140  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3141  ompBuilder->createCancellationPoint(ompLoc, cancelledDirective);
3142 
3143  if (failed(handleError(afterIP, *op.getOperation())))
3144  return failure();
3145 
3146  builder.restoreIP(afterIP.get());
3147 
3148  return success();
3149 }
3150 
3151 /// Converts an OpenMP Threadprivate operation into LLVM IR using
3152 /// OpenMPIRBuilder.
3153 static LogicalResult
3154 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
3155  LLVM::ModuleTranslation &moduleTranslation) {
3156  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3157  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3158  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
3159 
3160  if (failed(checkImplementationStatus(opInst)))
3161  return failure();
3162 
3163  Value symAddr = threadprivateOp.getSymAddr();
3164  auto *symOp = symAddr.getDefiningOp();
3165 
3166  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
3167  symOp = asCast.getOperand().getDefiningOp();
3168 
3169  if (!isa<LLVM::AddressOfOp>(symOp))
3170  return opInst.emitError("Addressing symbol not found");
3171  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
3172 
3173  LLVM::GlobalOp global =
3174  addressOfOp.getGlobal(moduleTranslation.symbolTable());
3175  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
3176 
3177  if (!ompBuilder->Config.isTargetDevice()) {
3178  llvm::Type *type = globalValue->getValueType();
3179  llvm::TypeSize typeSize =
3180  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
3181  type);
3182  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
3183  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
3184  ompLoc, globalValue, size, global.getSymName() + ".cache");
3185  moduleTranslation.mapValue(opInst.getResult(0), callInst);
3186  } else {
3187  moduleTranslation.mapValue(opInst.getResult(0), globalValue);
3188  }
3189 
3190  return success();
3191 }
3192 
3193 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
3194 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
3195  switch (deviceClause) {
3196  case mlir::omp::DeclareTargetDeviceType::host:
3197  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
3198  break;
3199  case mlir::omp::DeclareTargetDeviceType::nohost:
3200  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
3201  break;
3202  case mlir::omp::DeclareTargetDeviceType::any:
3203  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
3204  break;
3205  }
3206  llvm_unreachable("unhandled device clause");
3207 }
3208 
3209 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
3211  mlir::omp::DeclareTargetCaptureClause captureClause) {
3212  switch (captureClause) {
3213  case mlir::omp::DeclareTargetCaptureClause::to:
3214  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
3215  case mlir::omp::DeclareTargetCaptureClause::link:
3216  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
3217  case mlir::omp::DeclareTargetCaptureClause::enter:
3218  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
3219  }
3220  llvm_unreachable("unhandled capture clause");
3221 }
3222 
3223 static llvm::SmallString<64>
3224 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
3225  llvm::OpenMPIRBuilder &ompBuilder) {
3226  llvm::SmallString<64> suffix;
3227  llvm::raw_svector_ostream os(suffix);
3228  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
3229  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
3230  auto fileInfoCallBack = [&loc]() {
3231  return std::pair<std::string, uint64_t>(
3232  llvm::StringRef(loc.getFilename()), loc.getLine());
3233  };
3234 
3235  os << llvm::format(
3236  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
3237  }
3238  os << "_decl_tgt_ref_ptr";
3239 
3240  return suffix;
3241 }
3242 
3243 static bool isDeclareTargetLink(mlir::Value value) {
3244  if (auto addressOfOp =
3245  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3246  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3247  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3248  if (auto declareTargetGlobal =
3249  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3250  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3251  mlir::omp::DeclareTargetCaptureClause::link)
3252  return true;
3253  }
3254  return false;
3255 }
3256 
3257 // Returns the reference pointer generated by the lowering of the declare target
3258 // operation in cases where the link clause is used or the to clause is used in
3259 // USM mode.
3260 static llvm::Value *
3262  LLVM::ModuleTranslation &moduleTranslation) {
3263  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3264 
3265  // An easier way to do this may just be to keep track of any pointer
3266  // references and their mapping to their respective operation
3267  if (auto addressOfOp =
3268  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3269  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3270  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3271  addressOfOp.getGlobalName()))) {
3272 
3273  if (auto declareTargetGlobal =
3274  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3275  gOp.getOperation())) {
3276 
3277  // In this case, we must utilise the reference pointer generated by the
3278  // declare target operation, similar to Clang
3279  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3280  mlir::omp::DeclareTargetCaptureClause::link) ||
3281  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3282  mlir::omp::DeclareTargetCaptureClause::to &&
3283  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3284  llvm::SmallString<64> suffix =
3285  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3286 
3287  if (gOp.getSymName().contains(suffix))
3288  return moduleTranslation.getLLVMModule()->getNamedValue(
3289  gOp.getSymName());
3290 
3291  return moduleTranslation.getLLVMModule()->getNamedValue(
3292  (gOp.getSymName().str() + suffix.str()).str());
3293  }
3294  }
3295  }
3296  }
3297 
3298  return nullptr;
3299 }
3300 
3301 namespace {
3302 // Append customMappers information to existing MapInfosTy
3303 struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3305 
3306  /// Append arrays in \a CurInfo.
3307  void append(MapInfosTy &curInfo) {
3308  Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3309  llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3310  }
3311 };
3312 // A small helper structure to contain data gathered
3313 // for map lowering and coalese it into one area and
3314 // avoiding extra computations such as searches in the
3315 // llvm module for lowered mapped variables or checking
3316 // if something is declare target (and retrieving the
3317 // value) more than neccessary.
3318 struct MapInfoData : MapInfosTy {
3319  llvm::SmallVector<bool, 4> IsDeclareTarget;
3320  llvm::SmallVector<bool, 4> IsAMember;
3321  // Identify if mapping was added by mapClause or use_device clauses.
3322  llvm::SmallVector<bool, 4> IsAMapping;
3325  // Stripped off array/pointer to get the underlying
3326  // element type
3328 
3329  /// Append arrays in \a CurInfo.
3330  void append(MapInfoData &CurInfo) {
3331  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3332  CurInfo.IsDeclareTarget.end());
3333  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3334  OriginalValue.append(CurInfo.OriginalValue.begin(),
3335  CurInfo.OriginalValue.end());
3336  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3337  MapInfosTy::append(CurInfo);
3338  }
3339 };
3340 } // namespace
3341 
3342 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
3343  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3344  arrTy.getElementType()))
3345  return getArrayElementSizeInBits(nestedArrTy, dl);
3346  return dl.getTypeSizeInBits(arrTy.getElementType());
3347 }
3348 
3349 // This function calculates the size to be offloaded for a specified type, given
3350 // its associated map clause (which can contain bounds information which affects
3351 // the total size), this size is calculated based on the underlying element type
3352 // e.g. given a 1-D array of ints, we will calculate the size from the integer
3353 // type * number of elements in the array. This size can be used in other
3354 // calculations but is ultimately used as an argument to the OpenMP runtimes
3355 // kernel argument structure which is generated through the combinedInfo data
3356 // structures.
3357 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
3358 // CGOpenMPRuntime.cpp.
3359 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3360  Operation *clauseOp, llvm::Value *basePointer,
3361  llvm::Type *baseType, llvm::IRBuilderBase &builder,
3362  LLVM::ModuleTranslation &moduleTranslation) {
3363  if (auto memberClause =
3364  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3365  // This calculates the size to transfer based on bounds and the underlying
3366  // element type, provided bounds have been specified (Fortran
3367  // pointers/allocatables/target and arrays that have sections specified fall
3368  // into this as well).
3369  if (!memberClause.getBounds().empty()) {
3370  llvm::Value *elementCount = builder.getInt64(1);
3371  for (auto bounds : memberClause.getBounds()) {
3372  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3373  bounds.getDefiningOp())) {
3374  // The below calculation for the size to be mapped calculated from the
3375  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3376  // multiply by the underlying element types byte size to get the full
3377  // size to be offloaded based on the bounds
3378  elementCount = builder.CreateMul(
3379  elementCount,
3380  builder.CreateAdd(
3381  builder.CreateSub(
3382  moduleTranslation.lookupValue(boundOp.getUpperBound()),
3383  moduleTranslation.lookupValue(boundOp.getLowerBound())),
3384  builder.getInt64(1)));
3385  }
3386  }
3387 
3388  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3389  // the size in inconsistent byte or bit format.
3390  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3391  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3392  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3393 
3394  // The size in bytes x number of elements, the sizeInBytes stored is
3395  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3396  // size, so we do some on the fly runtime math to get the size in
3397  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3398  // some adjustment for members with more complex types.
3399  return builder.CreateMul(elementCount,
3400  builder.getInt64(underlyingTypeSzInBits / 8));
3401  }
3402  }
3403 
3404  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3405 }
3406 
3408  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3409  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3410  llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3411  ArrayRef<Value> useDevAddrOperands = {},
3412  ArrayRef<Value> hasDevAddrOperands = {}) {
3413  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3414  // Check if this is a member mapping and correctly assign that it is, if
3415  // it is a member of a larger object.
3416  // TODO: Need better handling of members, and distinguishing of members
3417  // that are implicitly allocated on device vs explicitly passed in as
3418  // arguments.
3419  // TODO: May require some further additions to support nested record
3420  // types, i.e. member maps that can have member maps.
3421  for (Value mapValue : mapVars) {
3422  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3423  for (auto member : map.getMembers())
3424  if (member == mapOp)
3425  return true;
3426  }
3427  return false;
3428  };
3429 
3430  // Process MapOperands
3431  for (Value mapValue : mapVars) {
3432  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3433  Value offloadPtr =
3434  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3435  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3436  mapData.Pointers.push_back(mapData.OriginalValue.back());
3437 
3438  if (llvm::Value *refPtr =
3439  getRefPtrIfDeclareTarget(offloadPtr,
3440  moduleTranslation)) { // declare target
3441  mapData.IsDeclareTarget.push_back(true);
3442  mapData.BasePointers.push_back(refPtr);
3443  } else { // regular mapped variable
3444  mapData.IsDeclareTarget.push_back(false);
3445  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3446  }
3447 
3448  mapData.BaseType.push_back(
3449  moduleTranslation.convertType(mapOp.getVarType()));
3450  mapData.Sizes.push_back(
3451  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3452  mapData.BaseType.back(), builder, moduleTranslation));
3453  mapData.MapClause.push_back(mapOp.getOperation());
3454  mapData.Types.push_back(
3455  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
3456  mapData.Names.push_back(LLVM::createMappingInformation(
3457  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3458  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3459  if (mapOp.getMapperId())
3460  mapData.Mappers.push_back(
3461  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3462  mapOp, mapOp.getMapperIdAttr()));
3463  else
3464  mapData.Mappers.push_back(nullptr);
3465  mapData.IsAMapping.push_back(true);
3466  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3467  }
3468 
3469  auto findMapInfo = [&mapData](llvm::Value *val,
3470  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3471  unsigned index = 0;
3472  bool found = false;
3473  for (llvm::Value *basePtr : mapData.OriginalValue) {
3474  if (basePtr == val && mapData.IsAMapping[index]) {
3475  found = true;
3476  mapData.Types[index] |=
3477  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3478  mapData.DevicePointers[index] = devInfoTy;
3479  }
3480  index++;
3481  }
3482  return found;
3483  };
3484 
3485  // Process useDevPtr(Addr)Operands
3486  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3487  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3488  for (Value mapValue : useDevOperands) {
3489  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3490  Value offloadPtr =
3491  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3492  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3493 
3494  // Check if map info is already present for this entry.
3495  if (!findMapInfo(origValue, devInfoTy)) {
3496  mapData.OriginalValue.push_back(origValue);
3497  mapData.Pointers.push_back(mapData.OriginalValue.back());
3498  mapData.IsDeclareTarget.push_back(false);
3499  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3500  mapData.BaseType.push_back(
3501  moduleTranslation.convertType(mapOp.getVarType()));
3502  mapData.Sizes.push_back(builder.getInt64(0));
3503  mapData.MapClause.push_back(mapOp.getOperation());
3504  mapData.Types.push_back(
3505  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3506  mapData.Names.push_back(LLVM::createMappingInformation(
3507  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3508  mapData.DevicePointers.push_back(devInfoTy);
3509  mapData.Mappers.push_back(nullptr);
3510  mapData.IsAMapping.push_back(false);
3511  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3512  }
3513  }
3514  };
3515 
3516  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3517  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3518 
3519  for (Value mapValue : hasDevAddrOperands) {
3520  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3521  Value offloadPtr =
3522  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3523  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3524  auto mapType =
3525  static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
3526  auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3527 
3528  mapData.OriginalValue.push_back(origValue);
3529  mapData.BasePointers.push_back(origValue);
3530  mapData.Pointers.push_back(origValue);
3531  mapData.IsDeclareTarget.push_back(false);
3532  mapData.BaseType.push_back(
3533  moduleTranslation.convertType(mapOp.getVarType()));
3534  mapData.Sizes.push_back(
3535  builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
3536  mapData.MapClause.push_back(mapOp.getOperation());
3537  if (llvm::to_underlying(mapType & mapTypeAlways)) {
3538  // Descriptors are mapped with the ALWAYS flag, since they can get
3539  // rematerialized, so the address of the decriptor for a given object
3540  // may change from one place to another.
3541  mapData.Types.push_back(mapType);
3542  // Technically it's possible for a non-descriptor mapping to have
3543  // both has-device-addr and ALWAYS, so lookup the mapper in case it
3544  // exists.
3545  if (mapOp.getMapperId()) {
3546  mapData.Mappers.push_back(
3547  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3548  mapOp, mapOp.getMapperIdAttr()));
3549  } else {
3550  mapData.Mappers.push_back(nullptr);
3551  }
3552  } else {
3553  mapData.Types.push_back(
3554  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
3555  mapData.Mappers.push_back(nullptr);
3556  }
3557  mapData.Names.push_back(LLVM::createMappingInformation(
3558  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3559  mapData.DevicePointers.push_back(
3560  llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3561  mapData.IsAMapping.push_back(false);
3562  mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
3563  }
3564 }
3565 
3566 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
3567  auto *res = llvm::find(mapData.MapClause, memberOp);
3568  assert(res != mapData.MapClause.end() &&
3569  "MapInfoOp for member not found in MapData, cannot return index");
3570  return std::distance(mapData.MapClause.begin(), res);
3571 }
3572 
3573 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
3574  bool first) {
3575  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
3576  // Only 1 member has been mapped, we can return it.
3577  if (indexAttr.size() == 1)
3578  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
3579 
3580  llvm::SmallVector<size_t> indices(indexAttr.size());
3581  std::iota(indices.begin(), indices.end(), 0);
3582 
3583  llvm::sort(indices.begin(), indices.end(),
3584  [&](const size_t a, const size_t b) {
3585  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
3586  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
3587  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
3588  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
3589  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
3590 
3591  if (aIndex == bIndex)
3592  continue;
3593 
3594  if (aIndex < bIndex)
3595  return first;
3596 
3597  if (aIndex > bIndex)
3598  return !first;
3599  }
3600 
3601  // Iterated the up until the end of the smallest member and
3602  // they were found to be equal up to that point, so select
3603  // the member with the lowest index count, so the "parent"
3604  return memberIndicesA.size() < memberIndicesB.size();
3605  });
3606 
3607  return llvm::cast<omp::MapInfoOp>(
3608  mapInfo.getMembers()[indices.front()].getDefiningOp());
3609 }
3610 
3611 /// This function calculates the array/pointer offset for map data provided
3612 /// with bounds operations, e.g. when provided something like the following:
3613 ///
3614 /// Fortran
3615 /// map(tofrom: array(2:5, 3:2))
3616 /// or
3617 /// C++
3618 /// map(tofrom: array[1:4][2:3])
3619 /// We must calculate the initial pointer offset to pass across, this function
3620 /// performs this using bounds.
3621 ///
3622 /// NOTE: which while specified in row-major order it currently needs to be
3623 /// flipped for Fortran's column order array allocation and access (as
3624 /// opposed to C++'s row-major, hence the backwards processing where order is
3625 /// important). This is likely important to keep in mind for the future when
3626 /// we incorporate a C++ frontend, both frontends will need to agree on the
3627 /// ordering of generated bounds operations (one may have to flip them) to
3628 /// make the below lowering frontend agnostic. The offload size
3629 /// calcualtion may also have to be adjusted for C++.
3630 std::vector<llvm::Value *>
3632  llvm::IRBuilderBase &builder, bool isArrayTy,
3633  OperandRange bounds) {
3634  std::vector<llvm::Value *> idx;
3635  // There's no bounds to calculate an offset from, we can safely
3636  // ignore and return no indices.
3637  if (bounds.empty())
3638  return idx;
3639 
3640  // If we have an array type, then we have its type so can treat it as a
3641  // normal GEP instruction where the bounds operations are simply indexes
3642  // into the array. We currently do reverse order of the bounds, which
3643  // I believe leans more towards Fortran's column-major in memory.
3644  if (isArrayTy) {
3645  idx.push_back(builder.getInt64(0));
3646  for (int i = bounds.size() - 1; i >= 0; --i) {
3647  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3648  bounds[i].getDefiningOp())) {
3649  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
3650  }
3651  }
3652  } else {
3653  // If we do not have an array type, but we have bounds, then we're dealing
3654  // with a pointer that's being treated like an array and we have the
3655  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
3656  // address (pointer pointing to the actual data) so we must caclulate the
3657  // offset using a single index which the following two loops attempts to
3658  // compute.
3659 
3660  // Calculates the size offset we need to make per row e.g. first row or
3661  // column only needs to be offset by one, but the next would have to be
3662  // the previous row/column offset multiplied by the extent of current row.
3663  //
3664  // For example ([1][10][100]):
3665  //
3666  // - First row/column we move by 1 for each index increment
3667  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
3668  // current) for 10 for each index increment
3669  // - Third row/column we would move by 10 (second row/column) *
3670  // (extent/size of current) 100 for 1000 for each index increment
3671  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
3672  for (size_t i = 1; i < bounds.size(); ++i) {
3673  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3674  bounds[i].getDefiningOp())) {
3675  dimensionIndexSizeOffset.push_back(builder.CreateMul(
3676  moduleTranslation.lookupValue(boundOp.getExtent()),
3677  dimensionIndexSizeOffset[i - 1]));
3678  }
3679  }
3680 
3681  // Now that we have calculated how much we move by per index, we must
3682  // multiply each lower bound offset in indexes by the size offset we
3683  // have calculated in the previous and accumulate the results to get
3684  // our final resulting offset.
3685  for (int i = bounds.size() - 1; i >= 0; --i) {
3686  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3687  bounds[i].getDefiningOp())) {
3688  if (idx.empty())
3689  idx.emplace_back(builder.CreateMul(
3690  moduleTranslation.lookupValue(boundOp.getLowerBound()),
3691  dimensionIndexSizeOffset[i]));
3692  else
3693  idx.back() = builder.CreateAdd(
3694  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
3695  boundOp.getLowerBound()),
3696  dimensionIndexSizeOffset[i]));
3697  }
3698  }
3699  }
3700 
3701  return idx;
3702 }
3703 
3704 // This creates two insertions into the MapInfosTy data structure for the
3705 // "parent" of a set of members, (usually a container e.g.
3706 // class/structure/derived type) when subsequent members have also been
3707 // explicitly mapped on the same map clause. Certain types, such as Fortran
3708 // descriptors are mapped like this as well, however, the members are
3709 // implicit as far as a user is concerned, but we must explicitly map them
3710 // internally.
3711 //
3712 // This function also returns the memberOfFlag for this particular parent,
3713 // which is utilised in subsequent member mappings (by modifying there map type
3714 // with it) to indicate that a member is part of this parent and should be
3715 // treated by the runtime as such. Important to achieve the correct mapping.
3716 //
3717 // This function borrows a lot from Clang's emitCombinedEntry function
3718 // inside of CGOpenMPRuntime.cpp
3719 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
3720  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3721  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3722  MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
3723  assert(!ompBuilder.Config.isTargetDevice() &&
3724  "function only supported for host device codegen");
3725 
3726  // Map the first segment of our structure
3727  combinedInfo.Types.emplace_back(
3728  isTargetParams
3729  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
3730  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
3731  combinedInfo.DevicePointers.emplace_back(
3732  mapData.DevicePointers[mapDataIndex]);
3733  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
3734  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3735  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3736  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3737 
3738  // Calculate size of the parent object being mapped based on the
3739  // addresses at runtime, highAddr - lowAddr = size. This of course
3740  // doesn't factor in allocated data like pointers, hence the further
3741  // processing of members specified by users, or in the case of
3742  // Fortran pointers and allocatables, the mapping of the pointed to
3743  // data by the descriptor (which itself, is a structure containing
3744  // runtime information on the dynamically allocated data).
3745  auto parentClause =
3746  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3747 
3748  llvm::Value *lowAddr, *highAddr;
3749  if (!parentClause.getPartialMap()) {
3750  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3751  builder.getPtrTy());
3752  highAddr = builder.CreatePointerCast(
3753  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3754  mapData.Pointers[mapDataIndex], 1),
3755  builder.getPtrTy());
3756  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3757  } else {
3758  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3759  int firstMemberIdx = getMapDataMemberIdx(
3760  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
3761  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3762  builder.getPtrTy());
3763  int lastMemberIdx = getMapDataMemberIdx(
3764  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
3765  highAddr = builder.CreatePointerCast(
3766  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3767  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3768  builder.getPtrTy());
3769  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
3770  }
3771 
3772  llvm::Value *size = builder.CreateIntCast(
3773  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3774  builder.getInt64Ty(),
3775  /*isSigned=*/false);
3776  combinedInfo.Sizes.push_back(size);
3777 
3778  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
3779  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3780 
3781  // This creates the initial MEMBER_OF mapping that consists of
3782  // the parent/top level container (same as above effectively, except
3783  // with a fixed initial compile time size and separate maptype which
3784  // indicates the true mape type (tofrom etc.). This parent mapping is
3785  // only relevant if the structure in its totality is being mapped,
3786  // otherwise the above suffices.
3787  if (!parentClause.getPartialMap()) {
3788  // TODO: This will need to be expanded to include the whole host of logic
3789  // for the map flags that Clang currently supports (e.g. it should do some
3790  // further case specific flag modifications). For the moment, it handles
3791  // what we support as expected.
3792  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
3793  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3794  combinedInfo.Types.emplace_back(mapFlag);
3795  combinedInfo.DevicePointers.emplace_back(
3797  combinedInfo.Mappers.emplace_back(nullptr);
3798  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3799  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3800  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3801  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3802  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
3803  }
3804  return memberOfFlag;
3805 }
3806 
3807 // The intent is to verify if the mapped data being passed is a
3808 // pointer -> pointee that requires special handling in certain cases,
3809 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
3810 //
3811 // There may be a better way to verify this, but unfortunately with
3812 // opaque pointers we lose the ability to easily check if something is
3813 // a pointer whilst maintaining access to the underlying type.
3814 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
3815  // If we have a varPtrPtr field assigned then the underlying type is a pointer
3816  if (mapOp.getVarPtrPtr())
3817  return true;
3818 
3819  // If the map data is declare target with a link clause, then it's represented
3820  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
3821  // no relation to pointers.
3822  if (isDeclareTargetLink(mapOp.getVarPtr()))
3823  return true;
3824 
3825  return false;
3826 }
3827 
3828 // This function is intended to add explicit mappings of members
3830  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3831  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3832  MapInfoData &mapData, uint64_t mapDataIndex,
3833  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
3834  assert(!ompBuilder.Config.isTargetDevice() &&
3835  "function only supported for host device codegen");
3836 
3837  auto parentClause =
3838  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3839 
3840  for (auto mappedMembers : parentClause.getMembers()) {
3841  auto memberClause =
3842  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
3843  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3844 
3845  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
3846 
3847  // If we're currently mapping a pointer to a block of data, we must
3848  // initially map the pointer, and then attatch/bind the data with a
3849  // subsequent map to the pointer. This segment of code generates the
3850  // pointer mapping, which can in certain cases be optimised out as Clang
3851  // currently does in its lowering. However, for the moment we do not do so,
3852  // in part as we currently have substantially less information on the data
3853  // being mapped at this stage.
3854  if (checkIfPointerMap(memberClause)) {
3855  auto mapFlag =
3856  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
3857  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3858  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3859  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3860  combinedInfo.Types.emplace_back(mapFlag);
3861  combinedInfo.DevicePointers.emplace_back(
3863  combinedInfo.Mappers.emplace_back(nullptr);
3864  combinedInfo.Names.emplace_back(
3865  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3866  combinedInfo.BasePointers.emplace_back(
3867  mapData.BasePointers[mapDataIndex]);
3868  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
3869  combinedInfo.Sizes.emplace_back(builder.getInt64(
3870  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
3871  }
3872 
3873  // Same MemberOfFlag to indicate its link with parent and other members
3874  // of.
3875  auto mapFlag =
3876  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
3877  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3878  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3879  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3880  if (checkIfPointerMap(memberClause))
3881  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3882 
3883  combinedInfo.Types.emplace_back(mapFlag);
3884  combinedInfo.DevicePointers.emplace_back(
3885  mapData.DevicePointers[memberDataIdx]);
3886  combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
3887  combinedInfo.Names.emplace_back(
3888  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3889  uint64_t basePointerIndex =
3890  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
3891  combinedInfo.BasePointers.emplace_back(
3892  mapData.BasePointers[basePointerIndex]);
3893  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
3894 
3895  llvm::Value *size = mapData.Sizes[memberDataIdx];
3896  if (checkIfPointerMap(memberClause)) {
3897  size = builder.CreateSelect(
3898  builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
3899  builder.getInt64(0), size);
3900  }
3901 
3902  combinedInfo.Sizes.emplace_back(size);
3903  }
3904 }
3905 
3906 static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
3907  MapInfosTy &combinedInfo, bool isTargetParams,
3908  int mapDataParentIdx = -1) {
3909  // Declare Target Mappings are excluded from being marked as
3910  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
3911  // marked with OMP_MAP_PTR_AND_OBJ instead.
3912  auto mapFlag = mapData.Types[mapDataIdx];
3913  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
3914 
3915  bool isPtrTy = checkIfPointerMap(mapInfoOp);
3916  if (isPtrTy)
3917  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3918 
3919  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
3920  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3921 
3922  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
3923  !isPtrTy)
3924  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
3925 
3926  // if we're provided a mapDataParentIdx, then the data being mapped is
3927  // part of a larger object (in a parent <-> member mapping) and in this
3928  // case our BasePointer should be the parent.
3929  if (mapDataParentIdx >= 0)
3930  combinedInfo.BasePointers.emplace_back(
3931  mapData.BasePointers[mapDataParentIdx]);
3932  else
3933  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
3934 
3935  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
3936  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
3937  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
3938  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
3939  combinedInfo.Types.emplace_back(mapFlag);
3940  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
3941 }
3942 
3943 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
3944  llvm::IRBuilderBase &builder,
3945  llvm::OpenMPIRBuilder &ompBuilder,
3946  DataLayout &dl, MapInfosTy &combinedInfo,
3947  MapInfoData &mapData, uint64_t mapDataIndex,
3948  bool isTargetParams) {
3949  assert(!ompBuilder.Config.isTargetDevice() &&
3950  "function only supported for host device codegen");
3951 
3952  auto parentClause =
3953  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3954 
3955  // If we have a partial map (no parent referenced in the map clauses of the
3956  // directive, only members) and only a single member, we do not need to bind
3957  // the map of the member to the parent, we can pass the member separately.
3958  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
3959  auto memberClause = llvm::cast<omp::MapInfoOp>(
3960  parentClause.getMembers()[0].getDefiningOp());
3961  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3962  // Note: Clang treats arrays with explicit bounds that fall into this
3963  // category as a parent with map case, however, it seems this isn't a
3964  // requirement, and processing them as an individual map is fine. So,
3965  // we will handle them as individual maps for the moment, as it's
3966  // difficult for us to check this as we always require bounds to be
3967  // specified currently and it's also marginally more optimal (single
3968  // map rather than two). The difference may come from the fact that
3969  // Clang maps array without bounds as pointers (which we do not
3970  // currently do), whereas we treat them as arrays in all cases
3971  // currently.
3972  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
3973  mapDataIndex);
3974  return;
3975  }
3976 
3977  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
3978  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
3979  combinedInfo, mapData, mapDataIndex, isTargetParams);
3980  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
3981  combinedInfo, mapData, mapDataIndex,
3982  memberOfParentFlag);
3983 }
3984 
3985 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
3986 // generates different operation (e.g. load/store) combinations for
3987 // arguments to the kernel, based on map capture kinds which are then
3988 // utilised in the combinedInfo in place of the original Map value.
3989 static void
3990 createAlteredByCaptureMap(MapInfoData &mapData,
3991  LLVM::ModuleTranslation &moduleTranslation,
3992  llvm::IRBuilderBase &builder) {
3993  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
3994  "function only supported for host device codegen");
3995  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3996  // if it's declare target, skip it, it's handled separately.
3997  if (!mapData.IsDeclareTarget[i]) {
3998  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3999  omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
4000  bool isPtrTy = checkIfPointerMap(mapOp);
4001 
4002  // Currently handles array sectioning lowerbound case, but more
4003  // logic may be required in the future. Clang invokes EmitLValue,
4004  // which has specialised logic for special Clang types such as user
4005  // defines, so it is possible we will have to extend this for
4006  // structures or other complex types. As the general idea is that this
4007  // function mimics some of the logic from Clang that we require for
4008  // kernel argument passing from host -> device.
4009  switch (captureKind) {
4010  case omp::VariableCaptureKind::ByRef: {
4011  llvm::Value *newV = mapData.Pointers[i];
4012  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
4013  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
4014  mapOp.getBounds());
4015  if (isPtrTy)
4016  newV = builder.CreateLoad(builder.getPtrTy(), newV);
4017 
4018  if (!offsetIdx.empty())
4019  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
4020  "array_offset");
4021  mapData.Pointers[i] = newV;
4022  } break;
4023  case omp::VariableCaptureKind::ByCopy: {
4024  llvm::Type *type = mapData.BaseType[i];
4025  llvm::Value *newV;
4026  if (mapData.Pointers[i]->getType()->isPointerTy())
4027  newV = builder.CreateLoad(type, mapData.Pointers[i]);
4028  else
4029  newV = mapData.Pointers[i];
4030 
4031  if (!isPtrTy) {
4032  auto curInsert = builder.saveIP();
4033  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
4034  auto *memTempAlloc =
4035  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
4036  builder.restoreIP(curInsert);
4037 
4038  builder.CreateStore(newV, memTempAlloc);
4039  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
4040  }
4041 
4042  mapData.Pointers[i] = newV;
4043  mapData.BasePointers[i] = newV;
4044  } break;
4045  case omp::VariableCaptureKind::This:
4046  case omp::VariableCaptureKind::VLAType:
4047  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
4048  break;
4049  }
4050  }
4051  }
4052 }
4053 
4054 // Generate all map related information and fill the combinedInfo.
4055 static void genMapInfos(llvm::IRBuilderBase &builder,
4056  LLVM::ModuleTranslation &moduleTranslation,
4057  DataLayout &dl, MapInfosTy &combinedInfo,
4058  MapInfoData &mapData, bool isTargetParams = false) {
4059  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4060  "function only supported for host device codegen");
4061 
4062  // We wish to modify some of the methods in which arguments are
4063  // passed based on their capture type by the target region, this can
4064  // involve generating new loads and stores, which changes the
4065  // MLIR value to LLVM value mapping, however, we only wish to do this
4066  // locally for the current function/target and also avoid altering
4067  // ModuleTranslation, so we remap the base pointer or pointer stored
4068  // in the map infos corresponding MapInfoData, which is later accessed
4069  // by genMapInfos and createTarget to help generate the kernel and
4070  // kernel arg structure. It primarily becomes relevant in cases like
4071  // bycopy, or byref range'd arrays. In the default case, we simply
4072  // pass thee pointer byref as both basePointer and pointer.
4073  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
4074 
4075  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4076 
4077  // We operate under the assumption that all vectors that are
4078  // required in MapInfoData are of equal lengths (either filled with
4079  // default constructed data or appropiate information) so we can
4080  // utilise the size from any component of MapInfoData, if we can't
4081  // something is missing from the initial MapInfoData construction.
4082  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4083  // NOTE/TODO: We currently do not support arbitrary depth record
4084  // type mapping.
4085  if (mapData.IsAMember[i])
4086  continue;
4087 
4088  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
4089  if (!mapInfoOp.getMembers().empty()) {
4090  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
4091  combinedInfo, mapData, i, isTargetParams);
4092  continue;
4093  }
4094 
4095  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
4096  }
4097 }
4098 
4100 emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
4101  LLVM::ModuleTranslation &moduleTranslation,
4102  llvm::StringRef mapperFuncName);
4103 
4105 getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
4106  LLVM::ModuleTranslation &moduleTranslation) {
4107  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4108  "function only supported for host device codegen");
4109  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4110  std::string mapperFuncName =
4111  moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
4112  {"omp_mapper", declMapperOp.getSymName()});
4113 
4114  if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
4115  return lookupFunc;
4116 
4117  return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
4118  mapperFuncName);
4119 }
4120 
4122 emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
4123  LLVM::ModuleTranslation &moduleTranslation,
4124  llvm::StringRef mapperFuncName) {
4125  assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4126  "function only supported for host device codegen");
4127  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4128  auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
4129  DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
4130  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4131  llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
4132  SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
4133 
4134  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4135 
4136  // Fill up the arrays with all the mapped variables.
4137  MapInfosTy combinedInfo;
4138  auto genMapInfoCB =
4139  [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
4140  llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
4141  builder.restoreIP(codeGenIP);
4142  moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
4143  moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
4144  builder.GetInsertBlock());
4145  if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
4146  /*ignoreArguments=*/true,
4147  builder)))
4148  return llvm::make_error<PreviouslyReportedError>();
4149  MapInfoData mapData;
4150  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4151  builder);
4152  genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
4153 
4154  // Drop the mapping that is no longer necessary so that the same region can
4155  // be processed multiple times.
4156  moduleTranslation.forgetMapping(declMapperOp.getRegion());
4157  return combinedInfo;
4158  };
4159 
4160  auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
4161  if (!combinedInfo.Mappers[i])
4162  return nullptr;
4163  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4164  moduleTranslation);
4165  };
4166 
4167  llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
4168  genMapInfoCB, varType, mapperFuncName, customMapperCB);
4169  if (!newFn)
4170  return newFn.takeError();
4171  moduleTranslation.mapFunction(mapperFuncName, *newFn);
4172  return *newFn;
4173 }
4174 
4175 static LogicalResult
4176 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
4177  LLVM::ModuleTranslation &moduleTranslation) {
4178  llvm::Value *ifCond = nullptr;
4179  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
4180  SmallVector<Value> mapVars;
4181  SmallVector<Value> useDevicePtrVars;
4182  SmallVector<Value> useDeviceAddrVars;
4183  llvm::omp::RuntimeFunction RTLFn;
4184  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
4185 
4186  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4187  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
4188  /*SeparateBeginEndCalls=*/true);
4189 
4190  LogicalResult result =
4192  .Case([&](omp::TargetDataOp dataOp) {
4193  if (failed(checkImplementationStatus(*dataOp)))
4194  return failure();
4195 
4196  if (auto ifVar = dataOp.getIfExpr())
4197  ifCond = moduleTranslation.lookupValue(ifVar);
4198 
4199  if (auto devId = dataOp.getDevice())
4200  if (auto constOp =
4201  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4202  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4203  deviceID = intAttr.getInt();
4204 
4205  mapVars = dataOp.getMapVars();
4206  useDevicePtrVars = dataOp.getUseDevicePtrVars();
4207  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
4208  return success();
4209  })
4210  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
4211  if (failed(checkImplementationStatus(*enterDataOp)))
4212  return failure();
4213 
4214  if (auto ifVar = enterDataOp.getIfExpr())
4215  ifCond = moduleTranslation.lookupValue(ifVar);
4216 
4217  if (auto devId = enterDataOp.getDevice())
4218  if (auto constOp =
4219  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4220  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4221  deviceID = intAttr.getInt();
4222  RTLFn =
4223  enterDataOp.getNowait()
4224  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
4225  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
4226  mapVars = enterDataOp.getMapVars();
4227  info.HasNoWait = enterDataOp.getNowait();
4228  return success();
4229  })
4230  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
4231  if (failed(checkImplementationStatus(*exitDataOp)))
4232  return failure();
4233 
4234  if (auto ifVar = exitDataOp.getIfExpr())
4235  ifCond = moduleTranslation.lookupValue(ifVar);
4236 
4237  if (auto devId = exitDataOp.getDevice())
4238  if (auto constOp =
4239  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4240  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4241  deviceID = intAttr.getInt();
4242 
4243  RTLFn = exitDataOp.getNowait()
4244  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
4245  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
4246  mapVars = exitDataOp.getMapVars();
4247  info.HasNoWait = exitDataOp.getNowait();
4248  return success();
4249  })
4250  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
4251  if (failed(checkImplementationStatus(*updateDataOp)))
4252  return failure();
4253 
4254  if (auto ifVar = updateDataOp.getIfExpr())
4255  ifCond = moduleTranslation.lookupValue(ifVar);
4256 
4257  if (auto devId = updateDataOp.getDevice())
4258  if (auto constOp =
4259  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4260  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4261  deviceID = intAttr.getInt();
4262 
4263  RTLFn =
4264  updateDataOp.getNowait()
4265  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4266  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4267  mapVars = updateDataOp.getMapVars();
4268  info.HasNoWait = updateDataOp.getNowait();
4269  return success();
4270  })
4271  .Default([&](Operation *op) {
4272  llvm_unreachable("unexpected operation");
4273  return failure();
4274  });
4275 
4276  if (failed(result))
4277  return failure();
4278 
4279  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4280  MapInfoData mapData;
4281  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4282  builder, useDevicePtrVars, useDeviceAddrVars);
4283 
4284  // Fill up the arrays with all the mapped variables.
4285  MapInfosTy combinedInfo;
4286  auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4287  builder.restoreIP(codeGenIP);
4288  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4289  return combinedInfo;
4290  };
4291 
4292  // Define a lambda to apply mappings between use_device_addr and
4293  // use_device_ptr base pointers, and their associated block arguments.
4294  auto mapUseDevice =
4295  [&moduleTranslation](
4296  llvm::OpenMPIRBuilder::DeviceInfoTy type,
4298  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4299  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4300  for (auto [arg, useDevVar] :
4301  llvm::zip_equal(blockArgs, useDeviceVars)) {
4302 
4303  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4304  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4305  : mapInfoOp.getVarPtr();
4306  };
4307 
4308  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4309  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4310  mapInfoData.MapClause, mapInfoData.DevicePointers,
4311  mapInfoData.BasePointers)) {
4312  auto mapOp = cast<omp::MapInfoOp>(mapClause);
4313  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4314  devicePointer != type)
4315  continue;
4316 
4317  if (llvm::Value *devPtrInfoMap =
4318  mapper ? mapper(basePointer) : basePointer) {
4319  moduleTranslation.mapValue(arg, devPtrInfoMap);
4320  break;
4321  }
4322  }
4323  }
4324  };
4325 
4326  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4327  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4328  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4329  builder.restoreIP(codeGenIP);
4330  assert(isa<omp::TargetDataOp>(op) &&
4331  "BodyGen requested for non TargetDataOp");
4332  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4333  Region &region = cast<omp::TargetDataOp>(op).getRegion();
4334  switch (bodyGenType) {
4335  case BodyGenTy::Priv:
4336  // Check if any device ptr/addr info is available
4337  if (!info.DevicePtrInfoMap.empty()) {
4338  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4339  blockArgIface.getUseDeviceAddrBlockArgs(),
4340  useDeviceAddrVars, mapData,
4341  [&](llvm::Value *basePointer) -> llvm::Value * {
4342  if (!info.DevicePtrInfoMap[basePointer].second)
4343  return nullptr;
4344  return builder.CreateLoad(
4345  builder.getPtrTy(),
4346  info.DevicePtrInfoMap[basePointer].second);
4347  });
4348  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4349  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4350  mapData, [&](llvm::Value *basePointer) {
4351  return info.DevicePtrInfoMap[basePointer].second;
4352  });
4353 
4354  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4355  moduleTranslation)))
4356  return llvm::make_error<PreviouslyReportedError>();
4357  }
4358  break;
4359  case BodyGenTy::DupNoPriv:
4360  // We must always restoreIP regardless of doing anything the caller
4361  // does not restore it, leading to incorrect (no) branch generation.
4362  builder.restoreIP(codeGenIP);
4363  break;
4364  case BodyGenTy::NoPriv:
4365  // If device info is available then region has already been generated
4366  if (info.DevicePtrInfoMap.empty()) {
4367  // For device pass, if use_device_ptr(addr) mappings were present,
4368  // we need to link them here before codegen.
4369  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4370  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4371  blockArgIface.getUseDeviceAddrBlockArgs(),
4372  useDeviceAddrVars, mapData);
4373  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4374  blockArgIface.getUseDevicePtrBlockArgs(),
4375  useDevicePtrVars, mapData);
4376  }
4377 
4378  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4379  moduleTranslation)))
4380  return llvm::make_error<PreviouslyReportedError>();
4381  }
4382  break;
4383  }
4384  return builder.saveIP();
4385  };
4386 
4387  auto customMapperCB =
4388  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4389  if (!combinedInfo.Mappers[i])
4390  return nullptr;
4391  info.HasMapper = true;
4392  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4393  moduleTranslation);
4394  };
4395 
4396  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4397  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4398  findAllocaInsertPoint(builder, moduleTranslation);
4399  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4400  if (isa<omp::TargetDataOp>(op))
4401  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4402  builder.getInt64(deviceID), ifCond,
4403  info, genMapInfoCB, customMapperCB,
4404  /*MapperFunc=*/nullptr, bodyGenCB,
4405  /*DeviceAddrCB=*/nullptr);
4406  return ompBuilder->createTargetData(
4407  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4408  info, genMapInfoCB, customMapperCB, &RTLFn);
4409  }();
4410 
4411  if (failed(handleError(afterIP, *op)))
4412  return failure();
4413 
4414  builder.restoreIP(*afterIP);
4415  return success();
4416 }
4417 
4418 static LogicalResult
4419 convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4420  LLVM::ModuleTranslation &moduleTranslation) {
4421  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4422  auto distributeOp = cast<omp::DistributeOp>(opInst);
4423  if (failed(checkImplementationStatus(opInst)))
4424  return failure();
4425 
4426  /// Process teams op reduction in distribute if the reduction is contained in
4427  /// the distribute op.
4428  omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>();
4429  bool doDistributeReduction =
4430  teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false;
4431 
4432  DenseMap<Value, llvm::Value *> reductionVariableMap;
4433  unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0;
4434  SmallVector<omp::DeclareReductionOp> reductionDecls;
4435  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
4436  llvm::ArrayRef<bool> isByRef;
4437 
4438  if (doDistributeReduction) {
4439  isByRef = getIsByRef(teamsOp.getReductionByref());
4440  assert(isByRef.size() == teamsOp.getNumReductionVars());
4441 
4442  collectReductionDecls(teamsOp, reductionDecls);
4443  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4444  findAllocaInsertPoint(builder, moduleTranslation);
4445 
4446  MutableArrayRef<BlockArgument> reductionArgs =
4447  llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
4448  .getReductionBlockArgs();
4449 
4451  teamsOp, reductionArgs, builder, moduleTranslation, allocaIP,
4452  reductionDecls, privateReductionVariables, reductionVariableMap,
4453  isByRef)))
4454  return failure();
4455  }
4456 
4457  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4458  auto bodyGenCB = [&](InsertPointTy allocaIP,
4459  InsertPointTy codeGenIP) -> llvm::Error {
4460  // Save the alloca insertion point on ModuleTranslation stack for use in
4461  // nested regions.
4463  moduleTranslation, allocaIP);
4464 
4465  // DistributeOp has only one region associated with it.
4466  builder.restoreIP(codeGenIP);
4467  PrivateVarsInfo privVarsInfo(distributeOp);
4468 
4469  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4470  allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4471  if (handleError(afterAllocas, opInst).failed())
4472  return llvm::make_error<PreviouslyReportedError>();
4473 
4474  if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4475  opInst)
4476  .failed())
4477  return llvm::make_error<PreviouslyReportedError>();
4478 
4479  if (failed(copyFirstPrivateVars(
4480  builder, moduleTranslation, privVarsInfo.mlirVars,
4481  privVarsInfo.llvmVars, privVarsInfo.privatizers)))
4482  return llvm::make_error<PreviouslyReportedError>();
4483 
4484  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4485  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4487  convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4488  builder, moduleTranslation);
4489  if (!regionBlock)
4490  return regionBlock.takeError();
4491  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4492 
4493  // Skip applying a workshare loop below when translating 'distribute
4494  // parallel do' (it's been already handled by this point while translating
4495  // the nested omp.wsloop).
4496  if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4497  // TODO: Add support for clauses which are valid for DISTRIBUTE
4498  // constructs. Static schedule is the default.
4499  auto schedule = omp::ClauseScheduleKind::Static;
4500  bool isOrdered = false;
4501  std::optional<omp::ScheduleModifier> scheduleMod;
4502  bool isSimd = false;
4503  llvm::omp::WorksharingLoopType workshareLoopType =
4504  llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4505  bool loopNeedsBarrier = false;
4506  llvm::Value *chunk = nullptr;
4507 
4508  llvm::CanonicalLoopInfo *loopInfo =
4509  findCurrentLoopInfo(moduleTranslation);
4510  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4511  ompBuilder->applyWorkshareLoop(
4512  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4513  convertToScheduleKind(schedule), chunk, isSimd,
4514  scheduleMod == omp::ScheduleModifier::monotonic,
4515  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4516  workshareLoopType);
4517 
4518  if (!wsloopIP)
4519  return wsloopIP.takeError();
4520  }
4521 
4522  if (failed(cleanupPrivateVars(builder, moduleTranslation,
4523  distributeOp.getLoc(), privVarsInfo.llvmVars,
4524  privVarsInfo.privatizers)))
4525  return llvm::make_error<PreviouslyReportedError>();
4526 
4527  return llvm::Error::success();
4528  };
4529 
4530  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4531  findAllocaInsertPoint(builder, moduleTranslation);
4532  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4533  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4534  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
4535 
4536  if (failed(handleError(afterIP, opInst)))
4537  return failure();
4538 
4539  builder.restoreIP(*afterIP);
4540 
4541  if (doDistributeReduction) {
4542  // Process the reductions if required.
4544  teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
4545  privateReductionVariables, isByRef,
4546  /*isNoWait*/ false, /*isTeamsReduction*/ true);
4547  }
4548  return success();
4549 }
4550 
4551 /// Lowers the FlagsAttr which is applied to the module on the device
4552 /// pass when offloading, this attribute contains OpenMP RTL globals that can
4553 /// be passed as flags to the frontend, otherwise they are set to default
4554 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
4555  LLVM::ModuleTranslation &moduleTranslation) {
4556  if (!cast<mlir::ModuleOp>(op))
4557  return failure();
4558 
4559  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4560 
4561  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
4562  attribute.getOpenmpDeviceVersion());
4563 
4564  if (attribute.getNoGpuLib())
4565  return success();
4566 
4567  ompBuilder->createGlobalFlag(
4568  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
4569  "__omp_rtl_debug_kind");
4570  ompBuilder->createGlobalFlag(
4571  attribute
4572  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
4573  ,
4574  "__omp_rtl_assume_teams_oversubscription");
4575  ompBuilder->createGlobalFlag(
4576  attribute
4577  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
4578  ,
4579  "__omp_rtl_assume_threads_oversubscription");
4580  ompBuilder->createGlobalFlag(
4581  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
4582  "__omp_rtl_assume_no_thread_state");
4583  ompBuilder->createGlobalFlag(
4584  attribute
4585  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
4586  ,
4587  "__omp_rtl_assume_no_nested_parallelism");
4588  return success();
4589 }
4590 
4591 static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
4592  omp::TargetOp targetOp,
4593  llvm::StringRef parentName = "") {
4594  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
4595 
4596  assert(fileLoc && "No file found from location");
4597  StringRef fileName = fileLoc.getFilename().getValue();
4598 
4599  llvm::sys::fs::UniqueID id;
4600  uint64_t line = fileLoc.getLine();
4601  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
4602  size_t fileHash = llvm::hash_value(fileName.str());
4603  size_t deviceId = 0xdeadf17e;
4604  targetInfo =
4605  llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
4606  } else {
4607  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
4608  id.getFile(), line);
4609  }
4610 }
4611 
4612 static void
4613 handleDeclareTargetMapVar(MapInfoData &mapData,
4614  LLVM::ModuleTranslation &moduleTranslation,
4615  llvm::IRBuilderBase &builder, llvm::Function *func) {
4616  assert(moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
4617  "function only supported for target device codegen");
4618  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4619  // In the case of declare target mapped variables, the basePointer is
4620  // the reference pointer generated by the convertDeclareTargetAttr
4621  // method. Whereas the kernelValue is the original variable, so for
4622  // the device we must replace all uses of this original global variable
4623  // (stored in kernelValue) with the reference pointer (stored in
4624  // basePointer for declare target mapped variables), as for device the
4625  // data is mapped into this reference pointer and should be loaded
4626  // from it, the original variable is discarded. On host both exist and
4627  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
4628  // function to link the two variables in the runtime and then both the
4629  // reference pointer and the pointer are assigned in the kernel argument
4630  // structure for the host.
4631  if (mapData.IsDeclareTarget[i]) {
4632  // If the original map value is a constant, then we have to make sure all
4633  // of it's uses within the current kernel/function that we are going to
4634  // rewrite are converted to instructions, as we will be altering the old
4635  // use (OriginalValue) from a constant to an instruction, which will be
4636  // illegal and ICE the compiler if the user is a constant expression of
4637  // some kind e.g. a constant GEP.
4638  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
4639  convertUsersOfConstantsToInstructions(constant, func, false);
4640 
4641  // The users iterator will get invalidated if we modify an element,
4642  // so we populate this vector of uses to alter each user on an
4643  // individual basis to emit its own load (rather than one load for
4644  // all).
4646  for (llvm::User *user : mapData.OriginalValue[i]->users())
4647  userVec.push_back(user);
4648 
4649  for (llvm::User *user : userVec) {
4650  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
4651  if (insn->getFunction() == func) {
4652  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
4653  mapData.BasePointers[i]);
4654  load->moveBefore(insn->getIterator());
4655  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
4656  }
4657  }
4658  }
4659  }
4660  }
4661 }
4662 
4663 // The createDeviceArgumentAccessor function generates
4664 // instructions for retrieving (acessing) kernel
4665 // arguments inside of the device kernel for use by
4666 // the kernel. This enables different semantics such as
4667 // the creation of temporary copies of data allowing
4668 // semantics like read-only/no host write back kernel
4669 // arguments.
4670 //
4671 // This currently implements a very light version of Clang's
4672 // EmitParmDecl's handling of direct argument handling as well
4673 // as a portion of the argument access generation based on
4674 // capture types found at the end of emitOutlinedFunctionPrologue
4675 // in Clang. The indirect path handling of EmitParmDecl's may be
4676 // required for future work, but a direct 1-to-1 copy doesn't seem
4677 // possible as the logic is rather scattered throughout Clang's
4678 // lowering and perhaps we wish to deviate slightly.
4679 //
4680 // \param mapData - A container containing vectors of information
4681 // corresponding to the input argument, which should have a
4682 // corresponding entry in the MapInfoData containers
4683 // OrigialValue's.
4684 // \param arg - This is the generated kernel function argument that
4685 // corresponds to the passed in input argument. We generated different
4686 // accesses of this Argument, based on capture type and other Input
4687 // related information.
4688 // \param input - This is the host side value that will be passed to
4689 // the kernel i.e. the kernel input, we rewrite all uses of this within
4690 // the kernel (as we generate the kernel body based on the target's region
4691 // which maintians references to the original input) to the retVal argument
4692 // apon exit of this function inside of the OMPIRBuilder. This interlinks
4693 // the kernel argument to future uses of it in the function providing
4694 // appropriate "glue" instructions inbetween.
4695 // \param retVal - This is the value that all uses of input inside of the
4696 // kernel will be re-written to, the goal of this function is to generate
4697 // an appropriate location for the kernel argument to be accessed from,
4698 // e.g. ByRef will result in a temporary allocation location and then
4699 // a store of the kernel argument into this allocated memory which
4700 // will then be loaded from, ByCopy will use the allocated memory
4701 // directly.
4702 static llvm::IRBuilderBase::InsertPoint
4704  llvm::Value *input, llvm::Value *&retVal,
4705  llvm::IRBuilderBase &builder,
4706  llvm::OpenMPIRBuilder &ompBuilder,
4707  LLVM::ModuleTranslation &moduleTranslation,
4708  llvm::IRBuilderBase::InsertPoint allocaIP,
4709  llvm::IRBuilderBase::InsertPoint codeGenIP) {
4710  assert(ompBuilder.Config.isTargetDevice() &&
4711  "function only supported for target device codegen");
4712  builder.restoreIP(allocaIP);
4713 
4714  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
4715  LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
4716  ompBuilder.M.getContext());
4717  unsigned alignmentValue = 0;
4718  // Find the associated MapInfoData entry for the current input
4719  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
4720  if (mapData.OriginalValue[i] == input) {
4721  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4722  capture = mapOp.getMapCaptureType();
4723  // Get information of alignment of mapped object
4724  alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
4725  mapOp.getVarType(), ompBuilder.M.getDataLayout());
4726  break;
4727  }
4728 
4729  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
4730  unsigned int defaultAS =
4731  ompBuilder.M.getDataLayout().getProgramAddressSpace();
4732 
4733  // Create the alloca for the argument the current point.
4734  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
4735 
4736  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
4737  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
4738 
4739  builder.CreateStore(&arg, v);
4740 
4741  builder.restoreIP(codeGenIP);
4742 
4743  switch (capture) {
4744  case omp::VariableCaptureKind::ByCopy: {
4745  retVal = v;
4746  break;
4747  }
4748  case omp::VariableCaptureKind::ByRef: {
4749  llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
4750  v->getType(), v,
4751  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
4752  // CreateAlignedLoad function creates similar LLVM IR:
4753  // %res = load ptr, ptr %input, align 8
4754  // This LLVM IR does not contain information about alignment
4755  // of the loaded value. We need to add !align metadata to unblock
4756  // optimizer. The existence of the !align metadata on the instruction
4757  // tells the optimizer that the value loaded is known to be aligned to
4758  // a boundary specified by the integer value in the metadata node.
4759  // Example:
4760  // %res = load ptr, ptr %input, align 8, !align !align_md_node
4761  // ^ ^
4762  // | |
4763  // alignment of %input address |
4764  // |
4765  // alignment of %res object
4766  if (v->getType()->isPointerTy() && alignmentValue) {
4767  llvm::MDBuilder MDB(builder.getContext());
4768  loadInst->setMetadata(
4769  llvm::LLVMContext::MD_align,
4770  llvm::MDNode::get(builder.getContext(),
4771  MDB.createConstant(llvm::ConstantInt::get(
4772  llvm::Type::getInt64Ty(builder.getContext()),
4773  alignmentValue))));
4774  }
4775  retVal = loadInst;
4776 
4777  break;
4778  }
4779  case omp::VariableCaptureKind::This:
4780  case omp::VariableCaptureKind::VLAType:
4781  // TODO: Consider returning error to use standard reporting for
4782  // unimplemented features.
4783  assert(false && "Currently unsupported capture kind");
4784  break;
4785  }
4786 
4787  return builder.saveIP();
4788 }
4789 
4790 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
4791 /// operation and populate output variables with their corresponding host value
4792 /// (i.e. operand evaluated outside of the target region), based on their uses
4793 /// inside of the target region.
4794 ///
4795 /// Loop bounds and steps are only optionally populated, if output vectors are
4796 /// provided.
4797 static void
4798 extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
4799  Value &numTeamsLower, Value &numTeamsUpper,
4800  Value &threadLimit,
4801  llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
4802  llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
4803  llvm::SmallVectorImpl<Value> *steps = nullptr) {
4804  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
4805  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
4806  blockArgIface.getHostEvalBlockArgs())) {
4807  Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
4808 
4809  for (Operation *user : blockArg.getUsers()) {
4811  .Case([&](omp::TeamsOp teamsOp) {
4812  if (teamsOp.getNumTeamsLower() == blockArg)
4813  numTeamsLower = hostEvalVar;
4814  else if (teamsOp.getNumTeamsUpper() == blockArg)
4815  numTeamsUpper = hostEvalVar;
4816  else if (teamsOp.getThreadLimit() == blockArg)
4817  threadLimit = hostEvalVar;
4818  else
4819  llvm_unreachable("unsupported host_eval use");
4820  })
4821  .Case([&](omp::ParallelOp parallelOp) {
4822  if (parallelOp.getNumThreads() == blockArg)
4823  numThreads = hostEvalVar;
4824  else
4825  llvm_unreachable("unsupported host_eval use");
4826  })
4827  .Case([&](omp::LoopNestOp loopOp) {
4828  auto processBounds =
4829  [&](OperandRange opBounds,
4830  llvm::SmallVectorImpl<Value> *outBounds) -> bool {
4831  bool found = false;
4832  for (auto [i, lb] : llvm::enumerate(opBounds)) {
4833  if (lb == blockArg) {
4834  found = true;
4835  if (outBounds)
4836  (*outBounds)[i] = hostEvalVar;
4837  }
4838  }
4839  return found;
4840  };
4841  bool found =
4842  processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
4843  found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
4844  found;
4845  found = processBounds(loopOp.getLoopSteps(), steps) || found;
4846  (void)found;
4847  assert(found && "unsupported host_eval use");
4848  })
4849  .Default([](Operation *) {
4850  llvm_unreachable("unsupported host_eval use");
4851  });
4852  }
4853  }
4854 }
4855 
4856 /// If \p op is of the given type parameter, return it casted to that type.
4857 /// Otherwise, if its immediate parent operation (or some other higher-level
4858 /// parent, if \p immediateParent is false) is of that type, return that parent
4859 /// casted to the given type.
4860 ///
4861 /// If \p op is \c null or neither it or its parent(s) are of the specified
4862 /// type, return a \c null operation.
4863 template <typename OpTy>
4864 static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
4865  if (!op)
4866  return OpTy();
4867 
4868  if (OpTy casted = dyn_cast<OpTy>(op))
4869  return casted;
4870 
4871  if (immediateParent)
4872  return dyn_cast_if_present<OpTy>(op->getParentOp());
4873 
4874  return op->getParentOfType<OpTy>();
4875 }
4876 
4877 /// If the given \p value is defined by an \c llvm.mlir.constant operation and
4878 /// it is of an integer type, return its value.
4879 static std::optional<int64_t> extractConstInteger(Value value) {
4880  if (!value)
4881  return std::nullopt;
4882 
4883  if (auto constOp =
4884  dyn_cast_if_present<LLVM::ConstantOp>(value.getDefiningOp()))
4885  if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4886  return constAttr.getInt();
4887 
4888  return std::nullopt;
4889 }
4890 
4891 static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
4892  uint64_t sizeInBits = dl.getTypeSizeInBits(type);
4893  uint64_t sizeInBytes = sizeInBits / 8;
4894  return sizeInBytes;
4895 }
4896 
4897 template <typename OpTy>
4898 static uint64_t getReductionDataSize(OpTy &op) {
4899  if (op.getNumReductionVars() > 0) {
4901  collectReductionDecls(op, reductions);
4902 
4904  members.reserve(reductions.size());
4905  for (omp::DeclareReductionOp &red : reductions)
4906  members.push_back(red.getType());
4907  Operation *opp = op.getOperation();
4908  auto structType = mlir::LLVM::LLVMStructType::getLiteral(
4909  opp->getContext(), members, /*isPacked=*/false);
4910  DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
4911  return getTypeByteSize(structType, dl);
4912  }
4913  return 0;
4914 }
4915 
4916 /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
4917 /// values as stated by the corresponding clauses, if constant.
4918 ///
4919 /// These default values must be set before the creation of the outlined LLVM
4920 /// function for the target region, so that they can be used to initialize the
4921 /// corresponding global `ConfigurationEnvironmentTy` structure.
4922 static void
4923 initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
4924  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
4925  bool isTargetDevice, bool isGPU) {
4926  // TODO: Handle constant 'if' clauses.
4927 
4928  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
4929  if (!isTargetDevice) {
4930  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
4931  threadLimit);
4932  } else {
4933  // In the target device, values for these clauses are not passed as
4934  // host_eval, but instead evaluated prior to entry to the region. This
4935  // ensures values are mapped and available inside of the target region.
4936  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
4937  numTeamsLower = teamsOp.getNumTeamsLower();
4938  numTeamsUpper = teamsOp.getNumTeamsUpper();
4939  threadLimit = teamsOp.getThreadLimit();
4940  }
4941 
4942  if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
4943  numThreads = parallelOp.getNumThreads();
4944  }
4945 
4946  // Handle clauses impacting the number of teams.
4947 
4948  int32_t minTeamsVal = 1, maxTeamsVal = -1;
4949  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
4950  // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
4951  // clang and set min and max to the same value.
4952  if (numTeamsUpper) {
4953  if (auto val = extractConstInteger(numTeamsUpper))
4954  minTeamsVal = maxTeamsVal = *val;
4955  } else {
4956  minTeamsVal = maxTeamsVal = 0;
4957  }
4958  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
4959  /*immediateParent=*/true) ||
4960  castOrGetParentOfType<omp::SimdOp>(capturedOp,
4961  /*immediateParent=*/true)) {
4962  minTeamsVal = maxTeamsVal = 1;
4963  } else {
4964  minTeamsVal = maxTeamsVal = -1;
4965  }
4966 
4967  // Handle clauses impacting the number of threads.
4968 
4969  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
4970  if (!clauseValue)
4971  return;
4972 
4973  if (auto val = extractConstInteger(clauseValue))
4974  result = *val;
4975 
4976  // Found an applicable clause, so it's not undefined. Mark as unknown
4977  // because it's not constant.
4978  if (result < 0)
4979  result = 0;
4980  };
4981 
4982  // Extract 'thread_limit' clause from 'target' and 'teams' directives.
4983  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
4984  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
4985  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
4986 
4987  // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
4988  int32_t maxThreadsVal = -1;
4989  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
4990  setMaxValueFromClause(numThreads, maxThreadsVal);
4991  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
4992  /*immediateParent=*/true))
4993  maxThreadsVal = 1;
4994 
4995  // For max values, < 0 means unset, == 0 means set but unknown. Select the
4996  // minimum value between 'max_threads' and 'thread_limit' clauses that were
4997  // set.
4998  int32_t combinedMaxThreadsVal = targetThreadLimitVal;
4999  if (combinedMaxThreadsVal < 0 ||
5000  (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
5001  combinedMaxThreadsVal = teamsThreadLimitVal;
5002 
5003  if (combinedMaxThreadsVal < 0 ||
5004  (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
5005  combinedMaxThreadsVal = maxThreadsVal;
5006 
5007  int32_t reductionDataSize = 0;
5008  if (isGPU && capturedOp) {
5009  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
5010  reductionDataSize = getReductionDataSize(teamsOp);
5011  }
5012 
5013  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
5014  omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
5015  assert(
5016  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
5017  omp::TargetRegionFlags::spmd) &&
5018  "invalid kernel flags");
5019  attrs.ExecFlags =
5020  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
5021  ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
5022  ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
5023  : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
5024  : llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
5025  attrs.MinTeams = minTeamsVal;
5026  attrs.MaxTeams.front() = maxTeamsVal;
5027  attrs.MinThreads = 1;
5028  attrs.MaxThreads.front() = combinedMaxThreadsVal;
5029  attrs.ReductionDataSize = reductionDataSize;
5030  // TODO: Allow modified buffer length similar to
5031  // fopenmp-cuda-teams-reduction-recs-num flag in clang.
5032  if (attrs.ReductionDataSize != 0)
5033  attrs.ReductionBufferLength = 1024;
5034 }
5035 
5036 /// Gather LLVM runtime values for all clauses evaluated in the host that are
5037 /// passed to the kernel invocation.
5038 ///
5039 /// This function must be called only when compiling for the host. Also, it will
5040 /// only provide correct results if it's called after the body of \c targetOp
5041 /// has been fully generated.
5042 static void
5043 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
5044  LLVM::ModuleTranslation &moduleTranslation,
5045  omp::TargetOp targetOp, Operation *capturedOp,
5046  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
5047  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
5048  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
5049 
5050  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
5051  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
5052  steps(numLoops);
5053  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5054  teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
5055 
5056  // TODO: Handle constant 'if' clauses.
5057  if (Value targetThreadLimit = targetOp.getThreadLimit())
5058  attrs.TargetThreadLimit.front() =
5059  moduleTranslation.lookupValue(targetThreadLimit);
5060 
5061  if (numTeamsLower)
5062  attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
5063 
5064  if (numTeamsUpper)
5065  attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
5066 
5067  if (teamsThreadLimit)
5068  attrs.TeamsThreadLimit.front() =
5069  moduleTranslation.lookupValue(teamsThreadLimit);
5070 
5071  if (numThreads)
5072  attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
5073 
5074  if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
5075  omp::TargetRegionFlags::trip_count)) {
5076  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5077  attrs.LoopTripCount = nullptr;
5078 
5079  // To calculate the trip count, we multiply together the trip counts of
5080  // every collapsed canonical loop. We don't need to create the loop nests
5081  // here, since we're only interested in the trip count.
5082  for (auto [loopLower, loopUpper, loopStep] :
5083  llvm::zip_equal(lowerBounds, upperBounds, steps)) {
5084  llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
5085  llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
5086  llvm::Value *step = moduleTranslation.lookupValue(loopStep);
5087 
5088  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
5089  llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
5090  loc, lowerBound, upperBound, step, /*IsSigned=*/true,
5091  loopOp.getLoopInclusive());
5092 
5093  if (!attrs.LoopTripCount) {
5094  attrs.LoopTripCount = tripCount;
5095  continue;
5096  }
5097 
5098  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
5099  attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
5100  {}, /*HasNUW=*/true);
5101  }
5102  }
5103 }
5104 
5105 static LogicalResult
5106 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
5107  LLVM::ModuleTranslation &moduleTranslation) {
5108  auto targetOp = cast<omp::TargetOp>(opInst);
5109  if (failed(checkImplementationStatus(opInst)))
5110  return failure();
5111 
5112  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5113  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
5114  bool isGPU = ompBuilder->Config.isGPU();
5115 
5116  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
5117  auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
5118  auto &targetRegion = targetOp.getRegion();
5119  // Holds the private vars that have been mapped along with the block argument
5120  // that corresponds to the MapInfoOp corresponding to the private var in
5121  // question. So, for instance:
5122  //
5123  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
5124  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
5125  //
5126  // Then, %10 has been created so that the descriptor can be used by the
5127  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
5128  // %arg0} in the mappedPrivateVars map.
5129  llvm::DenseMap<Value, Value> mappedPrivateVars;
5130  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
5131  SmallVector<Value> mapVars = targetOp.getMapVars();
5132  SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
5133  ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
5134  ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
5135  llvm::Function *llvmOutlinedFn = nullptr;
5136 
5137  // TODO: It can also be false if a compile-time constant `false` IF clause is
5138  // specified.
5139  bool isOffloadEntry =
5140  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
5141 
5142  // For some private variables, the MapsForPrivatizedVariablesPass
5143  // creates MapInfoOp instances. Go through the private variables and
5144  // the mapped variables so that during codegeneration we are able
5145  // to quickly look up the corresponding map variable, if any for each
5146  // private variable.
5147  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
5148  OperandRange privateVars = targetOp.getPrivateVars();
5149  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
5150  std::optional<DenseI64ArrayAttr> privateMapIndices =
5151  targetOp.getPrivateMapsAttr();
5152 
5153  for (auto [privVarIdx, privVarSymPair] :
5154  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
5155  auto privVar = std::get<0>(privVarSymPair);
5156  auto privSym = std::get<1>(privVarSymPair);
5157 
5158  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
5159  omp::PrivateClauseOp privatizer =
5160  findPrivatizer(targetOp, privatizerName);
5161 
5162  if (!privatizer.needsMap())
5163  continue;
5164 
5165  mlir::Value mappedValue =
5166  targetOp.getMappedValueForPrivateVar(privVarIdx);
5167  assert(mappedValue && "Expected to find mapped value for a privatized "
5168  "variable that needs mapping");
5169 
5170  // The MapInfoOp defining the map var isn't really needed later.
5171  // So, we don't store it in any datastructure. Instead, we just
5172  // do some sanity checks on it right now.
5173  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
5174  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
5175 
5176  // Check #1: Check that the type of the private variable matches
5177  // the type of the variable being mapped.
5178  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
5179  assert(
5180  varType == privVar.getType() &&
5181  "Type of private var doesn't match the type of the mapped value");
5182 
5183  // Ok, only 1 sanity check for now.
5184  // Record the block argument corresponding to this mapvar.
5185  mappedPrivateVars.insert(
5186  {privVar,
5187  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
5188  (*privateMapIndices)[privVarIdx])});
5189  }
5190  }
5191 
5192  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5193  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
5194  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5195  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5196  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5197  // Forward target-cpu and target-features function attributes from the
5198  // original function to the new outlined function.
5199  llvm::Function *llvmParentFn =
5200  moduleTranslation.lookupFunction(parentFn.getName());
5201  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
5202  assert(llvmParentFn && llvmOutlinedFn &&
5203  "Both parent and outlined functions must exist at this point");
5204 
5205  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
5206  attr.isStringAttribute())
5207  llvmOutlinedFn->addFnAttr(attr);
5208 
5209  if (auto attr = llvmParentFn->getFnAttribute("target-features");
5210  attr.isStringAttribute())
5211  llvmOutlinedFn->addFnAttr(attr);
5212 
5213  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
5214  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5215  llvm::Value *mapOpValue =
5216  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5217  moduleTranslation.mapValue(arg, mapOpValue);
5218  }
5219  for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
5220  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5221  llvm::Value *mapOpValue =
5222  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5223  moduleTranslation.mapValue(arg, mapOpValue);
5224  }
5225 
5226  // Do privatization after moduleTranslation has already recorded
5227  // mapped values.
5228  PrivateVarsInfo privateVarsInfo(targetOp);
5229 
5230  llvm::Expected<llvm::BasicBlock *> afterAllocas =
5231  allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
5232  allocaIP, &mappedPrivateVars);
5233 
5234  if (failed(handleError(afterAllocas, *targetOp)))
5235  return llvm::make_error<PreviouslyReportedError>();
5236 
5237  builder.restoreIP(codeGenIP);
5238  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
5239  &mappedPrivateVars),
5240  *targetOp)
5241  .failed())
5242  return llvm::make_error<PreviouslyReportedError>();
5243 
5244  if (failed(copyFirstPrivateVars(
5245  builder, moduleTranslation, privateVarsInfo.mlirVars,
5246  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
5247  &mappedPrivateVars)))
5248  return llvm::make_error<PreviouslyReportedError>();
5249 
5250  SmallVector<Region *> privateCleanupRegions;
5251  llvm::transform(privateVarsInfo.privatizers,
5252  std::back_inserter(privateCleanupRegions),
5253  [](omp::PrivateClauseOp privatizer) {
5254  return &privatizer.getDeallocRegion();
5255  });
5256 
5258  targetRegion, "omp.target", builder, moduleTranslation);
5259 
5260  if (!exitBlock)
5261  return exitBlock.takeError();
5262 
5263  builder.SetInsertPoint(*exitBlock);
5264  if (!privateCleanupRegions.empty()) {
5265  if (failed(inlineOmpRegionCleanup(
5266  privateCleanupRegions, privateVarsInfo.llvmVars,
5267  moduleTranslation, builder, "omp.targetop.private.cleanup",
5268  /*shouldLoadCleanupRegionArg=*/false))) {
5269  return llvm::createStringError(
5270  "failed to inline `dealloc` region of `omp.private` "
5271  "op in the target region");
5272  }
5273  return builder.saveIP();
5274  }
5275 
5276  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
5277  };
5278 
5279  StringRef parentName = parentFn.getName();
5280 
5281  llvm::TargetRegionEntryInfo entryInfo;
5282 
5283  getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
5284 
5285  MapInfoData mapData;
5286  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
5287  builder, /*useDevPtrOperands=*/{},
5288  /*useDevAddrOperands=*/{}, hdaVars);
5289 
5290  MapInfosTy combinedInfos;
5291  auto genMapInfoCB =
5292  [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
5293  builder.restoreIP(codeGenIP);
5294  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
5295  return combinedInfos;
5296  };
5297 
5298  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
5299  llvm::Value *&retVal, InsertPointTy allocaIP,
5300  InsertPointTy codeGenIP)
5301  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5302  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5303  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5304  // We just return the unaltered argument for the host function
5305  // for now, some alterations may be required in the future to
5306  // keep host fallback functions working identically to the device
5307  // version (e.g. pass ByCopy values should be treated as such on
5308  // host and device, currently not always the case)
5309  if (!isTargetDevice) {
5310  retVal = cast<llvm::Value>(&arg);
5311  return codeGenIP;
5312  }
5313 
5314  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
5315  *ompBuilder, moduleTranslation,
5316  allocaIP, codeGenIP);
5317  };
5318 
5319  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
5320  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
5321  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
5322  initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
5323  isTargetDevice, isGPU);
5324 
5325  // Collect host-evaluated values needed to properly launch the kernel from the
5326  // host.
5327  if (!isTargetDevice)
5328  initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
5329  targetCapturedOp, runtimeAttrs);
5330 
5331  // Pass host-evaluated values as parameters to the kernel / host fallback,
5332  // except if they are constants. In any case, map the MLIR block argument to
5333  // the corresponding LLVM values.
5335  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
5336  ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
5337  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
5338  llvm::Value *value = moduleTranslation.lookupValue(var);
5339  moduleTranslation.mapValue(arg, value);
5340 
5341  if (!llvm::isa<llvm::Constant>(value))
5342  kernelInput.push_back(value);
5343  }
5344 
5345  for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
5346  // declare target arguments are not passed to kernels as arguments
5347  // TODO: We currently do not handle cases where a member is explicitly
5348  // passed in as an argument, this will likley need to be handled in
5349  // the near future, rather than using IsAMember, it may be better to
5350  // test if the relevant BlockArg is used within the target region and
5351  // then use that as a basis for exclusion in the kernel inputs.
5352  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
5353  kernelInput.push_back(mapData.OriginalValue[i]);
5354  }
5355 
5357  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
5358  moduleTranslation, dds);
5359 
5360  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5361  findAllocaInsertPoint(builder, moduleTranslation);
5362  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5363 
5364  llvm::OpenMPIRBuilder::TargetDataInfo info(
5365  /*RequiresDevicePointerInfo=*/false,
5366  /*SeparateBeginEndCalls=*/true);
5367 
5368  auto customMapperCB =
5369  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
5370  if (!combinedInfos.Mappers[i])
5371  return nullptr;
5372  info.HasMapper = true;
5373  return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
5374  moduleTranslation);
5375  };
5376 
5377  llvm::Value *ifCond = nullptr;
5378  if (Value targetIfCond = targetOp.getIfExpr())
5379  ifCond = moduleTranslation.lookupValue(targetIfCond);
5380 
5381  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5382  moduleTranslation.getOpenMPBuilder()->createTarget(
5383  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
5384  defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
5385  argAccessorCB, customMapperCB, dds, targetOp.getNowait());
5386 
5387  if (failed(handleError(afterIP, opInst)))
5388  return failure();
5389 
5390  builder.restoreIP(*afterIP);
5391 
5392  // Remap access operations to declare target reference pointers for the
5393  // device, essentially generating extra loadop's as necessary
5394  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5395  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5396  llvmOutlinedFn);
5397 
5398  return success();
5399 }
5400 
5401 static LogicalResult
5402 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5403  LLVM::ModuleTranslation &moduleTranslation) {
5404  // Amend omp.declare_target by deleting the IR of the outlined functions
5405  // created for target regions. They cannot be filtered out from MLIR earlier
5406  // because the omp.target operation inside must be translated to LLVM, but
5407  // the wrapper functions themselves must not remain at the end of the
5408  // process. We know that functions where omp.declare_target does not match
5409  // omp.is_target_device at this stage can only be wrapper functions because
5410  // those that aren't are removed earlier as an MLIR transformation pass.
5411  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5412  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5413  op->getParentOfType<ModuleOp>().getOperation())) {
5414  if (!offloadMod.getIsTargetDevice())
5415  return success();
5416 
5417  omp::DeclareTargetDeviceType declareType =
5418  attribute.getDeviceType().getValue();
5419 
5420  if (declareType == omp::DeclareTargetDeviceType::host) {
5421  llvm::Function *llvmFunc =
5422  moduleTranslation.lookupFunction(funcOp.getName());
5423  llvmFunc->dropAllReferences();
5424  llvmFunc->eraseFromParent();
5425  }
5426  }
5427  return success();
5428  }
5429 
5430  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5431  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5432  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5433  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5434  bool isDeclaration = gOp.isDeclaration();
5435  bool isExternallyVisible =
5436  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5437  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5438  llvm::StringRef mangledName = gOp.getSymName();
5439  auto captureClause =
5440  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5441  auto deviceClause =
5442  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5443  // unused for MLIR at the moment, required in Clang for book
5444  // keeping
5445  std::vector<llvm::GlobalVariable *> generatedRefs;
5446 
5447  std::vector<llvm::Triple> targetTriple;
5448  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5449  op->getParentOfType<mlir::ModuleOp>()->getAttr(
5450  LLVM::LLVMDialect::getTargetTripleAttrName()));
5451  if (targetTripleAttr)
5452  targetTriple.emplace_back(targetTripleAttr.data());
5453 
5454  auto fileInfoCallBack = [&loc]() {
5455  std::string filename = "";
5456  std::uint64_t lineNo = 0;
5457 
5458  if (loc) {
5459  filename = loc.getFilename().str();
5460  lineNo = loc.getLine();
5461  }
5462 
5463  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5464  lineNo);
5465  };
5466 
5467  ompBuilder->registerTargetGlobalVariable(
5468  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5469  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5470  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5471  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5472  gVal->getType(), gVal);
5473 
5474  if (ompBuilder->Config.isTargetDevice() &&
5475  (attribute.getCaptureClause().getValue() !=
5476  mlir::omp::DeclareTargetCaptureClause::to ||
5477  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5478  ompBuilder->getAddrOfDeclareTargetVar(
5479  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5480  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5481  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
5482  /*GlobalInitializer*/ nullptr,
5483  /*VariableLinkage*/ nullptr);
5484  }
5485  }
5486  }
5487 
5488  return success();
5489 }
5490 
5491 // Returns true if the operation is inside a TargetOp or
5492 // is part of a declare target function.
5493 static bool isTargetDeviceOp(Operation *op) {
5494  // Assumes no reverse offloading
5495  if (op->getParentOfType<omp::TargetOp>())
5496  return true;
5497 
5498  // Certain operations return results, and whether utilised in host or
5499  // target there is a chance an LLVM Dialect operation depends on it
5500  // by taking it in as an operand, so we must always lower these in
5501  // some manner or result in an ICE (whether they end up in a no-op
5502  // or otherwise).
5503  if (mlir::isa<omp::ThreadprivateOp>(op))
5504  return true;
5505 
5506  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
5507  if (auto declareTargetIface =
5508  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
5509  parentFn.getOperation()))
5510  if (declareTargetIface.isDeclareTarget() &&
5511  declareTargetIface.getDeclareTargetDeviceType() !=
5512  mlir::omp::DeclareTargetDeviceType::host)
5513  return true;
5514 
5515  return false;
5516 }
5517 
5518 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
5519 /// OpenMP runtime calls).
5520 static LogicalResult
5521 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
5522  LLVM::ModuleTranslation &moduleTranslation) {
5523  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5524 
5525  // For each loop, introduce one stack frame to hold loop information. Ensure
5526  // this is only done for the outermost loop wrapper to prevent introducing
5527  // multiple stack frames for a single loop. Initially set to null, the loop
5528  // information structure is initialized during translation of the nested
5529  // omp.loop_nest operation, making it available to translation of all loop
5530  // wrappers after their body has been successfully translated.
5531  bool isOutermostLoopWrapper =
5532  isa_and_present<omp::LoopWrapperInterface>(op) &&
5533  !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
5534 
5535  if (isOutermostLoopWrapper)
5536  moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
5537 
5538  auto result =
5540  .Case([&](omp::BarrierOp op) -> LogicalResult {
5541  if (failed(checkImplementationStatus(*op)))
5542  return failure();
5543 
5544  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5545  ompBuilder->createBarrier(builder.saveIP(),
5546  llvm::omp::OMPD_barrier);
5547  return handleError(afterIP, *op);
5548  })
5549  .Case([&](omp::TaskyieldOp op) {
5550  if (failed(checkImplementationStatus(*op)))
5551  return failure();
5552 
5553  ompBuilder->createTaskyield(builder.saveIP());
5554  return success();
5555  })
5556  .Case([&](omp::FlushOp op) {
5557  if (failed(checkImplementationStatus(*op)))
5558  return failure();
5559 
5560  // No support in Openmp runtime function (__kmpc_flush) to accept
5561  // the argument list.
5562  // OpenMP standard states the following:
5563  // "An implementation may implement a flush with a list by ignoring
5564  // the list, and treating it the same as a flush without a list."
5565  //
5566  // The argument list is discarded so that, flush with a list is
5567  // treated same as a flush without a list.
5568  ompBuilder->createFlush(builder.saveIP());
5569  return success();
5570  })
5571  .Case([&](omp::ParallelOp op) {
5572  return convertOmpParallel(op, builder, moduleTranslation);
5573  })
5574  .Case([&](omp::MaskedOp) {
5575  return convertOmpMasked(*op, builder, moduleTranslation);
5576  })
5577  .Case([&](omp::MasterOp) {
5578  return convertOmpMaster(*op, builder, moduleTranslation);
5579  })
5580  .Case([&](omp::CriticalOp) {
5581  return convertOmpCritical(*op, builder, moduleTranslation);
5582  })
5583  .Case([&](omp::OrderedRegionOp) {
5584  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
5585  })
5586  .Case([&](omp::OrderedOp) {
5587  return convertOmpOrdered(*op, builder, moduleTranslation);
5588  })
5589  .Case([&](omp::WsloopOp) {
5590  return convertOmpWsloop(*op, builder, moduleTranslation);
5591  })
5592  .Case([&](omp::SimdOp) {
5593  return convertOmpSimd(*op, builder, moduleTranslation);
5594  })
5595  .Case([&](omp::AtomicReadOp) {
5596  return convertOmpAtomicRead(*op, builder, moduleTranslation);
5597  })
5598  .Case([&](omp::AtomicWriteOp) {
5599  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
5600  })
5601  .Case([&](omp::AtomicUpdateOp op) {
5602  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
5603  })
5604  .Case([&](omp::AtomicCaptureOp op) {
5605  return convertOmpAtomicCapture(op, builder, moduleTranslation);
5606  })
5607  .Case([&](omp::CancelOp op) {
5608  return convertOmpCancel(op, builder, moduleTranslation);
5609  })
5610  .Case([&](omp::CancellationPointOp op) {
5611  return convertOmpCancellationPoint(op, builder, moduleTranslation);
5612  })
5613  .Case([&](omp::SectionsOp) {
5614  return convertOmpSections(*op, builder, moduleTranslation);
5615  })
5616  .Case([&](omp::SingleOp op) {
5617  return convertOmpSingle(op, builder, moduleTranslation);
5618  })
5619  .Case([&](omp::TeamsOp op) {
5620  return convertOmpTeams(op, builder, moduleTranslation);
5621  })
5622  .Case([&](omp::TaskOp op) {
5623  return convertOmpTaskOp(op, builder, moduleTranslation);
5624  })
5625  .Case([&](omp::TaskgroupOp op) {
5626  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
5627  })
5628  .Case([&](omp::TaskwaitOp op) {
5629  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
5630  })
5631  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
5632  omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
5633  omp::CriticalDeclareOp>([](auto op) {
5634  // `yield` and `terminator` can be just omitted. The block structure
5635  // was created in the region that handles their parent operation.
5636  // `declare_reduction` will be used by reductions and is not
5637  // converted directly, skip it.
5638  // `declare_mapper` and `declare_mapper.info` are handled whenever
5639  // they are referred to through a `map` clause.
5640  // `critical.declare` is only used to declare names of critical
5641  // sections which will be used by `critical` ops and hence can be
5642  // ignored for lowering. The OpenMP IRBuilder will create unique
5643  // name for critical section names.
5644  return success();
5645  })
5646  .Case([&](omp::ThreadprivateOp) {
5647  return convertOmpThreadprivate(*op, builder, moduleTranslation);
5648  })
5649  .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
5650  omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
5651  return convertOmpTargetData(op, builder, moduleTranslation);
5652  })
5653  .Case([&](omp::TargetOp) {
5654  return convertOmpTarget(*op, builder, moduleTranslation);
5655  })
5656  .Case([&](omp::DistributeOp) {
5657  return convertOmpDistribute(*op, builder, moduleTranslation);
5658  })
5659  .Case([&](omp::LoopNestOp) {
5660  return convertOmpLoopNest(*op, builder, moduleTranslation);
5661  })
5662  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
5663  [&](auto op) {
5664  // No-op, should be handled by relevant owning operations e.g.
5665  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
5666  // etc. and then discarded
5667  return success();
5668  })
5669  .Default([&](Operation *inst) {
5670  return inst->emitError()
5671  << "not yet implemented: " << inst->getName();
5672  });
5673 
5674  if (isOutermostLoopWrapper)
5675  moduleTranslation.stackPop();
5676 
5677  return result;
5678 }
5679 
5680 static LogicalResult
5681 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
5682  LLVM::ModuleTranslation &moduleTranslation) {
5683  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5684 }
5685 
5686 static LogicalResult
5687 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
5688  LLVM::ModuleTranslation &moduleTranslation) {
5689  if (isa<omp::TargetOp>(op))
5690  return convertOmpTarget(*op, builder, moduleTranslation);
5691  if (isa<omp::TargetDataOp>(op))
5692  return convertOmpTargetData(op, builder, moduleTranslation);
5693  bool interrupted =
5694  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
5695  if (isa<omp::TargetOp>(oper)) {
5696  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
5697  return WalkResult::interrupt();
5698  return WalkResult::skip();
5699  }
5700  if (isa<omp::TargetDataOp>(oper)) {
5701  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
5702  return WalkResult::interrupt();
5703  return WalkResult::skip();
5704  }
5705 
5706  // Non-target ops might nest target-related ops, therefore, we
5707  // translate them as non-OpenMP scopes. Translating them is needed by
5708  // nested target-related ops since they might need LLVM values defined
5709  // in their parent non-target ops.
5710  if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
5711  oper->getParentOfType<LLVM::LLVMFuncOp>() &&
5712  !oper->getRegions().empty()) {
5713  if (auto blockArgsIface =
5714  dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
5715  forwardArgs(moduleTranslation, blockArgsIface);
5716  else {
5717  // Here we map entry block arguments of
5718  // non-BlockArgOpenMPOpInterface ops if they can be encountered
5719  // inside of a function and they define any of these arguments.
5720  if (isa<mlir::omp::AtomicUpdateOp>(oper))
5721  for (auto [operand, arg] :
5722  llvm::zip_equal(oper->getOperands(),
5723  oper->getRegion(0).getArguments())) {
5724  moduleTranslation.mapValue(
5725  arg, builder.CreateLoad(
5726  moduleTranslation.convertType(arg.getType()),
5727  moduleTranslation.lookupValue(operand)));
5728  }
5729  }
5730 
5731  if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
5732  assert(builder.GetInsertBlock() &&
5733  "No insert block is set for the builder");
5734  for (auto iv : loopNest.getIVs()) {
5735  // Map iv to an undefined value just to keep the IR validity.
5736  moduleTranslation.mapValue(
5738  moduleTranslation.convertType(iv.getType())));
5739  }
5740  }
5741 
5742  for (Region &region : oper->getRegions()) {
5743  // Regions are fake in the sense that they are not a truthful
5744  // translation of the OpenMP construct being converted (e.g. no
5745  // OpenMP runtime calls will be generated). We just need this to
5746  // prepare the kernel invocation args.
5748  auto result = convertOmpOpRegions(
5749  region, oper->getName().getStringRef().str() + ".fake.region",
5750  builder, moduleTranslation, &phis);
5751  if (failed(handleError(result, *oper)))
5752  return WalkResult::interrupt();
5753 
5754  builder.SetInsertPoint(result.get(), result.get()->end());
5755  }
5756 
5757  return WalkResult::skip();
5758  }
5759 
5760  return WalkResult::advance();
5761  }).wasInterrupted();
5762  return failure(interrupted);
5763 }
5764 
5765 namespace {
5766 
5767 /// Implementation of the dialect interface that converts operations belonging
5768 /// to the OpenMP dialect to LLVM IR.
5769 class OpenMPDialectLLVMIRTranslationInterface
5771 public:
5773 
5774  /// Translates the given operation to LLVM IR using the provided IR builder
5775  /// and saving the state in `moduleTranslation`.
5776  LogicalResult
5777  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
5778  LLVM::ModuleTranslation &moduleTranslation) const final;
5779 
5780  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
5781  /// runtime calls, or operation amendments
5782  LogicalResult
5784  NamedAttribute attribute,
5785  LLVM::ModuleTranslation &moduleTranslation) const final;
5786 };
5787 
5788 } // namespace
5789 
5790 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
5791  Operation *op, ArrayRef<llvm::Instruction *> instructions,
5792  NamedAttribute attribute,
5793  LLVM::ModuleTranslation &moduleTranslation) const {
5794  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
5795  attribute.getName())
5796  .Case("omp.is_target_device",
5797  [&](Attribute attr) {
5798  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
5799  llvm::OpenMPIRBuilderConfig &config =
5800  moduleTranslation.getOpenMPBuilder()->Config;
5801  config.setIsTargetDevice(deviceAttr.getValue());
5802  return success();
5803  }
5804  return failure();
5805  })
5806  .Case("omp.is_gpu",
5807  [&](Attribute attr) {
5808  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
5809  llvm::OpenMPIRBuilderConfig &config =
5810  moduleTranslation.getOpenMPBuilder()->Config;
5811  config.setIsGPU(gpuAttr.getValue());
5812  return success();
5813  }
5814  return failure();
5815  })
5816  .Case("omp.host_ir_filepath",
5817  [&](Attribute attr) {
5818  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
5819  llvm::OpenMPIRBuilder *ompBuilder =
5820  moduleTranslation.getOpenMPBuilder();
5821  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
5822  return success();
5823  }
5824  return failure();
5825  })
5826  .Case("omp.flags",
5827  [&](Attribute attr) {
5828  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
5829  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
5830  return failure();
5831  })
5832  .Case("omp.version",
5833  [&](Attribute attr) {
5834  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
5835  llvm::OpenMPIRBuilder *ompBuilder =
5836  moduleTranslation.getOpenMPBuilder();
5837  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
5838  versionAttr.getVersion());
5839  return success();
5840  }
5841  return failure();
5842  })
5843  .Case("omp.declare_target",
5844  [&](Attribute attr) {
5845  if (auto declareTargetAttr =
5846  dyn_cast<omp::DeclareTargetAttr>(attr))
5847  return convertDeclareTargetAttr(op, declareTargetAttr,
5848  moduleTranslation);
5849  return failure();
5850  })
5851  .Case("omp.requires",
5852  [&](Attribute attr) {
5853  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
5854  using Requires = omp::ClauseRequires;
5855  Requires flags = requiresAttr.getValue();
5856  llvm::OpenMPIRBuilderConfig &config =
5857  moduleTranslation.getOpenMPBuilder()->Config;
5858  config.setHasRequiresReverseOffload(
5859  bitEnumContainsAll(flags, Requires::reverse_offload));
5860  config.setHasRequiresUnifiedAddress(
5861  bitEnumContainsAll(flags, Requires::unified_address));
5862  config.setHasRequiresUnifiedSharedMemory(
5863  bitEnumContainsAll(flags, Requires::unified_shared_memory));
5864  config.setHasRequiresDynamicAllocators(
5865  bitEnumContainsAll(flags, Requires::dynamic_allocators));
5866  return success();
5867  }
5868  return failure();
5869  })
5870  .Case("omp.target_triples",
5871  [&](Attribute attr) {
5872  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
5873  llvm::OpenMPIRBuilderConfig &config =
5874  moduleTranslation.getOpenMPBuilder()->Config;
5875  config.TargetTriples.clear();
5876  config.TargetTriples.reserve(triplesAttr.size());
5877  for (Attribute tripleAttr : triplesAttr) {
5878  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
5879  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
5880  else
5881  return failure();
5882  }
5883  return success();
5884  }
5885  return failure();
5886  })
5887  .Default([](Attribute) {
5888  // Fall through for omp attributes that do not require lowering.
5889  return success();
5890  })(attribute.getValue());
5891 
5892  return failure();
5893 }
5894 
5895 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
5896 /// (including OpenMP runtime calls).
5897 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
5898  Operation *op, llvm::IRBuilderBase &builder,
5899  LLVM::ModuleTranslation &moduleTranslation) const {
5900 
5901  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5902  if (ompBuilder->Config.isTargetDevice()) {
5903  if (isTargetDeviceOp(op)) {
5904  return convertTargetDeviceOp(op, builder, moduleTranslation);
5905  } else {
5906  return convertTargetOpsInNest(op, builder, moduleTranslation);
5907  }
5908  }
5909  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5910 }
5911 
5913  registry.insert<omp::OpenMPDialect>();
5914  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
5915  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
5916  });
5917 }
5918 
5920  DialectRegistry registry;
5922  context.appendDialectRegistry(registry);
5923 }
union mlir::linalg::@1194::ArityGroupAndKind::Kind kind
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp)
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable.
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type.
static LogicalResult copyFirstPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static void popCancelFinalizationCB(const ArrayRef< llvm::BranchInst * > cancelTerminators, llvm::OpenMPIRBuilder &ompBuilder, const llvm::OpenMPIRBuilder::InsertPointTy &afterIP)
If we cancelled the construct, we should branch to the finalization block of that construct.
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void pushCancelFinalizationCB(SmallVectorImpl< llvm::BranchInst * > &cancelTerminators, llvm::IRBuilderBase &llvmBuilder, llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op, llvm::omp::Directive cancelDirective)
Shared implementation of a callback which adds a termiator for the new block created for the branch t...
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
Inline reductions' init regions.
static LogicalResult convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl)
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool constructIsCancellable(Operation *op)
Returns true if the construct contains omp.cancel or omp.cancellation_point.
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef, bool isNowait=false, bool isTeamsReduction=false)
static LogicalResult convertOmpCancellationPoint(omp::CancellationPointOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static uint64_t getReductionDataSize(OpTy &op)
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static llvm::omp::Directive convertCancellationConstructType(omp::ClauseCancellationConstructType directive)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice, bool isGPU)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static LogicalResult convertIgnoredWrapper(omp::LoopWrapperInterface opInst, LLVM::ModuleTranslation &moduleTranslation)
Helper function to map block arguments defined by ignored loop wrappers to LLVM values and prevent an...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:331
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:295
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:164
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
WalkResult stackWalk(llvm::function_ref< WalkResult(T &)> callback)
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void stackPush(Args &&...args)
Creates a stack frame of type T on ModuleTranslation stack.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
void mapFunction(StringRef name, llvm::Function *func)
Stores the mapping between a function name and its LLVM IR representation.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
void stackPop()
Pops the last element from the ModuleTranslation stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
Utility class to translate MLIR LLVM dialect types to LLVM IR.
Definition: TypeToLLVM.h:39
unsigned getPreferredAlignment(Type type, const llvm::DataLayout &layout)
Returns the preferred alignment for the type given the data layout.
Definition: TypeToLLVM.cpp:183
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:44
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
Dialect * getDialect()
Return the dialect this operation is associated with, or nullptr if the associated dialect is not loa...
Definition: Operation.h:220
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
llvm::hash_code hash_value(const StructType::MemberDecorationInfo &memberDecorationInfo)
llvm::PointerUnion< NamedAttribute *, NamedProperty *, NamedTypeConstraint * > Argument
Definition: Argument.h:64
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars
RAII object calling stackPush/stackPop on construction/destruction.