MLIR  21.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
19 #include "mlir/IR/IRMapping.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Support/LLVM.h"
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/TypeSwitch.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/MDBuilder.h"
37 #include "llvm/IR/ReplaceConstant.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/TargetParser/Triple.h"
40 #include "llvm/Transforms/Utils/ModuleUtils.h"
41 
42 #include <any>
43 #include <cstdint>
44 #include <iterator>
45 #include <numeric>
46 #include <optional>
47 #include <utility>
48 
49 using namespace mlir;
50 
51 namespace {
52 static llvm::omp::ScheduleKind
53 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
54  if (!schedKind.has_value())
55  return llvm::omp::OMP_SCHEDULE_Default;
56  switch (schedKind.value()) {
57  case omp::ClauseScheduleKind::Static:
58  return llvm::omp::OMP_SCHEDULE_Static;
59  case omp::ClauseScheduleKind::Dynamic:
60  return llvm::omp::OMP_SCHEDULE_Dynamic;
61  case omp::ClauseScheduleKind::Guided:
62  return llvm::omp::OMP_SCHEDULE_Guided;
63  case omp::ClauseScheduleKind::Auto:
64  return llvm::omp::OMP_SCHEDULE_Auto;
66  return llvm::omp::OMP_SCHEDULE_Runtime;
67  }
68  llvm_unreachable("unhandled schedule clause argument");
69 }
70 
71 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
72 /// insertion points for allocas.
73 class OpenMPAllocaStackFrame
74  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
75 public:
76  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
77 
78  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
79  : allocaInsertPoint(allocaIP) {}
80  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
81 };
82 
83 /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
84 /// collapsed canonical loop information corresponding to an \c omp.loop_nest
85 /// operation.
86 class OpenMPLoopInfoStackFrame
87  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> {
88 public:
89  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
90  llvm::CanonicalLoopInfo *loopInfo = nullptr;
91 };
92 
93 /// Custom error class to signal translation errors that don't need reporting,
94 /// since encountering them will have already triggered relevant error messages.
95 ///
96 /// Its purpose is to serve as the glue between MLIR failures represented as
97 /// \see LogicalResult instances and \see llvm::Error instances used to
98 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
99 /// error of the first type is raised, a message is emitted directly (the \see
100 /// LogicalResult itself does not hold any information). If we need to forward
101 /// this error condition as an \see llvm::Error while avoiding triggering some
102 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
103 /// class to just signal this situation has happened.
104 ///
105 /// For example, this class should be used to trigger errors from within
106 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
107 /// translation of their own regions. This unclutters the error log from
108 /// redundant messages.
109 class PreviouslyReportedError
110  : public llvm::ErrorInfo<PreviouslyReportedError> {
111 public:
112  void log(raw_ostream &) const override {
113  // Do not log anything.
114  }
115 
116  std::error_code convertToErrorCode() const override {
117  llvm_unreachable(
118  "PreviouslyReportedError doesn't support ECError conversion");
119  }
120 
121  // Used by ErrorInfo::classID.
122  static char ID;
123 };
124 
126 
127 } // namespace
128 
129 /// Looks up from the operation from and returns the PrivateClauseOp with
130 /// name symbolName
131 static omp::PrivateClauseOp findPrivatizer(Operation *from,
132  SymbolRefAttr symbolName) {
133  omp::PrivateClauseOp privatizer =
134  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
135  symbolName);
136  assert(privatizer && "privatizer not found in the symbol table");
137  return privatizer;
138 }
139 
140 /// Check whether translation to LLVM IR for the given operation is currently
141 /// supported. If not, descriptive diagnostics will be emitted to let users know
142 /// this is a not-yet-implemented feature.
143 ///
144 /// \returns success if no unimplemented features are needed to translate the
145 /// given operation.
146 static LogicalResult checkImplementationStatus(Operation &op) {
147  auto todo = [&op](StringRef clauseName) {
148  return op.emitError() << "not yet implemented: Unhandled clause "
149  << clauseName << " in " << op.getName()
150  << " operation";
151  };
152 
153  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
154  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
155  result = todo("allocate");
156  };
157  auto checkBare = [&todo](auto op, LogicalResult &result) {
158  if (op.getBare())
159  result = todo("ompx_bare");
160  };
161  auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
162  omp::ClauseCancellationConstructType cancelledDirective =
163  op.getCancelDirective();
164  // Cancelling a taskloop is not yet supported because we don't yet have LLVM
165  // IR conversion for taskloop
166  if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) {
167  Operation *parent = op->getParentOp();
168  while (parent) {
169  if (parent->getDialect() == op->getDialect())
170  break;
171  parent = parent->getParentOp();
172  }
173  if (isa_and_nonnull<omp::TaskloopOp>(parent))
174  result = todo("cancel directive inside of taskloop");
175  }
176  };
177  auto checkDepend = [&todo](auto op, LogicalResult &result) {
178  if (!op.getDependVars().empty() || op.getDependKinds())
179  result = todo("depend");
180  };
181  auto checkDevice = [&todo](auto op, LogicalResult &result) {
182  if (op.getDevice())
183  result = todo("device");
184  };
185  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
186  if (op.getDistScheduleChunkSize())
187  result = todo("dist_schedule with chunk_size");
188  };
189  auto checkHint = [](auto op, LogicalResult &) {
190  if (op.getHint())
191  op.emitWarning("hint clause discarded");
192  };
193  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
194  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
195  op.getInReductionSyms())
196  result = todo("in_reduction");
197  };
198  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
199  if (!op.getIsDevicePtrVars().empty())
200  result = todo("is_device_ptr");
201  };
202  auto checkLinear = [&todo](auto op, LogicalResult &result) {
203  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
204  result = todo("linear");
205  };
206  auto checkNowait = [&todo](auto op, LogicalResult &result) {
207  if (op.getNowait())
208  result = todo("nowait");
209  };
210  auto checkOrder = [&todo](auto op, LogicalResult &result) {
211  if (op.getOrder() || op.getOrderMod())
212  result = todo("order");
213  };
214  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
215  if (op.getParLevelSimd())
216  result = todo("parallelization-level");
217  };
218  auto checkPriority = [&todo](auto op, LogicalResult &result) {
219  if (op.getPriority())
220  result = todo("priority");
221  };
222  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
223  if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
224  // Privatization is supported only for included target tasks.
225  if (!op.getPrivateVars().empty() && op.getNowait())
226  result = todo("privatization for deferred target tasks");
227  } else {
228  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
229  result = todo("privatization");
230  }
231  };
232  auto checkReduction = [&todo](auto op, LogicalResult &result) {
233  if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op))
234  if (!op.getReductionVars().empty() || op.getReductionByref() ||
235  op.getReductionSyms())
236  result = todo("reduction");
237  if (op.getReductionMod() &&
238  op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
239  result = todo("reduction with modifier");
240  };
241  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
242  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
243  op.getTaskReductionSyms())
244  result = todo("task_reduction");
245  };
246  auto checkUntied = [&todo](auto op, LogicalResult &result) {
247  if (op.getUntied())
248  result = todo("untied");
249  };
250 
251  LogicalResult result = success();
253  .Case([&](omp::CancelOp op) { checkCancelDirective(op, result); })
254  .Case([&](omp::CancellationPointOp op) {
255  checkCancelDirective(op, result);
256  })
257  .Case([&](omp::DistributeOp op) {
258  checkAllocate(op, result);
259  checkDistSchedule(op, result);
260  checkOrder(op, result);
261  })
262  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
263  .Case([&](omp::SectionsOp op) {
264  checkAllocate(op, result);
265  checkPrivate(op, result);
266  checkReduction(op, result);
267  })
268  .Case([&](omp::SingleOp op) {
269  checkAllocate(op, result);
270  checkPrivate(op, result);
271  })
272  .Case([&](omp::TeamsOp op) {
273  checkAllocate(op, result);
274  checkPrivate(op, result);
275  })
276  .Case([&](omp::TaskOp op) {
277  checkAllocate(op, result);
278  checkInReduction(op, result);
279  })
280  .Case([&](omp::TaskgroupOp op) {
281  checkAllocate(op, result);
282  checkTaskReduction(op, result);
283  })
284  .Case([&](omp::TaskwaitOp op) {
285  checkDepend(op, result);
286  checkNowait(op, result);
287  })
288  .Case([&](omp::TaskloopOp op) {
289  // TODO: Add other clauses check
290  checkUntied(op, result);
291  checkPriority(op, result);
292  })
293  .Case([&](omp::WsloopOp op) {
294  checkAllocate(op, result);
295  checkLinear(op, result);
296  checkOrder(op, result);
297  checkReduction(op, result);
298  })
299  .Case([&](omp::ParallelOp op) {
300  checkAllocate(op, result);
301  checkReduction(op, result);
302  })
303  .Case([&](omp::SimdOp op) {
304  checkLinear(op, result);
305  checkReduction(op, result);
306  })
307  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
308  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
309  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
310  [&](auto op) { checkDepend(op, result); })
311  .Case([&](omp::TargetOp op) {
312  checkAllocate(op, result);
313  checkBare(op, result);
314  checkDevice(op, result);
315  checkInReduction(op, result);
316  checkIsDevicePtr(op, result);
317  checkPrivate(op, result);
318  })
319  .Default([](Operation &) {
320  // Assume all clauses for an operation can be translated unless they are
321  // checked above.
322  });
323  return result;
324 }
325 
326 static LogicalResult handleError(llvm::Error error, Operation &op) {
327  LogicalResult result = success();
328  if (error) {
329  llvm::handleAllErrors(
330  std::move(error),
331  [&](const PreviouslyReportedError &) { result = failure(); },
332  [&](const llvm::ErrorInfoBase &err) {
333  result = op.emitError(err.message());
334  });
335  }
336  return result;
337 }
338 
339 template <typename T>
340 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
341  if (!result)
342  return handleError(result.takeError(), op);
343 
344  return success();
345 }
346 
347 /// Find the insertion point for allocas given the current insertion point for
348 /// normal operations in the builder.
349 static llvm::OpenMPIRBuilder::InsertPointTy
350 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
351  LLVM::ModuleTranslation &moduleTranslation) {
352  // If there is an alloca insertion point on stack, i.e. we are in a nested
353  // operation and a specific point was provided by some surrounding operation,
354  // use it.
355  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
356  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
357  [&](OpenMPAllocaStackFrame &frame) {
358  allocaInsertPoint = frame.allocaInsertPoint;
359  return WalkResult::interrupt();
360  });
361  if (walkResult.wasInterrupted())
362  return allocaInsertPoint;
363 
364  // Otherwise, insert to the entry block of the surrounding function.
365  // If the current IRBuilder InsertPoint is the function's entry, it cannot
366  // also be used for alloca insertion which would result in insertion order
367  // confusion. Create a new BasicBlock for the Builder and use the entry block
368  // for the allocs.
369  // TODO: Create a dedicated alloca BasicBlock at function creation such that
370  // we do not need to move the current InertPoint here.
371  if (builder.GetInsertBlock() ==
372  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
373  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
374  "Assuming end of basic block");
375  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
376  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
377  builder.GetInsertBlock()->getNextNode());
378  builder.CreateBr(entryBB);
379  builder.SetInsertPoint(entryBB);
380  }
381 
382  llvm::BasicBlock &funcEntryBlock =
383  builder.GetInsertBlock()->getParent()->getEntryBlock();
384  return llvm::OpenMPIRBuilder::InsertPointTy(
385  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
386 }
387 
388 /// Find the loop information structure for the loop nest being translated. It
389 /// will return a `null` value unless called from the translation function for
390 /// a loop wrapper operation after successfully translating its body.
391 static llvm::CanonicalLoopInfo *
393  llvm::CanonicalLoopInfo *loopInfo = nullptr;
394  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
395  [&](OpenMPLoopInfoStackFrame &frame) {
396  loopInfo = frame.loopInfo;
397  return WalkResult::interrupt();
398  });
399  return loopInfo;
400 }
401 
402 /// Converts the given region that appears within an OpenMP dialect operation to
403 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
404 /// region, and a branch from any block with an successor-less OpenMP terminator
405 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
406 /// of the continuation block if provided.
408  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
409  LLVM::ModuleTranslation &moduleTranslation,
410  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
411  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
412 
413  llvm::BasicBlock *continuationBlock =
414  splitBB(builder, true, "omp.region.cont");
415  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
416 
417  llvm::LLVMContext &llvmContext = builder.getContext();
418  for (Block &bb : region) {
419  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
420  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
421  builder.GetInsertBlock()->getNextNode());
422  moduleTranslation.mapBlock(&bb, llvmBB);
423  }
424 
425  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
426 
427  // Terminators (namely YieldOp) may be forwarding values to the region that
428  // need to be available in the continuation block. Collect the types of these
429  // operands in preparation of creating PHI nodes. This is skipped for loop
430  // wrapper operations, for which we know in advance they have no terminators.
431  SmallVector<llvm::Type *> continuationBlockPHITypes;
432  unsigned numYields = 0;
433 
434  if (!isLoopWrapper) {
435  bool operandsProcessed = false;
436  for (Block &bb : region.getBlocks()) {
437  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
438  if (!operandsProcessed) {
439  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
440  continuationBlockPHITypes.push_back(
441  moduleTranslation.convertType(yield->getOperand(i).getType()));
442  }
443  operandsProcessed = true;
444  } else {
445  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
446  "mismatching number of values yielded from the region");
447  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
448  llvm::Type *operandType =
449  moduleTranslation.convertType(yield->getOperand(i).getType());
450  (void)operandType;
451  assert(continuationBlockPHITypes[i] == operandType &&
452  "values of mismatching types yielded from the region");
453  }
454  }
455  numYields++;
456  }
457  }
458  }
459 
460  // Insert PHI nodes in the continuation block for any values forwarded by the
461  // terminators in this region.
462  if (!continuationBlockPHITypes.empty())
463  assert(
464  continuationBlockPHIs &&
465  "expected continuation block PHIs if converted regions yield values");
466  if (continuationBlockPHIs) {
467  llvm::IRBuilderBase::InsertPointGuard guard(builder);
468  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
469  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
470  for (llvm::Type *ty : continuationBlockPHITypes)
471  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
472  }
473 
474  // Convert blocks one by one in topological order to ensure
475  // defs are converted before uses.
477  for (Block *bb : blocks) {
478  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
479  // Retarget the branch of the entry block to the entry block of the
480  // converted region (regions are single-entry).
481  if (bb->isEntryBlock()) {
482  assert(sourceTerminator->getNumSuccessors() == 1 &&
483  "provided entry block has multiple successors");
484  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
485  "ContinuationBlock is not the successor of the entry block");
486  sourceTerminator->setSuccessor(0, llvmBB);
487  }
488 
489  llvm::IRBuilderBase::InsertPointGuard guard(builder);
490  if (failed(
491  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
492  return llvm::make_error<PreviouslyReportedError>();
493 
494  // Create a direct branch here for loop wrappers to prevent their lack of a
495  // terminator from causing a crash below.
496  if (isLoopWrapper) {
497  builder.CreateBr(continuationBlock);
498  continue;
499  }
500 
501  // Special handling for `omp.yield` and `omp.terminator` (we may have more
502  // than one): they return the control to the parent OpenMP dialect operation
503  // so replace them with the branch to the continuation block. We handle this
504  // here to avoid relying inter-function communication through the
505  // ModuleTranslation class to set up the correct insertion point. This is
506  // also consistent with MLIR's idiom of handling special region terminators
507  // in the same code that handles the region-owning operation.
508  Operation *terminator = bb->getTerminator();
509  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
510  builder.CreateBr(continuationBlock);
511 
512  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
513  (*continuationBlockPHIs)[i]->addIncoming(
514  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
515  }
516  }
517  // After all blocks have been traversed and values mapped, connect the PHI
518  // nodes to the results of preceding blocks.
519  LLVM::detail::connectPHINodes(region, moduleTranslation);
520 
521  // Remove the blocks and values defined in this region from the mapping since
522  // they are not visible outside of this region. This allows the same region to
523  // be converted several times, that is cloned, without clashes, and slightly
524  // speeds up the lookups.
525  moduleTranslation.forgetMapping(region);
526 
527  return continuationBlock;
528 }
529 
530 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
531 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
532  switch (kind) {
533  case omp::ClauseProcBindKind::Close:
534  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
535  case omp::ClauseProcBindKind::Master:
536  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
537  case omp::ClauseProcBindKind::Primary:
538  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
539  case omp::ClauseProcBindKind::Spread:
540  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
541  }
542  llvm_unreachable("Unknown ClauseProcBindKind kind");
543 }
544 
545 /// Maps block arguments from \p blockArgIface (which are MLIR values) to the
546 /// corresponding LLVM values of \p the interface's operands. This is useful
547 /// when an OpenMP region with entry block arguments is converted to LLVM. In
548 /// this case the block arguments are (part of) of the OpenMP region's entry
549 /// arguments and the operands are (part of) of the operands to the OpenMP op
550 /// containing the region.
551 static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
552  omp::BlockArgOpenMPOpInterface blockArgIface) {
554  blockArgIface.getBlockArgsPairs(blockArgsPairs);
555  for (auto [var, arg] : blockArgsPairs)
556  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
557 }
558 
559 /// Helper function to map block arguments defined by ignored loop wrappers to
560 /// LLVM values and prevent any uses of those from triggering null pointer
561 /// dereferences.
562 ///
563 /// This must be called after block arguments of parent wrappers have already
564 /// been mapped to LLVM IR values.
565 static LogicalResult
566 convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
567  LLVM::ModuleTranslation &moduleTranslation) {
568  // Map block arguments directly to the LLVM value associated to the
569  // corresponding operand. This is semantically equivalent to this wrapper not
570  // being present.
572  .Case([&](omp::SimdOp op) {
573  forwardArgs(moduleTranslation,
574  cast<omp::BlockArgOpenMPOpInterface>(*op));
575  op.emitWarning() << "simd information on composite construct discarded";
576  return success();
577  })
578  .Default([&](Operation *op) {
579  return op->emitError() << "cannot ignore wrapper";
580  });
581 }
582 
583 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
584 static LogicalResult
585 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
586  LLVM::ModuleTranslation &moduleTranslation) {
587  auto maskedOp = cast<omp::MaskedOp>(opInst);
588  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
589 
590  if (failed(checkImplementationStatus(opInst)))
591  return failure();
592 
593  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
594  // MaskedOp has only one region associated with it.
595  auto &region = maskedOp.getRegion();
596  builder.restoreIP(codeGenIP);
597  return convertOmpOpRegions(region, "omp.masked.region", builder,
598  moduleTranslation)
599  .takeError();
600  };
601 
602  // TODO: Perform finalization actions for variables. This has to be
603  // called for variables which have destructors/finalizers.
604  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
605 
606  llvm::Value *filterVal = nullptr;
607  if (auto filterVar = maskedOp.getFilteredThreadId()) {
608  filterVal = moduleTranslation.lookupValue(filterVar);
609  } else {
610  llvm::LLVMContext &llvmContext = builder.getContext();
611  filterVal =
612  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
613  }
614  assert(filterVal != nullptr);
615  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
616  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
617  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
618  finiCB, filterVal);
619 
620  if (failed(handleError(afterIP, opInst)))
621  return failure();
622 
623  builder.restoreIP(*afterIP);
624  return success();
625 }
626 
627 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
628 static LogicalResult
629 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
630  LLVM::ModuleTranslation &moduleTranslation) {
631  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
632  auto masterOp = cast<omp::MasterOp>(opInst);
633 
634  if (failed(checkImplementationStatus(opInst)))
635  return failure();
636 
637  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
638  // MasterOp has only one region associated with it.
639  auto &region = masterOp.getRegion();
640  builder.restoreIP(codeGenIP);
641  return convertOmpOpRegions(region, "omp.master.region", builder,
642  moduleTranslation)
643  .takeError();
644  };
645 
646  // TODO: Perform finalization actions for variables. This has to be
647  // called for variables which have destructors/finalizers.
648  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
649 
650  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
651  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
652  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
653  finiCB);
654 
655  if (failed(handleError(afterIP, opInst)))
656  return failure();
657 
658  builder.restoreIP(*afterIP);
659  return success();
660 }
661 
662 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
663 static LogicalResult
664 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
665  LLVM::ModuleTranslation &moduleTranslation) {
666  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
667  auto criticalOp = cast<omp::CriticalOp>(opInst);
668 
669  if (failed(checkImplementationStatus(opInst)))
670  return failure();
671 
672  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
673  // CriticalOp has only one region associated with it.
674  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
675  builder.restoreIP(codeGenIP);
676  return convertOmpOpRegions(region, "omp.critical.region", builder,
677  moduleTranslation)
678  .takeError();
679  };
680 
681  // TODO: Perform finalization actions for variables. This has to be
682  // called for variables which have destructors/finalizers.
683  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
684 
685  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
686  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
687  llvm::Constant *hint = nullptr;
688 
689  // If it has a name, it probably has a hint too.
690  if (criticalOp.getNameAttr()) {
691  // The verifiers in OpenMP Dialect guarentee that all the pointers are
692  // non-null
693  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
694  auto criticalDeclareOp =
695  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
696  symbolRef);
697  hint =
698  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
699  static_cast<int>(criticalDeclareOp.getHint()));
700  }
701  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
702  moduleTranslation.getOpenMPBuilder()->createCritical(
703  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
704 
705  if (failed(handleError(afterIP, opInst)))
706  return failure();
707 
708  builder.restoreIP(*afterIP);
709  return success();
710 }
711 
712 /// A util to collect info needed to convert delayed privatizers from MLIR to
713 /// LLVM.
715  template <typename OP>
717  : blockArgs(
718  cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
719  mlirVars.reserve(blockArgs.size());
720  llvmVars.reserve(blockArgs.size());
721  collectPrivatizationDecls<OP>(op);
722 
723  for (mlir::Value privateVar : op.getPrivateVars())
724  mlirVars.push_back(privateVar);
725  }
726 
731 
732 private:
733  /// Populates `privatizations` with privatization declarations used for the
734  /// given op.
735  template <class OP>
736  void collectPrivatizationDecls(OP op) {
737  std::optional<ArrayAttr> attr = op.getPrivateSyms();
738  if (!attr)
739  return;
740 
741  privatizers.reserve(privatizers.size() + attr->size());
742  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
743  privatizers.push_back(findPrivatizer(op, symbolRef));
744  }
745  }
746 };
747 
748 /// Populates `reductions` with reduction declarations used in the given op.
749 template <typename T>
750 static void
753  std::optional<ArrayAttr> attr = op.getReductionSyms();
754  if (!attr)
755  return;
756 
757  reductions.reserve(reductions.size() + op.getNumReductionVars());
758  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
759  reductions.push_back(
760  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
761  op, symbolRef));
762  }
763 }
764 
765 /// Translates the blocks contained in the given region and appends them to at
766 /// the current insertion point of `builder`. The operations of the entry block
767 /// are appended to the current insertion block. If set, `continuationBlockArgs`
768 /// is populated with translated values that correspond to the values
769 /// omp.yield'ed from the region.
770 static LogicalResult inlineConvertOmpRegions(
771  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
772  LLVM::ModuleTranslation &moduleTranslation,
773  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
774  if (region.empty())
775  return success();
776 
777  // Special case for single-block regions that don't create additional blocks:
778  // insert operations without creating additional blocks.
779  if (llvm::hasSingleElement(region)) {
780  llvm::Instruction *potentialTerminator =
781  builder.GetInsertBlock()->empty() ? nullptr
782  : &builder.GetInsertBlock()->back();
783 
784  if (potentialTerminator && potentialTerminator->isTerminator())
785  potentialTerminator->removeFromParent();
786  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
787 
788  if (failed(moduleTranslation.convertBlock(
789  region.front(), /*ignoreArguments=*/true, builder)))
790  return failure();
791 
792  // The continuation arguments are simply the translated terminator operands.
793  if (continuationBlockArgs)
794  llvm::append_range(
795  *continuationBlockArgs,
796  moduleTranslation.lookupValues(region.front().back().getOperands()));
797 
798  // Drop the mapping that is no longer necessary so that the same region can
799  // be processed multiple times.
800  moduleTranslation.forgetMapping(region);
801 
802  if (potentialTerminator && potentialTerminator->isTerminator()) {
803  llvm::BasicBlock *block = builder.GetInsertBlock();
804  if (block->empty()) {
805  // this can happen for really simple reduction init regions e.g.
806  // %0 = llvm.mlir.constant(0 : i32) : i32
807  // omp.yield(%0 : i32)
808  // because the llvm.mlir.constant (MLIR op) isn't converted into any
809  // llvm op
810  potentialTerminator->insertInto(block, block->begin());
811  } else {
812  potentialTerminator->insertAfter(&block->back());
813  }
814  }
815 
816  return success();
817  }
818 
820  llvm::Expected<llvm::BasicBlock *> continuationBlock =
821  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
822 
823  if (failed(handleError(continuationBlock, *region.getParentOp())))
824  return failure();
825 
826  if (continuationBlockArgs)
827  llvm::append_range(*continuationBlockArgs, phis);
828  builder.SetInsertPoint(*continuationBlock,
829  (*continuationBlock)->getFirstInsertionPt());
830  return success();
831 }
832 
833 namespace {
834 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
835 /// store lambdas with capture.
836 using OwningReductionGen =
837  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
838  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
839  llvm::Value *&)>;
840 using OwningAtomicReductionGen =
841  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
842  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
843  llvm::Value *)>;
844 } // namespace
845 
846 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
847 /// reduction declaration. The generator uses `builder` but ignores its
848 /// insertion point.
849 static OwningReductionGen
850 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
851  LLVM::ModuleTranslation &moduleTranslation) {
852  // The lambda is mutable because we need access to non-const methods of decl
853  // (which aren't actually mutating it), and we must capture decl by-value to
854  // avoid the dangling reference after the parent function returns.
855  OwningReductionGen gen =
856  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
857  llvm::Value *lhs, llvm::Value *rhs,
858  llvm::Value *&result) mutable
859  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
860  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
861  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
862  builder.restoreIP(insertPoint);
864  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
865  "omp.reduction.nonatomic.body", builder,
866  moduleTranslation, &phis)))
867  return llvm::createStringError(
868  "failed to inline `combiner` region of `omp.declare_reduction`");
869  result = llvm::getSingleElement(phis);
870  return builder.saveIP();
871  };
872  return gen;
873 }
874 
875 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
876 /// given reduction declaration. The generator uses `builder` but ignores its
877 /// insertion point. Returns null if there is no atomic region available in the
878 /// reduction declaration.
879 static OwningAtomicReductionGen
880 makeAtomicReductionGen(omp::DeclareReductionOp decl,
881  llvm::IRBuilderBase &builder,
882  LLVM::ModuleTranslation &moduleTranslation) {
883  if (decl.getAtomicReductionRegion().empty())
884  return OwningAtomicReductionGen();
885 
886  // The lambda is mutable because we need access to non-const methods of decl
887  // (which aren't actually mutating it), and we must capture decl by-value to
888  // avoid the dangling reference after the parent function returns.
889  OwningAtomicReductionGen atomicGen =
890  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
891  llvm::Value *lhs, llvm::Value *rhs) mutable
892  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
893  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
894  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
895  builder.restoreIP(insertPoint);
897  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
898  "omp.reduction.atomic.body", builder,
899  moduleTranslation, &phis)))
900  return llvm::createStringError(
901  "failed to inline `atomic` region of `omp.declare_reduction`");
902  assert(phis.empty());
903  return builder.saveIP();
904  };
905  return atomicGen;
906 }
907 
908 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
909 static LogicalResult
910 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
911  LLVM::ModuleTranslation &moduleTranslation) {
912  auto orderedOp = cast<omp::OrderedOp>(opInst);
913 
914  if (failed(checkImplementationStatus(opInst)))
915  return failure();
916 
917  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
918  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
919  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
920  SmallVector<llvm::Value *> vecValues =
921  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
922 
923  size_t indexVecValues = 0;
924  while (indexVecValues < vecValues.size()) {
925  SmallVector<llvm::Value *> storeValues;
926  storeValues.reserve(numLoops);
927  for (unsigned i = 0; i < numLoops; i++) {
928  storeValues.push_back(vecValues[indexVecValues]);
929  indexVecValues++;
930  }
931  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
932  findAllocaInsertPoint(builder, moduleTranslation);
933  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
934  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
935  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
936  }
937  return success();
938 }
939 
940 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
941 /// OpenMPIRBuilder.
942 static LogicalResult
943 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
944  LLVM::ModuleTranslation &moduleTranslation) {
945  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
946  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
947 
948  if (failed(checkImplementationStatus(opInst)))
949  return failure();
950 
951  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
952  // OrderedOp has only one region associated with it.
953  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
954  builder.restoreIP(codeGenIP);
955  return convertOmpOpRegions(region, "omp.ordered.region", builder,
956  moduleTranslation)
957  .takeError();
958  };
959 
960  // TODO: Perform finalization actions for variables. This has to be
961  // called for variables which have destructors/finalizers.
962  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
963 
964  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
965  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
966  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
967  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
968 
969  if (failed(handleError(afterIP, opInst)))
970  return failure();
971 
972  builder.restoreIP(*afterIP);
973  return success();
974 }
975 
976 namespace {
977 /// Contains the arguments for an LLVM store operation
978 struct DeferredStore {
979  DeferredStore(llvm::Value *value, llvm::Value *address)
980  : value(value), address(address) {}
981 
982  llvm::Value *value;
983  llvm::Value *address;
984 };
985 } // namespace
986 
987 /// Allocate space for privatized reduction variables.
988 /// `deferredStores` contains information to create store operations which needs
989 /// to be inserted after all allocas
990 template <typename T>
991 static LogicalResult
993  llvm::IRBuilderBase &builder,
994  LLVM::ModuleTranslation &moduleTranslation,
995  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
997  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
998  DenseMap<Value, llvm::Value *> &reductionVariableMap,
999  SmallVectorImpl<DeferredStore> &deferredStores,
1000  llvm::ArrayRef<bool> isByRefs) {
1001  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1002  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1003 
1004  // delay creating stores until after all allocas
1005  deferredStores.reserve(loop.getNumReductionVars());
1006 
1007  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1008  Region &allocRegion = reductionDecls[i].getAllocRegion();
1009  if (isByRefs[i]) {
1010  if (allocRegion.empty())
1011  continue;
1012 
1014  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1015  builder, moduleTranslation, &phis)))
1016  return loop.emitError(
1017  "failed to inline `alloc` region of `omp.declare_reduction`");
1018 
1019  assert(phis.size() == 1 && "expected one allocation to be yielded");
1020  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1021 
1022  // Allocate reduction variable (which is a pointer to the real reduction
1023  // variable allocated in the inlined region)
1024  llvm::Value *var = builder.CreateAlloca(
1025  moduleTranslation.convertType(reductionDecls[i].getType()));
1026 
1027  llvm::Type *ptrTy = builder.getPtrTy();
1028  llvm::Value *castVar =
1029  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1030  llvm::Value *castPhi =
1031  builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
1032 
1033  deferredStores.emplace_back(castPhi, castVar);
1034 
1035  privateReductionVariables[i] = castVar;
1036  moduleTranslation.mapValue(reductionArgs[i], castPhi);
1037  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castPhi);
1038  } else {
1039  assert(allocRegion.empty() &&
1040  "allocaction is implicit for by-val reduction");
1041  llvm::Value *var = builder.CreateAlloca(
1042  moduleTranslation.convertType(reductionDecls[i].getType()));
1043 
1044  llvm::Type *ptrTy = builder.getPtrTy();
1045  llvm::Value *castVar =
1046  builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1047 
1048  moduleTranslation.mapValue(reductionArgs[i], castVar);
1049  privateReductionVariables[i] = castVar;
1050  reductionVariableMap.try_emplace(loop.getReductionVars()[i], castVar);
1051  }
1052  }
1053 
1054  return success();
1055 }
1056 
1057 /// Map input arguments to reduction initialization region
1058 template <typename T>
1059 static void
1062  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1063  unsigned i) {
1064  // map input argument to the initialization region
1065  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1066  Region &initializerRegion = reduction.getInitializerRegion();
1067  Block &entry = initializerRegion.front();
1068 
1069  mlir::Value mlirSource = loop.getReductionVars()[i];
1070  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1071  assert(llvmSource && "lookup reduction var");
1072  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1073 
1074  if (entry.getNumArguments() > 1) {
1075  llvm::Value *allocation =
1076  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1077  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1078  }
1079 }
1080 
1081 static void
1082 setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1083  llvm::BasicBlock *block = nullptr) {
1084  if (block == nullptr)
1085  block = builder.GetInsertBlock();
1086 
1087  if (block->empty() || block->getTerminator() == nullptr)
1088  builder.SetInsertPoint(block);
1089  else
1090  builder.SetInsertPoint(block->getTerminator());
1091 }
1092 
1093 /// Inline reductions' `init` regions. This functions assumes that the
1094 /// `builder`'s insertion point is where the user wants the `init` regions to be
1095 /// inlined; i.e. it does not try to find a proper insertion location for the
1096 /// `init` regions. It also leaves the `builder's insertions point in a state
1097 /// where the user can continue the code-gen directly afterwards.
1098 template <typename OP>
1099 static LogicalResult
1101  llvm::IRBuilderBase &builder,
1102  LLVM::ModuleTranslation &moduleTranslation,
1103  llvm::BasicBlock *latestAllocaBlock,
1105  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1106  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1107  llvm::ArrayRef<bool> isByRef,
1108  SmallVectorImpl<DeferredStore> &deferredStores) {
1109  if (op.getNumReductionVars() == 0)
1110  return success();
1111 
1112  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1113  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1114  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1115  builder.restoreIP(allocaIP);
1116  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1117 
1118  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1119  if (isByRef[i]) {
1120  if (!reductionDecls[i].getAllocRegion().empty())
1121  continue;
1122 
1123  // TODO: remove after all users of by-ref are updated to use the alloc
1124  // region: Allocate reduction variable (which is a pointer to the real
1125  // reduciton variable allocated in the inlined region)
1126  byRefVars[i] = builder.CreateAlloca(
1127  moduleTranslation.convertType(reductionDecls[i].getType()));
1128  }
1129  }
1130 
1131  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1132 
1133  // store result of the alloc region to the allocated pointer to the real
1134  // reduction variable
1135  for (auto [data, addr] : deferredStores)
1136  builder.CreateStore(data, addr);
1137 
1138  // Before the loop, store the initial values of reductions into reduction
1139  // variables. Although this could be done after allocas, we don't want to mess
1140  // up with the alloca insertion point.
1141  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1143 
1144  // map block argument to initializer region
1145  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1146  reductionVariableMap, i);
1147 
1148  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1149  "omp.reduction.neutral", builder,
1150  moduleTranslation, &phis)))
1151  return failure();
1152 
1153  assert(phis.size() == 1 && "expected one value to be yielded from the "
1154  "reduction neutral element declaration region");
1155 
1157 
1158  if (isByRef[i]) {
1159  if (!reductionDecls[i].getAllocRegion().empty())
1160  // done in allocReductionVars
1161  continue;
1162 
1163  // TODO: this path can be removed once all users of by-ref are updated to
1164  // use an alloc region
1165 
1166  // Store the result of the inlined region to the allocated reduction var
1167  // ptr
1168  builder.CreateStore(phis[0], byRefVars[i]);
1169 
1170  privateReductionVariables[i] = byRefVars[i];
1171  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1172  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1173  } else {
1174  // for by-ref case the store is inside of the reduction region
1175  builder.CreateStore(phis[0], privateReductionVariables[i]);
1176  // the rest was handled in allocByValReductionVars
1177  }
1178 
1179  // forget the mapping for the initializer region because we might need a
1180  // different mapping if this reduction declaration is re-used for a
1181  // different variable
1182  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1183  }
1184 
1185  return success();
1186 }
1187 
1188 /// Collect reduction info
1189 template <typename T>
1191  T loop, llvm::IRBuilderBase &builder,
1192  LLVM::ModuleTranslation &moduleTranslation,
1194  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1195  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1196  const ArrayRef<llvm::Value *> privateReductionVariables,
1198  unsigned numReductions = loop.getNumReductionVars();
1199 
1200  for (unsigned i = 0; i < numReductions; ++i) {
1201  owningReductionGens.push_back(
1202  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1203  owningAtomicReductionGens.push_back(
1204  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1205  }
1206 
1207  // Collect the reduction information.
1208  reductionInfos.reserve(numReductions);
1209  for (unsigned i = 0; i < numReductions; ++i) {
1210  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1211  if (owningAtomicReductionGens[i])
1212  atomicGen = owningAtomicReductionGens[i];
1213  llvm::Value *variable =
1214  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1215  reductionInfos.push_back(
1216  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1217  privateReductionVariables[i],
1218  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1219  owningReductionGens[i],
1220  /*ReductionGenClang=*/nullptr, atomicGen});
1221  }
1222 }
1223 
1224 /// handling of DeclareReductionOp's cleanup region
1225 static LogicalResult
1227  llvm::ArrayRef<llvm::Value *> privateVariables,
1228  LLVM::ModuleTranslation &moduleTranslation,
1229  llvm::IRBuilderBase &builder, StringRef regionName,
1230  bool shouldLoadCleanupRegionArg = true) {
1231  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1232  if (cleanupRegion->empty())
1233  continue;
1234 
1235  // map the argument to the cleanup region
1236  Block &entry = cleanupRegion->front();
1237 
1238  llvm::Instruction *potentialTerminator =
1239  builder.GetInsertBlock()->empty() ? nullptr
1240  : &builder.GetInsertBlock()->back();
1241  if (potentialTerminator && potentialTerminator->isTerminator())
1242  builder.SetInsertPoint(potentialTerminator);
1243  llvm::Value *privateVarValue =
1244  shouldLoadCleanupRegionArg
1245  ? builder.CreateLoad(
1246  moduleTranslation.convertType(entry.getArgument(0).getType()),
1247  privateVariables[i])
1248  : privateVariables[i];
1249 
1250  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1251 
1252  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1253  moduleTranslation)))
1254  return failure();
1255 
1256  // clear block argument mapping in case it needs to be re-created with a
1257  // different source for another use of the same reduction decl
1258  moduleTranslation.forgetMapping(*cleanupRegion);
1259  }
1260  return success();
1261 }
1262 
1263 // TODO: not used by ParallelOp
1264 template <class OP>
1265 static LogicalResult createReductionsAndCleanup(
1266  OP op, llvm::IRBuilderBase &builder,
1267  LLVM::ModuleTranslation &moduleTranslation,
1268  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1270  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef,
1271  bool isNowait = false, bool isTeamsReduction = false) {
1272  // Process the reductions if required.
1273  if (op.getNumReductionVars() == 0)
1274  return success();
1275 
1276  SmallVector<OwningReductionGen> owningReductionGens;
1277  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1279 
1280  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1281 
1282  // Create the reduction generators. We need to own them here because
1283  // ReductionInfo only accepts references to the generators.
1284  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1285  owningReductionGens, owningAtomicReductionGens,
1286  privateReductionVariables, reductionInfos);
1287 
1288  // The call to createReductions below expects the block to have a
1289  // terminator. Create an unreachable instruction to serve as terminator
1290  // and remove it later.
1291  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1292  builder.SetInsertPoint(tempTerminator);
1293  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1294  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1295  isByRef, isNowait, isTeamsReduction);
1296 
1297  if (failed(handleError(contInsertPoint, *op)))
1298  return failure();
1299 
1300  if (!contInsertPoint->getBlock())
1301  return op->emitOpError() << "failed to convert reductions";
1302 
1303  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1304  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1305 
1306  if (failed(handleError(afterIP, *op)))
1307  return failure();
1308 
1309  tempTerminator->eraseFromParent();
1310  builder.restoreIP(*afterIP);
1311 
1312  // after the construct, deallocate private reduction variables
1313  SmallVector<Region *> reductionRegions;
1314  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1315  [](omp::DeclareReductionOp reductionDecl) {
1316  return &reductionDecl.getCleanupRegion();
1317  });
1318  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1319  moduleTranslation, builder,
1320  "omp.reduction.cleanup");
1321  return success();
1322 }
1323 
1324 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1325  if (!attr)
1326  return {};
1327  return *attr;
1328 }
1329 
1330 // TODO: not used by omp.parallel
1331 template <typename OP>
1332 static LogicalResult allocAndInitializeReductionVars(
1333  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1334  LLVM::ModuleTranslation &moduleTranslation,
1335  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1337  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1338  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1339  llvm::ArrayRef<bool> isByRef) {
1340  if (op.getNumReductionVars() == 0)
1341  return success();
1342 
1343  SmallVector<DeferredStore> deferredStores;
1344 
1345  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1346  allocaIP, reductionDecls,
1347  privateReductionVariables, reductionVariableMap,
1348  deferredStores, isByRef)))
1349  return failure();
1350 
1351  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1352  allocaIP.getBlock(), reductionDecls,
1353  privateReductionVariables, reductionVariableMap,
1354  isByRef, deferredStores);
1355 }
1356 
1357 /// Return the llvm::Value * corresponding to the `privateVar` that
1358 /// is being privatized. It isn't always as simple as looking up
1359 /// moduleTranslation with privateVar. For instance, in case of
1360 /// an allocatable, the descriptor for the allocatable is privatized.
1361 /// This descriptor is mapped using an MapInfoOp. So, this function
1362 /// will return a pointer to the llvm::Value corresponding to the
1363 /// block argument for the mapped descriptor.
1364 static llvm::Value *
1365 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1366  LLVM::ModuleTranslation &moduleTranslation,
1367  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1368  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1369  return moduleTranslation.lookupValue(privateVar);
1370 
1371  Value blockArg = (*mappedPrivateVars)[privateVar];
1372  Type privVarType = privateVar.getType();
1373  Type blockArgType = blockArg.getType();
1374  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1375  "A block argument corresponding to a mapped var should have "
1376  "!llvm.ptr type");
1377 
1378  if (privVarType == blockArgType)
1379  return moduleTranslation.lookupValue(blockArg);
1380 
1381  // This typically happens when the privatized type is lowered from
1382  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1383  // struct/pair is passed by value. But, mapped values are passed only as
1384  // pointers, so before we privatize, we must load the pointer.
1385  if (!isa<LLVM::LLVMPointerType>(privVarType))
1386  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1387  moduleTranslation.lookupValue(blockArg));
1388 
1389  return moduleTranslation.lookupValue(privateVar);
1390 }
1391 
1392 /// Initialize a single (first)private variable. You probably want to use
1393 /// allocateAndInitPrivateVars instead of this.
1394 /// This returns the private variable which has been initialized. This
1395 /// variable should be mapped before constructing the body of the Op.
1397  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1398  omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1399  llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1400  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1401  Region &initRegion = privDecl.getInitRegion();
1402  if (initRegion.empty())
1403  return llvmPrivateVar;
1404 
1405  // map initialization region block arguments
1406  llvm::Value *nonPrivateVar = findAssociatedValue(
1407  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1408  assert(nonPrivateVar);
1409  moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1410  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1411 
1412  // in-place convert the private initialization region
1414  if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1415  moduleTranslation, &phis)))
1416  return llvm::createStringError(
1417  "failed to inline `init` region of `omp.private`");
1418 
1419  assert(phis.size() == 1 && "expected one allocation to be yielded");
1420 
1421  // clear init region block argument mapping in case it needs to be
1422  // re-created with a different source for another use of the same
1423  // reduction decl
1424  moduleTranslation.forgetMapping(initRegion);
1425 
1426  // Prefer the value yielded from the init region to the allocated private
1427  // variable in case the region is operating on arguments by-value (e.g.
1428  // Fortran character boxes).
1429  return phis[0];
1430 }
1431 
1432 static llvm::Error
1433 initPrivateVars(llvm::IRBuilderBase &builder,
1434  LLVM::ModuleTranslation &moduleTranslation,
1435  PrivateVarsInfo &privateVarsInfo,
1436  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1437  if (privateVarsInfo.blockArgs.empty())
1438  return llvm::Error::success();
1439 
1440  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1441  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1442 
1443  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1444  privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1445  privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1446  auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1448  builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1449  llvmPrivateVar, privInitBlock, mappedPrivateVars);
1450 
1451  if (!privVarOrErr)
1452  return privVarOrErr.takeError();
1453 
1454  llvmPrivateVar = privVarOrErr.get();
1455  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1456 
1458  }
1459 
1460  return llvm::Error::success();
1461 }
1462 
1463 /// Allocate and initialize delayed private variables. Returns the basic block
1464 /// which comes after all of these allocations. llvm::Value * for each of these
1465 /// private variables are populated in llvmPrivateVars.
1467 allocatePrivateVars(llvm::IRBuilderBase &builder,
1468  LLVM::ModuleTranslation &moduleTranslation,
1469  PrivateVarsInfo &privateVarsInfo,
1470  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1471  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1472  // Allocate private vars
1473  llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1474  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1475  allocaTerminator->getIterator()),
1476  true, allocaTerminator->getStableDebugLoc(),
1477  "omp.region.after_alloca");
1478 
1479  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1480  // Update the allocaTerminator since the alloca block was split above.
1481  allocaTerminator = allocaIP.getBlock()->getTerminator();
1482  builder.SetInsertPoint(allocaTerminator);
1483  // The new terminator is an uncondition branch created by the splitBB above.
1484  assert(allocaTerminator->getNumSuccessors() == 1 &&
1485  "This is an unconditional branch created by splitBB");
1486 
1487  llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
1488  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1489 
1490  unsigned int allocaAS =
1491  moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1492  unsigned int defaultAS = moduleTranslation.getLLVMModule()
1493  ->getDataLayout()
1494  .getProgramAddressSpace();
1495 
1496  for (auto [privDecl, mlirPrivVar, blockArg] :
1497  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1498  privateVarsInfo.blockArgs)) {
1499  llvm::Type *llvmAllocType =
1500  moduleTranslation.convertType(privDecl.getType());
1501  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1502  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1503  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1504  if (allocaAS != defaultAS)
1505  llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1506  builder.getPtrTy(defaultAS));
1507 
1508  privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1509  }
1510 
1511  return afterAllocas;
1512 }
1513 
1514 static LogicalResult copyFirstPrivateVars(
1515  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1516  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1517  ArrayRef<llvm::Value *> llvmPrivateVars,
1519  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1520  // Apply copy region for firstprivate.
1521  bool needsFirstprivate =
1522  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1523  return privOp.getDataSharingType() ==
1524  omp::DataSharingClauseType::FirstPrivate;
1525  });
1526 
1527  if (!needsFirstprivate)
1528  return success();
1529 
1530  llvm::BasicBlock *copyBlock =
1531  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1532  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1533 
1534  for (auto [decl, mlirVar, llvmVar] :
1535  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1536  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1537  continue;
1538 
1539  // copyRegion implements `lhs = rhs`
1540  Region &copyRegion = decl.getCopyRegion();
1541 
1542  // map copyRegion rhs arg
1543  llvm::Value *nonPrivateVar = findAssociatedValue(
1544  mlirVar, builder, moduleTranslation, mappedPrivateVars);
1545  assert(nonPrivateVar);
1546  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1547 
1548  // map copyRegion lhs arg
1549  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1550 
1551  // in-place convert copy region
1552  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1553  moduleTranslation)))
1554  return decl.emitError("failed to inline `copy` region of `omp.private`");
1555 
1557 
1558  // ignore unused value yielded from copy region
1559 
1560  // clear copy region block argument mapping in case it needs to be
1561  // re-created with different sources for reuse of the same reduction
1562  // decl
1563  moduleTranslation.forgetMapping(copyRegion);
1564  }
1565 
1566  return success();
1567 }
1568 
1569 static LogicalResult
1570 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1571  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1572  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1573  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1574  // private variable deallocation
1575  SmallVector<Region *> privateCleanupRegions;
1576  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1577  [](omp::PrivateClauseOp privatizer) {
1578  return &privatizer.getDeallocRegion();
1579  });
1580 
1581  if (failed(inlineOmpRegionCleanup(
1582  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1583  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1584  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1585  "`omp.private` op in");
1586 
1587  return success();
1588 }
1589 
1590 /// Returns true if the construct contains omp.cancel or omp.cancellation_point
1592  // omp.cancel and omp.cancellation_point must be "closely nested" so they will
1593  // be visible and not inside of function calls. This is enforced by the
1594  // verifier.
1595  return op
1596  ->walk([](Operation *child) {
1597  if (mlir::isa<omp::CancelOp, omp::CancellationPointOp>(child))
1598  return WalkResult::interrupt();
1599  return WalkResult::advance();
1600  })
1601  .wasInterrupted();
1602 }
1603 
1604 static LogicalResult
1605 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1606  LLVM::ModuleTranslation &moduleTranslation) {
1607  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1608  using StorableBodyGenCallbackTy =
1609  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1610 
1611  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1612 
1613  if (failed(checkImplementationStatus(opInst)))
1614  return failure();
1615 
1616  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1617  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1618 
1619  SmallVector<omp::DeclareReductionOp> reductionDecls;
1620  collectReductionDecls(sectionsOp, reductionDecls);
1621  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1622  findAllocaInsertPoint(builder, moduleTranslation);
1623 
1624  SmallVector<llvm::Value *> privateReductionVariables(
1625  sectionsOp.getNumReductionVars());
1626  DenseMap<Value, llvm::Value *> reductionVariableMap;
1627 
1628  MutableArrayRef<BlockArgument> reductionArgs =
1629  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1630 
1632  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1633  reductionDecls, privateReductionVariables, reductionVariableMap,
1634  isByRef)))
1635  return failure();
1636 
1638 
1639  for (Operation &op : *sectionsOp.getRegion().begin()) {
1640  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1641  if (!sectionOp) // omp.terminator
1642  continue;
1643 
1644  Region &region = sectionOp.getRegion();
1645  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1646  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1647  builder.restoreIP(codeGenIP);
1648 
1649  // map the omp.section reduction block argument to the omp.sections block
1650  // arguments
1651  // TODO: this assumes that the only block arguments are reduction
1652  // variables
1653  assert(region.getNumArguments() ==
1654  sectionsOp.getRegion().getNumArguments());
1655  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1656  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1657  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1658  assert(llvmVal);
1659  moduleTranslation.mapValue(sectionArg, llvmVal);
1660  }
1661 
1662  return convertOmpOpRegions(region, "omp.section.region", builder,
1663  moduleTranslation)
1664  .takeError();
1665  };
1666  sectionCBs.push_back(sectionCB);
1667  }
1668 
1669  // No sections within omp.sections operation - skip generation. This situation
1670  // is only possible if there is only a terminator operation inside the
1671  // sections operation
1672  if (sectionCBs.empty())
1673  return success();
1674 
1675  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1676 
1677  // TODO: Perform appropriate actions according to the data-sharing
1678  // attribute (shared, private, firstprivate, ...) of variables.
1679  // Currently defaults to shared.
1680  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1681  llvm::Value &vPtr, llvm::Value *&replacementValue)
1682  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1683  replacementValue = &vPtr;
1684  return codeGenIP;
1685  };
1686 
1687  // TODO: Perform finalization actions for variables. This has to be
1688  // called for variables which have destructors/finalizers.
1689  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1690 
1691  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1692  bool isCancellable = constructIsCancellable(sectionsOp);
1693  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1694  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1695  moduleTranslation.getOpenMPBuilder()->createSections(
1696  ompLoc, allocaIP, sectionCBs, privCB, finiCB, isCancellable,
1697  sectionsOp.getNowait());
1698 
1699  if (failed(handleError(afterIP, opInst)))
1700  return failure();
1701 
1702  builder.restoreIP(*afterIP);
1703 
1704  // Process the reductions if required.
1706  sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
1707  privateReductionVariables, isByRef, sectionsOp.getNowait());
1708 }
1709 
1710 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1711 static LogicalResult
1712 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1713  LLVM::ModuleTranslation &moduleTranslation) {
1714  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1715  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1716 
1717  if (failed(checkImplementationStatus(*singleOp)))
1718  return failure();
1719 
1720  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1721  builder.restoreIP(codegenIP);
1722  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1723  builder, moduleTranslation)
1724  .takeError();
1725  };
1726  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1727 
1728  // Handle copyprivate
1729  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1730  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1733  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1734  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1735  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1736  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1737  llvmCPFuncs.push_back(
1738  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1739  }
1740 
1741  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1742  moduleTranslation.getOpenMPBuilder()->createSingle(
1743  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1744  llvmCPFuncs);
1745 
1746  if (failed(handleError(afterIP, *singleOp)))
1747  return failure();
1748 
1749  builder.restoreIP(*afterIP);
1750  return success();
1751 }
1752 
1753 static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
1754  auto iface =
1755  llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(teamsOp.getOperation());
1756  // Check that all uses of the reduction block arg has the same distribute op
1757  // parent.
1759  Operation *distOp = nullptr;
1760  for (auto ra : iface.getReductionBlockArgs())
1761  for (auto &use : ra.getUses()) {
1762  auto *useOp = use.getOwner();
1763  // Ignore debug uses.
1764  if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
1765  debugUses.push_back(useOp);
1766  continue;
1767  }
1768 
1769  auto currentDistOp = useOp->getParentOfType<omp::DistributeOp>();
1770  // Use is not inside a distribute op - return false
1771  if (!currentDistOp)
1772  return false;
1773  // Multiple distribute operations - return false
1774  Operation *currentOp = currentDistOp.getOperation();
1775  if (distOp && (distOp != currentOp))
1776  return false;
1777 
1778  distOp = currentOp;
1779  }
1780 
1781  // If we are going to use distribute reduction then remove any debug uses of
1782  // the reduction parameters in teamsOp. Otherwise they will be left without
1783  // any mapped value in moduleTranslation and will eventually error out.
1784  for (auto use : debugUses)
1785  use->erase();
1786  return true;
1787 }
1788 
1789 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1790 static LogicalResult
1791 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1792  LLVM::ModuleTranslation &moduleTranslation) {
1793  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1794  if (failed(checkImplementationStatus(*op)))
1795  return failure();
1796 
1797  DenseMap<Value, llvm::Value *> reductionVariableMap;
1798  unsigned numReductionVars = op.getNumReductionVars();
1799  SmallVector<omp::DeclareReductionOp> reductionDecls;
1800  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
1801  llvm::ArrayRef<bool> isByRef;
1802  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1803  findAllocaInsertPoint(builder, moduleTranslation);
1804 
1805  // Only do teams reduction if there is no distribute op that captures the
1806  // reduction instead.
1807  bool doTeamsReduction = !teamsReductionContainedInDistribute(op);
1808  if (doTeamsReduction) {
1809  isByRef = getIsByRef(op.getReductionByref());
1810 
1811  assert(isByRef.size() == op.getNumReductionVars());
1812 
1813  MutableArrayRef<BlockArgument> reductionArgs =
1814  llvm::cast<omp::BlockArgOpenMPOpInterface>(*op).getReductionBlockArgs();
1815 
1816  collectReductionDecls(op, reductionDecls);
1817 
1819  op, reductionArgs, builder, moduleTranslation, allocaIP,
1820  reductionDecls, privateReductionVariables, reductionVariableMap,
1821  isByRef)))
1822  return failure();
1823  }
1824 
1825  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1827  moduleTranslation, allocaIP);
1828  builder.restoreIP(codegenIP);
1829  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1830  moduleTranslation)
1831  .takeError();
1832  };
1833 
1834  llvm::Value *numTeamsLower = nullptr;
1835  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1836  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1837 
1838  llvm::Value *numTeamsUpper = nullptr;
1839  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1840  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1841 
1842  llvm::Value *threadLimit = nullptr;
1843  if (Value threadLimitVar = op.getThreadLimit())
1844  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1845 
1846  llvm::Value *ifExpr = nullptr;
1847  if (Value ifVar = op.getIfExpr())
1848  ifExpr = moduleTranslation.lookupValue(ifVar);
1849 
1850  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1851  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1852  moduleTranslation.getOpenMPBuilder()->createTeams(
1853  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1854 
1855  if (failed(handleError(afterIP, *op)))
1856  return failure();
1857 
1858  builder.restoreIP(*afterIP);
1859  if (doTeamsReduction) {
1860  // Process the reductions if required.
1862  op, builder, moduleTranslation, allocaIP, reductionDecls,
1863  privateReductionVariables, isByRef,
1864  /*isNoWait*/ false, /*isTeamsReduction*/ true);
1865  }
1866  return success();
1867 }
1868 
1869 static void
1870 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
1871  LLVM::ModuleTranslation &moduleTranslation,
1873  if (dependVars.empty())
1874  return;
1875  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
1876  llvm::omp::RTLDependenceKindTy type;
1877  switch (
1878  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1879  case mlir::omp::ClauseTaskDepend::taskdependin:
1880  type = llvm::omp::RTLDependenceKindTy::DepIn;
1881  break;
1882  // The OpenMP runtime requires that the codegen for 'depend' clause for
1883  // 'out' dependency kind must be the same as codegen for 'depend' clause
1884  // with 'inout' dependency.
1885  case mlir::omp::ClauseTaskDepend::taskdependout:
1886  case mlir::omp::ClauseTaskDepend::taskdependinout:
1887  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1888  break;
1889  case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
1890  type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
1891  break;
1892  case mlir::omp::ClauseTaskDepend::taskdependinoutset:
1893  type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
1894  break;
1895  };
1896  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1897  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1898  dds.emplace_back(dd);
1899  }
1900 }
1901 
1902 /// Shared implementation of a callback which adds a termiator for the new block
1903 /// created for the branch taken when an openmp construct is cancelled. The
1904 /// terminator is saved in \p cancelTerminators. This callback is invoked only
1905 /// if there is cancellation inside of the taskgroup body.
1906 /// The terminator will need to be fixed to branch to the correct block to
1907 /// cleanup the construct.
1908 static void
1910  llvm::IRBuilderBase &llvmBuilder,
1911  llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op,
1912  llvm::omp::Directive cancelDirective) {
1913  auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
1914  llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
1915 
1916  // ip is currently in the block branched to if cancellation occured.
1917  // We need to create a branch to terminate that block.
1918  llvmBuilder.restoreIP(ip);
1919 
1920  // We must still clean up the construct after cancelling it, so we need to
1921  // branch to the block that finalizes the taskgroup.
1922  // That block has not been created yet so use this block as a dummy for now
1923  // and fix this after creating the operation.
1924  cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
1925  return llvm::Error::success();
1926  };
1927  // We have to add the cleanup to the OpenMPIRBuilder before the body gets
1928  // created in case the body contains omp.cancel (which will then expect to be
1929  // able to find this cleanup callback).
1930  ompBuilder.pushFinalizationCB(
1931  {finiCB, cancelDirective, constructIsCancellable(op)});
1932 }
1933 
1934 /// If we cancelled the construct, we should branch to the finalization block of
1935 /// that construct. OMPIRBuilder structures the CFG such that the cleanup block
1936 /// is immediately before the continuation block. Now this finalization has
1937 /// been created we can fix the branch.
1938 static void
1940  llvm::OpenMPIRBuilder &ompBuilder,
1941  const llvm::OpenMPIRBuilder::InsertPointTy &afterIP) {
1942  ompBuilder.popFinalizationCB();
1943  llvm::BasicBlock *constructFini = afterIP.getBlock()->getSinglePredecessor();
1944  for (llvm::BranchInst *cancelBranch : cancelTerminators) {
1945  assert(cancelBranch->getNumSuccessors() == 1 &&
1946  "cancel branch should have one target");
1947  cancelBranch->setSuccessor(0, constructFini);
1948  }
1949 }
1950 
1951 namespace {
1952 /// TaskContextStructManager takes care of creating and freeing a structure
1953 /// containing information needed by the task body to execute.
1954 class TaskContextStructManager {
1955 public:
1956  TaskContextStructManager(llvm::IRBuilderBase &builder,
1957  LLVM::ModuleTranslation &moduleTranslation,
1959  : builder{builder}, moduleTranslation{moduleTranslation},
1960  privateDecls{privateDecls} {}
1961 
1962  /// Creates a heap allocated struct containing space for each private
1963  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
1964  /// the structure should all have the same order (although privateDecls which
1965  /// do not read from the mold argument are skipped).
1966  void generateTaskContextStruct();
1967 
1968  /// Create GEPs to access each member of the structure representing a private
1969  /// variable, adding them to llvmPrivateVars. Null values are added where
1970  /// private decls were skipped so that the ordering continues to match the
1971  /// private decls.
1972  void createGEPsToPrivateVars();
1973 
1974  /// De-allocate the task context structure.
1975  void freeStructPtr();
1976 
1977  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
1978  return llvmPrivateVarGEPs;
1979  }
1980 
1981  llvm::Value *getStructPtr() { return structPtr; }
1982 
1983 private:
1984  llvm::IRBuilderBase &builder;
1985  LLVM::ModuleTranslation &moduleTranslation;
1987 
1988  /// The type of each member of the structure, in order.
1989  SmallVector<llvm::Type *> privateVarTypes;
1990 
1991  /// LLVM values for each private variable, or null if that private variable is
1992  /// not included in the task context structure
1993  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
1994 
1995  /// A pointer to the structure containing context for this task.
1996  llvm::Value *structPtr = nullptr;
1997  /// The type of the structure
1998  llvm::Type *structTy = nullptr;
1999 };
2000 } // namespace
2001 
2002 void TaskContextStructManager::generateTaskContextStruct() {
2003  if (privateDecls.empty())
2004  return;
2005  privateVarTypes.reserve(privateDecls.size());
2006 
2007  for (omp::PrivateClauseOp &privOp : privateDecls) {
2008  // Skip private variables which can safely be allocated and initialised
2009  // inside of the task
2010  if (!privOp.readsFromMold())
2011  continue;
2012  Type mlirType = privOp.getType();
2013  privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
2014  }
2015 
2016  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
2017  privateVarTypes);
2018 
2019  llvm::DataLayout dataLayout =
2020  builder.GetInsertBlock()->getModule()->getDataLayout();
2021  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
2022  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
2023 
2024  // Heap allocate the structure
2025  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
2026  /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
2027  "omp.task.context_ptr");
2028 }
2029 
2030 void TaskContextStructManager::createGEPsToPrivateVars() {
2031  if (!structPtr) {
2032  assert(privateVarTypes.empty());
2033  return;
2034  }
2035 
2036  // Create GEPs for each struct member
2037  llvmPrivateVarGEPs.clear();
2038  llvmPrivateVarGEPs.reserve(privateDecls.size());
2039  llvm::Value *zero = builder.getInt32(0);
2040  unsigned i = 0;
2041  for (auto privDecl : privateDecls) {
2042  if (!privDecl.readsFromMold()) {
2043  // Handle this inside of the task so we don't pass unnessecary vars in
2044  llvmPrivateVarGEPs.push_back(nullptr);
2045  continue;
2046  }
2047  llvm::Value *iVal = builder.getInt32(i);
2048  llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
2049  llvmPrivateVarGEPs.push_back(gep);
2050  i += 1;
2051  }
2052 }
2053 
2054 void TaskContextStructManager::freeStructPtr() {
2055  if (!structPtr)
2056  return;
2057 
2058  llvm::IRBuilderBase::InsertPointGuard guard{builder};
2059  // Ensure we don't put the call to free() after the terminator
2060  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2061  builder.CreateFree(structPtr);
2062 }
2063 
2064 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
2065 static LogicalResult
2066 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
2067  LLVM::ModuleTranslation &moduleTranslation) {
2068  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2069  if (failed(checkImplementationStatus(*taskOp)))
2070  return failure();
2071 
2072  PrivateVarsInfo privateVarsInfo(taskOp);
2073  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
2074  privateVarsInfo.privatizers};
2075 
2076  // Allocate and copy private variables before creating the task. This avoids
2077  // accessing invalid memory if (after this scope ends) the private variables
2078  // are initialized from host variables or if the variables are copied into
2079  // from host variables (firstprivate). The insertion point is just before
2080  // where the code for creating and scheduling the task will go. That puts this
2081  // code outside of the outlined task region, which is what we want because
2082  // this way the initialization and copy regions are executed immediately while
2083  // the host variable data are still live.
2084 
2085  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2086  findAllocaInsertPoint(builder, moduleTranslation);
2087 
2088  // Not using splitBB() because that requires the current block to have a
2089  // terminator.
2090  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
2091  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
2092  builder.getContext(), "omp.task.start",
2093  /*Parent=*/builder.GetInsertBlock()->getParent());
2094  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
2095  builder.SetInsertPoint(branchToTaskStartBlock);
2096 
2097  // Now do this again to make the initialization and copy blocks
2098  llvm::BasicBlock *copyBlock =
2099  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
2100  llvm::BasicBlock *initBlock =
2101  splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
2102 
2103  // Now the control flow graph should look like
2104  // starter_block:
2105  // <---- where we started when convertOmpTaskOp was called
2106  // br %omp.private.init
2107  // omp.private.init:
2108  // br %omp.private.copy
2109  // omp.private.copy:
2110  // br %omp.task.start
2111  // omp.task.start:
2112  // <---- where we want the insertion point to be when we call createTask()
2113 
2114  // Save the alloca insertion point on ModuleTranslation stack for use in
2115  // nested regions.
2117  moduleTranslation, allocaIP);
2118 
2119  // Allocate and initialize private variables
2120  builder.SetInsertPoint(initBlock->getTerminator());
2121 
2122  // Create task variable structure
2123  taskStructMgr.generateTaskContextStruct();
2124  // GEPs so that we can initialize the variables. Don't use these GEPs inside
2125  // of the body otherwise it will be the GEP not the struct which is fowarded
2126  // to the outlined function. GEPs forwarded in this way are passed in a
2127  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
2128  // which may not be executed until after the current stack frame goes out of
2129  // scope.
2130  taskStructMgr.createGEPsToPrivateVars();
2131 
2132  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
2133  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
2134  privateVarsInfo.blockArgs,
2135  taskStructMgr.getLLVMPrivateVarGEPs())) {
2136  // To be handled inside the task.
2137  if (!privDecl.readsFromMold())
2138  continue;
2139  assert(llvmPrivateVarAlloc &&
2140  "reads from mold so shouldn't have been skipped");
2141 
2142  llvm::Expected<llvm::Value *> privateVarOrErr =
2143  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2144  blockArg, llvmPrivateVarAlloc, initBlock);
2145  if (!privateVarOrErr)
2146  return handleError(privateVarOrErr, *taskOp.getOperation());
2147 
2148  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2149  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2150 
2151  // TODO: this is a bit of a hack for Fortran character boxes.
2152  // Character boxes are passed by value into the init region and then the
2153  // initialized character box is yielded by value. Here we need to store the
2154  // yielded value into the private allocation, and load the private
2155  // allocation to match the type expected by region block arguments.
2156  if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2157  !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2158  builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2159  // Load it so we have the value pointed to by the GEP
2160  llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2161  llvmPrivateVarAlloc);
2162  }
2163  assert(llvmPrivateVarAlloc->getType() ==
2164  moduleTranslation.convertType(blockArg.getType()));
2165 
2166  // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2167  // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2168  // stack allocated structure.
2169  }
2170 
2171  // firstprivate copy region
2172  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2173  if (failed(copyFirstPrivateVars(
2174  builder, moduleTranslation, privateVarsInfo.mlirVars,
2175  taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers)))
2176  return llvm::failure();
2177 
2178  // Set up for call to createTask()
2179  builder.SetInsertPoint(taskStartBlock);
2180 
2181  auto bodyCB = [&](InsertPointTy allocaIP,
2182  InsertPointTy codegenIP) -> llvm::Error {
2183  // Save the alloca insertion point on ModuleTranslation stack for use in
2184  // nested regions.
2186  moduleTranslation, allocaIP);
2187 
2188  // translate the body of the task:
2189  builder.restoreIP(codegenIP);
2190 
2191  llvm::BasicBlock *privInitBlock = nullptr;
2192  privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2193  for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2194  privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2195  privateVarsInfo.mlirVars))) {
2196  auto [blockArg, privDecl, mlirPrivVar] = zip;
2197  // This is handled before the task executes
2198  if (privDecl.readsFromMold())
2199  continue;
2200 
2201  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2202  llvm::Type *llvmAllocType =
2203  moduleTranslation.convertType(privDecl.getType());
2204  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2205  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2206  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2207 
2208  llvm::Expected<llvm::Value *> privateVarOrError =
2209  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2210  blockArg, llvmPrivateVar, privInitBlock);
2211  if (!privateVarOrError)
2212  return privateVarOrError.takeError();
2213  moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2214  privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2215  }
2216 
2217  taskStructMgr.createGEPsToPrivateVars();
2218  for (auto [i, llvmPrivVar] :
2219  llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2220  if (!llvmPrivVar) {
2221  assert(privateVarsInfo.llvmVars[i] &&
2222  "This is added in the loop above");
2223  continue;
2224  }
2225  privateVarsInfo.llvmVars[i] = llvmPrivVar;
2226  }
2227 
2228  // Find and map the addresses of each variable within the task context
2229  // structure
2230  for (auto [blockArg, llvmPrivateVar, privateDecl] :
2231  llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2232  privateVarsInfo.privatizers)) {
2233  // This was handled above.
2234  if (!privateDecl.readsFromMold())
2235  continue;
2236  // Fix broken pass-by-value case for Fortran character boxes
2237  if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2238  llvmPrivateVar = builder.CreateLoad(
2239  moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2240  }
2241  assert(llvmPrivateVar->getType() ==
2242  moduleTranslation.convertType(blockArg.getType()));
2243  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2244  }
2245 
2246  auto continuationBlockOrError = convertOmpOpRegions(
2247  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2248  if (failed(handleError(continuationBlockOrError, *taskOp)))
2249  return llvm::make_error<PreviouslyReportedError>();
2250 
2251  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2252 
2253  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2254  privateVarsInfo.llvmVars,
2255  privateVarsInfo.privatizers)))
2256  return llvm::make_error<PreviouslyReportedError>();
2257 
2258  // Free heap allocated task context structure at the end of the task.
2259  taskStructMgr.freeStructPtr();
2260 
2261  return llvm::Error::success();
2262  };
2263 
2264  llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
2265  SmallVector<llvm::BranchInst *> cancelTerminators;
2266  // The directive to match here is OMPD_taskgroup because it is the taskgroup
2267  // which is canceled. This is handled here because it is the task's cleanup
2268  // block which should be branched to.
2269  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskOp,
2270  llvm::omp::Directive::OMPD_taskgroup);
2271 
2273  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2274  moduleTranslation, dds);
2275 
2276  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2277  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2278  moduleTranslation.getOpenMPBuilder()->createTask(
2279  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2280  moduleTranslation.lookupValue(taskOp.getFinal()),
2281  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2282  taskOp.getMergeable(),
2283  moduleTranslation.lookupValue(taskOp.getEventHandle()),
2284  moduleTranslation.lookupValue(taskOp.getPriority()));
2285 
2286  if (failed(handleError(afterIP, *taskOp)))
2287  return failure();
2288 
2289  // Set the correct branch target for task cancellation
2290  popCancelFinalizationCB(cancelTerminators, ompBuilder, afterIP.get());
2291 
2292  builder.restoreIP(*afterIP);
2293  return success();
2294 }
2295 
2296 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2297 static LogicalResult
2298 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2299  LLVM::ModuleTranslation &moduleTranslation) {
2300  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2301  if (failed(checkImplementationStatus(*tgOp)))
2302  return failure();
2303 
2304  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2305  builder.restoreIP(codegenIP);
2306  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2307  builder, moduleTranslation)
2308  .takeError();
2309  };
2310 
2311  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2312  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2313  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2314  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2315  bodyCB);
2316 
2317  if (failed(handleError(afterIP, *tgOp)))
2318  return failure();
2319 
2320  builder.restoreIP(*afterIP);
2321  return success();
2322 }
2323 
2324 static LogicalResult
2325 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2326  LLVM::ModuleTranslation &moduleTranslation) {
2327  if (failed(checkImplementationStatus(*twOp)))
2328  return failure();
2329 
2330  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2331  return success();
2332 }
2333 
2334 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2335 static LogicalResult
2336 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2337  LLVM::ModuleTranslation &moduleTranslation) {
2338  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2339  auto wsloopOp = cast<omp::WsloopOp>(opInst);
2340  if (failed(checkImplementationStatus(opInst)))
2341  return failure();
2342 
2343  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2344  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2345  assert(isByRef.size() == wsloopOp.getNumReductionVars());
2346 
2347  // Static is the default.
2348  auto schedule =
2349  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2350 
2351  // Find the loop configuration.
2352  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2353  llvm::Type *ivType = step->getType();
2354  llvm::Value *chunk = nullptr;
2355  if (wsloopOp.getScheduleChunk()) {
2356  llvm::Value *chunkVar =
2357  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2358  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2359  }
2360 
2361  PrivateVarsInfo privateVarsInfo(wsloopOp);
2362 
2363  SmallVector<omp::DeclareReductionOp> reductionDecls;
2364  collectReductionDecls(wsloopOp, reductionDecls);
2365  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2366  findAllocaInsertPoint(builder, moduleTranslation);
2367 
2368  SmallVector<llvm::Value *> privateReductionVariables(
2369  wsloopOp.getNumReductionVars());
2370 
2372  builder, moduleTranslation, privateVarsInfo, allocaIP);
2373  if (handleError(afterAllocas, opInst).failed())
2374  return failure();
2375 
2376  DenseMap<Value, llvm::Value *> reductionVariableMap;
2377 
2378  MutableArrayRef<BlockArgument> reductionArgs =
2379  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2380 
2381  SmallVector<DeferredStore> deferredStores;
2382 
2383  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2384  moduleTranslation, allocaIP, reductionDecls,
2385  privateReductionVariables, reductionVariableMap,
2386  deferredStores, isByRef)))
2387  return failure();
2388 
2389  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2390  opInst)
2391  .failed())
2392  return failure();
2393 
2394  if (failed(copyFirstPrivateVars(
2395  builder, moduleTranslation, privateVarsInfo.mlirVars,
2396  privateVarsInfo.llvmVars, privateVarsInfo.privatizers)))
2397  return failure();
2398 
2399  assert(afterAllocas.get()->getSinglePredecessor());
2400  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2401  moduleTranslation,
2402  afterAllocas.get()->getSinglePredecessor(),
2403  reductionDecls, privateReductionVariables,
2404  reductionVariableMap, isByRef, deferredStores)))
2405  return failure();
2406 
2407  // TODO: Handle doacross loops when the ordered clause has a parameter.
2408  bool isOrdered = wsloopOp.getOrdered().has_value();
2409  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2410  bool isSimd = wsloopOp.getScheduleSimd();
2411  bool loopNeedsBarrier = !wsloopOp.getNowait();
2412 
2413  // The only legal way for the direct parent to be omp.distribute is that this
2414  // represents 'distribute parallel do'. Otherwise, this is a regular
2415  // worksharing loop.
2416  llvm::omp::WorksharingLoopType workshareLoopType =
2417  llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2418  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2419  : llvm::omp::WorksharingLoopType::ForStaticLoop;
2420 
2421  SmallVector<llvm::BranchInst *> cancelTerminators;
2422  pushCancelFinalizationCB(cancelTerminators, builder, *ompBuilder, wsloopOp,
2423  llvm::omp::Directive::OMPD_for);
2424 
2425  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2427  wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2428 
2429  if (failed(handleError(regionBlock, opInst)))
2430  return failure();
2431 
2432  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2433  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2434 
2435  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2436  ompBuilder->applyWorkshareLoop(
2437  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2438  convertToScheduleKind(schedule), chunk, isSimd,
2439  scheduleMod == omp::ScheduleModifier::monotonic,
2440  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2441  workshareLoopType);
2442 
2443  if (failed(handleError(wsloopIP, opInst)))
2444  return failure();
2445 
2446  // Set the correct branch target for task cancellation
2447  popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
2448 
2449  // Process the reductions if required.
2450  if (failed(createReductionsAndCleanup(
2451  wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
2452  privateReductionVariables, isByRef, wsloopOp.getNowait(),
2453  /*isTeamsReduction=*/false)))
2454  return failure();
2455 
2456  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2457  privateVarsInfo.llvmVars,
2458  privateVarsInfo.privatizers);
2459 }
2460 
2461 /// Converts the OpenMP parallel operation to LLVM IR.
2462 static LogicalResult
2463 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2464  LLVM::ModuleTranslation &moduleTranslation) {
2465  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2466  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2467  assert(isByRef.size() == opInst.getNumReductionVars());
2468  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2469 
2470  if (failed(checkImplementationStatus(*opInst)))
2471  return failure();
2472 
2473  PrivateVarsInfo privateVarsInfo(opInst);
2474 
2475  // Collect reduction declarations
2476  SmallVector<omp::DeclareReductionOp> reductionDecls;
2477  collectReductionDecls(opInst, reductionDecls);
2478  SmallVector<llvm::Value *> privateReductionVariables(
2479  opInst.getNumReductionVars());
2480  SmallVector<DeferredStore> deferredStores;
2481 
2482  auto bodyGenCB = [&](InsertPointTy allocaIP,
2483  InsertPointTy codeGenIP) -> llvm::Error {
2485  builder, moduleTranslation, privateVarsInfo, allocaIP);
2486  if (handleError(afterAllocas, *opInst).failed())
2487  return llvm::make_error<PreviouslyReportedError>();
2488 
2489  // Allocate reduction vars
2490  DenseMap<Value, llvm::Value *> reductionVariableMap;
2491 
2492  MutableArrayRef<BlockArgument> reductionArgs =
2493  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2494 
2495  allocaIP =
2496  InsertPointTy(allocaIP.getBlock(),
2497  allocaIP.getBlock()->getTerminator()->getIterator());
2498 
2499  if (failed(allocReductionVars(
2500  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2501  reductionDecls, privateReductionVariables, reductionVariableMap,
2502  deferredStores, isByRef)))
2503  return llvm::make_error<PreviouslyReportedError>();
2504 
2505  assert(afterAllocas.get()->getSinglePredecessor());
2506  builder.restoreIP(codeGenIP);
2507 
2508  if (handleError(
2509  initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2510  *opInst)
2511  .failed())
2512  return llvm::make_error<PreviouslyReportedError>();
2513 
2514  if (failed(copyFirstPrivateVars(
2515  builder, moduleTranslation, privateVarsInfo.mlirVars,
2516  privateVarsInfo.llvmVars, privateVarsInfo.privatizers)))
2517  return llvm::make_error<PreviouslyReportedError>();
2518 
2519  if (failed(
2520  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2521  afterAllocas.get()->getSinglePredecessor(),
2522  reductionDecls, privateReductionVariables,
2523  reductionVariableMap, isByRef, deferredStores)))
2524  return llvm::make_error<PreviouslyReportedError>();
2525 
2526  // Save the alloca insertion point on ModuleTranslation stack for use in
2527  // nested regions.
2529  moduleTranslation, allocaIP);
2530 
2531  // ParallelOp has only one region associated with it.
2533  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2534  if (!regionBlock)
2535  return regionBlock.takeError();
2536 
2537  // Process the reductions if required.
2538  if (opInst.getNumReductionVars() > 0) {
2539  // Collect reduction info
2540  SmallVector<OwningReductionGen> owningReductionGens;
2541  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2543  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2544  owningReductionGens, owningAtomicReductionGens,
2545  privateReductionVariables, reductionInfos);
2546 
2547  // Move to region cont block
2548  builder.SetInsertPoint((*regionBlock)->getTerminator());
2549 
2550  // Generate reductions from info
2551  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2552  builder.SetInsertPoint(tempTerminator);
2553 
2554  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2555  ompBuilder->createReductions(
2556  builder.saveIP(), allocaIP, reductionInfos, isByRef,
2557  /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
2558  if (!contInsertPoint)
2559  return contInsertPoint.takeError();
2560 
2561  if (!contInsertPoint->getBlock())
2562  return llvm::make_error<PreviouslyReportedError>();
2563 
2564  tempTerminator->eraseFromParent();
2565  builder.restoreIP(*contInsertPoint);
2566  }
2567 
2568  return llvm::Error::success();
2569  };
2570 
2571  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2572  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2573  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2574  // bodyGenCB.
2575  replVal = &val;
2576  return codeGenIP;
2577  };
2578 
2579  // TODO: Perform finalization actions for variables. This has to be
2580  // called for variables which have destructors/finalizers.
2581  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2582  InsertPointTy oldIP = builder.saveIP();
2583  builder.restoreIP(codeGenIP);
2584 
2585  // if the reduction has a cleanup region, inline it here to finalize the
2586  // reduction variables
2587  SmallVector<Region *> reductionCleanupRegions;
2588  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2589  [](omp::DeclareReductionOp reductionDecl) {
2590  return &reductionDecl.getCleanupRegion();
2591  });
2592  if (failed(inlineOmpRegionCleanup(
2593  reductionCleanupRegions, privateReductionVariables,
2594  moduleTranslation, builder, "omp.reduction.cleanup")))
2595  return llvm::createStringError(
2596  "failed to inline `cleanup` region of `omp.declare_reduction`");
2597 
2598  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2599  privateVarsInfo.llvmVars,
2600  privateVarsInfo.privatizers)))
2601  return llvm::make_error<PreviouslyReportedError>();
2602 
2603  builder.restoreIP(oldIP);
2604  return llvm::Error::success();
2605  };
2606 
2607  llvm::Value *ifCond = nullptr;
2608  if (auto ifVar = opInst.getIfExpr())
2609  ifCond = moduleTranslation.lookupValue(ifVar);
2610  llvm::Value *numThreads = nullptr;
2611  if (auto numThreadsVar = opInst.getNumThreads())
2612  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2613  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2614  if (auto bind = opInst.getProcBindKind())
2615  pbKind = getProcBindKind(*bind);
2616  bool isCancellable = constructIsCancellable(opInst);
2617 
2618  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2619  findAllocaInsertPoint(builder, moduleTranslation);
2620  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2621 
2622  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2623  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2624  ifCond, numThreads, pbKind, isCancellable);
2625 
2626  if (failed(handleError(afterIP, *opInst)))
2627  return failure();
2628 
2629  builder.restoreIP(*afterIP);
2630  return success();
2631 }
2632 
2633 /// Convert Order attribute to llvm::omp::OrderKind.
2634 static llvm::omp::OrderKind
2635 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2636  if (!o)
2637  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2638  switch (*o) {
2639  case omp::ClauseOrderKind::Concurrent:
2640  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2641  }
2642  llvm_unreachable("Unknown ClauseOrderKind kind");
2643 }
2644 
2645 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2646 static LogicalResult
2647 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2648  LLVM::ModuleTranslation &moduleTranslation) {
2649  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2650  auto simdOp = cast<omp::SimdOp>(opInst);
2651 
2652  // TODO: Replace this with proper composite translation support.
2653  // Currently, simd information on composite constructs is ignored, so e.g.
2654  // 'do/for simd' will be treated the same as a standalone 'do/for'. This is
2655  // allowed by the spec, since it's equivalent to using a SIMD length of 1.
2656  if (simdOp.isComposite()) {
2657  if (failed(convertIgnoredWrapper(simdOp, moduleTranslation)))
2658  return failure();
2659 
2660  return inlineConvertOmpRegions(simdOp.getRegion(), "omp.simd.region",
2661  builder, moduleTranslation);
2662  }
2663 
2664  if (failed(checkImplementationStatus(opInst)))
2665  return failure();
2666 
2667  PrivateVarsInfo privateVarsInfo(simdOp);
2668 
2669  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2670  findAllocaInsertPoint(builder, moduleTranslation);
2671 
2673  builder, moduleTranslation, privateVarsInfo, allocaIP);
2674  if (handleError(afterAllocas, opInst).failed())
2675  return failure();
2676 
2677  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2678  opInst)
2679  .failed())
2680  return failure();
2681 
2682  llvm::ConstantInt *simdlen = nullptr;
2683  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2684  simdlen = builder.getInt64(simdlenVar.value());
2685 
2686  llvm::ConstantInt *safelen = nullptr;
2687  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2688  safelen = builder.getInt64(safelenVar.value());
2689 
2690  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2691  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2692 
2693  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2694  std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2695  mlir::OperandRange operands = simdOp.getAlignedVars();
2696  for (size_t i = 0; i < operands.size(); ++i) {
2697  llvm::Value *alignment = nullptr;
2698  llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2699  llvm::Type *ty = llvmVal->getType();
2700 
2701  auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2702  alignment = builder.getInt64(intAttr.getInt());
2703  assert(ty->isPointerTy() && "Invalid type for aligned variable");
2704  assert(alignment && "Invalid alignment value");
2705  auto curInsert = builder.saveIP();
2706  builder.SetInsertPoint(sourceBlock);
2707  llvmVal = builder.CreateLoad(ty, llvmVal);
2708  builder.restoreIP(curInsert);
2709  alignedVars[llvmVal] = alignment;
2710  }
2711 
2713  simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2714 
2715  if (failed(handleError(regionBlock, opInst)))
2716  return failure();
2717 
2718  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2719  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2720  ompBuilder->applySimd(loopInfo, alignedVars,
2721  simdOp.getIfExpr()
2722  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2723  : nullptr,
2724  order, simdlen, safelen);
2725 
2726  return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2727  privateVarsInfo.llvmVars,
2728  privateVarsInfo.privatizers);
2729 }
2730 
2731 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2732 static LogicalResult
2733 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2734  LLVM::ModuleTranslation &moduleTranslation) {
2735  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2736  auto loopOp = cast<omp::LoopNestOp>(opInst);
2737 
2738  // Set up the source location value for OpenMP runtime.
2739  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2740 
2741  // Generator of the canonical loop body.
2744  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2745  llvm::Value *iv) -> llvm::Error {
2746  // Make sure further conversions know about the induction variable.
2747  moduleTranslation.mapValue(
2748  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2749 
2750  // Capture the body insertion point for use in nested loops. BodyIP of the
2751  // CanonicalLoopInfo always points to the beginning of the entry block of
2752  // the body.
2753  bodyInsertPoints.push_back(ip);
2754 
2755  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2756  return llvm::Error::success();
2757 
2758  // Convert the body of the loop.
2759  builder.restoreIP(ip);
2761  loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
2762  if (!regionBlock)
2763  return regionBlock.takeError();
2764 
2765  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2766  return llvm::Error::success();
2767  };
2768 
2769  // Delegate actual loop construction to the OpenMP IRBuilder.
2770  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
2771  // loop, i.e. it has a positive step, uses signed integer semantics.
2772  // Reconsider this code when the nested loop operation clearly supports more
2773  // cases.
2774  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
2775  llvm::Value *lowerBound =
2776  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
2777  llvm::Value *upperBound =
2778  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
2779  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
2780 
2781  // Make sure loop trip count are emitted in the preheader of the outermost
2782  // loop at the latest so that they are all available for the new collapsed
2783  // loop will be created below.
2784  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
2785  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
2786  if (i != 0) {
2787  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
2788  ompLoc.DL);
2789  computeIP = loopInfos.front()->getPreheaderIP();
2790  }
2791 
2793  ompBuilder->createCanonicalLoop(
2794  loc, bodyGen, lowerBound, upperBound, step,
2795  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
2796 
2797  if (failed(handleError(loopResult, *loopOp)))
2798  return failure();
2799 
2800  loopInfos.push_back(*loopResult);
2801  }
2802 
2803  // Collapse loops. Store the insertion point because LoopInfos may get
2804  // invalidated.
2805  llvm::OpenMPIRBuilder::InsertPointTy afterIP =
2806  loopInfos.front()->getAfterIP();
2807 
2808  // Update the stack frame created for this loop to point to the resulting loop
2809  // after applying transformations.
2810  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
2811  [&](OpenMPLoopInfoStackFrame &frame) {
2812  frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
2813  return WalkResult::interrupt();
2814  });
2815 
2816  // Continue building IR after the loop. Note that the LoopInfo returned by
2817  // `collapseLoops` points inside the outermost loop and is intended for
2818  // potential further loop transformations. Use the insertion point stored
2819  // before collapsing loops instead.
2820  builder.restoreIP(afterIP);
2821  return success();
2822 }
2823 
2824 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
2825 static llvm::AtomicOrdering
2826 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
2827  if (!ao)
2828  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
2829 
2830  switch (*ao) {
2831  case omp::ClauseMemoryOrderKind::Seq_cst:
2832  return llvm::AtomicOrdering::SequentiallyConsistent;
2833  case omp::ClauseMemoryOrderKind::Acq_rel:
2834  return llvm::AtomicOrdering::AcquireRelease;
2835  case omp::ClauseMemoryOrderKind::Acquire:
2836  return llvm::AtomicOrdering::Acquire;
2837  case omp::ClauseMemoryOrderKind::Release:
2838  return llvm::AtomicOrdering::Release;
2839  case omp::ClauseMemoryOrderKind::Relaxed:
2840  return llvm::AtomicOrdering::Monotonic;
2841  }
2842  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
2843 }
2844 
2845 /// Convert omp.atomic.read operation to LLVM IR.
2846 static LogicalResult
2847 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
2848  LLVM::ModuleTranslation &moduleTranslation) {
2849  auto readOp = cast<omp::AtomicReadOp>(opInst);
2850  if (failed(checkImplementationStatus(opInst)))
2851  return failure();
2852 
2853  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2854  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2855  findAllocaInsertPoint(builder, moduleTranslation);
2856 
2857  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2858 
2859  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
2860  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
2861  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
2862 
2863  llvm::Type *elementType =
2864  moduleTranslation.convertType(readOp.getElementType());
2865 
2866  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
2867  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
2868  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO, allocaIP));
2869  return success();
2870 }
2871 
2872 /// Converts an omp.atomic.write operation to LLVM IR.
2873 static LogicalResult
2874 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
2875  LLVM::ModuleTranslation &moduleTranslation) {
2876  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
2877  if (failed(checkImplementationStatus(opInst)))
2878  return failure();
2879 
2880  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2881  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2882  findAllocaInsertPoint(builder, moduleTranslation);
2883 
2884  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2885  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
2886  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
2887  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
2888  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
2889  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
2890  /*isVolatile=*/false};
2891  builder.restoreIP(
2892  ompBuilder->createAtomicWrite(ompLoc, x, expr, ao, allocaIP));
2893  return success();
2894 }
2895 
2896 /// Converts an LLVM dialect binary operation to the corresponding enum value
2897 /// for `atomicrmw` supported binary operation.
2898 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
2900  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
2901  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
2902  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
2903  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
2904  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
2905  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
2906  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
2907  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
2908  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
2909  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
2910 }
2911 
2912 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
2913 static LogicalResult
2914 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
2915  llvm::IRBuilderBase &builder,
2916  LLVM::ModuleTranslation &moduleTranslation) {
2917  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2918  if (failed(checkImplementationStatus(*opInst)))
2919  return failure();
2920 
2921  // Convert values and types.
2922  auto &innerOpList = opInst.getRegion().front().getOperations();
2923  bool isXBinopExpr{false};
2924  llvm::AtomicRMWInst::BinOp binop;
2925  mlir::Value mlirExpr;
2926  llvm::Value *llvmExpr = nullptr;
2927  llvm::Value *llvmX = nullptr;
2928  llvm::Type *llvmXElementType = nullptr;
2929  if (innerOpList.size() == 2) {
2930  // The two operations here are the update and the terminator.
2931  // Since we can identify the update operation, there is a possibility
2932  // that we can generate the atomicrmw instruction.
2933  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
2934  if (!llvm::is_contained(innerOp.getOperands(),
2935  opInst.getRegion().getArgument(0))) {
2936  return opInst.emitError("no atomic update operation with region argument"
2937  " as operand found inside atomic.update region");
2938  }
2939  binop = convertBinOpToAtomic(innerOp);
2940  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
2941  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2942  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2943  } else {
2944  // Since the update region includes more than one operation
2945  // we will resort to generating a cmpxchg loop.
2946  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2947  }
2948  llvmX = moduleTranslation.lookupValue(opInst.getX());
2949  llvmXElementType = moduleTranslation.convertType(
2950  opInst.getRegion().getArgument(0).getType());
2951  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2952  /*isSigned=*/false,
2953  /*isVolatile=*/false};
2954 
2955  llvm::AtomicOrdering atomicOrdering =
2956  convertAtomicOrdering(opInst.getMemoryOrder());
2957 
2958  // Generate update code.
2959  auto updateFn =
2960  [&opInst, &moduleTranslation](
2961  llvm::Value *atomicx,
2962  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2963  Block &bb = *opInst.getRegion().begin();
2964  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
2965  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2966  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2967  return llvm::make_error<PreviouslyReportedError>();
2968 
2969  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2970  assert(yieldop && yieldop.getResults().size() == 1 &&
2971  "terminator must be omp.yield op and it must have exactly one "
2972  "argument");
2973  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2974  };
2975 
2976  // Handle ambiguous alloca, if any.
2977  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2978  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2979  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2980  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
2981  atomicOrdering, binop, updateFn,
2982  isXBinopExpr);
2983 
2984  if (failed(handleError(afterIP, *opInst)))
2985  return failure();
2986 
2987  builder.restoreIP(*afterIP);
2988  return success();
2989 }
2990 
2991 static LogicalResult
2992 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
2993  llvm::IRBuilderBase &builder,
2994  LLVM::ModuleTranslation &moduleTranslation) {
2995  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2996  if (failed(checkImplementationStatus(*atomicCaptureOp)))
2997  return failure();
2998 
2999  mlir::Value mlirExpr;
3000  bool isXBinopExpr = false, isPostfixUpdate = false;
3001  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3002 
3003  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3004  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
3005 
3006  assert((atomicUpdateOp || atomicWriteOp) &&
3007  "internal op must be an atomic.update or atomic.write op");
3008 
3009  if (atomicWriteOp) {
3010  isPostfixUpdate = true;
3011  mlirExpr = atomicWriteOp.getExpr();
3012  } else {
3013  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
3014  atomicCaptureOp.getAtomicUpdateOp().getOperation();
3015  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
3016  // Find the binary update operation that uses the region argument
3017  // and get the expression to update
3018  if (innerOpList.size() == 2) {
3019  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
3020  if (!llvm::is_contained(innerOp.getOperands(),
3021  atomicUpdateOp.getRegion().getArgument(0))) {
3022  return atomicUpdateOp.emitError(
3023  "no atomic update operation with region argument"
3024  " as operand found inside atomic.update region");
3025  }
3026  binop = convertBinOpToAtomic(innerOp);
3027  isXBinopExpr =
3028  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
3029  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
3030  } else {
3031  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
3032  }
3033  }
3034 
3035  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
3036  llvm::Value *llvmX =
3037  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
3038  llvm::Value *llvmV =
3039  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
3040  llvm::Type *llvmXElementType = moduleTranslation.convertType(
3041  atomicCaptureOp.getAtomicReadOp().getElementType());
3042  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
3043  /*isSigned=*/false,
3044  /*isVolatile=*/false};
3045  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
3046  /*isSigned=*/false,
3047  /*isVolatile=*/false};
3048 
3049  llvm::AtomicOrdering atomicOrdering =
3050  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
3051 
3052  auto updateFn =
3053  [&](llvm::Value *atomicx,
3054  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
3055  if (atomicWriteOp)
3056  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
3057  Block &bb = *atomicUpdateOp.getRegion().begin();
3058  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
3059  atomicx);
3060  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
3061  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
3062  return llvm::make_error<PreviouslyReportedError>();
3063 
3064  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
3065  assert(yieldop && yieldop.getResults().size() == 1 &&
3066  "terminator must be omp.yield op and it must have exactly one "
3067  "argument");
3068  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
3069  };
3070 
3071  // Handle ambiguous alloca, if any.
3072  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
3073  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3074  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3075  ompBuilder->createAtomicCapture(
3076  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3077  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3078 
3079  if (failed(handleError(afterIP, *atomicCaptureOp)))
3080  return failure();
3081 
3082  builder.restoreIP(*afterIP);
3083  return success();
3084 }
3085 
3086 static llvm::omp::Directive convertCancellationConstructType(
3087  omp::ClauseCancellationConstructType directive) {
3088  switch (directive) {
3089  case omp::ClauseCancellationConstructType::Loop:
3090  return llvm::omp::Directive::OMPD_for;
3091  case omp::ClauseCancellationConstructType::Parallel:
3092  return llvm::omp::Directive::OMPD_parallel;
3093  case omp::ClauseCancellationConstructType::Sections:
3094  return llvm::omp::Directive::OMPD_sections;
3095  case omp::ClauseCancellationConstructType::Taskgroup:
3096  return llvm::omp::Directive::OMPD_taskgroup;
3097  }
3098 }
3099 
3100 static LogicalResult
3101 convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder,
3102  LLVM::ModuleTranslation &moduleTranslation) {
3103  if (failed(checkImplementationStatus(*op.getOperation())))
3104  return failure();
3105 
3106  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3107  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3108 
3109  llvm::Value *ifCond = nullptr;
3110  if (Value ifVar = op.getIfExpr())
3111  ifCond = moduleTranslation.lookupValue(ifVar);
3112 
3113  llvm::omp::Directive cancelledDirective =
3114  convertCancellationConstructType(op.getCancelDirective());
3115 
3116  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3117  ompBuilder->createCancel(ompLoc, ifCond, cancelledDirective);
3118 
3119  if (failed(handleError(afterIP, *op.getOperation())))
3120  return failure();
3121 
3122  builder.restoreIP(afterIP.get());
3123 
3124  return success();
3125 }
3126 
3127 static LogicalResult
3128 convertOmpCancellationPoint(omp::CancellationPointOp op,
3129  llvm::IRBuilderBase &builder,
3130  LLVM::ModuleTranslation &moduleTranslation) {
3131  if (failed(checkImplementationStatus(*op.getOperation())))
3132  return failure();
3133 
3134  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3135  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3136 
3137  llvm::omp::Directive cancelledDirective =
3138  convertCancellationConstructType(op.getCancelDirective());
3139 
3140  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3141  ompBuilder->createCancellationPoint(ompLoc, cancelledDirective);
3142 
3143  if (failed(handleError(afterIP, *op.getOperation())))
3144  return failure();
3145 
3146  builder.restoreIP(afterIP.get());
3147 
3148  return success();
3149 }
3150 
3151 /// Converts an OpenMP Threadprivate operation into LLVM IR using
3152 /// OpenMPIRBuilder.
3153 static LogicalResult
3154 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
3155  LLVM::ModuleTranslation &moduleTranslation) {
3156  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3157  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3158  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
3159 
3160  if (failed(checkImplementationStatus(opInst)))
3161  return failure();
3162 
3163  Value symAddr = threadprivateOp.getSymAddr();
3164  auto *symOp = symAddr.getDefiningOp();
3165 
3166  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
3167  symOp = asCast.getOperand().getDefiningOp();
3168 
3169  if (!isa<LLVM::AddressOfOp>(symOp))
3170  return opInst.emitError("Addressing symbol not found");
3171  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
3172 
3173  LLVM::GlobalOp global =
3174  addressOfOp.getGlobal(moduleTranslation.symbolTable());
3175  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
3176 
3177  if (!ompBuilder->Config.isTargetDevice()) {
3178  llvm::Type *type = globalValue->getValueType();
3179  llvm::TypeSize typeSize =
3180  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
3181  type);
3182  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
3183  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
3184  ompLoc, globalValue, size, global.getSymName() + ".cache");
3185  moduleTranslation.mapValue(opInst.getResult(0), callInst);
3186  } else {
3187  moduleTranslation.mapValue(opInst.getResult(0), globalValue);
3188  }
3189 
3190  return success();
3191 }
3192 
3193 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
3194 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
3195  switch (deviceClause) {
3196  case mlir::omp::DeclareTargetDeviceType::host:
3197  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
3198  break;
3199  case mlir::omp::DeclareTargetDeviceType::nohost:
3200  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
3201  break;
3202  case mlir::omp::DeclareTargetDeviceType::any:
3203  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
3204  break;
3205  }
3206  llvm_unreachable("unhandled device clause");
3207 }
3208 
3209 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
3211  mlir::omp::DeclareTargetCaptureClause captureClause) {
3212  switch (captureClause) {
3213  case mlir::omp::DeclareTargetCaptureClause::to:
3214  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
3215  case mlir::omp::DeclareTargetCaptureClause::link:
3216  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
3217  case mlir::omp::DeclareTargetCaptureClause::enter:
3218  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
3219  }
3220  llvm_unreachable("unhandled capture clause");
3221 }
3222 
3223 static llvm::SmallString<64>
3224 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
3225  llvm::OpenMPIRBuilder &ompBuilder) {
3226  llvm::SmallString<64> suffix;
3227  llvm::raw_svector_ostream os(suffix);
3228  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
3229  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
3230  auto fileInfoCallBack = [&loc]() {
3231  return std::pair<std::string, uint64_t>(
3232  llvm::StringRef(loc.getFilename()), loc.getLine());
3233  };
3234 
3235  os << llvm::format(
3236  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
3237  }
3238  os << "_decl_tgt_ref_ptr";
3239 
3240  return suffix;
3241 }
3242 
3243 static bool isDeclareTargetLink(mlir::Value value) {
3244  if (auto addressOfOp =
3245  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3246  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3247  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3248  if (auto declareTargetGlobal =
3249  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3250  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3251  mlir::omp::DeclareTargetCaptureClause::link)
3252  return true;
3253  }
3254  return false;
3255 }
3256 
3257 // Returns the reference pointer generated by the lowering of the declare target
3258 // operation in cases where the link clause is used or the to clause is used in
3259 // USM mode.
3260 static llvm::Value *
3262  LLVM::ModuleTranslation &moduleTranslation) {
3263  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3264 
3265  // An easier way to do this may just be to keep track of any pointer
3266  // references and their mapping to their respective operation
3267  if (auto addressOfOp =
3268  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3269  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3270  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3271  addressOfOp.getGlobalName()))) {
3272 
3273  if (auto declareTargetGlobal =
3274  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3275  gOp.getOperation())) {
3276 
3277  // In this case, we must utilise the reference pointer generated by the
3278  // declare target operation, similar to Clang
3279  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3280  mlir::omp::DeclareTargetCaptureClause::link) ||
3281  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3282  mlir::omp::DeclareTargetCaptureClause::to &&
3283  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3284  llvm::SmallString<64> suffix =
3285  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3286 
3287  if (gOp.getSymName().contains(suffix))
3288  return moduleTranslation.getLLVMModule()->getNamedValue(
3289  gOp.getSymName());
3290 
3291  return moduleTranslation.getLLVMModule()->getNamedValue(
3292  (gOp.getSymName().str() + suffix.str()).str());
3293  }
3294  }
3295  }
3296  }
3297 
3298  return nullptr;
3299 }
3300 
3301 namespace {
3302 // Append customMappers information to existing MapInfosTy
3303 struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3305 
3306  /// Append arrays in \a CurInfo.
3307  void append(MapInfosTy &curInfo) {
3308  Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3309  llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3310  }
3311 };
3312 // A small helper structure to contain data gathered
3313 // for map lowering and coalese it into one area and
3314 // avoiding extra computations such as searches in the
3315 // llvm module for lowered mapped variables or checking
3316 // if something is declare target (and retrieving the
3317 // value) more than neccessary.
3318 struct MapInfoData : MapInfosTy {
3319  llvm::SmallVector<bool, 4> IsDeclareTarget;
3320  llvm::SmallVector<bool, 4> IsAMember;
3321  // Identify if mapping was added by mapClause or use_device clauses.
3322  llvm::SmallVector<bool, 4> IsAMapping;
3325  // Stripped off array/pointer to get the underlying
3326  // element type
3328 
3329  /// Append arrays in \a CurInfo.
3330  void append(MapInfoData &CurInfo) {
3331  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3332  CurInfo.IsDeclareTarget.end());
3333  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3334  OriginalValue.append(CurInfo.OriginalValue.begin(),
3335  CurInfo.OriginalValue.end());
3336  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3337  MapInfosTy::append(CurInfo);
3338  }
3339 };
3340 } // namespace
3341 
3342 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
3343  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3344  arrTy.getElementType()))
3345  return getArrayElementSizeInBits(nestedArrTy, dl);
3346  return dl.getTypeSizeInBits(arrTy.getElementType());
3347 }
3348 
3349 // This function calculates the size to be offloaded for a specified type, given
3350 // its associated map clause (which can contain bounds information which affects
3351 // the total size), this size is calculated based on the underlying element type
3352 // e.g. given a 1-D array of ints, we will calculate the size from the integer
3353 // type * number of elements in the array. This size can be used in other
3354 // calculations but is ultimately used as an argument to the OpenMP runtimes
3355 // kernel argument structure which is generated through the combinedInfo data
3356 // structures.
3357 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
3358 // CGOpenMPRuntime.cpp.
3359 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3360  Operation *clauseOp, llvm::Value *basePointer,
3361  llvm::Type *baseType, llvm::IRBuilderBase &builder,
3362  LLVM::ModuleTranslation &moduleTranslation) {
3363  if (auto memberClause =
3364  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3365  // This calculates the size to transfer based on bounds and the underlying
3366  // element type, provided bounds have been specified (Fortran
3367  // pointers/allocatables/target and arrays that have sections specified fall
3368  // into this as well).
3369  if (!memberClause.getBounds().empty()) {
3370  llvm::Value *elementCount = builder.getInt64(1);
3371  for (auto bounds : memberClause.getBounds()) {
3372  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3373  bounds.getDefiningOp())) {
3374  // The below calculation for the size to be mapped calculated from the
3375  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3376  // multiply by the underlying element types byte size to get the full
3377  // size to be offloaded based on the bounds
3378  elementCount = builder.CreateMul(
3379  elementCount,
3380  builder.CreateAdd(
3381  builder.CreateSub(
3382  moduleTranslation.lookupValue(boundOp.getUpperBound()),
3383  moduleTranslation.lookupValue(boundOp.getLowerBound())),
3384  builder.getInt64(1)));
3385  }
3386  }
3387 
3388  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3389  // the size in inconsistent byte or bit format.
3390  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3391  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3392  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3393 
3394  // The size in bytes x number of elements, the sizeInBytes stored is
3395  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3396  // size, so we do some on the fly runtime math to get the size in
3397  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3398  // some adjustment for members with more complex types.
3399  return builder.CreateMul(elementCount,
3400  builder.getInt64(underlyingTypeSzInBits / 8));
3401  }
3402  }
3403 
3404  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3405 }
3406 
3408  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3409  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3410  llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3411  ArrayRef<Value> useDevAddrOperands = {},
3412  ArrayRef<Value> hasDevAddrOperands = {}) {
3413  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3414  // Check if this is a member mapping and correctly assign that it is, if
3415  // it is a member of a larger object.
3416  // TODO: Need better handling of members, and distinguishing of members
3417  // that are implicitly allocated on device vs explicitly passed in as
3418  // arguments.
3419  // TODO: May require some further additions to support nested record
3420  // types, i.e. member maps that can have member maps.
3421  for (Value mapValue : mapVars) {
3422  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3423  for (auto member : map.getMembers())
3424  if (member == mapOp)
3425  return true;
3426  }
3427  return false;
3428  };
3429 
3430  // Process MapOperands
3431  for (Value mapValue : mapVars) {
3432  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3433  Value offloadPtr =
3434  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3435  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3436  mapData.Pointers.push_back(mapData.OriginalValue.back());
3437 
3438  if (llvm::Value *refPtr =
3439  getRefPtrIfDeclareTarget(offloadPtr,
3440  moduleTranslation)) { // declare target
3441  mapData.IsDeclareTarget.push_back(true);
3442  mapData.BasePointers.push_back(refPtr);
3443  } else { // regular mapped variable
3444  mapData.IsDeclareTarget.push_back(false);
3445  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3446  }
3447 
3448  mapData.BaseType.push_back(
3449  moduleTranslation.convertType(mapOp.getVarType()));
3450  mapData.Sizes.push_back(
3451  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3452  mapData.BaseType.back(), builder, moduleTranslation));
3453  mapData.MapClause.push_back(mapOp.getOperation());
3454  mapData.Types.push_back(
3455  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
3456  mapData.Names.push_back(LLVM::createMappingInformation(
3457  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3458  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3459  if (mapOp.getMapperId())
3460  mapData.Mappers.push_back(
3461  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3462  mapOp, mapOp.getMapperIdAttr()));
3463  else
3464  mapData.Mappers.push_back(nullptr);
3465  mapData.IsAMapping.push_back(true);
3466  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3467  }
3468 
3469  auto findMapInfo = [&mapData](llvm::Value *val,
3470  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3471  unsigned index = 0;
3472  bool found = false;
3473  for (llvm::Value *basePtr : mapData.OriginalValue) {
3474  if (basePtr == val && mapData.IsAMapping[index]) {
3475  found = true;
3476  mapData.Types[index] |=
3477  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3478  mapData.DevicePointers[index] = devInfoTy;
3479  }
3480  index++;
3481  }
3482  return found;
3483  };
3484 
3485  // Process useDevPtr(Addr)Operands
3486  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3487  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3488  for (Value mapValue : useDevOperands) {
3489  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3490  Value offloadPtr =
3491  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3492  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3493 
3494  // Check if map info is already present for this entry.
3495  if (!findMapInfo(origValue, devInfoTy)) {
3496  mapData.OriginalValue.push_back(origValue);
3497  mapData.Pointers.push_back(mapData.OriginalValue.back());
3498  mapData.IsDeclareTarget.push_back(false);
3499  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3500  mapData.BaseType.push_back(
3501  moduleTranslation.convertType(mapOp.getVarType()));
3502  mapData.Sizes.push_back(builder.getInt64(0));
3503  mapData.MapClause.push_back(mapOp.getOperation());
3504  mapData.Types.push_back(
3505  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3506  mapData.Names.push_back(LLVM::createMappingInformation(
3507  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3508  mapData.DevicePointers.push_back(devInfoTy);
3509  mapData.Mappers.push_back(nullptr);
3510  mapData.IsAMapping.push_back(false);
3511  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3512  }
3513  }
3514  };
3515 
3516  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3517  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3518 
3519  for (Value mapValue : hasDevAddrOperands) {
3520  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3521  Value offloadPtr =
3522  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3523  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3524  auto mapType =
3525  static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
3526  auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3527 
3528  mapData.OriginalValue.push_back(origValue);
3529  mapData.BasePointers.push_back(origValue);
3530  mapData.Pointers.push_back(origValue);
3531  mapData.IsDeclareTarget.push_back(false);
3532  mapData.BaseType.push_back(
3533  moduleTranslation.convertType(mapOp.getVarType()));
3534  mapData.Sizes.push_back(
3535  builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
3536  mapData.MapClause.push_back(mapOp.getOperation());
3537  if (llvm::to_underlying(mapType & mapTypeAlways)) {
3538  // Descriptors are mapped with the ALWAYS flag, since they can get
3539  // rematerialized, so the address of the decriptor for a given object
3540  // may change from one place to another.
3541  mapData.Types.push_back(mapType);
3542  // Technically it's possible for a non-descriptor mapping to have
3543  // both has-device-addr and ALWAYS, so lookup the mapper in case it
3544  // exists.
3545  if (mapOp.getMapperId()) {
3546  mapData.Mappers.push_back(
3547  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3548  mapOp, mapOp.getMapperIdAttr()));
3549  } else {
3550  mapData.Mappers.push_back(nullptr);
3551  }
3552  } else {
3553  mapData.Types.push_back(
3554  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
3555  mapData.Mappers.push_back(nullptr);
3556  }
3557  mapData.Names.push_back(LLVM::createMappingInformation(
3558  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3559  mapData.DevicePointers.push_back(
3560  llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3561  mapData.IsAMapping.push_back(false);
3562  mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
3563  }
3564 }
3565 
3566 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
3567  auto *res = llvm::find(mapData.MapClause, memberOp);
3568  assert(res != mapData.MapClause.end() &&
3569  "MapInfoOp for member not found in MapData, cannot return index");
3570  return std::distance(mapData.MapClause.begin(), res);
3571 }
3572 
3573 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
3574  bool first) {
3575  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
3576  // Only 1 member has been mapped, we can return it.
3577  if (indexAttr.size() == 1)
3578  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
3579 
3580  llvm::SmallVector<size_t> indices(indexAttr.size());
3581  std::iota(indices.begin(), indices.end(), 0);
3582 
3583  llvm::sort(indices.begin(), indices.end(),
3584  [&](const size_t a, const size_t b) {
3585  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
3586  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
3587  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
3588  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
3589  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
3590 
3591  if (aIndex == bIndex)
3592  continue;
3593 
3594  if (aIndex < bIndex)
3595  return first;
3596 
3597  if (aIndex > bIndex)
3598  return !first;
3599  }
3600 
3601  // Iterated the up until the end of the smallest member and
3602  // they were found to be equal up to that point, so select
3603  // the member with the lowest index count, so the "parent"
3604  return memberIndicesA.size() < memberIndicesB.size();
3605  });
3606 
3607  return llvm::cast<omp::MapInfoOp>(
3608  mapInfo.getMembers()[indices.front()].getDefiningOp());
3609 }
3610 
3611 /// This function calculates the array/pointer offset for map data provided
3612 /// with bounds operations, e.g. when provided something like the following:
3613 ///
3614 /// Fortran
3615 /// map(tofrom: array(2:5, 3:2))
3616 /// or
3617 /// C++
3618 /// map(tofrom: array[1:4][2:3])
3619 /// We must calculate the initial pointer offset to pass across, this function
3620 /// performs this using bounds.
3621 ///
3622 /// NOTE: which while specified in row-major order it currently needs to be
3623 /// flipped for Fortran's column order array allocation and access (as
3624 /// opposed to C++'s row-major, hence the backwards processing where order is
3625 /// important). This is likely important to keep in mind for the future when
3626 /// we incorporate a C++ frontend, both frontends will need to agree on the
3627 /// ordering of generated bounds operations (one may have to flip them) to
3628 /// make the below lowering frontend agnostic. The offload size
3629 /// calcualtion may also have to be adjusted for C++.
3630 std::vector<llvm::Value *>
3632  llvm::IRBuilderBase &builder, bool isArrayTy,
3633  OperandRange bounds) {
3634  std::vector<llvm::Value *> idx;
3635  // There's no bounds to calculate an offset from, we can safely
3636  // ignore and return no indices.
3637  if (bounds.empty())
3638  return idx;
3639 
3640  // If we have an array type, then we have its type so can treat it as a
3641  // normal GEP instruction where the bounds operations are simply indexes
3642  // into the array. We currently do reverse order of the bounds, which
3643  // I believe leans more towards Fortran's column-major in memory.
3644  if (isArrayTy) {
3645  idx.push_back(builder.getInt64(0));
3646  for (int i = bounds.size() - 1; i >= 0; --i) {
3647  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3648  bounds[i].getDefiningOp())) {
3649  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
3650  }
3651  }
3652  } else {
3653  // If we do not have an array type, but we have bounds, then we're dealing
3654  // with a pointer that's being treated like an array and we have the
3655  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
3656  // address (pointer pointing to the actual data) so we must caclulate the
3657  // offset using a single index which the following two loops attempts to
3658  // compute.
3659 
3660  // Calculates the size offset we need to make per row e.g. first row or
3661  // column only needs to be offset by one, but the next would have to be
3662  // the previous row/column offset multiplied by the extent of current row.
3663  //
3664  // For example ([1][10][100]):
3665  //
3666  // - First row/column we move by 1 for each index increment
3667  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
3668  // current) for 10 for each index increment
3669  // - Third row/column we would move by 10 (second row/column) *
3670  // (extent/size of current) 100 for 1000 for each index increment
3671  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
3672  for (size_t i = 1; i < bounds.size(); ++i) {
3673  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3674  bounds[i].getDefiningOp())) {
3675  dimensionIndexSizeOffset.push_back(builder.CreateMul(
3676  moduleTranslation.lookupValue(boundOp.getExtent()),
3677  dimensionIndexSizeOffset[i - 1]));
3678  }
3679  }
3680 
3681  // Now that we have calculated how much we move by per index, we must
3682  // multiply each lower bound offset in indexes by the size offset we
3683  // have calculated in the previous and accumulate the results to get
3684  // our final resulting offset.
3685  for (int i = bounds.size() - 1; i >= 0; --i) {
3686  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3687  bounds[i].getDefiningOp())) {
3688  if (idx.empty())
3689  idx.emplace_back(builder.CreateMul(
3690  moduleTranslation.lookupValue(boundOp.getLowerBound()),
3691  dimensionIndexSizeOffset[i]));
3692  else
3693  idx.back() = builder.CreateAdd(
3694  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
3695  boundOp.getLowerBound()),
3696  dimensionIndexSizeOffset[i]));
3697  }
3698  }
3699  }
3700 
3701  return idx;
3702 }
3703 
3704 // This creates two insertions into the MapInfosTy data structure for the
3705 // "parent" of a set of members, (usually a container e.g.
3706 // class/structure/derived type) when subsequent members have also been
3707 // explicitly mapped on the same map clause. Certain types, such as Fortran
3708 // descriptors are mapped like this as well, however, the members are
3709 // implicit as far as a user is concerned, but we must explicitly map them
3710 // internally.
3711 //
3712 // This function also returns the memberOfFlag for this particular parent,
3713 // which is utilised in subsequent member mappings (by modifying there map type
3714 // with it) to indicate that a member is part of this parent and should be
3715 // treated by the runtime as such. Important to achieve the correct mapping.
3716 //
3717 // This function borrows a lot from Clang's emitCombinedEntry function
3718 // inside of CGOpenMPRuntime.cpp
3719 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
3720  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3721  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3722  MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
3723  // Map the first segment of our structure
3724  combinedInfo.Types.emplace_back(
3725  isTargetParams
3726  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
3727  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
3728  combinedInfo.DevicePointers.emplace_back(
3729  mapData.DevicePointers[mapDataIndex]);
3730  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
3731  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3732  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3733  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3734 
3735  // Calculate size of the parent object being mapped based on the
3736  // addresses at runtime, highAddr - lowAddr = size. This of course
3737  // doesn't factor in allocated data like pointers, hence the further
3738  // processing of members specified by users, or in the case of
3739  // Fortran pointers and allocatables, the mapping of the pointed to
3740  // data by the descriptor (which itself, is a structure containing
3741  // runtime information on the dynamically allocated data).
3742  auto parentClause =
3743  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3744 
3745  llvm::Value *lowAddr, *highAddr;
3746  if (!parentClause.getPartialMap()) {
3747  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3748  builder.getPtrTy());
3749  highAddr = builder.CreatePointerCast(
3750  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3751  mapData.Pointers[mapDataIndex], 1),
3752  builder.getPtrTy());
3753  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3754  } else {
3755  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3756  int firstMemberIdx = getMapDataMemberIdx(
3757  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
3758  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3759  builder.getPtrTy());
3760  int lastMemberIdx = getMapDataMemberIdx(
3761  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
3762  highAddr = builder.CreatePointerCast(
3763  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3764  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3765  builder.getPtrTy());
3766  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
3767  }
3768 
3769  llvm::Value *size = builder.CreateIntCast(
3770  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3771  builder.getInt64Ty(),
3772  /*isSigned=*/false);
3773  combinedInfo.Sizes.push_back(size);
3774 
3775  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
3776  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3777 
3778  // This creates the initial MEMBER_OF mapping that consists of
3779  // the parent/top level container (same as above effectively, except
3780  // with a fixed initial compile time size and separate maptype which
3781  // indicates the true mape type (tofrom etc.). This parent mapping is
3782  // only relevant if the structure in its totality is being mapped,
3783  // otherwise the above suffices.
3784  if (!parentClause.getPartialMap()) {
3785  // TODO: This will need to be expanded to include the whole host of logic
3786  // for the map flags that Clang currently supports (e.g. it should do some
3787  // further case specific flag modifications). For the moment, it handles
3788  // what we support as expected.
3789  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
3790  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3791  combinedInfo.Types.emplace_back(mapFlag);
3792  combinedInfo.DevicePointers.emplace_back(
3794  combinedInfo.Mappers.emplace_back(nullptr);
3795  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3796  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3797  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3798  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3799  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
3800  }
3801  return memberOfFlag;
3802 }
3803 
3804 // The intent is to verify if the mapped data being passed is a
3805 // pointer -> pointee that requires special handling in certain cases,
3806 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
3807 //
3808 // There may be a better way to verify this, but unfortunately with
3809 // opaque pointers we lose the ability to easily check if something is
3810 // a pointer whilst maintaining access to the underlying type.
3811 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
3812  // If we have a varPtrPtr field assigned then the underlying type is a pointer
3813  if (mapOp.getVarPtrPtr())
3814  return true;
3815 
3816  // If the map data is declare target with a link clause, then it's represented
3817  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
3818  // no relation to pointers.
3819  if (isDeclareTargetLink(mapOp.getVarPtr()))
3820  return true;
3821 
3822  return false;
3823 }
3824 
3825 // This function is intended to add explicit mappings of members
3827  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3828  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3829  MapInfoData &mapData, uint64_t mapDataIndex,
3830  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
3831 
3832  auto parentClause =
3833  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3834 
3835  for (auto mappedMembers : parentClause.getMembers()) {
3836  auto memberClause =
3837  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
3838  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3839 
3840  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
3841 
3842  // If we're currently mapping a pointer to a block of data, we must
3843  // initially map the pointer, and then attatch/bind the data with a
3844  // subsequent map to the pointer. This segment of code generates the
3845  // pointer mapping, which can in certain cases be optimised out as Clang
3846  // currently does in its lowering. However, for the moment we do not do so,
3847  // in part as we currently have substantially less information on the data
3848  // being mapped at this stage.
3849  if (checkIfPointerMap(memberClause)) {
3850  auto mapFlag =
3851  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
3852  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3853  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3854  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3855  combinedInfo.Types.emplace_back(mapFlag);
3856  combinedInfo.DevicePointers.emplace_back(
3858  combinedInfo.Mappers.emplace_back(nullptr);
3859  combinedInfo.Names.emplace_back(
3860  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3861  combinedInfo.BasePointers.emplace_back(
3862  mapData.BasePointers[mapDataIndex]);
3863  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
3864  combinedInfo.Sizes.emplace_back(builder.getInt64(
3865  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
3866  }
3867 
3868  // Same MemberOfFlag to indicate its link with parent and other members
3869  // of.
3870  auto mapFlag =
3871  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
3872  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3873  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3874  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3875  if (checkIfPointerMap(memberClause))
3876  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3877 
3878  combinedInfo.Types.emplace_back(mapFlag);
3879  combinedInfo.DevicePointers.emplace_back(
3880  mapData.DevicePointers[memberDataIdx]);
3881  combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
3882  combinedInfo.Names.emplace_back(
3883  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3884  uint64_t basePointerIndex =
3885  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
3886  combinedInfo.BasePointers.emplace_back(
3887  mapData.BasePointers[basePointerIndex]);
3888  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
3889 
3890  llvm::Value *size = mapData.Sizes[memberDataIdx];
3891  if (checkIfPointerMap(memberClause)) {
3892  size = builder.CreateSelect(
3893  builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
3894  builder.getInt64(0), size);
3895  }
3896 
3897  combinedInfo.Sizes.emplace_back(size);
3898  }
3899 }
3900 
3901 static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
3902  MapInfosTy &combinedInfo, bool isTargetParams,
3903  int mapDataParentIdx = -1) {
3904  // Declare Target Mappings are excluded from being marked as
3905  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
3906  // marked with OMP_MAP_PTR_AND_OBJ instead.
3907  auto mapFlag = mapData.Types[mapDataIdx];
3908  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
3909 
3910  bool isPtrTy = checkIfPointerMap(mapInfoOp);
3911  if (isPtrTy)
3912  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3913 
3914  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
3915  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3916 
3917  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
3918  !isPtrTy)
3919  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
3920 
3921  // if we're provided a mapDataParentIdx, then the data being mapped is
3922  // part of a larger object (in a parent <-> member mapping) and in this
3923  // case our BasePointer should be the parent.
3924  if (mapDataParentIdx >= 0)
3925  combinedInfo.BasePointers.emplace_back(
3926  mapData.BasePointers[mapDataParentIdx]);
3927  else
3928  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
3929 
3930  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
3931  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
3932  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
3933  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
3934  combinedInfo.Types.emplace_back(mapFlag);
3935  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
3936 }
3937 
3938 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
3939  llvm::IRBuilderBase &builder,
3940  llvm::OpenMPIRBuilder &ompBuilder,
3941  DataLayout &dl, MapInfosTy &combinedInfo,
3942  MapInfoData &mapData, uint64_t mapDataIndex,
3943  bool isTargetParams) {
3944  auto parentClause =
3945  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3946 
3947  // If we have a partial map (no parent referenced in the map clauses of the
3948  // directive, only members) and only a single member, we do not need to bind
3949  // the map of the member to the parent, we can pass the member separately.
3950  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
3951  auto memberClause = llvm::cast<omp::MapInfoOp>(
3952  parentClause.getMembers()[0].getDefiningOp());
3953  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3954  // Note: Clang treats arrays with explicit bounds that fall into this
3955  // category as a parent with map case, however, it seems this isn't a
3956  // requirement, and processing them as an individual map is fine. So,
3957  // we will handle them as individual maps for the moment, as it's
3958  // difficult for us to check this as we always require bounds to be
3959  // specified currently and it's also marginally more optimal (single
3960  // map rather than two). The difference may come from the fact that
3961  // Clang maps array without bounds as pointers (which we do not
3962  // currently do), whereas we treat them as arrays in all cases
3963  // currently.
3964  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
3965  mapDataIndex);
3966  return;
3967  }
3968 
3969  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
3970  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
3971  combinedInfo, mapData, mapDataIndex, isTargetParams);
3972  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
3973  combinedInfo, mapData, mapDataIndex,
3974  memberOfParentFlag);
3975 }
3976 
3977 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
3978 // generates different operation (e.g. load/store) combinations for
3979 // arguments to the kernel, based on map capture kinds which are then
3980 // utilised in the combinedInfo in place of the original Map value.
3981 static void
3982 createAlteredByCaptureMap(MapInfoData &mapData,
3983  LLVM::ModuleTranslation &moduleTranslation,
3984  llvm::IRBuilderBase &builder) {
3985  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3986  // if it's declare target, skip it, it's handled separately.
3987  if (!mapData.IsDeclareTarget[i]) {
3988  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3989  omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
3990  bool isPtrTy = checkIfPointerMap(mapOp);
3991 
3992  // Currently handles array sectioning lowerbound case, but more
3993  // logic may be required in the future. Clang invokes EmitLValue,
3994  // which has specialised logic for special Clang types such as user
3995  // defines, so it is possible we will have to extend this for
3996  // structures or other complex types. As the general idea is that this
3997  // function mimics some of the logic from Clang that we require for
3998  // kernel argument passing from host -> device.
3999  switch (captureKind) {
4000  case omp::VariableCaptureKind::ByRef: {
4001  llvm::Value *newV = mapData.Pointers[i];
4002  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
4003  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
4004  mapOp.getBounds());
4005  if (isPtrTy)
4006  newV = builder.CreateLoad(builder.getPtrTy(), newV);
4007 
4008  if (!offsetIdx.empty())
4009  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
4010  "array_offset");
4011  mapData.Pointers[i] = newV;
4012  } break;
4013  case omp::VariableCaptureKind::ByCopy: {
4014  llvm::Type *type = mapData.BaseType[i];
4015  llvm::Value *newV;
4016  if (mapData.Pointers[i]->getType()->isPointerTy())
4017  newV = builder.CreateLoad(type, mapData.Pointers[i]);
4018  else
4019  newV = mapData.Pointers[i];
4020 
4021  if (!isPtrTy) {
4022  auto curInsert = builder.saveIP();
4023  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
4024  auto *memTempAlloc =
4025  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
4026  builder.restoreIP(curInsert);
4027 
4028  builder.CreateStore(newV, memTempAlloc);
4029  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
4030  }
4031 
4032  mapData.Pointers[i] = newV;
4033  mapData.BasePointers[i] = newV;
4034  } break;
4035  case omp::VariableCaptureKind::This:
4036  case omp::VariableCaptureKind::VLAType:
4037  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
4038  break;
4039  }
4040  }
4041  }
4042 }
4043 
4044 // Generate all map related information and fill the combinedInfo.
4045 static void genMapInfos(llvm::IRBuilderBase &builder,
4046  LLVM::ModuleTranslation &moduleTranslation,
4047  DataLayout &dl, MapInfosTy &combinedInfo,
4048  MapInfoData &mapData, bool isTargetParams = false) {
4049  // We wish to modify some of the methods in which arguments are
4050  // passed based on their capture type by the target region, this can
4051  // involve generating new loads and stores, which changes the
4052  // MLIR value to LLVM value mapping, however, we only wish to do this
4053  // locally for the current function/target and also avoid altering
4054  // ModuleTranslation, so we remap the base pointer or pointer stored
4055  // in the map infos corresponding MapInfoData, which is later accessed
4056  // by genMapInfos and createTarget to help generate the kernel and
4057  // kernel arg structure. It primarily becomes relevant in cases like
4058  // bycopy, or byref range'd arrays. In the default case, we simply
4059  // pass thee pointer byref as both basePointer and pointer.
4060  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
4061  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
4062 
4063  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4064 
4065  // We operate under the assumption that all vectors that are
4066  // required in MapInfoData are of equal lengths (either filled with
4067  // default constructed data or appropiate information) so we can
4068  // utilise the size from any component of MapInfoData, if we can't
4069  // something is missing from the initial MapInfoData construction.
4070  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4071  // NOTE/TODO: We currently do not support arbitrary depth record
4072  // type mapping.
4073  if (mapData.IsAMember[i])
4074  continue;
4075 
4076  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
4077  if (!mapInfoOp.getMembers().empty()) {
4078  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
4079  combinedInfo, mapData, i, isTargetParams);
4080  continue;
4081  }
4082 
4083  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
4084  }
4085 }
4086 
4088 emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
4089  LLVM::ModuleTranslation &moduleTranslation,
4090  llvm::StringRef mapperFuncName);
4091 
4093 getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
4094  LLVM::ModuleTranslation &moduleTranslation) {
4095  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4096  std::string mapperFuncName =
4097  moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
4098  {"omp_mapper", declMapperOp.getSymName()});
4099 
4100  if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
4101  return lookupFunc;
4102 
4103  return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
4104  mapperFuncName);
4105 }
4106 
4108 emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
4109  LLVM::ModuleTranslation &moduleTranslation,
4110  llvm::StringRef mapperFuncName) {
4111  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
4112  auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
4113  DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
4114  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4115  llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
4116  SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
4117 
4118  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4119 
4120  // Fill up the arrays with all the mapped variables.
4121  MapInfosTy combinedInfo;
4122  auto genMapInfoCB =
4123  [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
4124  llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
4125  builder.restoreIP(codeGenIP);
4126  moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
4127  moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
4128  builder.GetInsertBlock());
4129  if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
4130  /*ignoreArguments=*/true,
4131  builder)))
4132  return llvm::make_error<PreviouslyReportedError>();
4133  MapInfoData mapData;
4134  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4135  builder);
4136  genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
4137 
4138  // Drop the mapping that is no longer necessary so that the same region can
4139  // be processed multiple times.
4140  moduleTranslation.forgetMapping(declMapperOp.getRegion());
4141  return combinedInfo;
4142  };
4143 
4144  auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
4145  if (!combinedInfo.Mappers[i])
4146  return nullptr;
4147  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4148  moduleTranslation);
4149  };
4150 
4151  llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
4152  genMapInfoCB, varType, mapperFuncName, customMapperCB);
4153  if (!newFn)
4154  return newFn.takeError();
4155  moduleTranslation.mapFunction(mapperFuncName, *newFn);
4156  return *newFn;
4157 }
4158 
4159 static LogicalResult
4160 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
4161  LLVM::ModuleTranslation &moduleTranslation) {
4162  llvm::Value *ifCond = nullptr;
4163  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
4164  SmallVector<Value> mapVars;
4165  SmallVector<Value> useDevicePtrVars;
4166  SmallVector<Value> useDeviceAddrVars;
4167  llvm::omp::RuntimeFunction RTLFn;
4168  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
4169 
4170  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4171  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
4172  /*SeparateBeginEndCalls=*/true);
4173 
4174  LogicalResult result =
4176  .Case([&](omp::TargetDataOp dataOp) {
4177  if (failed(checkImplementationStatus(*dataOp)))
4178  return failure();
4179 
4180  if (auto ifVar = dataOp.getIfExpr())
4181  ifCond = moduleTranslation.lookupValue(ifVar);
4182 
4183  if (auto devId = dataOp.getDevice())
4184  if (auto constOp =
4185  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4186  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4187  deviceID = intAttr.getInt();
4188 
4189  mapVars = dataOp.getMapVars();
4190  useDevicePtrVars = dataOp.getUseDevicePtrVars();
4191  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
4192  return success();
4193  })
4194  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
4195  if (failed(checkImplementationStatus(*enterDataOp)))
4196  return failure();
4197 
4198  if (auto ifVar = enterDataOp.getIfExpr())
4199  ifCond = moduleTranslation.lookupValue(ifVar);
4200 
4201  if (auto devId = enterDataOp.getDevice())
4202  if (auto constOp =
4203  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4204  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4205  deviceID = intAttr.getInt();
4206  RTLFn =
4207  enterDataOp.getNowait()
4208  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
4209  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
4210  mapVars = enterDataOp.getMapVars();
4211  info.HasNoWait = enterDataOp.getNowait();
4212  return success();
4213  })
4214  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
4215  if (failed(checkImplementationStatus(*exitDataOp)))
4216  return failure();
4217 
4218  if (auto ifVar = exitDataOp.getIfExpr())
4219  ifCond = moduleTranslation.lookupValue(ifVar);
4220 
4221  if (auto devId = exitDataOp.getDevice())
4222  if (auto constOp =
4223  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4224  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4225  deviceID = intAttr.getInt();
4226 
4227  RTLFn = exitDataOp.getNowait()
4228  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
4229  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
4230  mapVars = exitDataOp.getMapVars();
4231  info.HasNoWait = exitDataOp.getNowait();
4232  return success();
4233  })
4234  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
4235  if (failed(checkImplementationStatus(*updateDataOp)))
4236  return failure();
4237 
4238  if (auto ifVar = updateDataOp.getIfExpr())
4239  ifCond = moduleTranslation.lookupValue(ifVar);
4240 
4241  if (auto devId = updateDataOp.getDevice())
4242  if (auto constOp =
4243  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
4244  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4245  deviceID = intAttr.getInt();
4246 
4247  RTLFn =
4248  updateDataOp.getNowait()
4249  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4250  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4251  mapVars = updateDataOp.getMapVars();
4252  info.HasNoWait = updateDataOp.getNowait();
4253  return success();
4254  })
4255  .Default([&](Operation *op) {
4256  llvm_unreachable("unexpected operation");
4257  return failure();
4258  });
4259 
4260  if (failed(result))
4261  return failure();
4262 
4263  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4264  MapInfoData mapData;
4265  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4266  builder, useDevicePtrVars, useDeviceAddrVars);
4267 
4268  // Fill up the arrays with all the mapped variables.
4269  MapInfosTy combinedInfo;
4270  auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4271  builder.restoreIP(codeGenIP);
4272  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4273  return combinedInfo;
4274  };
4275 
4276  // Define a lambda to apply mappings between use_device_addr and
4277  // use_device_ptr base pointers, and their associated block arguments.
4278  auto mapUseDevice =
4279  [&moduleTranslation](
4280  llvm::OpenMPIRBuilder::DeviceInfoTy type,
4282  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4283  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4284  for (auto [arg, useDevVar] :
4285  llvm::zip_equal(blockArgs, useDeviceVars)) {
4286 
4287  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4288  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4289  : mapInfoOp.getVarPtr();
4290  };
4291 
4292  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4293  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4294  mapInfoData.MapClause, mapInfoData.DevicePointers,
4295  mapInfoData.BasePointers)) {
4296  auto mapOp = cast<omp::MapInfoOp>(mapClause);
4297  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4298  devicePointer != type)
4299  continue;
4300 
4301  if (llvm::Value *devPtrInfoMap =
4302  mapper ? mapper(basePointer) : basePointer) {
4303  moduleTranslation.mapValue(arg, devPtrInfoMap);
4304  break;
4305  }
4306  }
4307  }
4308  };
4309 
4310  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4311  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4312  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4313  builder.restoreIP(codeGenIP);
4314  assert(isa<omp::TargetDataOp>(op) &&
4315  "BodyGen requested for non TargetDataOp");
4316  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4317  Region &region = cast<omp::TargetDataOp>(op).getRegion();
4318  switch (bodyGenType) {
4319  case BodyGenTy::Priv:
4320  // Check if any device ptr/addr info is available
4321  if (!info.DevicePtrInfoMap.empty()) {
4322  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4323  blockArgIface.getUseDeviceAddrBlockArgs(),
4324  useDeviceAddrVars, mapData,
4325  [&](llvm::Value *basePointer) -> llvm::Value * {
4326  if (!info.DevicePtrInfoMap[basePointer].second)
4327  return nullptr;
4328  return builder.CreateLoad(
4329  builder.getPtrTy(),
4330  info.DevicePtrInfoMap[basePointer].second);
4331  });
4332  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4333  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4334  mapData, [&](llvm::Value *basePointer) {
4335  return info.DevicePtrInfoMap[basePointer].second;
4336  });
4337 
4338  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4339  moduleTranslation)))
4340  return llvm::make_error<PreviouslyReportedError>();
4341  }
4342  break;
4343  case BodyGenTy::DupNoPriv:
4344  // We must always restoreIP regardless of doing anything the caller
4345  // does not restore it, leading to incorrect (no) branch generation.
4346  builder.restoreIP(codeGenIP);
4347  break;
4348  case BodyGenTy::NoPriv:
4349  // If device info is available then region has already been generated
4350  if (info.DevicePtrInfoMap.empty()) {
4351  // For device pass, if use_device_ptr(addr) mappings were present,
4352  // we need to link them here before codegen.
4353  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4354  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4355  blockArgIface.getUseDeviceAddrBlockArgs(),
4356  useDeviceAddrVars, mapData);
4357  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4358  blockArgIface.getUseDevicePtrBlockArgs(),
4359  useDevicePtrVars, mapData);
4360  }
4361 
4362  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4363  moduleTranslation)))
4364  return llvm::make_error<PreviouslyReportedError>();
4365  }
4366  break;
4367  }
4368  return builder.saveIP();
4369  };
4370 
4371  auto customMapperCB =
4372  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4373  if (!combinedInfo.Mappers[i])
4374  return nullptr;
4375  info.HasMapper = true;
4376  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4377  moduleTranslation);
4378  };
4379 
4380  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4381  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4382  findAllocaInsertPoint(builder, moduleTranslation);
4383  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4384  if (isa<omp::TargetDataOp>(op))
4385  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4386  builder.getInt64(deviceID), ifCond,
4387  info, genMapInfoCB, customMapperCB,
4388  /*MapperFunc=*/nullptr, bodyGenCB,
4389  /*DeviceAddrCB=*/nullptr);
4390  return ompBuilder->createTargetData(
4391  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4392  info, genMapInfoCB, customMapperCB, &RTLFn);
4393  }();
4394 
4395  if (failed(handleError(afterIP, *op)))
4396  return failure();
4397 
4398  builder.restoreIP(*afterIP);
4399  return success();
4400 }
4401 
4402 static LogicalResult
4403 convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4404  LLVM::ModuleTranslation &moduleTranslation) {
4405  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4406  auto distributeOp = cast<omp::DistributeOp>(opInst);
4407  if (failed(checkImplementationStatus(opInst)))
4408  return failure();
4409 
4410  /// Process teams op reduction in distribute if the reduction is contained in
4411  /// the distribute op.
4412  omp::TeamsOp teamsOp = opInst.getParentOfType<omp::TeamsOp>();
4413  bool doDistributeReduction =
4414  teamsOp ? teamsReductionContainedInDistribute(teamsOp) : false;
4415 
4416  DenseMap<Value, llvm::Value *> reductionVariableMap;
4417  unsigned numReductionVars = teamsOp ? teamsOp.getNumReductionVars() : 0;
4418  SmallVector<omp::DeclareReductionOp> reductionDecls;
4419  SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
4420  llvm::ArrayRef<bool> isByRef;
4421 
4422  if (doDistributeReduction) {
4423  isByRef = getIsByRef(teamsOp.getReductionByref());
4424  assert(isByRef.size() == teamsOp.getNumReductionVars());
4425 
4426  collectReductionDecls(teamsOp, reductionDecls);
4427  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4428  findAllocaInsertPoint(builder, moduleTranslation);
4429 
4430  MutableArrayRef<BlockArgument> reductionArgs =
4431  llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
4432  .getReductionBlockArgs();
4433 
4435  teamsOp, reductionArgs, builder, moduleTranslation, allocaIP,
4436  reductionDecls, privateReductionVariables, reductionVariableMap,
4437  isByRef)))
4438  return failure();
4439  }
4440 
4441  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4442  auto bodyGenCB = [&](InsertPointTy allocaIP,
4443  InsertPointTy codeGenIP) -> llvm::Error {
4444  // Save the alloca insertion point on ModuleTranslation stack for use in
4445  // nested regions.
4447  moduleTranslation, allocaIP);
4448 
4449  // DistributeOp has only one region associated with it.
4450  builder.restoreIP(codeGenIP);
4451  PrivateVarsInfo privVarsInfo(distributeOp);
4452 
4453  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4454  allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4455  if (handleError(afterAllocas, opInst).failed())
4456  return llvm::make_error<PreviouslyReportedError>();
4457 
4458  if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4459  opInst)
4460  .failed())
4461  return llvm::make_error<PreviouslyReportedError>();
4462 
4463  if (failed(copyFirstPrivateVars(
4464  builder, moduleTranslation, privVarsInfo.mlirVars,
4465  privVarsInfo.llvmVars, privVarsInfo.privatizers)))
4466  return llvm::make_error<PreviouslyReportedError>();
4467 
4468  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4469  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4471  convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4472  builder, moduleTranslation);
4473  if (!regionBlock)
4474  return regionBlock.takeError();
4475  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4476 
4477  // Skip applying a workshare loop below when translating 'distribute
4478  // parallel do' (it's been already handled by this point while translating
4479  // the nested omp.wsloop).
4480  if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4481  // TODO: Add support for clauses which are valid for DISTRIBUTE
4482  // constructs. Static schedule is the default.
4483  auto schedule = omp::ClauseScheduleKind::Static;
4484  bool isOrdered = false;
4485  std::optional<omp::ScheduleModifier> scheduleMod;
4486  bool isSimd = false;
4487  llvm::omp::WorksharingLoopType workshareLoopType =
4488  llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4489  bool loopNeedsBarrier = false;
4490  llvm::Value *chunk = nullptr;
4491 
4492  llvm::CanonicalLoopInfo *loopInfo =
4493  findCurrentLoopInfo(moduleTranslation);
4494  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4495  ompBuilder->applyWorkshareLoop(
4496  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4497  convertToScheduleKind(schedule), chunk, isSimd,
4498  scheduleMod == omp::ScheduleModifier::monotonic,
4499  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4500  workshareLoopType);
4501 
4502  if (!wsloopIP)
4503  return wsloopIP.takeError();
4504  }
4505 
4506  if (failed(cleanupPrivateVars(builder, moduleTranslation,
4507  distributeOp.getLoc(), privVarsInfo.llvmVars,
4508  privVarsInfo.privatizers)))
4509  return llvm::make_error<PreviouslyReportedError>();
4510 
4511  return llvm::Error::success();
4512  };
4513 
4514  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4515  findAllocaInsertPoint(builder, moduleTranslation);
4516  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4517  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4518  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
4519 
4520  if (failed(handleError(afterIP, opInst)))
4521  return failure();
4522 
4523  builder.restoreIP(*afterIP);
4524 
4525  if (doDistributeReduction) {
4526  // Process the reductions if required.
4528  teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
4529  privateReductionVariables, isByRef,
4530  /*isNoWait*/ false, /*isTeamsReduction*/ true);
4531  }
4532  return success();
4533 }
4534 
4535 /// Lowers the FlagsAttr which is applied to the module on the device
4536 /// pass when offloading, this attribute contains OpenMP RTL globals that can
4537 /// be passed as flags to the frontend, otherwise they are set to default
4538 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
4539  LLVM::ModuleTranslation &moduleTranslation) {
4540  if (!cast<mlir::ModuleOp>(op))
4541  return failure();
4542 
4543  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4544 
4545  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
4546  attribute.getOpenmpDeviceVersion());
4547 
4548  if (attribute.getNoGpuLib())
4549  return success();
4550 
4551  ompBuilder->createGlobalFlag(
4552  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
4553  "__omp_rtl_debug_kind");
4554  ompBuilder->createGlobalFlag(
4555  attribute
4556  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
4557  ,
4558  "__omp_rtl_assume_teams_oversubscription");
4559  ompBuilder->createGlobalFlag(
4560  attribute
4561  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
4562  ,
4563  "__omp_rtl_assume_threads_oversubscription");
4564  ompBuilder->createGlobalFlag(
4565  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
4566  "__omp_rtl_assume_no_thread_state");
4567  ompBuilder->createGlobalFlag(
4568  attribute
4569  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
4570  ,
4571  "__omp_rtl_assume_no_nested_parallelism");
4572  return success();
4573 }
4574 
4575 static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
4576  omp::TargetOp targetOp,
4577  llvm::StringRef parentName = "") {
4578  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
4579 
4580  assert(fileLoc && "No file found from location");
4581  StringRef fileName = fileLoc.getFilename().getValue();
4582 
4583  llvm::sys::fs::UniqueID id;
4584  uint64_t line = fileLoc.getLine();
4585  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
4586  size_t fileHash = llvm::hash_value(fileName.str());
4587  size_t deviceId = 0xdeadf17e;
4588  targetInfo =
4589  llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
4590  } else {
4591  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
4592  id.getFile(), line);
4593  }
4594 }
4595 
4596 static void
4597 handleDeclareTargetMapVar(MapInfoData &mapData,
4598  LLVM::ModuleTranslation &moduleTranslation,
4599  llvm::IRBuilderBase &builder, llvm::Function *func) {
4600  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4601  // In the case of declare target mapped variables, the basePointer is
4602  // the reference pointer generated by the convertDeclareTargetAttr
4603  // method. Whereas the kernelValue is the original variable, so for
4604  // the device we must replace all uses of this original global variable
4605  // (stored in kernelValue) with the reference pointer (stored in
4606  // basePointer for declare target mapped variables), as for device the
4607  // data is mapped into this reference pointer and should be loaded
4608  // from it, the original variable is discarded. On host both exist and
4609  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
4610  // function to link the two variables in the runtime and then both the
4611  // reference pointer and the pointer are assigned in the kernel argument
4612  // structure for the host.
4613  if (mapData.IsDeclareTarget[i]) {
4614  // If the original map value is a constant, then we have to make sure all
4615  // of it's uses within the current kernel/function that we are going to
4616  // rewrite are converted to instructions, as we will be altering the old
4617  // use (OriginalValue) from a constant to an instruction, which will be
4618  // illegal and ICE the compiler if the user is a constant expression of
4619  // some kind e.g. a constant GEP.
4620  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
4621  convertUsersOfConstantsToInstructions(constant, func, false);
4622 
4623  // The users iterator will get invalidated if we modify an element,
4624  // so we populate this vector of uses to alter each user on an
4625  // individual basis to emit its own load (rather than one load for
4626  // all).
4628  for (llvm::User *user : mapData.OriginalValue[i]->users())
4629  userVec.push_back(user);
4630 
4631  for (llvm::User *user : userVec) {
4632  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
4633  if (insn->getFunction() == func) {
4634  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
4635  mapData.BasePointers[i]);
4636  load->moveBefore(insn->getIterator());
4637  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
4638  }
4639  }
4640  }
4641  }
4642  }
4643 }
4644 
4645 // The createDeviceArgumentAccessor function generates
4646 // instructions for retrieving (acessing) kernel
4647 // arguments inside of the device kernel for use by
4648 // the kernel. This enables different semantics such as
4649 // the creation of temporary copies of data allowing
4650 // semantics like read-only/no host write back kernel
4651 // arguments.
4652 //
4653 // This currently implements a very light version of Clang's
4654 // EmitParmDecl's handling of direct argument handling as well
4655 // as a portion of the argument access generation based on
4656 // capture types found at the end of emitOutlinedFunctionPrologue
4657 // in Clang. The indirect path handling of EmitParmDecl's may be
4658 // required for future work, but a direct 1-to-1 copy doesn't seem
4659 // possible as the logic is rather scattered throughout Clang's
4660 // lowering and perhaps we wish to deviate slightly.
4661 //
4662 // \param mapData - A container containing vectors of information
4663 // corresponding to the input argument, which should have a
4664 // corresponding entry in the MapInfoData containers
4665 // OrigialValue's.
4666 // \param arg - This is the generated kernel function argument that
4667 // corresponds to the passed in input argument. We generated different
4668 // accesses of this Argument, based on capture type and other Input
4669 // related information.
4670 // \param input - This is the host side value that will be passed to
4671 // the kernel i.e. the kernel input, we rewrite all uses of this within
4672 // the kernel (as we generate the kernel body based on the target's region
4673 // which maintians references to the original input) to the retVal argument
4674 // apon exit of this function inside of the OMPIRBuilder. This interlinks
4675 // the kernel argument to future uses of it in the function providing
4676 // appropriate "glue" instructions inbetween.
4677 // \param retVal - This is the value that all uses of input inside of the
4678 // kernel will be re-written to, the goal of this function is to generate
4679 // an appropriate location for the kernel argument to be accessed from,
4680 // e.g. ByRef will result in a temporary allocation location and then
4681 // a store of the kernel argument into this allocated memory which
4682 // will then be loaded from, ByCopy will use the allocated memory
4683 // directly.
4684 static llvm::IRBuilderBase::InsertPoint
4686  llvm::Value *input, llvm::Value *&retVal,
4687  llvm::IRBuilderBase &builder,
4688  llvm::OpenMPIRBuilder &ompBuilder,
4689  LLVM::ModuleTranslation &moduleTranslation,
4690  llvm::IRBuilderBase::InsertPoint allocaIP,
4691  llvm::IRBuilderBase::InsertPoint codeGenIP) {
4692  builder.restoreIP(allocaIP);
4693 
4694  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
4695  LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
4696  ompBuilder.M.getContext());
4697  unsigned alignmentValue = 0;
4698  // Find the associated MapInfoData entry for the current input
4699  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
4700  if (mapData.OriginalValue[i] == input) {
4701  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4702  capture = mapOp.getMapCaptureType();
4703  // Get information of alignment of mapped object
4704  alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
4705  mapOp.getVarType(), ompBuilder.M.getDataLayout());
4706  break;
4707  }
4708 
4709  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
4710  unsigned int defaultAS =
4711  ompBuilder.M.getDataLayout().getProgramAddressSpace();
4712 
4713  // Create the alloca for the argument the current point.
4714  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
4715 
4716  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
4717  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
4718 
4719  builder.CreateStore(&arg, v);
4720 
4721  builder.restoreIP(codeGenIP);
4722 
4723  switch (capture) {
4724  case omp::VariableCaptureKind::ByCopy: {
4725  retVal = v;
4726  break;
4727  }
4728  case omp::VariableCaptureKind::ByRef: {
4729  llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
4730  v->getType(), v,
4731  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
4732  // CreateAlignedLoad function creates similar LLVM IR:
4733  // %res = load ptr, ptr %input, align 8
4734  // This LLVM IR does not contain information about alignment
4735  // of the loaded value. We need to add !align metadata to unblock
4736  // optimizer. The existence of the !align metadata on the instruction
4737  // tells the optimizer that the value loaded is known to be aligned to
4738  // a boundary specified by the integer value in the metadata node.
4739  // Example:
4740  // %res = load ptr, ptr %input, align 8, !align !align_md_node
4741  // ^ ^
4742  // | |
4743  // alignment of %input address |
4744  // |
4745  // alignment of %res object
4746  if (v->getType()->isPointerTy() && alignmentValue) {
4747  llvm::MDBuilder MDB(builder.getContext());
4748  loadInst->setMetadata(
4749  llvm::LLVMContext::MD_align,
4750  llvm::MDNode::get(builder.getContext(),
4751  MDB.createConstant(llvm::ConstantInt::get(
4752  llvm::Type::getInt64Ty(builder.getContext()),
4753  alignmentValue))));
4754  }
4755  retVal = loadInst;
4756 
4757  break;
4758  }
4759  case omp::VariableCaptureKind::This:
4760  case omp::VariableCaptureKind::VLAType:
4761  // TODO: Consider returning error to use standard reporting for
4762  // unimplemented features.
4763  assert(false && "Currently unsupported capture kind");
4764  break;
4765  }
4766 
4767  return builder.saveIP();
4768 }
4769 
4770 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
4771 /// operation and populate output variables with their corresponding host value
4772 /// (i.e. operand evaluated outside of the target region), based on their uses
4773 /// inside of the target region.
4774 ///
4775 /// Loop bounds and steps are only optionally populated, if output vectors are
4776 /// provided.
4777 static void
4778 extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
4779  Value &numTeamsLower, Value &numTeamsUpper,
4780  Value &threadLimit,
4781  llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
4782  llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
4783  llvm::SmallVectorImpl<Value> *steps = nullptr) {
4784  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
4785  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
4786  blockArgIface.getHostEvalBlockArgs())) {
4787  Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
4788 
4789  for (Operation *user : blockArg.getUsers()) {
4791  .Case([&](omp::TeamsOp teamsOp) {
4792  if (teamsOp.getNumTeamsLower() == blockArg)
4793  numTeamsLower = hostEvalVar;
4794  else if (teamsOp.getNumTeamsUpper() == blockArg)
4795  numTeamsUpper = hostEvalVar;
4796  else if (teamsOp.getThreadLimit() == blockArg)
4797  threadLimit = hostEvalVar;
4798  else
4799  llvm_unreachable("unsupported host_eval use");
4800  })
4801  .Case([&](omp::ParallelOp parallelOp) {
4802  if (parallelOp.getNumThreads() == blockArg)
4803  numThreads = hostEvalVar;
4804  else
4805  llvm_unreachable("unsupported host_eval use");
4806  })
4807  .Case([&](omp::LoopNestOp loopOp) {
4808  auto processBounds =
4809  [&](OperandRange opBounds,
4810  llvm::SmallVectorImpl<Value> *outBounds) -> bool {
4811  bool found = false;
4812  for (auto [i, lb] : llvm::enumerate(opBounds)) {
4813  if (lb == blockArg) {
4814  found = true;
4815  if (outBounds)
4816  (*outBounds)[i] = hostEvalVar;
4817  }
4818  }
4819  return found;
4820  };
4821  bool found =
4822  processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
4823  found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
4824  found;
4825  found = processBounds(loopOp.getLoopSteps(), steps) || found;
4826  (void)found;
4827  assert(found && "unsupported host_eval use");
4828  })
4829  .Default([](Operation *) {
4830  llvm_unreachable("unsupported host_eval use");
4831  });
4832  }
4833  }
4834 }
4835 
4836 /// If \p op is of the given type parameter, return it casted to that type.
4837 /// Otherwise, if its immediate parent operation (or some other higher-level
4838 /// parent, if \p immediateParent is false) is of that type, return that parent
4839 /// casted to the given type.
4840 ///
4841 /// If \p op is \c null or neither it or its parent(s) are of the specified
4842 /// type, return a \c null operation.
4843 template <typename OpTy>
4844 static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
4845  if (!op)
4846  return OpTy();
4847 
4848  if (OpTy casted = dyn_cast<OpTy>(op))
4849  return casted;
4850 
4851  if (immediateParent)
4852  return dyn_cast_if_present<OpTy>(op->getParentOp());
4853 
4854  return op->getParentOfType<OpTy>();
4855 }
4856 
4857 /// If the given \p value is defined by an \c llvm.mlir.constant operation and
4858 /// it is of an integer type, return its value.
4859 static std::optional<int64_t> extractConstInteger(Value value) {
4860  if (!value)
4861  return std::nullopt;
4862 
4863  if (auto constOp =
4864  dyn_cast_if_present<LLVM::ConstantOp>(value.getDefiningOp()))
4865  if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4866  return constAttr.getInt();
4867 
4868  return std::nullopt;
4869 }
4870 
4871 static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
4872  uint64_t sizeInBits = dl.getTypeSizeInBits(type);
4873  uint64_t sizeInBytes = sizeInBits / 8;
4874  return sizeInBytes;
4875 }
4876 
4877 template <typename OpTy>
4878 static uint64_t getReductionDataSize(OpTy &op) {
4879  if (op.getNumReductionVars() > 0) {
4881  collectReductionDecls(op, reductions);
4882 
4884  members.reserve(reductions.size());
4885  for (omp::DeclareReductionOp &red : reductions)
4886  members.push_back(red.getType());
4887  Operation *opp = op.getOperation();
4888  auto structType = mlir::LLVM::LLVMStructType::getLiteral(
4889  opp->getContext(), members, /*isPacked=*/false);
4890  DataLayout dl = DataLayout(opp->getParentOfType<ModuleOp>());
4891  return getTypeByteSize(structType, dl);
4892  }
4893  return 0;
4894 }
4895 
4896 /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
4897 /// values as stated by the corresponding clauses, if constant.
4898 ///
4899 /// These default values must be set before the creation of the outlined LLVM
4900 /// function for the target region, so that they can be used to initialize the
4901 /// corresponding global `ConfigurationEnvironmentTy` structure.
4902 static void
4903 initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
4904  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
4905  bool isTargetDevice, bool isGPU) {
4906  // TODO: Handle constant 'if' clauses.
4907 
4908  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
4909  if (!isTargetDevice) {
4910  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
4911  threadLimit);
4912  } else {
4913  // In the target device, values for these clauses are not passed as
4914  // host_eval, but instead evaluated prior to entry to the region. This
4915  // ensures values are mapped and available inside of the target region.
4916  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
4917  numTeamsLower = teamsOp.getNumTeamsLower();
4918  numTeamsUpper = teamsOp.getNumTeamsUpper();
4919  threadLimit = teamsOp.getThreadLimit();
4920  }
4921 
4922  if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
4923  numThreads = parallelOp.getNumThreads();
4924  }
4925 
4926  // Handle clauses impacting the number of teams.
4927 
4928  int32_t minTeamsVal = 1, maxTeamsVal = -1;
4929  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
4930  // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
4931  // clang and set min and max to the same value.
4932  if (numTeamsUpper) {
4933  if (auto val = extractConstInteger(numTeamsUpper))
4934  minTeamsVal = maxTeamsVal = *val;
4935  } else {
4936  minTeamsVal = maxTeamsVal = 0;
4937  }
4938  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
4939  /*immediateParent=*/true) ||
4940  castOrGetParentOfType<omp::SimdOp>(capturedOp,
4941  /*immediateParent=*/true)) {
4942  minTeamsVal = maxTeamsVal = 1;
4943  } else {
4944  minTeamsVal = maxTeamsVal = -1;
4945  }
4946 
4947  // Handle clauses impacting the number of threads.
4948 
4949  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
4950  if (!clauseValue)
4951  return;
4952 
4953  if (auto val = extractConstInteger(clauseValue))
4954  result = *val;
4955 
4956  // Found an applicable clause, so it's not undefined. Mark as unknown
4957  // because it's not constant.
4958  if (result < 0)
4959  result = 0;
4960  };
4961 
4962  // Extract 'thread_limit' clause from 'target' and 'teams' directives.
4963  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
4964  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
4965  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
4966 
4967  // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
4968  int32_t maxThreadsVal = -1;
4969  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
4970  setMaxValueFromClause(numThreads, maxThreadsVal);
4971  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
4972  /*immediateParent=*/true))
4973  maxThreadsVal = 1;
4974 
4975  // For max values, < 0 means unset, == 0 means set but unknown. Select the
4976  // minimum value between 'max_threads' and 'thread_limit' clauses that were
4977  // set.
4978  int32_t combinedMaxThreadsVal = targetThreadLimitVal;
4979  if (combinedMaxThreadsVal < 0 ||
4980  (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
4981  combinedMaxThreadsVal = teamsThreadLimitVal;
4982 
4983  if (combinedMaxThreadsVal < 0 ||
4984  (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
4985  combinedMaxThreadsVal = maxThreadsVal;
4986 
4987  int32_t reductionDataSize = 0;
4988  if (isGPU && capturedOp) {
4989  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
4990  reductionDataSize = getReductionDataSize(teamsOp);
4991  }
4992 
4993  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
4994  omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(capturedOp);
4995  assert(
4996  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic |
4997  omp::TargetRegionFlags::spmd) &&
4998  "invalid kernel flags");
4999  attrs.ExecFlags =
5000  omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)
5001  ? omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::spmd)
5002  ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD
5003  : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
5004  : llvm::omp::OMP_TGT_EXEC_MODE_SPMD;
5005  attrs.MinTeams = minTeamsVal;
5006  attrs.MaxTeams.front() = maxTeamsVal;
5007  attrs.MinThreads = 1;
5008  attrs.MaxThreads.front() = combinedMaxThreadsVal;
5009  attrs.ReductionDataSize = reductionDataSize;
5010  // TODO: Allow modified buffer length similar to
5011  // fopenmp-cuda-teams-reduction-recs-num flag in clang.
5012  if (attrs.ReductionDataSize != 0)
5013  attrs.ReductionBufferLength = 1024;
5014 }
5015 
5016 /// Gather LLVM runtime values for all clauses evaluated in the host that are
5017 /// passed to the kernel invocation.
5018 ///
5019 /// This function must be called only when compiling for the host. Also, it will
5020 /// only provide correct results if it's called after the body of \c targetOp
5021 /// has been fully generated.
5022 static void
5023 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
5024  LLVM::ModuleTranslation &moduleTranslation,
5025  omp::TargetOp targetOp, Operation *capturedOp,
5026  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
5027  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
5028  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
5029 
5030  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
5031  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
5032  steps(numLoops);
5033  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
5034  teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
5035 
5036  // TODO: Handle constant 'if' clauses.
5037  if (Value targetThreadLimit = targetOp.getThreadLimit())
5038  attrs.TargetThreadLimit.front() =
5039  moduleTranslation.lookupValue(targetThreadLimit);
5040 
5041  if (numTeamsLower)
5042  attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
5043 
5044  if (numTeamsUpper)
5045  attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
5046 
5047  if (teamsThreadLimit)
5048  attrs.TeamsThreadLimit.front() =
5049  moduleTranslation.lookupValue(teamsThreadLimit);
5050 
5051  if (numThreads)
5052  attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
5053 
5054  if (omp::bitEnumContainsAny(targetOp.getKernelExecFlags(capturedOp),
5055  omp::TargetRegionFlags::trip_count)) {
5056  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5057  attrs.LoopTripCount = nullptr;
5058 
5059  // To calculate the trip count, we multiply together the trip counts of
5060  // every collapsed canonical loop. We don't need to create the loop nests
5061  // here, since we're only interested in the trip count.
5062  for (auto [loopLower, loopUpper, loopStep] :
5063  llvm::zip_equal(lowerBounds, upperBounds, steps)) {
5064  llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
5065  llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
5066  llvm::Value *step = moduleTranslation.lookupValue(loopStep);
5067 
5068  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
5069  llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
5070  loc, lowerBound, upperBound, step, /*IsSigned=*/true,
5071  loopOp.getLoopInclusive());
5072 
5073  if (!attrs.LoopTripCount) {
5074  attrs.LoopTripCount = tripCount;
5075  continue;
5076  }
5077 
5078  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
5079  attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
5080  {}, /*HasNUW=*/true);
5081  }
5082  }
5083 }
5084 
5085 static LogicalResult
5086 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
5087  LLVM::ModuleTranslation &moduleTranslation) {
5088  auto targetOp = cast<omp::TargetOp>(opInst);
5089  if (failed(checkImplementationStatus(opInst)))
5090  return failure();
5091 
5092  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5093  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
5094  bool isGPU = ompBuilder->Config.isGPU();
5095 
5096  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
5097  auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
5098  auto &targetRegion = targetOp.getRegion();
5099  // Holds the private vars that have been mapped along with the block argument
5100  // that corresponds to the MapInfoOp corresponding to the private var in
5101  // question. So, for instance:
5102  //
5103  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
5104  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
5105  //
5106  // Then, %10 has been created so that the descriptor can be used by the
5107  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
5108  // %arg0} in the mappedPrivateVars map.
5109  llvm::DenseMap<Value, Value> mappedPrivateVars;
5110  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
5111  SmallVector<Value> mapVars = targetOp.getMapVars();
5112  SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
5113  ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
5114  ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
5115  llvm::Function *llvmOutlinedFn = nullptr;
5116 
5117  // TODO: It can also be false if a compile-time constant `false` IF clause is
5118  // specified.
5119  bool isOffloadEntry =
5120  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
5121 
5122  // For some private variables, the MapsForPrivatizedVariablesPass
5123  // creates MapInfoOp instances. Go through the private variables and
5124  // the mapped variables so that during codegeneration we are able
5125  // to quickly look up the corresponding map variable, if any for each
5126  // private variable.
5127  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
5128  OperandRange privateVars = targetOp.getPrivateVars();
5129  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
5130  std::optional<DenseI64ArrayAttr> privateMapIndices =
5131  targetOp.getPrivateMapsAttr();
5132 
5133  for (auto [privVarIdx, privVarSymPair] :
5134  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
5135  auto privVar = std::get<0>(privVarSymPair);
5136  auto privSym = std::get<1>(privVarSymPair);
5137 
5138  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
5139  omp::PrivateClauseOp privatizer =
5140  findPrivatizer(targetOp, privatizerName);
5141 
5142  if (!privatizer.needsMap())
5143  continue;
5144 
5145  mlir::Value mappedValue =
5146  targetOp.getMappedValueForPrivateVar(privVarIdx);
5147  assert(mappedValue && "Expected to find mapped value for a privatized "
5148  "variable that needs mapping");
5149 
5150  // The MapInfoOp defining the map var isn't really needed later.
5151  // So, we don't store it in any datastructure. Instead, we just
5152  // do some sanity checks on it right now.
5153  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
5154  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
5155 
5156  // Check #1: Check that the type of the private variable matches
5157  // the type of the variable being mapped.
5158  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
5159  assert(
5160  varType == privVar.getType() &&
5161  "Type of private var doesn't match the type of the mapped value");
5162 
5163  // Ok, only 1 sanity check for now.
5164  // Record the block argument corresponding to this mapvar.
5165  mappedPrivateVars.insert(
5166  {privVar,
5167  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
5168  (*privateMapIndices)[privVarIdx])});
5169  }
5170  }
5171 
5172  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5173  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
5174  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5175  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5176  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5177  // Forward target-cpu and target-features function attributes from the
5178  // original function to the new outlined function.
5179  llvm::Function *llvmParentFn =
5180  moduleTranslation.lookupFunction(parentFn.getName());
5181  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
5182  assert(llvmParentFn && llvmOutlinedFn &&
5183  "Both parent and outlined functions must exist at this point");
5184 
5185  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
5186  attr.isStringAttribute())
5187  llvmOutlinedFn->addFnAttr(attr);
5188 
5189  if (auto attr = llvmParentFn->getFnAttribute("target-features");
5190  attr.isStringAttribute())
5191  llvmOutlinedFn->addFnAttr(attr);
5192 
5193  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
5194  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5195  llvm::Value *mapOpValue =
5196  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5197  moduleTranslation.mapValue(arg, mapOpValue);
5198  }
5199  for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
5200  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
5201  llvm::Value *mapOpValue =
5202  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
5203  moduleTranslation.mapValue(arg, mapOpValue);
5204  }
5205 
5206  // Do privatization after moduleTranslation has already recorded
5207  // mapped values.
5208  PrivateVarsInfo privateVarsInfo(targetOp);
5209 
5210  llvm::Expected<llvm::BasicBlock *> afterAllocas =
5211  allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
5212  allocaIP, &mappedPrivateVars);
5213 
5214  if (failed(handleError(afterAllocas, *targetOp)))
5215  return llvm::make_error<PreviouslyReportedError>();
5216 
5217  builder.restoreIP(codeGenIP);
5218  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
5219  &mappedPrivateVars),
5220  *targetOp)
5221  .failed())
5222  return llvm::make_error<PreviouslyReportedError>();
5223 
5224  if (failed(copyFirstPrivateVars(
5225  builder, moduleTranslation, privateVarsInfo.mlirVars,
5226  privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
5227  &mappedPrivateVars)))
5228  return llvm::make_error<PreviouslyReportedError>();
5229 
5230  SmallVector<Region *> privateCleanupRegions;
5231  llvm::transform(privateVarsInfo.privatizers,
5232  std::back_inserter(privateCleanupRegions),
5233  [](omp::PrivateClauseOp privatizer) {
5234  return &privatizer.getDeallocRegion();
5235  });
5236 
5238  targetRegion, "omp.target", builder, moduleTranslation);
5239 
5240  if (!exitBlock)
5241  return exitBlock.takeError();
5242 
5243  builder.SetInsertPoint(*exitBlock);
5244  if (!privateCleanupRegions.empty()) {
5245  if (failed(inlineOmpRegionCleanup(
5246  privateCleanupRegions, privateVarsInfo.llvmVars,
5247  moduleTranslation, builder, "omp.targetop.private.cleanup",
5248  /*shouldLoadCleanupRegionArg=*/false))) {
5249  return llvm::createStringError(
5250  "failed to inline `dealloc` region of `omp.private` "
5251  "op in the target region");
5252  }
5253  return builder.saveIP();
5254  }
5255 
5256  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
5257  };
5258 
5259  StringRef parentName = parentFn.getName();
5260 
5261  llvm::TargetRegionEntryInfo entryInfo;
5262 
5263  getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
5264 
5265  MapInfoData mapData;
5266  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
5267  builder, /*useDevPtrOperands=*/{},
5268  /*useDevAddrOperands=*/{}, hdaVars);
5269 
5270  MapInfosTy combinedInfos;
5271  auto genMapInfoCB =
5272  [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
5273  builder.restoreIP(codeGenIP);
5274  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
5275  return combinedInfos;
5276  };
5277 
5278  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
5279  llvm::Value *&retVal, InsertPointTy allocaIP,
5280  InsertPointTy codeGenIP)
5281  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
5282  llvm::IRBuilderBase::InsertPointGuard guard(builder);
5283  builder.SetCurrentDebugLocation(llvm::DebugLoc());
5284  // We just return the unaltered argument for the host function
5285  // for now, some alterations may be required in the future to
5286  // keep host fallback functions working identically to the device
5287  // version (e.g. pass ByCopy values should be treated as such on
5288  // host and device, currently not always the case)
5289  if (!isTargetDevice) {
5290  retVal = cast<llvm::Value>(&arg);
5291  return codeGenIP;
5292  }
5293 
5294  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
5295  *ompBuilder, moduleTranslation,
5296  allocaIP, codeGenIP);
5297  };
5298 
5299  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
5300  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
5301  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
5302  initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
5303  isTargetDevice, isGPU);
5304 
5305  // Collect host-evaluated values needed to properly launch the kernel from the
5306  // host.
5307  if (!isTargetDevice)
5308  initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
5309  targetCapturedOp, runtimeAttrs);
5310 
5311  // Pass host-evaluated values as parameters to the kernel / host fallback,
5312  // except if they are constants. In any case, map the MLIR block argument to
5313  // the corresponding LLVM values.
5315  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
5316  ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
5317  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
5318  llvm::Value *value = moduleTranslation.lookupValue(var);
5319  moduleTranslation.mapValue(arg, value);
5320 
5321  if (!llvm::isa<llvm::Constant>(value))
5322  kernelInput.push_back(value);
5323  }
5324 
5325  for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
5326  // declare target arguments are not passed to kernels as arguments
5327  // TODO: We currently do not handle cases where a member is explicitly
5328  // passed in as an argument, this will likley need to be handled in
5329  // the near future, rather than using IsAMember, it may be better to
5330  // test if the relevant BlockArg is used within the target region and
5331  // then use that as a basis for exclusion in the kernel inputs.
5332  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
5333  kernelInput.push_back(mapData.OriginalValue[i]);
5334  }
5335 
5337  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
5338  moduleTranslation, dds);
5339 
5340  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
5341  findAllocaInsertPoint(builder, moduleTranslation);
5342  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
5343 
5344  llvm::OpenMPIRBuilder::TargetDataInfo info(
5345  /*RequiresDevicePointerInfo=*/false,
5346  /*SeparateBeginEndCalls=*/true);
5347 
5348  auto customMapperCB =
5349  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
5350  if (!combinedInfos.Mappers[i])
5351  return nullptr;
5352  info.HasMapper = true;
5353  return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
5354  moduleTranslation);
5355  };
5356 
5357  llvm::Value *ifCond = nullptr;
5358  if (Value targetIfCond = targetOp.getIfExpr())
5359  ifCond = moduleTranslation.lookupValue(targetIfCond);
5360 
5361  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5362  moduleTranslation.getOpenMPBuilder()->createTarget(
5363  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
5364  defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
5365  argAccessorCB, customMapperCB, dds, targetOp.getNowait());
5366 
5367  if (failed(handleError(afterIP, opInst)))
5368  return failure();
5369 
5370  builder.restoreIP(*afterIP);
5371 
5372  // Remap access operations to declare target reference pointers for the
5373  // device, essentially generating extra loadop's as necessary
5374  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5375  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5376  llvmOutlinedFn);
5377 
5378  return success();
5379 }
5380 
5381 static LogicalResult
5382 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5383  LLVM::ModuleTranslation &moduleTranslation) {
5384  // Amend omp.declare_target by deleting the IR of the outlined functions
5385  // created for target regions. They cannot be filtered out from MLIR earlier
5386  // because the omp.target operation inside must be translated to LLVM, but
5387  // the wrapper functions themselves must not remain at the end of the
5388  // process. We know that functions where omp.declare_target does not match
5389  // omp.is_target_device at this stage can only be wrapper functions because
5390  // those that aren't are removed earlier as an MLIR transformation pass.
5391  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5392  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5393  op->getParentOfType<ModuleOp>().getOperation())) {
5394  if (!offloadMod.getIsTargetDevice())
5395  return success();
5396 
5397  omp::DeclareTargetDeviceType declareType =
5398  attribute.getDeviceType().getValue();
5399 
5400  if (declareType == omp::DeclareTargetDeviceType::host) {
5401  llvm::Function *llvmFunc =
5402  moduleTranslation.lookupFunction(funcOp.getName());
5403  llvmFunc->dropAllReferences();
5404  llvmFunc->eraseFromParent();
5405  }
5406  }
5407  return success();
5408  }
5409 
5410  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5411  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5412  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5413  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5414  bool isDeclaration = gOp.isDeclaration();
5415  bool isExternallyVisible =
5416  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5417  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5418  llvm::StringRef mangledName = gOp.getSymName();
5419  auto captureClause =
5420  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5421  auto deviceClause =
5422  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5423  // unused for MLIR at the moment, required in Clang for book
5424  // keeping
5425  std::vector<llvm::GlobalVariable *> generatedRefs;
5426 
5427  std::vector<llvm::Triple> targetTriple;
5428  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5429  op->getParentOfType<mlir::ModuleOp>()->getAttr(
5430  LLVM::LLVMDialect::getTargetTripleAttrName()));
5431  if (targetTripleAttr)
5432  targetTriple.emplace_back(targetTripleAttr.data());
5433 
5434  auto fileInfoCallBack = [&loc]() {
5435  std::string filename = "";
5436  std::uint64_t lineNo = 0;
5437 
5438  if (loc) {
5439  filename = loc.getFilename().str();
5440  lineNo = loc.getLine();
5441  }
5442 
5443  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5444  lineNo);
5445  };
5446 
5447  ompBuilder->registerTargetGlobalVariable(
5448  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5449  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5450  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5451  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5452  gVal->getType(), gVal);
5453 
5454  if (ompBuilder->Config.isTargetDevice() &&
5455  (attribute.getCaptureClause().getValue() !=
5456  mlir::omp::DeclareTargetCaptureClause::to ||
5457  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5458  ompBuilder->getAddrOfDeclareTargetVar(
5459  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5460  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5461  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
5462  /*GlobalInitializer*/ nullptr,
5463  /*VariableLinkage*/ nullptr);
5464  }
5465  }
5466  }
5467 
5468  return success();
5469 }
5470 
5471 // Returns true if the operation is inside a TargetOp or
5472 // is part of a declare target function.
5473 static bool isTargetDeviceOp(Operation *op) {
5474  // Assumes no reverse offloading
5475  if (op->getParentOfType<omp::TargetOp>())
5476  return true;
5477 
5478  // Certain operations return results, and whether utilised in host or
5479  // target there is a chance an LLVM Dialect operation depends on it
5480  // by taking it in as an operand, so we must always lower these in
5481  // some manner or result in an ICE (whether they end up in a no-op
5482  // or otherwise).
5483  if (mlir::isa<omp::ThreadprivateOp>(op))
5484  return true;
5485 
5486  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
5487  if (auto declareTargetIface =
5488  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
5489  parentFn.getOperation()))
5490  if (declareTargetIface.isDeclareTarget() &&
5491  declareTargetIface.getDeclareTargetDeviceType() !=
5492  mlir::omp::DeclareTargetDeviceType::host)
5493  return true;
5494 
5495  return false;
5496 }
5497 
5498 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
5499 /// OpenMP runtime calls).
5500 static LogicalResult
5501 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
5502  LLVM::ModuleTranslation &moduleTranslation) {
5503  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5504 
5505  // For each loop, introduce one stack frame to hold loop information. Ensure
5506  // this is only done for the outermost loop wrapper to prevent introducing
5507  // multiple stack frames for a single loop. Initially set to null, the loop
5508  // information structure is initialized during translation of the nested
5509  // omp.loop_nest operation, making it available to translation of all loop
5510  // wrappers after their body has been successfully translated.
5511  bool isOutermostLoopWrapper =
5512  isa_and_present<omp::LoopWrapperInterface>(op) &&
5513  !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
5514 
5515  if (isOutermostLoopWrapper)
5516  moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
5517 
5518  auto result =
5520  .Case([&](omp::BarrierOp op) -> LogicalResult {
5521  if (failed(checkImplementationStatus(*op)))
5522  return failure();
5523 
5524  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5525  ompBuilder->createBarrier(builder.saveIP(),
5526  llvm::omp::OMPD_barrier);
5527  return handleError(afterIP, *op);
5528  })
5529  .Case([&](omp::TaskyieldOp op) {
5530  if (failed(checkImplementationStatus(*op)))
5531  return failure();
5532 
5533  ompBuilder->createTaskyield(builder.saveIP());
5534  return success();
5535  })
5536  .Case([&](omp::FlushOp op) {
5537  if (failed(checkImplementationStatus(*op)))
5538  return failure();
5539 
5540  // No support in Openmp runtime function (__kmpc_flush) to accept
5541  // the argument list.
5542  // OpenMP standard states the following:
5543  // "An implementation may implement a flush with a list by ignoring
5544  // the list, and treating it the same as a flush without a list."
5545  //
5546  // The argument list is discarded so that, flush with a list is
5547  // treated same as a flush without a list.
5548  ompBuilder->createFlush(builder.saveIP());
5549  return success();
5550  })
5551  .Case([&](omp::ParallelOp op) {
5552  return convertOmpParallel(op, builder, moduleTranslation);
5553  })
5554  .Case([&](omp::MaskedOp) {
5555  return convertOmpMasked(*op, builder, moduleTranslation);
5556  })
5557  .Case([&](omp::MasterOp) {
5558  return convertOmpMaster(*op, builder, moduleTranslation);
5559  })
5560  .Case([&](omp::CriticalOp) {
5561  return convertOmpCritical(*op, builder, moduleTranslation);
5562  })
5563  .Case([&](omp::OrderedRegionOp) {
5564  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
5565  })
5566  .Case([&](omp::OrderedOp) {
5567  return convertOmpOrdered(*op, builder, moduleTranslation);
5568  })
5569  .Case([&](omp::WsloopOp) {
5570  return convertOmpWsloop(*op, builder, moduleTranslation);
5571  })
5572  .Case([&](omp::SimdOp) {
5573  return convertOmpSimd(*op, builder, moduleTranslation);
5574  })
5575  .Case([&](omp::AtomicReadOp) {
5576  return convertOmpAtomicRead(*op, builder, moduleTranslation);
5577  })
5578  .Case([&](omp::AtomicWriteOp) {
5579  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
5580  })
5581  .Case([&](omp::AtomicUpdateOp op) {
5582  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
5583  })
5584  .Case([&](omp::AtomicCaptureOp op) {
5585  return convertOmpAtomicCapture(op, builder, moduleTranslation);
5586  })
5587  .Case([&](omp::CancelOp op) {
5588  return convertOmpCancel(op, builder, moduleTranslation);
5589  })
5590  .Case([&](omp::CancellationPointOp op) {
5591  return convertOmpCancellationPoint(op, builder, moduleTranslation);
5592  })
5593  .Case([&](omp::SectionsOp) {
5594  return convertOmpSections(*op, builder, moduleTranslation);
5595  })
5596  .Case([&](omp::SingleOp op) {
5597  return convertOmpSingle(op, builder, moduleTranslation);
5598  })
5599  .Case([&](omp::TeamsOp op) {
5600  return convertOmpTeams(op, builder, moduleTranslation);
5601  })
5602  .Case([&](omp::TaskOp op) {
5603  return convertOmpTaskOp(op, builder, moduleTranslation);
5604  })
5605  .Case([&](omp::TaskgroupOp op) {
5606  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
5607  })
5608  .Case([&](omp::TaskwaitOp op) {
5609  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
5610  })
5611  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
5612  omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
5613  omp::CriticalDeclareOp>([](auto op) {
5614  // `yield` and `terminator` can be just omitted. The block structure
5615  // was created in the region that handles their parent operation.
5616  // `declare_reduction` will be used by reductions and is not
5617  // converted directly, skip it.
5618  // `declare_mapper` and `declare_mapper.info` are handled whenever
5619  // they are referred to through a `map` clause.
5620  // `critical.declare` is only used to declare names of critical
5621  // sections which will be used by `critical` ops and hence can be
5622  // ignored for lowering. The OpenMP IRBuilder will create unique
5623  // name for critical section names.
5624  return success();
5625  })
5626  .Case([&](omp::ThreadprivateOp) {
5627  return convertOmpThreadprivate(*op, builder, moduleTranslation);
5628  })
5629  .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
5630  omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
5631  return convertOmpTargetData(op, builder, moduleTranslation);
5632  })
5633  .Case([&](omp::TargetOp) {
5634  return convertOmpTarget(*op, builder, moduleTranslation);
5635  })
5636  .Case([&](omp::DistributeOp) {
5637  return convertOmpDistribute(*op, builder, moduleTranslation);
5638  })
5639  .Case([&](omp::LoopNestOp) {
5640  return convertOmpLoopNest(*op, builder, moduleTranslation);
5641  })
5642  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
5643  [&](auto op) {
5644  // No-op, should be handled by relevant owning operations e.g.
5645  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
5646  // etc. and then discarded
5647  return success();
5648  })
5649  .Default([&](Operation *inst) {
5650  return inst->emitError()
5651  << "not yet implemented: " << inst->getName();
5652  });
5653 
5654  if (isOutermostLoopWrapper)
5655  moduleTranslation.stackPop();
5656 
5657  return result;
5658 }
5659 
5660 static LogicalResult
5661 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
5662  LLVM::ModuleTranslation &moduleTranslation) {
5663  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5664 }
5665 
5666 static LogicalResult
5667 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
5668  LLVM::ModuleTranslation &moduleTranslation) {
5669  if (isa<omp::TargetOp>(op))
5670  return convertOmpTarget(*op, builder, moduleTranslation);
5671  if (isa<omp::TargetDataOp>(op))
5672  return convertOmpTargetData(op, builder, moduleTranslation);
5673  bool interrupted =
5674  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
5675  if (isa<omp::TargetOp>(oper)) {
5676  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
5677  return WalkResult::interrupt();
5678  return WalkResult::skip();
5679  }
5680  if (isa<omp::TargetDataOp>(oper)) {
5681  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
5682  return WalkResult::interrupt();
5683  return WalkResult::skip();
5684  }
5685 
5686  // Non-target ops might nest target-related ops, therefore, we
5687  // translate them as non-OpenMP scopes. Translating them is needed by
5688  // nested target-related ops since they might need LLVM values defined
5689  // in their parent non-target ops.
5690  if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
5691  oper->getParentOfType<LLVM::LLVMFuncOp>() &&
5692  !oper->getRegions().empty()) {
5693  if (auto blockArgsIface =
5694  dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
5695  forwardArgs(moduleTranslation, blockArgsIface);
5696  else {
5697  // Here we map entry block arguments of
5698  // non-BlockArgOpenMPOpInterface ops if they can be encountered
5699  // inside of a function and they define any of these arguments.
5700  if (isa<mlir::omp::AtomicUpdateOp>(oper))
5701  for (auto [operand, arg] :
5702  llvm::zip_equal(oper->getOperands(),
5703  oper->getRegion(0).getArguments())) {
5704  moduleTranslation.mapValue(
5705  arg, builder.CreateLoad(
5706  moduleTranslation.convertType(arg.getType()),
5707  moduleTranslation.lookupValue(operand)));
5708  }
5709  }
5710 
5711  if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
5712  assert(builder.GetInsertBlock() &&
5713  "No insert block is set for the builder");
5714  for (auto iv : loopNest.getIVs()) {
5715  // Map iv to an undefined value just to keep the IR validity.
5716  moduleTranslation.mapValue(
5718  moduleTranslation.convertType(iv.getType())));
5719  }
5720  }
5721 
5722  for (Region &region : oper->getRegions()) {
5723  // Regions are fake in the sense that they are not a truthful
5724  // translation of the OpenMP construct being converted (e.g. no
5725  // OpenMP runtime calls will be generated). We just need this to
5726  // prepare the kernel invocation args.
5728  auto result = convertOmpOpRegions(
5729  region, oper->getName().getStringRef().str() + ".fake.region",
5730  builder, moduleTranslation, &phis);
5731  if (failed(handleError(result, *oper)))
5732  return WalkResult::interrupt();
5733 
5734  builder.SetInsertPoint(result.get(), result.get()->end());
5735  }
5736 
5737  return WalkResult::skip();
5738  }
5739 
5740  return WalkResult::advance();
5741  }).wasInterrupted();
5742  return failure(interrupted);
5743 }
5744 
5745 namespace {
5746 
5747 /// Implementation of the dialect interface that converts operations belonging
5748 /// to the OpenMP dialect to LLVM IR.
5749 class OpenMPDialectLLVMIRTranslationInterface
5751 public:
5753 
5754  /// Translates the given operation to LLVM IR using the provided IR builder
5755  /// and saving the state in `moduleTranslation`.
5756  LogicalResult
5757  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
5758  LLVM::ModuleTranslation &moduleTranslation) const final;
5759 
5760  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
5761  /// runtime calls, or operation amendments
5762  LogicalResult
5764  NamedAttribute attribute,
5765  LLVM::ModuleTranslation &moduleTranslation) const final;
5766 };
5767 
5768 } // namespace
5769 
5770 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
5771  Operation *op, ArrayRef<llvm::Instruction *> instructions,
5772  NamedAttribute attribute,
5773  LLVM::ModuleTranslation &moduleTranslation) const {
5774  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
5775  attribute.getName())
5776  .Case("omp.is_target_device",
5777  [&](Attribute attr) {
5778  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
5779  llvm::OpenMPIRBuilderConfig &config =
5780  moduleTranslation.getOpenMPBuilder()->Config;
5781  config.setIsTargetDevice(deviceAttr.getValue());
5782  return success();
5783  }
5784  return failure();
5785  })
5786  .Case("omp.is_gpu",
5787  [&](Attribute attr) {
5788  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
5789  llvm::OpenMPIRBuilderConfig &config =
5790  moduleTranslation.getOpenMPBuilder()->Config;
5791  config.setIsGPU(gpuAttr.getValue());
5792  return success();
5793  }
5794  return failure();
5795  })
5796  .Case("omp.host_ir_filepath",
5797  [&](Attribute attr) {
5798  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
5799  llvm::OpenMPIRBuilder *ompBuilder =
5800  moduleTranslation.getOpenMPBuilder();
5801  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
5802  return success();
5803  }
5804  return failure();
5805  })
5806  .Case("omp.flags",
5807  [&](Attribute attr) {
5808  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
5809  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
5810  return failure();
5811  })
5812  .Case("omp.version",
5813  [&](Attribute attr) {
5814  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
5815  llvm::OpenMPIRBuilder *ompBuilder =
5816  moduleTranslation.getOpenMPBuilder();
5817  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
5818  versionAttr.getVersion());
5819  return success();
5820  }
5821  return failure();
5822  })
5823  .Case("omp.declare_target",
5824  [&](Attribute attr) {
5825  if (auto declareTargetAttr =
5826  dyn_cast<omp::DeclareTargetAttr>(attr))
5827  return convertDeclareTargetAttr(op, declareTargetAttr,
5828  moduleTranslation);
5829  return failure();
5830  })
5831  .Case("omp.requires",
5832  [&](Attribute attr) {
5833  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
5834  using Requires = omp::ClauseRequires;
5835  Requires flags = requiresAttr.getValue();
5836  llvm::OpenMPIRBuilderConfig &config =
5837  moduleTranslation.getOpenMPBuilder()->Config;
5838  config.setHasRequiresReverseOffload(
5839  bitEnumContainsAll(flags, Requires::reverse_offload));
5840  config.setHasRequiresUnifiedAddress(
5841  bitEnumContainsAll(flags, Requires::unified_address));
5842  config.setHasRequiresUnifiedSharedMemory(
5843  bitEnumContainsAll(flags, Requires::unified_shared_memory));
5844  config.setHasRequiresDynamicAllocators(
5845  bitEnumContainsAll(flags, Requires::dynamic_allocators));
5846  return success();
5847  }
5848  return failure();
5849  })
5850  .Case("omp.target_triples",
5851  [&](Attribute attr) {
5852  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
5853  llvm::OpenMPIRBuilderConfig &config =
5854  moduleTranslation.getOpenMPBuilder()->Config;
5855  config.TargetTriples.clear();
5856  config.TargetTriples.reserve(triplesAttr.size());
5857  for (Attribute tripleAttr : triplesAttr) {
5858  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
5859  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
5860  else
5861  return failure();
5862  }
5863  return success();
5864  }
5865  return failure();
5866  })
5867  .Default([](Attribute) {
5868  // Fall through for omp attributes that do not require lowering.
5869  return success();
5870  })(attribute.getValue());
5871 
5872  return failure();
5873 }
5874 
5875 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
5876 /// (including OpenMP runtime calls).
5877 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
5878  Operation *op, llvm::IRBuilderBase &builder,
5879  LLVM::ModuleTranslation &moduleTranslation) const {
5880 
5881  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5882  if (ompBuilder->Config.isTargetDevice()) {
5883  if (isTargetDeviceOp(op)) {
5884  return convertTargetDeviceOp(op, builder, moduleTranslation);
5885  } else {
5886  return convertTargetOpsInNest(op, builder, moduleTranslation);
5887  }
5888  }
5889  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5890 }
5891 
5893  registry.insert<omp::OpenMPDialect>();
5894  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
5895  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
5896  });
5897 }
5898 
5900  DialectRegistry registry;
5902  context.appendDialectRegistry(registry);
5903 }
union mlir::linalg::@1197::ArityGroupAndKind::Kind kind
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp)
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable.
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type.
static LogicalResult copyFirstPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static void popCancelFinalizationCB(const ArrayRef< llvm::BranchInst * > cancelTerminators, llvm::OpenMPIRBuilder &ompBuilder, const llvm::OpenMPIRBuilder::InsertPointTy &afterIP)
If we cancelled the construct, we should branch to the finalization block of that construct.
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void pushCancelFinalizationCB(SmallVectorImpl< llvm::BranchInst * > &cancelTerminators, llvm::IRBuilderBase &llvmBuilder, llvm::OpenMPIRBuilder &ompBuilder, mlir::Operation *op, llvm::omp::Directive cancelDirective)
Shared implementation of a callback which adds a termiator for the new block created for the branch t...
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
Inline reductions' init regions.
static LogicalResult convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl)
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool constructIsCancellable(Operation *op)
Returns true if the construct contains omp.cancel or omp.cancellation_point.
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef, bool isNowait=false, bool isTeamsReduction=false)
static LogicalResult convertOmpCancellationPoint(omp::CancellationPointOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static uint64_t getReductionDataSize(OpTy &op)
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static llvm::omp::Directive convertCancellationConstructType(omp::ClauseCancellationConstructType directive)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice, bool isGPU)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static LogicalResult convertIgnoredWrapper(omp::LoopWrapperInterface opInst, LLVM::ModuleTranslation &moduleTranslation)
Helper function to map block arguments defined by ignored loop wrappers to LLVM values and prevent an...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:331
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:295
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:164
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
WalkResult stackWalk(llvm::function_ref< WalkResult(T &)> callback)
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void stackPush(Args &&...args)
Creates a stack frame of type T on ModuleTranslation stack.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
void mapFunction(StringRef name, llvm::Function *func)
Stores the mapping between a function name and its LLVM IR representation.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
void stackPop()
Pops the last element from the ModuleTranslation stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
Utility class to translate MLIR LLVM dialect types to LLVM IR.
Definition: TypeToLLVM.h:39
unsigned getPreferredAlignment(Type type, const llvm::DataLayout &layout)
Returns the preferred alignment for the type given the data layout.
Definition: TypeToLLVM.cpp:183
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:44
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
Dialect * getDialect()
Return the dialect this operation is associated with, or nullptr if the associated dialect is not loa...
Definition: Operation.h:220
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:798
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:687
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:874
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
inline ::llvm::hash_code hash_value(const PolynomialBase< D, T > &arg)
Definition: Polynomial.h:262
llvm::PointerUnion< NamedAttribute *, NamedProperty *, NamedTypeConstraint * > Argument
Definition: Argument.h:64
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars
RAII object calling stackPush/stackPop on construction/destruction.