MLIR  21.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
19 #include "mlir/IR/IRMapping.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Support/LLVM.h"
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/TypeSwitch.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/ReplaceConstant.h"
37 #include "llvm/Support/FileSystem.h"
38 #include "llvm/TargetParser/Triple.h"
39 #include "llvm/Transforms/Utils/ModuleUtils.h"
40 
41 #include <any>
42 #include <cstdint>
43 #include <iterator>
44 #include <numeric>
45 #include <optional>
46 #include <utility>
47 
48 using namespace mlir;
49 
50 namespace {
51 static llvm::omp::ScheduleKind
52 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
53  if (!schedKind.has_value())
54  return llvm::omp::OMP_SCHEDULE_Default;
55  switch (schedKind.value()) {
56  case omp::ClauseScheduleKind::Static:
57  return llvm::omp::OMP_SCHEDULE_Static;
58  case omp::ClauseScheduleKind::Dynamic:
59  return llvm::omp::OMP_SCHEDULE_Dynamic;
60  case omp::ClauseScheduleKind::Guided:
61  return llvm::omp::OMP_SCHEDULE_Guided;
62  case omp::ClauseScheduleKind::Auto:
63  return llvm::omp::OMP_SCHEDULE_Auto;
65  return llvm::omp::OMP_SCHEDULE_Runtime;
66  }
67  llvm_unreachable("unhandled schedule clause argument");
68 }
69 
70 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
71 /// insertion points for allocas.
72 class OpenMPAllocaStackFrame
73  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
74 public:
75  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
76 
77  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
78  : allocaInsertPoint(allocaIP) {}
79  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
80 };
81 
82 /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
83 /// collapsed canonical loop information corresponding to an \c omp.loop_nest
84 /// operation.
85 class OpenMPLoopInfoStackFrame
86  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> {
87 public:
88  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
89  llvm::CanonicalLoopInfo *loopInfo = nullptr;
90 };
91 
92 /// Custom error class to signal translation errors that don't need reporting,
93 /// since encountering them will have already triggered relevant error messages.
94 ///
95 /// Its purpose is to serve as the glue between MLIR failures represented as
96 /// \see LogicalResult instances and \see llvm::Error instances used to
97 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
98 /// error of the first type is raised, a message is emitted directly (the \see
99 /// LogicalResult itself does not hold any information). If we need to forward
100 /// this error condition as an \see llvm::Error while avoiding triggering some
101 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
102 /// class to just signal this situation has happened.
103 ///
104 /// For example, this class should be used to trigger errors from within
105 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
106 /// translation of their own regions. This unclutters the error log from
107 /// redundant messages.
108 class PreviouslyReportedError
109  : public llvm::ErrorInfo<PreviouslyReportedError> {
110 public:
111  void log(raw_ostream &) const override {
112  // Do not log anything.
113  }
114 
115  std::error_code convertToErrorCode() const override {
116  llvm_unreachable(
117  "PreviouslyReportedError doesn't support ECError conversion");
118  }
119 
120  // Used by ErrorInfo::classID.
121  static char ID;
122 };
123 
125 
126 } // namespace
127 
128 /// Looks up from the operation from and returns the PrivateClauseOp with
129 /// name symbolName
130 static omp::PrivateClauseOp findPrivatizer(Operation *from,
131  SymbolRefAttr symbolName) {
132  omp::PrivateClauseOp privatizer =
133  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
134  symbolName);
135  assert(privatizer && "privatizer not found in the symbol table");
136  return privatizer;
137 }
138 
139 /// Check whether translation to LLVM IR for the given operation is currently
140 /// supported. If not, descriptive diagnostics will be emitted to let users know
141 /// this is a not-yet-implemented feature.
142 ///
143 /// \returns success if no unimplemented features are needed to translate the
144 /// given operation.
145 static LogicalResult checkImplementationStatus(Operation &op) {
146  auto todo = [&op](StringRef clauseName) {
147  return op.emitError() << "not yet implemented: Unhandled clause "
148  << clauseName << " in " << op.getName()
149  << " operation";
150  };
151 
152  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
153  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
154  result = todo("allocate");
155  };
156  auto checkBare = [&todo](auto op, LogicalResult &result) {
157  if (op.getBare())
158  result = todo("ompx_bare");
159  };
160  auto checkDepend = [&todo](auto op, LogicalResult &result) {
161  if (!op.getDependVars().empty() || op.getDependKinds())
162  result = todo("depend");
163  };
164  auto checkDevice = [&todo](auto op, LogicalResult &result) {
165  if (op.getDevice())
166  result = todo("device");
167  };
168  auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
169  if (op.getDistScheduleChunkSize())
170  result = todo("dist_schedule with chunk_size");
171  };
172  auto checkHint = [](auto op, LogicalResult &) {
173  if (op.getHint())
174  op.emitWarning("hint clause discarded");
175  };
176  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
177  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
178  op.getInReductionSyms())
179  result = todo("in_reduction");
180  };
181  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
182  if (!op.getIsDevicePtrVars().empty())
183  result = todo("is_device_ptr");
184  };
185  auto checkLinear = [&todo](auto op, LogicalResult &result) {
186  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
187  result = todo("linear");
188  };
189  auto checkNontemporal = [&todo](auto op, LogicalResult &result) {
190  if (!op.getNontemporalVars().empty())
191  result = todo("nontemporal");
192  };
193  auto checkNowait = [&todo](auto op, LogicalResult &result) {
194  if (op.getNowait())
195  result = todo("nowait");
196  };
197  auto checkOrder = [&todo](auto op, LogicalResult &result) {
198  if (op.getOrder() || op.getOrderMod())
199  result = todo("order");
200  };
201  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
202  if (op.getParLevelSimd())
203  result = todo("parallelization-level");
204  };
205  auto checkPriority = [&todo](auto op, LogicalResult &result) {
206  if (op.getPriority())
207  result = todo("priority");
208  };
209  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
210  if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
211  // Privatization clauses are supported, except on some situations, so we
212  // need to check here whether any of these unsupported cases are being
213  // translated.
214  if (std::optional<ArrayAttr> privateSyms = op.getPrivateSyms()) {
215  for (Attribute privatizerNameAttr : *privateSyms) {
216  omp::PrivateClauseOp privatizer = findPrivatizer(
217  op.getOperation(), cast<SymbolRefAttr>(privatizerNameAttr));
218 
219  if (privatizer.getDataSharingType() ==
220  omp::DataSharingClauseType::FirstPrivate)
221  result = todo("firstprivate");
222  }
223  }
224  } else {
225  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
226  result = todo("privatization");
227  }
228  };
229  auto checkReduction = [&todo](auto op, LogicalResult &result) {
230  if (isa<omp::TeamsOp>(op) || isa<omp::SimdOp>(op))
231  if (!op.getReductionVars().empty() || op.getReductionByref() ||
232  op.getReductionSyms())
233  result = todo("reduction");
234  if (op.getReductionMod() &&
235  op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
236  result = todo("reduction with modifier");
237  };
238  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
239  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
240  op.getTaskReductionSyms())
241  result = todo("task_reduction");
242  };
243  auto checkUntied = [&todo](auto op, LogicalResult &result) {
244  if (op.getUntied())
245  result = todo("untied");
246  };
247 
248  LogicalResult result = success();
250  .Case([&](omp::DistributeOp op) {
251  checkAllocate(op, result);
252  checkDistSchedule(op, result);
253  checkOrder(op, result);
254  })
255  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
256  .Case([&](omp::SectionsOp op) {
257  checkAllocate(op, result);
258  checkPrivate(op, result);
259  checkReduction(op, result);
260  })
261  .Case([&](omp::SingleOp op) {
262  checkAllocate(op, result);
263  checkPrivate(op, result);
264  })
265  .Case([&](omp::TeamsOp op) {
266  checkAllocate(op, result);
267  checkPrivate(op, result);
268  checkReduction(op, result);
269  })
270  .Case([&](omp::TaskOp op) {
271  checkAllocate(op, result);
272  checkInReduction(op, result);
273  })
274  .Case([&](omp::TaskgroupOp op) {
275  checkAllocate(op, result);
276  checkTaskReduction(op, result);
277  })
278  .Case([&](omp::TaskwaitOp op) {
279  checkDepend(op, result);
280  checkNowait(op, result);
281  })
282  .Case([&](omp::TaskloopOp op) {
283  // TODO: Add other clauses check
284  checkUntied(op, result);
285  checkPriority(op, result);
286  })
287  .Case([&](omp::WsloopOp op) {
288  checkAllocate(op, result);
289  checkLinear(op, result);
290  checkOrder(op, result);
291  checkReduction(op, result);
292  })
293  .Case([&](omp::ParallelOp op) {
294  checkAllocate(op, result);
295  checkReduction(op, result);
296  })
297  .Case([&](omp::SimdOp op) {
298  checkLinear(op, result);
299  checkNontemporal(op, result);
300  checkReduction(op, result);
301  })
302  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
303  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
304  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
305  [&](auto op) { checkDepend(op, result); })
306  .Case([&](omp::TargetOp op) {
307  checkAllocate(op, result);
308  checkBare(op, result);
309  checkDevice(op, result);
310  checkInReduction(op, result);
311  checkIsDevicePtr(op, result);
312  checkPrivate(op, result);
313  })
314  .Default([](Operation &) {
315  // Assume all clauses for an operation can be translated unless they are
316  // checked above.
317  });
318  return result;
319 }
320 
321 static LogicalResult handleError(llvm::Error error, Operation &op) {
322  LogicalResult result = success();
323  if (error) {
324  llvm::handleAllErrors(
325  std::move(error),
326  [&](const PreviouslyReportedError &) { result = failure(); },
327  [&](const llvm::ErrorInfoBase &err) {
328  result = op.emitError(err.message());
329  });
330  }
331  return result;
332 }
333 
334 template <typename T>
335 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
336  if (!result)
337  return handleError(result.takeError(), op);
338 
339  return success();
340 }
341 
342 /// Find the insertion point for allocas given the current insertion point for
343 /// normal operations in the builder.
344 static llvm::OpenMPIRBuilder::InsertPointTy
345 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
346  LLVM::ModuleTranslation &moduleTranslation) {
347  // If there is an alloca insertion point on stack, i.e. we are in a nested
348  // operation and a specific point was provided by some surrounding operation,
349  // use it.
350  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
351  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
352  [&](OpenMPAllocaStackFrame &frame) {
353  allocaInsertPoint = frame.allocaInsertPoint;
354  return WalkResult::interrupt();
355  });
356  if (walkResult.wasInterrupted())
357  return allocaInsertPoint;
358 
359  // Otherwise, insert to the entry block of the surrounding function.
360  // If the current IRBuilder InsertPoint is the function's entry, it cannot
361  // also be used for alloca insertion which would result in insertion order
362  // confusion. Create a new BasicBlock for the Builder and use the entry block
363  // for the allocs.
364  // TODO: Create a dedicated alloca BasicBlock at function creation such that
365  // we do not need to move the current InertPoint here.
366  if (builder.GetInsertBlock() ==
367  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
368  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
369  "Assuming end of basic block");
370  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
371  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
372  builder.GetInsertBlock()->getNextNode());
373  builder.CreateBr(entryBB);
374  builder.SetInsertPoint(entryBB);
375  }
376 
377  llvm::BasicBlock &funcEntryBlock =
378  builder.GetInsertBlock()->getParent()->getEntryBlock();
379  return llvm::OpenMPIRBuilder::InsertPointTy(
380  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
381 }
382 
383 /// Find the loop information structure for the loop nest being translated. It
384 /// will return a `null` value unless called from the translation function for
385 /// a loop wrapper operation after successfully translating its body.
386 static llvm::CanonicalLoopInfo *
388  llvm::CanonicalLoopInfo *loopInfo = nullptr;
389  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
390  [&](OpenMPLoopInfoStackFrame &frame) {
391  loopInfo = frame.loopInfo;
392  return WalkResult::interrupt();
393  });
394  return loopInfo;
395 }
396 
397 /// Converts the given region that appears within an OpenMP dialect operation to
398 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
399 /// region, and a branch from any block with an successor-less OpenMP terminator
400 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
401 /// of the continuation block if provided.
403  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
404  LLVM::ModuleTranslation &moduleTranslation,
405  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
406  bool isLoopWrapper = isa<omp::LoopWrapperInterface>(region.getParentOp());
407 
408  llvm::BasicBlock *continuationBlock =
409  splitBB(builder, true, "omp.region.cont");
410  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
411 
412  llvm::LLVMContext &llvmContext = builder.getContext();
413  for (Block &bb : region) {
414  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
415  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
416  builder.GetInsertBlock()->getNextNode());
417  moduleTranslation.mapBlock(&bb, llvmBB);
418  }
419 
420  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
421 
422  // Terminators (namely YieldOp) may be forwarding values to the region that
423  // need to be available in the continuation block. Collect the types of these
424  // operands in preparation of creating PHI nodes. This is skipped for loop
425  // wrapper operations, for which we know in advance they have no terminators.
426  SmallVector<llvm::Type *> continuationBlockPHITypes;
427  unsigned numYields = 0;
428 
429  if (!isLoopWrapper) {
430  bool operandsProcessed = false;
431  for (Block &bb : region.getBlocks()) {
432  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
433  if (!operandsProcessed) {
434  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
435  continuationBlockPHITypes.push_back(
436  moduleTranslation.convertType(yield->getOperand(i).getType()));
437  }
438  operandsProcessed = true;
439  } else {
440  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
441  "mismatching number of values yielded from the region");
442  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
443  llvm::Type *operandType =
444  moduleTranslation.convertType(yield->getOperand(i).getType());
445  (void)operandType;
446  assert(continuationBlockPHITypes[i] == operandType &&
447  "values of mismatching types yielded from the region");
448  }
449  }
450  numYields++;
451  }
452  }
453  }
454 
455  // Insert PHI nodes in the continuation block for any values forwarded by the
456  // terminators in this region.
457  if (!continuationBlockPHITypes.empty())
458  assert(
459  continuationBlockPHIs &&
460  "expected continuation block PHIs if converted regions yield values");
461  if (continuationBlockPHIs) {
462  llvm::IRBuilderBase::InsertPointGuard guard(builder);
463  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
464  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
465  for (llvm::Type *ty : continuationBlockPHITypes)
466  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
467  }
468 
469  // Convert blocks one by one in topological order to ensure
470  // defs are converted before uses.
472  for (Block *bb : blocks) {
473  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
474  // Retarget the branch of the entry block to the entry block of the
475  // converted region (regions are single-entry).
476  if (bb->isEntryBlock()) {
477  assert(sourceTerminator->getNumSuccessors() == 1 &&
478  "provided entry block has multiple successors");
479  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
480  "ContinuationBlock is not the successor of the entry block");
481  sourceTerminator->setSuccessor(0, llvmBB);
482  }
483 
484  llvm::IRBuilderBase::InsertPointGuard guard(builder);
485  if (failed(
486  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
487  return llvm::make_error<PreviouslyReportedError>();
488 
489  // Create a direct branch here for loop wrappers to prevent their lack of a
490  // terminator from causing a crash below.
491  if (isLoopWrapper) {
492  builder.CreateBr(continuationBlock);
493  continue;
494  }
495 
496  // Special handling for `omp.yield` and `omp.terminator` (we may have more
497  // than one): they return the control to the parent OpenMP dialect operation
498  // so replace them with the branch to the continuation block. We handle this
499  // here to avoid relying inter-function communication through the
500  // ModuleTranslation class to set up the correct insertion point. This is
501  // also consistent with MLIR's idiom of handling special region terminators
502  // in the same code that handles the region-owning operation.
503  Operation *terminator = bb->getTerminator();
504  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
505  builder.CreateBr(continuationBlock);
506 
507  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
508  (*continuationBlockPHIs)[i]->addIncoming(
509  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
510  }
511  }
512  // After all blocks have been traversed and values mapped, connect the PHI
513  // nodes to the results of preceding blocks.
514  LLVM::detail::connectPHINodes(region, moduleTranslation);
515 
516  // Remove the blocks and values defined in this region from the mapping since
517  // they are not visible outside of this region. This allows the same region to
518  // be converted several times, that is cloned, without clashes, and slightly
519  // speeds up the lookups.
520  moduleTranslation.forgetMapping(region);
521 
522  return continuationBlock;
523 }
524 
525 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
526 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
527  switch (kind) {
528  case omp::ClauseProcBindKind::Close:
529  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
530  case omp::ClauseProcBindKind::Master:
531  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
532  case omp::ClauseProcBindKind::Primary:
533  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
534  case omp::ClauseProcBindKind::Spread:
535  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
536  }
537  llvm_unreachable("Unknown ClauseProcBindKind kind");
538 }
539 
540 /// Maps block arguments from \p blockArgIface (which are MLIR values) to the
541 /// corresponding LLVM values of \p the interface's operands. This is useful
542 /// when an OpenMP region with entry block arguments is converted to LLVM. In
543 /// this case the block arguments are (part of) of the OpenMP region's entry
544 /// arguments and the operands are (part of) of the operands to the OpenMP op
545 /// containing the region.
546 static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
547  omp::BlockArgOpenMPOpInterface blockArgIface) {
549  blockArgIface.getBlockArgsPairs(blockArgsPairs);
550  for (auto [var, arg] : blockArgsPairs)
551  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
552 }
553 
554 /// Helper function to map block arguments defined by ignored loop wrappers to
555 /// LLVM values and prevent any uses of those from triggering null pointer
556 /// dereferences.
557 ///
558 /// This must be called after block arguments of parent wrappers have already
559 /// been mapped to LLVM IR values.
560 static LogicalResult
561 convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
562  LLVM::ModuleTranslation &moduleTranslation) {
563  // Map block arguments directly to the LLVM value associated to the
564  // corresponding operand. This is semantically equivalent to this wrapper not
565  // being present.
567  .Case([&](omp::SimdOp op) {
568  forwardArgs(moduleTranslation,
569  cast<omp::BlockArgOpenMPOpInterface>(*op));
570  op.emitWarning() << "simd information on composite construct discarded";
571  return success();
572  })
573  .Default([&](Operation *op) {
574  return op->emitError() << "cannot ignore wrapper";
575  });
576 }
577 
578 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
579 static LogicalResult
580 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
581  LLVM::ModuleTranslation &moduleTranslation) {
582  auto maskedOp = cast<omp::MaskedOp>(opInst);
583  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
584 
585  if (failed(checkImplementationStatus(opInst)))
586  return failure();
587 
588  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
589  // MaskedOp has only one region associated with it.
590  auto &region = maskedOp.getRegion();
591  builder.restoreIP(codeGenIP);
592  return convertOmpOpRegions(region, "omp.masked.region", builder,
593  moduleTranslation)
594  .takeError();
595  };
596 
597  // TODO: Perform finalization actions for variables. This has to be
598  // called for variables which have destructors/finalizers.
599  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
600 
601  llvm::Value *filterVal = nullptr;
602  if (auto filterVar = maskedOp.getFilteredThreadId()) {
603  filterVal = moduleTranslation.lookupValue(filterVar);
604  } else {
605  llvm::LLVMContext &llvmContext = builder.getContext();
606  filterVal =
607  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
608  }
609  assert(filterVal != nullptr);
610  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
611  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
612  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
613  finiCB, filterVal);
614 
615  if (failed(handleError(afterIP, opInst)))
616  return failure();
617 
618  builder.restoreIP(*afterIP);
619  return success();
620 }
621 
622 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
623 static LogicalResult
624 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
625  LLVM::ModuleTranslation &moduleTranslation) {
626  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
627  auto masterOp = cast<omp::MasterOp>(opInst);
628 
629  if (failed(checkImplementationStatus(opInst)))
630  return failure();
631 
632  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
633  // MasterOp has only one region associated with it.
634  auto &region = masterOp.getRegion();
635  builder.restoreIP(codeGenIP);
636  return convertOmpOpRegions(region, "omp.master.region", builder,
637  moduleTranslation)
638  .takeError();
639  };
640 
641  // TODO: Perform finalization actions for variables. This has to be
642  // called for variables which have destructors/finalizers.
643  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
644 
645  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
646  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
647  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
648  finiCB);
649 
650  if (failed(handleError(afterIP, opInst)))
651  return failure();
652 
653  builder.restoreIP(*afterIP);
654  return success();
655 }
656 
657 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
658 static LogicalResult
659 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
660  LLVM::ModuleTranslation &moduleTranslation) {
661  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
662  auto criticalOp = cast<omp::CriticalOp>(opInst);
663 
664  if (failed(checkImplementationStatus(opInst)))
665  return failure();
666 
667  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
668  // CriticalOp has only one region associated with it.
669  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
670  builder.restoreIP(codeGenIP);
671  return convertOmpOpRegions(region, "omp.critical.region", builder,
672  moduleTranslation)
673  .takeError();
674  };
675 
676  // TODO: Perform finalization actions for variables. This has to be
677  // called for variables which have destructors/finalizers.
678  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
679 
680  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
681  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
682  llvm::Constant *hint = nullptr;
683 
684  // If it has a name, it probably has a hint too.
685  if (criticalOp.getNameAttr()) {
686  // The verifiers in OpenMP Dialect guarentee that all the pointers are
687  // non-null
688  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
689  auto criticalDeclareOp =
690  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
691  symbolRef);
692  hint =
693  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
694  static_cast<int>(criticalDeclareOp.getHint()));
695  }
696  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
697  moduleTranslation.getOpenMPBuilder()->createCritical(
698  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
699 
700  if (failed(handleError(afterIP, opInst)))
701  return failure();
702 
703  builder.restoreIP(*afterIP);
704  return success();
705 }
706 
707 /// A util to collect info needed to convert delayed privatizers from MLIR to
708 /// LLVM.
710  template <typename OP>
712  : blockArgs(
713  cast<omp::BlockArgOpenMPOpInterface>(*op).getPrivateBlockArgs()) {
714  mlirVars.reserve(blockArgs.size());
715  llvmVars.reserve(blockArgs.size());
716  collectPrivatizationDecls<OP>(op);
717 
718  for (mlir::Value privateVar : op.getPrivateVars())
719  mlirVars.push_back(privateVar);
720  }
721 
726 
727 private:
728  /// Populates `privatizations` with privatization declarations used for the
729  /// given op.
730  template <class OP>
731  void collectPrivatizationDecls(OP op) {
732  std::optional<ArrayAttr> attr = op.getPrivateSyms();
733  if (!attr)
734  return;
735 
736  privatizers.reserve(privatizers.size() + attr->size());
737  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
738  privatizers.push_back(findPrivatizer(op, symbolRef));
739  }
740  }
741 };
742 
743 /// Populates `reductions` with reduction declarations used in the given op.
744 template <typename T>
745 static void
748  std::optional<ArrayAttr> attr = op.getReductionSyms();
749  if (!attr)
750  return;
751 
752  reductions.reserve(reductions.size() + op.getNumReductionVars());
753  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
754  reductions.push_back(
755  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
756  op, symbolRef));
757  }
758 }
759 
760 /// Translates the blocks contained in the given region and appends them to at
761 /// the current insertion point of `builder`. The operations of the entry block
762 /// are appended to the current insertion block. If set, `continuationBlockArgs`
763 /// is populated with translated values that correspond to the values
764 /// omp.yield'ed from the region.
765 static LogicalResult inlineConvertOmpRegions(
766  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
767  LLVM::ModuleTranslation &moduleTranslation,
768  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
769  if (region.empty())
770  return success();
771 
772  // Special case for single-block regions that don't create additional blocks:
773  // insert operations without creating additional blocks.
774  if (llvm::hasSingleElement(region)) {
775  llvm::Instruction *potentialTerminator =
776  builder.GetInsertBlock()->empty() ? nullptr
777  : &builder.GetInsertBlock()->back();
778 
779  if (potentialTerminator && potentialTerminator->isTerminator())
780  potentialTerminator->removeFromParent();
781  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
782 
783  if (failed(moduleTranslation.convertBlock(
784  region.front(), /*ignoreArguments=*/true, builder)))
785  return failure();
786 
787  // The continuation arguments are simply the translated terminator operands.
788  if (continuationBlockArgs)
789  llvm::append_range(
790  *continuationBlockArgs,
791  moduleTranslation.lookupValues(region.front().back().getOperands()));
792 
793  // Drop the mapping that is no longer necessary so that the same region can
794  // be processed multiple times.
795  moduleTranslation.forgetMapping(region);
796 
797  if (potentialTerminator && potentialTerminator->isTerminator()) {
798  llvm::BasicBlock *block = builder.GetInsertBlock();
799  if (block->empty()) {
800  // this can happen for really simple reduction init regions e.g.
801  // %0 = llvm.mlir.constant(0 : i32) : i32
802  // omp.yield(%0 : i32)
803  // because the llvm.mlir.constant (MLIR op) isn't converted into any
804  // llvm op
805  potentialTerminator->insertInto(block, block->begin());
806  } else {
807  potentialTerminator->insertAfter(&block->back());
808  }
809  }
810 
811  return success();
812  }
813 
815  llvm::Expected<llvm::BasicBlock *> continuationBlock =
816  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
817 
818  if (failed(handleError(continuationBlock, *region.getParentOp())))
819  return failure();
820 
821  if (continuationBlockArgs)
822  llvm::append_range(*continuationBlockArgs, phis);
823  builder.SetInsertPoint(*continuationBlock,
824  (*continuationBlock)->getFirstInsertionPt());
825  return success();
826 }
827 
828 namespace {
829 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
830 /// store lambdas with capture.
831 using OwningReductionGen =
832  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
833  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
834  llvm::Value *&)>;
835 using OwningAtomicReductionGen =
836  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
837  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
838  llvm::Value *)>;
839 } // namespace
840 
841 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
842 /// reduction declaration. The generator uses `builder` but ignores its
843 /// insertion point.
844 static OwningReductionGen
845 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
846  LLVM::ModuleTranslation &moduleTranslation) {
847  // The lambda is mutable because we need access to non-const methods of decl
848  // (which aren't actually mutating it), and we must capture decl by-value to
849  // avoid the dangling reference after the parent function returns.
850  OwningReductionGen gen =
851  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
852  llvm::Value *lhs, llvm::Value *rhs,
853  llvm::Value *&result) mutable
854  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
855  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
856  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
857  builder.restoreIP(insertPoint);
859  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
860  "omp.reduction.nonatomic.body", builder,
861  moduleTranslation, &phis)))
862  return llvm::createStringError(
863  "failed to inline `combiner` region of `omp.declare_reduction`");
864  result = llvm::getSingleElement(phis);
865  return builder.saveIP();
866  };
867  return gen;
868 }
869 
870 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
871 /// given reduction declaration. The generator uses `builder` but ignores its
872 /// insertion point. Returns null if there is no atomic region available in the
873 /// reduction declaration.
874 static OwningAtomicReductionGen
875 makeAtomicReductionGen(omp::DeclareReductionOp decl,
876  llvm::IRBuilderBase &builder,
877  LLVM::ModuleTranslation &moduleTranslation) {
878  if (decl.getAtomicReductionRegion().empty())
879  return OwningAtomicReductionGen();
880 
881  // The lambda is mutable because we need access to non-const methods of decl
882  // (which aren't actually mutating it), and we must capture decl by-value to
883  // avoid the dangling reference after the parent function returns.
884  OwningAtomicReductionGen atomicGen =
885  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
886  llvm::Value *lhs, llvm::Value *rhs) mutable
887  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
888  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
889  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
890  builder.restoreIP(insertPoint);
892  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
893  "omp.reduction.atomic.body", builder,
894  moduleTranslation, &phis)))
895  return llvm::createStringError(
896  "failed to inline `atomic` region of `omp.declare_reduction`");
897  assert(phis.empty());
898  return builder.saveIP();
899  };
900  return atomicGen;
901 }
902 
903 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
904 static LogicalResult
905 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
906  LLVM::ModuleTranslation &moduleTranslation) {
907  auto orderedOp = cast<omp::OrderedOp>(opInst);
908 
909  if (failed(checkImplementationStatus(opInst)))
910  return failure();
911 
912  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
913  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
914  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
915  SmallVector<llvm::Value *> vecValues =
916  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
917 
918  size_t indexVecValues = 0;
919  while (indexVecValues < vecValues.size()) {
920  SmallVector<llvm::Value *> storeValues;
921  storeValues.reserve(numLoops);
922  for (unsigned i = 0; i < numLoops; i++) {
923  storeValues.push_back(vecValues[indexVecValues]);
924  indexVecValues++;
925  }
926  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
927  findAllocaInsertPoint(builder, moduleTranslation);
928  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
929  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
930  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
931  }
932  return success();
933 }
934 
935 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
936 /// OpenMPIRBuilder.
937 static LogicalResult
938 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
939  LLVM::ModuleTranslation &moduleTranslation) {
940  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
941  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
942 
943  if (failed(checkImplementationStatus(opInst)))
944  return failure();
945 
946  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
947  // OrderedOp has only one region associated with it.
948  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
949  builder.restoreIP(codeGenIP);
950  return convertOmpOpRegions(region, "omp.ordered.region", builder,
951  moduleTranslation)
952  .takeError();
953  };
954 
955  // TODO: Perform finalization actions for variables. This has to be
956  // called for variables which have destructors/finalizers.
957  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
958 
959  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
960  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
961  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
962  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
963 
964  if (failed(handleError(afterIP, opInst)))
965  return failure();
966 
967  builder.restoreIP(*afterIP);
968  return success();
969 }
970 
971 namespace {
972 /// Contains the arguments for an LLVM store operation
973 struct DeferredStore {
974  DeferredStore(llvm::Value *value, llvm::Value *address)
975  : value(value), address(address) {}
976 
977  llvm::Value *value;
978  llvm::Value *address;
979 };
980 } // namespace
981 
982 /// Allocate space for privatized reduction variables.
983 /// `deferredStores` contains information to create store operations which needs
984 /// to be inserted after all allocas
985 template <typename T>
986 static LogicalResult
988  llvm::IRBuilderBase &builder,
989  LLVM::ModuleTranslation &moduleTranslation,
990  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
992  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
993  DenseMap<Value, llvm::Value *> &reductionVariableMap,
994  SmallVectorImpl<DeferredStore> &deferredStores,
995  llvm::ArrayRef<bool> isByRefs) {
996  llvm::IRBuilderBase::InsertPointGuard guard(builder);
997  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
998 
999  // delay creating stores until after all allocas
1000  deferredStores.reserve(loop.getNumReductionVars());
1001 
1002  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
1003  Region &allocRegion = reductionDecls[i].getAllocRegion();
1004  if (isByRefs[i]) {
1005  if (allocRegion.empty())
1006  continue;
1007 
1009  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
1010  builder, moduleTranslation, &phis)))
1011  return loop.emitError(
1012  "failed to inline `alloc` region of `omp.declare_reduction`");
1013 
1014  assert(phis.size() == 1 && "expected one allocation to be yielded");
1015  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1016 
1017  // Allocate reduction variable (which is a pointer to the real reduction
1018  // variable allocated in the inlined region)
1019  llvm::Value *var = builder.CreateAlloca(
1020  moduleTranslation.convertType(reductionDecls[i].getType()));
1021  deferredStores.emplace_back(phis[0], var);
1022 
1023  privateReductionVariables[i] = var;
1024  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1025  reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
1026  } else {
1027  assert(allocRegion.empty() &&
1028  "allocaction is implicit for by-val reduction");
1029  llvm::Value *var = builder.CreateAlloca(
1030  moduleTranslation.convertType(reductionDecls[i].getType()));
1031  moduleTranslation.mapValue(reductionArgs[i], var);
1032  privateReductionVariables[i] = var;
1033  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
1034  }
1035  }
1036 
1037  return success();
1038 }
1039 
1040 /// Map input arguments to reduction initialization region
1041 template <typename T>
1042 static void
1045  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1046  unsigned i) {
1047  // map input argument to the initialization region
1048  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1049  Region &initializerRegion = reduction.getInitializerRegion();
1050  Block &entry = initializerRegion.front();
1051 
1052  mlir::Value mlirSource = loop.getReductionVars()[i];
1053  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1054  assert(llvmSource && "lookup reduction var");
1055  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1056 
1057  if (entry.getNumArguments() > 1) {
1058  llvm::Value *allocation =
1059  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1060  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1061  }
1062 }
1063 
1064 static void
1065 setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder,
1066  llvm::BasicBlock *block = nullptr) {
1067  if (block == nullptr)
1068  block = builder.GetInsertBlock();
1069 
1070  if (block->empty() || block->getTerminator() == nullptr)
1071  builder.SetInsertPoint(block);
1072  else
1073  builder.SetInsertPoint(block->getTerminator());
1074 }
1075 
1076 /// Inline reductions' `init` regions. This functions assumes that the
1077 /// `builder`'s insertion point is where the user wants the `init` regions to be
1078 /// inlined; i.e. it does not try to find a proper insertion location for the
1079 /// `init` regions. It also leaves the `builder's insertions point in a state
1080 /// where the user can continue the code-gen directly afterwards.
1081 template <typename OP>
1082 static LogicalResult
1084  llvm::IRBuilderBase &builder,
1085  LLVM::ModuleTranslation &moduleTranslation,
1086  llvm::BasicBlock *latestAllocaBlock,
1088  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1089  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1090  llvm::ArrayRef<bool> isByRef,
1091  SmallVectorImpl<DeferredStore> &deferredStores) {
1092  if (op.getNumReductionVars() == 0)
1093  return success();
1094 
1095  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1096  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1097  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1098  builder.restoreIP(allocaIP);
1099  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1100 
1101  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1102  if (isByRef[i]) {
1103  if (!reductionDecls[i].getAllocRegion().empty())
1104  continue;
1105 
1106  // TODO: remove after all users of by-ref are updated to use the alloc
1107  // region: Allocate reduction variable (which is a pointer to the real
1108  // reduciton variable allocated in the inlined region)
1109  byRefVars[i] = builder.CreateAlloca(
1110  moduleTranslation.convertType(reductionDecls[i].getType()));
1111  }
1112  }
1113 
1114  setInsertPointForPossiblyEmptyBlock(builder, initBlock);
1115 
1116  // store result of the alloc region to the allocated pointer to the real
1117  // reduction variable
1118  for (auto [data, addr] : deferredStores)
1119  builder.CreateStore(data, addr);
1120 
1121  // Before the loop, store the initial values of reductions into reduction
1122  // variables. Although this could be done after allocas, we don't want to mess
1123  // up with the alloca insertion point.
1124  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1126 
1127  // map block argument to initializer region
1128  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1129  reductionVariableMap, i);
1130 
1131  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1132  "omp.reduction.neutral", builder,
1133  moduleTranslation, &phis)))
1134  return failure();
1135 
1136  assert(phis.size() == 1 && "expected one value to be yielded from the "
1137  "reduction neutral element declaration region");
1138 
1140 
1141  if (isByRef[i]) {
1142  if (!reductionDecls[i].getAllocRegion().empty())
1143  // done in allocReductionVars
1144  continue;
1145 
1146  // TODO: this path can be removed once all users of by-ref are updated to
1147  // use an alloc region
1148 
1149  // Store the result of the inlined region to the allocated reduction var
1150  // ptr
1151  builder.CreateStore(phis[0], byRefVars[i]);
1152 
1153  privateReductionVariables[i] = byRefVars[i];
1154  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1155  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1156  } else {
1157  // for by-ref case the store is inside of the reduction region
1158  builder.CreateStore(phis[0], privateReductionVariables[i]);
1159  // the rest was handled in allocByValReductionVars
1160  }
1161 
1162  // forget the mapping for the initializer region because we might need a
1163  // different mapping if this reduction declaration is re-used for a
1164  // different variable
1165  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1166  }
1167 
1168  return success();
1169 }
1170 
1171 /// Collect reduction info
1172 template <typename T>
1174  T loop, llvm::IRBuilderBase &builder,
1175  LLVM::ModuleTranslation &moduleTranslation,
1177  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1178  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1179  const ArrayRef<llvm::Value *> privateReductionVariables,
1181  unsigned numReductions = loop.getNumReductionVars();
1182 
1183  for (unsigned i = 0; i < numReductions; ++i) {
1184  owningReductionGens.push_back(
1185  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1186  owningAtomicReductionGens.push_back(
1187  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1188  }
1189 
1190  // Collect the reduction information.
1191  reductionInfos.reserve(numReductions);
1192  for (unsigned i = 0; i < numReductions; ++i) {
1193  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1194  if (owningAtomicReductionGens[i])
1195  atomicGen = owningAtomicReductionGens[i];
1196  llvm::Value *variable =
1197  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1198  reductionInfos.push_back(
1199  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1200  privateReductionVariables[i],
1201  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1202  owningReductionGens[i],
1203  /*ReductionGenClang=*/nullptr, atomicGen});
1204  }
1205 }
1206 
1207 /// handling of DeclareReductionOp's cleanup region
1208 static LogicalResult
1210  llvm::ArrayRef<llvm::Value *> privateVariables,
1211  LLVM::ModuleTranslation &moduleTranslation,
1212  llvm::IRBuilderBase &builder, StringRef regionName,
1213  bool shouldLoadCleanupRegionArg = true) {
1214  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1215  if (cleanupRegion->empty())
1216  continue;
1217 
1218  // map the argument to the cleanup region
1219  Block &entry = cleanupRegion->front();
1220 
1221  llvm::Instruction *potentialTerminator =
1222  builder.GetInsertBlock()->empty() ? nullptr
1223  : &builder.GetInsertBlock()->back();
1224  if (potentialTerminator && potentialTerminator->isTerminator())
1225  builder.SetInsertPoint(potentialTerminator);
1226  llvm::Value *privateVarValue =
1227  shouldLoadCleanupRegionArg
1228  ? builder.CreateLoad(
1229  moduleTranslation.convertType(entry.getArgument(0).getType()),
1230  privateVariables[i])
1231  : privateVariables[i];
1232 
1233  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1234 
1235  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1236  moduleTranslation)))
1237  return failure();
1238 
1239  // clear block argument mapping in case it needs to be re-created with a
1240  // different source for another use of the same reduction decl
1241  moduleTranslation.forgetMapping(*cleanupRegion);
1242  }
1243  return success();
1244 }
1245 
1246 // TODO: not used by ParallelOp
1247 template <class OP>
1248 static LogicalResult createReductionsAndCleanup(
1249  OP op, llvm::IRBuilderBase &builder,
1250  LLVM::ModuleTranslation &moduleTranslation,
1251  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1253  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
1254  // Process the reductions if required.
1255  if (op.getNumReductionVars() == 0)
1256  return success();
1257 
1258  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1259 
1260  // Create the reduction generators. We need to own them here because
1261  // ReductionInfo only accepts references to the generators.
1262  SmallVector<OwningReductionGen> owningReductionGens;
1263  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1265  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1266  owningReductionGens, owningAtomicReductionGens,
1267  privateReductionVariables, reductionInfos);
1268 
1269  // The call to createReductions below expects the block to have a
1270  // terminator. Create an unreachable instruction to serve as terminator
1271  // and remove it later.
1272  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1273  builder.SetInsertPoint(tempTerminator);
1274  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1275  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1276  isByRef, op.getNowait());
1277 
1278  if (failed(handleError(contInsertPoint, *op)))
1279  return failure();
1280 
1281  if (!contInsertPoint->getBlock())
1282  return op->emitOpError() << "failed to convert reductions";
1283 
1284  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1285  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1286 
1287  if (failed(handleError(afterIP, *op)))
1288  return failure();
1289 
1290  tempTerminator->eraseFromParent();
1291  builder.restoreIP(*afterIP);
1292 
1293  // after the construct, deallocate private reduction variables
1294  SmallVector<Region *> reductionRegions;
1295  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1296  [](omp::DeclareReductionOp reductionDecl) {
1297  return &reductionDecl.getCleanupRegion();
1298  });
1299  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1300  moduleTranslation, builder,
1301  "omp.reduction.cleanup");
1302  return success();
1303 }
1304 
1305 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1306  if (!attr)
1307  return {};
1308  return *attr;
1309 }
1310 
1311 // TODO: not used by omp.parallel
1312 template <typename OP>
1313 static LogicalResult allocAndInitializeReductionVars(
1314  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1315  LLVM::ModuleTranslation &moduleTranslation,
1316  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1318  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1319  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1320  llvm::ArrayRef<bool> isByRef) {
1321  if (op.getNumReductionVars() == 0)
1322  return success();
1323 
1324  SmallVector<DeferredStore> deferredStores;
1325 
1326  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1327  allocaIP, reductionDecls,
1328  privateReductionVariables, reductionVariableMap,
1329  deferredStores, isByRef)))
1330  return failure();
1331 
1332  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1333  allocaIP.getBlock(), reductionDecls,
1334  privateReductionVariables, reductionVariableMap,
1335  isByRef, deferredStores);
1336 }
1337 
1338 /// Return the llvm::Value * corresponding to the `privateVar` that
1339 /// is being privatized. It isn't always as simple as looking up
1340 /// moduleTranslation with privateVar. For instance, in case of
1341 /// an allocatable, the descriptor for the allocatable is privatized.
1342 /// This descriptor is mapped using an MapInfoOp. So, this function
1343 /// will return a pointer to the llvm::Value corresponding to the
1344 /// block argument for the mapped descriptor.
1345 static llvm::Value *
1346 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1347  LLVM::ModuleTranslation &moduleTranslation,
1348  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1349  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1350  return moduleTranslation.lookupValue(privateVar);
1351 
1352  Value blockArg = (*mappedPrivateVars)[privateVar];
1353  Type privVarType = privateVar.getType();
1354  Type blockArgType = blockArg.getType();
1355  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1356  "A block argument corresponding to a mapped var should have "
1357  "!llvm.ptr type");
1358 
1359  if (privVarType == blockArgType)
1360  return moduleTranslation.lookupValue(blockArg);
1361 
1362  // This typically happens when the privatized type is lowered from
1363  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1364  // struct/pair is passed by value. But, mapped values are passed only as
1365  // pointers, so before we privatize, we must load the pointer.
1366  if (!isa<LLVM::LLVMPointerType>(privVarType))
1367  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1368  moduleTranslation.lookupValue(blockArg));
1369 
1370  return moduleTranslation.lookupValue(privateVar);
1371 }
1372 
1373 /// Initialize a single (first)private variable. You probably want to use
1374 /// allocateAndInitPrivateVars instead of this.
1375 /// This returns the private variable which has been initialized. This
1376 /// variable should be mapped before constructing the body of the Op.
1378  llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
1379  omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1380  llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
1381  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1382  Region &initRegion = privDecl.getInitRegion();
1383  if (initRegion.empty())
1384  return llvmPrivateVar;
1385 
1386  // map initialization region block arguments
1387  llvm::Value *nonPrivateVar = findAssociatedValue(
1388  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1389  assert(nonPrivateVar);
1390  moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
1391  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
1392 
1393  // in-place convert the private initialization region
1395  if (failed(inlineConvertOmpRegions(initRegion, "omp.private.init", builder,
1396  moduleTranslation, &phis)))
1397  return llvm::createStringError(
1398  "failed to inline `init` region of `omp.private`");
1399 
1400  assert(phis.size() == 1 && "expected one allocation to be yielded");
1401 
1402  // clear init region block argument mapping in case it needs to be
1403  // re-created with a different source for another use of the same
1404  // reduction decl
1405  moduleTranslation.forgetMapping(initRegion);
1406 
1407  // Prefer the value yielded from the init region to the allocated private
1408  // variable in case the region is operating on arguments by-value (e.g.
1409  // Fortran character boxes).
1410  return phis[0];
1411 }
1412 
1413 static llvm::Error
1414 initPrivateVars(llvm::IRBuilderBase &builder,
1415  LLVM::ModuleTranslation &moduleTranslation,
1416  PrivateVarsInfo &privateVarsInfo,
1417  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1418  if (privateVarsInfo.blockArgs.empty())
1419  return llvm::Error::success();
1420 
1421  llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
1422  setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
1423 
1424  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
1425  privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1426  privateVarsInfo.blockArgs, privateVarsInfo.llvmVars))) {
1427  auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
1429  builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
1430  llvmPrivateVar, privInitBlock, mappedPrivateVars);
1431 
1432  if (!privVarOrErr)
1433  return privVarOrErr.takeError();
1434 
1435  llvmPrivateVar = privVarOrErr.get();
1436  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
1437 
1439  }
1440 
1441  return llvm::Error::success();
1442 }
1443 
1444 /// Allocate and initialize delayed private variables. Returns the basic block
1445 /// which comes after all of these allocations. llvm::Value * for each of these
1446 /// private variables are populated in llvmPrivateVars.
1448 allocatePrivateVars(llvm::IRBuilderBase &builder,
1449  LLVM::ModuleTranslation &moduleTranslation,
1450  PrivateVarsInfo &privateVarsInfo,
1451  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1452  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1453  // Allocate private vars
1454  llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator();
1455  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
1456  allocaTerminator->getIterator()),
1457  true, allocaTerminator->getStableDebugLoc(),
1458  "omp.region.after_alloca");
1459 
1460  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1461  // Update the allocaTerminator since the alloca block was split above.
1462  allocaTerminator = allocaIP.getBlock()->getTerminator();
1463  builder.SetInsertPoint(allocaTerminator);
1464  // The new terminator is an uncondition branch created by the splitBB above.
1465  assert(allocaTerminator->getNumSuccessors() == 1 &&
1466  "This is an unconditional branch created by splitBB");
1467 
1468  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1469 
1470  unsigned int allocaAS =
1471  moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
1472  unsigned int defaultAS = moduleTranslation.getLLVMModule()
1473  ->getDataLayout()
1474  .getProgramAddressSpace();
1475 
1476  for (auto [privDecl, mlirPrivVar, blockArg] :
1477  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1478  privateVarsInfo.blockArgs)) {
1479  llvm::Type *llvmAllocType =
1480  moduleTranslation.convertType(privDecl.getType());
1481  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1482  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1483  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1484  if (allocaAS != defaultAS)
1485  llvmPrivateVar = builder.CreateAddrSpaceCast(llvmPrivateVar,
1486  builder.getPtrTy(defaultAS));
1487 
1488  privateVarsInfo.llvmVars.push_back(llvmPrivateVar);
1489  }
1490 
1491  return afterAllocas;
1492 }
1493 
1494 static LogicalResult
1495 copyFirstPrivateVars(llvm::IRBuilderBase &builder,
1496  LLVM::ModuleTranslation &moduleTranslation,
1497  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1498  ArrayRef<llvm::Value *> llvmPrivateVars,
1499  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1500  // Apply copy region for firstprivate.
1501  bool needsFirstprivate =
1502  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1503  return privOp.getDataSharingType() ==
1504  omp::DataSharingClauseType::FirstPrivate;
1505  });
1506 
1507  if (!needsFirstprivate)
1508  return success();
1509 
1510  llvm::BasicBlock *copyBlock =
1511  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1512  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
1513 
1514  for (auto [decl, mlirVar, llvmVar] :
1515  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1516  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1517  continue;
1518 
1519  // copyRegion implements `lhs = rhs`
1520  Region &copyRegion = decl.getCopyRegion();
1521 
1522  // map copyRegion rhs arg
1523  llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1524  assert(nonPrivateVar);
1525  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1526 
1527  // map copyRegion lhs arg
1528  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1529 
1530  // in-place convert copy region
1531  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1532  moduleTranslation)))
1533  return decl.emitError("failed to inline `copy` region of `omp.private`");
1534 
1536 
1537  // ignore unused value yielded from copy region
1538 
1539  // clear copy region block argument mapping in case it needs to be
1540  // re-created with different sources for reuse of the same reduction
1541  // decl
1542  moduleTranslation.forgetMapping(copyRegion);
1543  }
1544 
1545  return success();
1546 }
1547 
1548 static LogicalResult
1549 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1550  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1551  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1552  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1553  // private variable deallocation
1554  SmallVector<Region *> privateCleanupRegions;
1555  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1556  [](omp::PrivateClauseOp privatizer) {
1557  return &privatizer.getDeallocRegion();
1558  });
1559 
1560  if (failed(inlineOmpRegionCleanup(
1561  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1562  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1563  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1564  "`omp.private` op in");
1565 
1566  return success();
1567 }
1568 
1569 static LogicalResult
1570 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1571  LLVM::ModuleTranslation &moduleTranslation) {
1572  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1573  using StorableBodyGenCallbackTy =
1574  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1575 
1576  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1577 
1578  if (failed(checkImplementationStatus(opInst)))
1579  return failure();
1580 
1581  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1582  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1583 
1584  SmallVector<omp::DeclareReductionOp> reductionDecls;
1585  collectReductionDecls(sectionsOp, reductionDecls);
1586  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1587  findAllocaInsertPoint(builder, moduleTranslation);
1588 
1589  SmallVector<llvm::Value *> privateReductionVariables(
1590  sectionsOp.getNumReductionVars());
1591  DenseMap<Value, llvm::Value *> reductionVariableMap;
1592 
1593  MutableArrayRef<BlockArgument> reductionArgs =
1594  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1595 
1597  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1598  reductionDecls, privateReductionVariables, reductionVariableMap,
1599  isByRef)))
1600  return failure();
1601 
1603 
1604  for (Operation &op : *sectionsOp.getRegion().begin()) {
1605  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1606  if (!sectionOp) // omp.terminator
1607  continue;
1608 
1609  Region &region = sectionOp.getRegion();
1610  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1611  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1612  builder.restoreIP(codeGenIP);
1613 
1614  // map the omp.section reduction block argument to the omp.sections block
1615  // arguments
1616  // TODO: this assumes that the only block arguments are reduction
1617  // variables
1618  assert(region.getNumArguments() ==
1619  sectionsOp.getRegion().getNumArguments());
1620  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1621  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1622  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1623  assert(llvmVal);
1624  moduleTranslation.mapValue(sectionArg, llvmVal);
1625  }
1626 
1627  return convertOmpOpRegions(region, "omp.section.region", builder,
1628  moduleTranslation)
1629  .takeError();
1630  };
1631  sectionCBs.push_back(sectionCB);
1632  }
1633 
1634  // No sections within omp.sections operation - skip generation. This situation
1635  // is only possible if there is only a terminator operation inside the
1636  // sections operation
1637  if (sectionCBs.empty())
1638  return success();
1639 
1640  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1641 
1642  // TODO: Perform appropriate actions according to the data-sharing
1643  // attribute (shared, private, firstprivate, ...) of variables.
1644  // Currently defaults to shared.
1645  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1646  llvm::Value &vPtr, llvm::Value *&replacementValue)
1647  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1648  replacementValue = &vPtr;
1649  return codeGenIP;
1650  };
1651 
1652  // TODO: Perform finalization actions for variables. This has to be
1653  // called for variables which have destructors/finalizers.
1654  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1655 
1656  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1657  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1658  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1659  moduleTranslation.getOpenMPBuilder()->createSections(
1660  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
1661  sectionsOp.getNowait());
1662 
1663  if (failed(handleError(afterIP, opInst)))
1664  return failure();
1665 
1666  builder.restoreIP(*afterIP);
1667 
1668  // Process the reductions if required.
1669  return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
1670  allocaIP, reductionDecls,
1671  privateReductionVariables, isByRef);
1672 }
1673 
1674 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1675 static LogicalResult
1676 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1677  LLVM::ModuleTranslation &moduleTranslation) {
1678  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1679  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1680 
1681  if (failed(checkImplementationStatus(*singleOp)))
1682  return failure();
1683 
1684  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1685  builder.restoreIP(codegenIP);
1686  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1687  builder, moduleTranslation)
1688  .takeError();
1689  };
1690  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1691 
1692  // Handle copyprivate
1693  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1694  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1697  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1698  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1699  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1700  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1701  llvmCPFuncs.push_back(
1702  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1703  }
1704 
1705  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1706  moduleTranslation.getOpenMPBuilder()->createSingle(
1707  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1708  llvmCPFuncs);
1709 
1710  if (failed(handleError(afterIP, *singleOp)))
1711  return failure();
1712 
1713  builder.restoreIP(*afterIP);
1714  return success();
1715 }
1716 
1717 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1718 static LogicalResult
1719 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1720  LLVM::ModuleTranslation &moduleTranslation) {
1721  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1722  if (failed(checkImplementationStatus(*op)))
1723  return failure();
1724 
1725  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1727  moduleTranslation, allocaIP);
1728  builder.restoreIP(codegenIP);
1729  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1730  moduleTranslation)
1731  .takeError();
1732  };
1733 
1734  llvm::Value *numTeamsLower = nullptr;
1735  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1736  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1737 
1738  llvm::Value *numTeamsUpper = nullptr;
1739  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1740  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1741 
1742  llvm::Value *threadLimit = nullptr;
1743  if (Value threadLimitVar = op.getThreadLimit())
1744  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1745 
1746  llvm::Value *ifExpr = nullptr;
1747  if (Value ifVar = op.getIfExpr())
1748  ifExpr = moduleTranslation.lookupValue(ifVar);
1749 
1750  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1751  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1752  moduleTranslation.getOpenMPBuilder()->createTeams(
1753  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1754 
1755  if (failed(handleError(afterIP, *op)))
1756  return failure();
1757 
1758  builder.restoreIP(*afterIP);
1759  return success();
1760 }
1761 
1762 static void
1763 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
1764  LLVM::ModuleTranslation &moduleTranslation,
1766  if (dependVars.empty())
1767  return;
1768  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
1769  llvm::omp::RTLDependenceKindTy type;
1770  switch (
1771  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1772  case mlir::omp::ClauseTaskDepend::taskdependin:
1773  type = llvm::omp::RTLDependenceKindTy::DepIn;
1774  break;
1775  // The OpenMP runtime requires that the codegen for 'depend' clause for
1776  // 'out' dependency kind must be the same as codegen for 'depend' clause
1777  // with 'inout' dependency.
1778  case mlir::omp::ClauseTaskDepend::taskdependout:
1779  case mlir::omp::ClauseTaskDepend::taskdependinout:
1780  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1781  break;
1782  case mlir::omp::ClauseTaskDepend::taskdependmutexinoutset:
1783  type = llvm::omp::RTLDependenceKindTy::DepMutexInOutSet;
1784  break;
1785  case mlir::omp::ClauseTaskDepend::taskdependinoutset:
1786  type = llvm::omp::RTLDependenceKindTy::DepInOutSet;
1787  break;
1788  };
1789  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1790  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1791  dds.emplace_back(dd);
1792  }
1793 }
1794 
1795 namespace {
1796 /// TaskContextStructManager takes care of creating and freeing a structure
1797 /// containing information needed by the task body to execute.
1798 class TaskContextStructManager {
1799 public:
1800  TaskContextStructManager(llvm::IRBuilderBase &builder,
1801  LLVM::ModuleTranslation &moduleTranslation,
1803  : builder{builder}, moduleTranslation{moduleTranslation},
1804  privateDecls{privateDecls} {}
1805 
1806  /// Creates a heap allocated struct containing space for each private
1807  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
1808  /// the structure should all have the same order (although privateDecls which
1809  /// do not read from the mold argument are skipped).
1810  void generateTaskContextStruct();
1811 
1812  /// Create GEPs to access each member of the structure representing a private
1813  /// variable, adding them to llvmPrivateVars. Null values are added where
1814  /// private decls were skipped so that the ordering continues to match the
1815  /// private decls.
1816  void createGEPsToPrivateVars();
1817 
1818  /// De-allocate the task context structure.
1819  void freeStructPtr();
1820 
1821  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
1822  return llvmPrivateVarGEPs;
1823  }
1824 
1825  llvm::Value *getStructPtr() { return structPtr; }
1826 
1827 private:
1828  llvm::IRBuilderBase &builder;
1829  LLVM::ModuleTranslation &moduleTranslation;
1831 
1832  /// The type of each member of the structure, in order.
1833  SmallVector<llvm::Type *> privateVarTypes;
1834 
1835  /// LLVM values for each private variable, or null if that private variable is
1836  /// not included in the task context structure
1837  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
1838 
1839  /// A pointer to the structure containing context for this task.
1840  llvm::Value *structPtr = nullptr;
1841  /// The type of the structure
1842  llvm::Type *structTy = nullptr;
1843 };
1844 } // namespace
1845 
1846 void TaskContextStructManager::generateTaskContextStruct() {
1847  if (privateDecls.empty())
1848  return;
1849  privateVarTypes.reserve(privateDecls.size());
1850 
1851  for (omp::PrivateClauseOp &privOp : privateDecls) {
1852  // Skip private variables which can safely be allocated and initialised
1853  // inside of the task
1854  if (!privOp.readsFromMold())
1855  continue;
1856  Type mlirType = privOp.getType();
1857  privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
1858  }
1859 
1860  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
1861  privateVarTypes);
1862 
1863  llvm::DataLayout dataLayout =
1864  builder.GetInsertBlock()->getModule()->getDataLayout();
1865  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
1866  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
1867 
1868  // Heap allocate the structure
1869  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
1870  /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
1871  "omp.task.context_ptr");
1872 }
1873 
1874 void TaskContextStructManager::createGEPsToPrivateVars() {
1875  if (!structPtr) {
1876  assert(privateVarTypes.empty());
1877  return;
1878  }
1879 
1880  // Create GEPs for each struct member
1881  llvmPrivateVarGEPs.clear();
1882  llvmPrivateVarGEPs.reserve(privateDecls.size());
1883  llvm::Value *zero = builder.getInt32(0);
1884  unsigned i = 0;
1885  for (auto privDecl : privateDecls) {
1886  if (!privDecl.readsFromMold()) {
1887  // Handle this inside of the task so we don't pass unnessecary vars in
1888  llvmPrivateVarGEPs.push_back(nullptr);
1889  continue;
1890  }
1891  llvm::Value *iVal = builder.getInt32(i);
1892  llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
1893  llvmPrivateVarGEPs.push_back(gep);
1894  i += 1;
1895  }
1896 }
1897 
1898 void TaskContextStructManager::freeStructPtr() {
1899  if (!structPtr)
1900  return;
1901 
1902  llvm::IRBuilderBase::InsertPointGuard guard{builder};
1903  // Ensure we don't put the call to free() after the terminator
1904  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1905  builder.CreateFree(structPtr);
1906 }
1907 
1908 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
1909 static LogicalResult
1910 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1911  LLVM::ModuleTranslation &moduleTranslation) {
1912  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1913  if (failed(checkImplementationStatus(*taskOp)))
1914  return failure();
1915 
1916  PrivateVarsInfo privateVarsInfo(taskOp);
1917  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
1918  privateVarsInfo.privatizers};
1919 
1920  // Allocate and copy private variables before creating the task. This avoids
1921  // accessing invalid memory if (after this scope ends) the private variables
1922  // are initialized from host variables or if the variables are copied into
1923  // from host variables (firstprivate). The insertion point is just before
1924  // where the code for creating and scheduling the task will go. That puts this
1925  // code outside of the outlined task region, which is what we want because
1926  // this way the initialization and copy regions are executed immediately while
1927  // the host variable data are still live.
1928 
1929  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1930  findAllocaInsertPoint(builder, moduleTranslation);
1931 
1932  // Not using splitBB() because that requires the current block to have a
1933  // terminator.
1934  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
1935  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
1936  builder.getContext(), "omp.task.start",
1937  /*Parent=*/builder.GetInsertBlock()->getParent());
1938  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
1939  builder.SetInsertPoint(branchToTaskStartBlock);
1940 
1941  // Now do this again to make the initialization and copy blocks
1942  llvm::BasicBlock *copyBlock =
1943  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1944  llvm::BasicBlock *initBlock =
1945  splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
1946 
1947  // Now the control flow graph should look like
1948  // starter_block:
1949  // <---- where we started when convertOmpTaskOp was called
1950  // br %omp.private.init
1951  // omp.private.init:
1952  // br %omp.private.copy
1953  // omp.private.copy:
1954  // br %omp.task.start
1955  // omp.task.start:
1956  // <---- where we want the insertion point to be when we call createTask()
1957 
1958  // Save the alloca insertion point on ModuleTranslation stack for use in
1959  // nested regions.
1961  moduleTranslation, allocaIP);
1962 
1963  // Allocate and initialize private variables
1964  builder.SetInsertPoint(initBlock->getTerminator());
1965 
1966  // Create task variable structure
1967  taskStructMgr.generateTaskContextStruct();
1968  // GEPs so that we can initialize the variables. Don't use these GEPs inside
1969  // of the body otherwise it will be the GEP not the struct which is fowarded
1970  // to the outlined function. GEPs forwarded in this way are passed in a
1971  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
1972  // which may not be executed until after the current stack frame goes out of
1973  // scope.
1974  taskStructMgr.createGEPsToPrivateVars();
1975 
1976  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
1977  llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
1978  privateVarsInfo.blockArgs,
1979  taskStructMgr.getLLVMPrivateVarGEPs())) {
1980  // To be handled inside the task.
1981  if (!privDecl.readsFromMold())
1982  continue;
1983  assert(llvmPrivateVarAlloc &&
1984  "reads from mold so shouldn't have been skipped");
1985 
1986  llvm::Expected<llvm::Value *> privateVarOrErr =
1987  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
1988  blockArg, llvmPrivateVarAlloc, initBlock);
1989  if (!privateVarOrErr)
1990  return handleError(privateVarOrErr, *taskOp.getOperation());
1991 
1992  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1993  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1994 
1995  // TODO: this is a bit of a hack for Fortran character boxes.
1996  // Character boxes are passed by value into the init region and then the
1997  // initialized character box is yielded by value. Here we need to store the
1998  // yielded value into the private allocation, and load the private
1999  // allocation to match the type expected by region block arguments.
2000  if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
2001  !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2002  builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
2003  // Load it so we have the value pointed to by the GEP
2004  llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
2005  llvmPrivateVarAlloc);
2006  }
2007  assert(llvmPrivateVarAlloc->getType() ==
2008  moduleTranslation.convertType(blockArg.getType()));
2009 
2010  // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
2011  // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
2012  // stack allocated structure.
2013  }
2014 
2015  // firstprivate copy region
2016  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
2017  if (failed(copyFirstPrivateVars(
2018  builder, moduleTranslation, privateVarsInfo.mlirVars,
2019  taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers)))
2020  return llvm::failure();
2021 
2022  // Set up for call to createTask()
2023  builder.SetInsertPoint(taskStartBlock);
2024 
2025  auto bodyCB = [&](InsertPointTy allocaIP,
2026  InsertPointTy codegenIP) -> llvm::Error {
2027  // Save the alloca insertion point on ModuleTranslation stack for use in
2028  // nested regions.
2030  moduleTranslation, allocaIP);
2031 
2032  // translate the body of the task:
2033  builder.restoreIP(codegenIP);
2034 
2035  llvm::BasicBlock *privInitBlock = nullptr;
2036  privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
2037  for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
2038  privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
2039  privateVarsInfo.mlirVars))) {
2040  auto [blockArg, privDecl, mlirPrivVar] = zip;
2041  // This is handled before the task executes
2042  if (privDecl.readsFromMold())
2043  continue;
2044 
2045  llvm::IRBuilderBase::InsertPointGuard guard(builder);
2046  llvm::Type *llvmAllocType =
2047  moduleTranslation.convertType(privDecl.getType());
2048  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2049  llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2050  llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2051 
2052  llvm::Expected<llvm::Value *> privateVarOrError =
2053  initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2054  blockArg, llvmPrivateVar, privInitBlock);
2055  if (!privateVarOrError)
2056  return privateVarOrError.takeError();
2057  moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2058  privateVarsInfo.llvmVars[i] = privateVarOrError.get();
2059  }
2060 
2061  taskStructMgr.createGEPsToPrivateVars();
2062  for (auto [i, llvmPrivVar] :
2063  llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
2064  if (!llvmPrivVar) {
2065  assert(privateVarsInfo.llvmVars[i] &&
2066  "This is added in the loop above");
2067  continue;
2068  }
2069  privateVarsInfo.llvmVars[i] = llvmPrivVar;
2070  }
2071 
2072  // Find and map the addresses of each variable within the task context
2073  // structure
2074  for (auto [blockArg, llvmPrivateVar, privateDecl] :
2075  llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
2076  privateVarsInfo.privatizers)) {
2077  // This was handled above.
2078  if (!privateDecl.readsFromMold())
2079  continue;
2080  // Fix broken pass-by-value case for Fortran character boxes
2081  if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
2082  llvmPrivateVar = builder.CreateLoad(
2083  moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
2084  }
2085  assert(llvmPrivateVar->getType() ==
2086  moduleTranslation.convertType(blockArg.getType()));
2087  moduleTranslation.mapValue(blockArg, llvmPrivateVar);
2088  }
2089 
2090  auto continuationBlockOrError = convertOmpOpRegions(
2091  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
2092  if (failed(handleError(continuationBlockOrError, *taskOp)))
2093  return llvm::make_error<PreviouslyReportedError>();
2094 
2095  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
2096 
2097  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
2098  privateVarsInfo.llvmVars,
2099  privateVarsInfo.privatizers)))
2100  return llvm::make_error<PreviouslyReportedError>();
2101 
2102  // Free heap allocated task context structure at the end of the task.
2103  taskStructMgr.freeStructPtr();
2104 
2105  return llvm::Error::success();
2106  };
2107 
2109  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
2110  moduleTranslation, dds);
2111 
2112  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2113  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2114  moduleTranslation.getOpenMPBuilder()->createTask(
2115  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
2116  moduleTranslation.lookupValue(taskOp.getFinal()),
2117  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
2118  taskOp.getMergeable(),
2119  moduleTranslation.lookupValue(taskOp.getEventHandle()),
2120  moduleTranslation.lookupValue(taskOp.getPriority()));
2121 
2122  if (failed(handleError(afterIP, *taskOp)))
2123  return failure();
2124 
2125  builder.restoreIP(*afterIP);
2126  return success();
2127 }
2128 
2129 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
2130 static LogicalResult
2131 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
2132  LLVM::ModuleTranslation &moduleTranslation) {
2133  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2134  if (failed(checkImplementationStatus(*tgOp)))
2135  return failure();
2136 
2137  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
2138  builder.restoreIP(codegenIP);
2139  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
2140  builder, moduleTranslation)
2141  .takeError();
2142  };
2143 
2144  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2145  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2146  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2147  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
2148  bodyCB);
2149 
2150  if (failed(handleError(afterIP, *tgOp)))
2151  return failure();
2152 
2153  builder.restoreIP(*afterIP);
2154  return success();
2155 }
2156 
2157 static LogicalResult
2158 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
2159  LLVM::ModuleTranslation &moduleTranslation) {
2160  if (failed(checkImplementationStatus(*twOp)))
2161  return failure();
2162 
2163  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
2164  return success();
2165 }
2166 
2167 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
2168 static LogicalResult
2169 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
2170  LLVM::ModuleTranslation &moduleTranslation) {
2171  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2172  auto wsloopOp = cast<omp::WsloopOp>(opInst);
2173  if (failed(checkImplementationStatus(opInst)))
2174  return failure();
2175 
2176  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
2177  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
2178  assert(isByRef.size() == wsloopOp.getNumReductionVars());
2179 
2180  // Static is the default.
2181  auto schedule =
2182  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
2183 
2184  // Find the loop configuration.
2185  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
2186  llvm::Type *ivType = step->getType();
2187  llvm::Value *chunk = nullptr;
2188  if (wsloopOp.getScheduleChunk()) {
2189  llvm::Value *chunkVar =
2190  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
2191  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2192  }
2193 
2194  PrivateVarsInfo privateVarsInfo(wsloopOp);
2195 
2196  SmallVector<omp::DeclareReductionOp> reductionDecls;
2197  collectReductionDecls(wsloopOp, reductionDecls);
2198  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2199  findAllocaInsertPoint(builder, moduleTranslation);
2200 
2201  SmallVector<llvm::Value *> privateReductionVariables(
2202  wsloopOp.getNumReductionVars());
2203 
2205  builder, moduleTranslation, privateVarsInfo, allocaIP);
2206  if (handleError(afterAllocas, opInst).failed())
2207  return failure();
2208 
2209  DenseMap<Value, llvm::Value *> reductionVariableMap;
2210 
2211  MutableArrayRef<BlockArgument> reductionArgs =
2212  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
2213 
2214  SmallVector<DeferredStore> deferredStores;
2215 
2216  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
2217  moduleTranslation, allocaIP, reductionDecls,
2218  privateReductionVariables, reductionVariableMap,
2219  deferredStores, isByRef)))
2220  return failure();
2221 
2222  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2223  opInst)
2224  .failed())
2225  return failure();
2226 
2227  if (failed(copyFirstPrivateVars(
2228  builder, moduleTranslation, privateVarsInfo.mlirVars,
2229  privateVarsInfo.llvmVars, privateVarsInfo.privatizers)))
2230  return failure();
2231 
2232  assert(afterAllocas.get()->getSinglePredecessor());
2233  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
2234  moduleTranslation,
2235  afterAllocas.get()->getSinglePredecessor(),
2236  reductionDecls, privateReductionVariables,
2237  reductionVariableMap, isByRef, deferredStores)))
2238  return failure();
2239 
2240  // TODO: Handle doacross loops when the ordered clause has a parameter.
2241  bool isOrdered = wsloopOp.getOrdered().has_value();
2242  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
2243  bool isSimd = wsloopOp.getScheduleSimd();
2244  bool loopNeedsBarrier = !wsloopOp.getNowait();
2245 
2246  // The only legal way for the direct parent to be omp.distribute is that this
2247  // represents 'distribute parallel do'. Otherwise, this is a regular
2248  // worksharing loop.
2249  llvm::omp::WorksharingLoopType workshareLoopType =
2250  llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())
2251  ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
2252  : llvm::omp::WorksharingLoopType::ForStaticLoop;
2253 
2254  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2256  wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
2257 
2258  if (failed(handleError(regionBlock, opInst)))
2259  return failure();
2260 
2261  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2262  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2263 
2264  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
2265  ompBuilder->applyWorkshareLoop(
2266  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
2267  convertToScheduleKind(schedule), chunk, isSimd,
2268  scheduleMod == omp::ScheduleModifier::monotonic,
2269  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2270  workshareLoopType);
2271 
2272  if (failed(handleError(wsloopIP, opInst)))
2273  return failure();
2274 
2275  // Process the reductions if required.
2276  if (failed(createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
2277  allocaIP, reductionDecls,
2278  privateReductionVariables, isByRef)))
2279  return failure();
2280 
2281  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2282  privateVarsInfo.llvmVars,
2283  privateVarsInfo.privatizers);
2284 }
2285 
2286 /// Converts the OpenMP parallel operation to LLVM IR.
2287 static LogicalResult
2288 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2289  LLVM::ModuleTranslation &moduleTranslation) {
2290  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2291  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2292  assert(isByRef.size() == opInst.getNumReductionVars());
2293  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2294 
2295  if (failed(checkImplementationStatus(*opInst)))
2296  return failure();
2297 
2298  PrivateVarsInfo privateVarsInfo(opInst);
2299 
2300  // Collect reduction declarations
2301  SmallVector<omp::DeclareReductionOp> reductionDecls;
2302  collectReductionDecls(opInst, reductionDecls);
2303  SmallVector<llvm::Value *> privateReductionVariables(
2304  opInst.getNumReductionVars());
2305  SmallVector<DeferredStore> deferredStores;
2306 
2307  auto bodyGenCB = [&](InsertPointTy allocaIP,
2308  InsertPointTy codeGenIP) -> llvm::Error {
2310  builder, moduleTranslation, privateVarsInfo, allocaIP);
2311  if (handleError(afterAllocas, *opInst).failed())
2312  return llvm::make_error<PreviouslyReportedError>();
2313 
2314  // Allocate reduction vars
2315  DenseMap<Value, llvm::Value *> reductionVariableMap;
2316 
2317  MutableArrayRef<BlockArgument> reductionArgs =
2318  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2319 
2320  allocaIP =
2321  InsertPointTy(allocaIP.getBlock(),
2322  allocaIP.getBlock()->getTerminator()->getIterator());
2323 
2324  if (failed(allocReductionVars(
2325  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2326  reductionDecls, privateReductionVariables, reductionVariableMap,
2327  deferredStores, isByRef)))
2328  return llvm::make_error<PreviouslyReportedError>();
2329 
2330  assert(afterAllocas.get()->getSinglePredecessor());
2331  builder.restoreIP(codeGenIP);
2332 
2333  if (handleError(
2334  initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2335  *opInst)
2336  .failed())
2337  return llvm::make_error<PreviouslyReportedError>();
2338 
2339  if (failed(copyFirstPrivateVars(
2340  builder, moduleTranslation, privateVarsInfo.mlirVars,
2341  privateVarsInfo.llvmVars, privateVarsInfo.privatizers)))
2342  return llvm::make_error<PreviouslyReportedError>();
2343 
2344  if (failed(
2345  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2346  afterAllocas.get()->getSinglePredecessor(),
2347  reductionDecls, privateReductionVariables,
2348  reductionVariableMap, isByRef, deferredStores)))
2349  return llvm::make_error<PreviouslyReportedError>();
2350 
2351  // Save the alloca insertion point on ModuleTranslation stack for use in
2352  // nested regions.
2354  moduleTranslation, allocaIP);
2355 
2356  // ParallelOp has only one region associated with it.
2358  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2359  if (!regionBlock)
2360  return regionBlock.takeError();
2361 
2362  // Process the reductions if required.
2363  if (opInst.getNumReductionVars() > 0) {
2364  // Collect reduction info
2365  SmallVector<OwningReductionGen> owningReductionGens;
2366  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2368  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2369  owningReductionGens, owningAtomicReductionGens,
2370  privateReductionVariables, reductionInfos);
2371 
2372  // Move to region cont block
2373  builder.SetInsertPoint((*regionBlock)->getTerminator());
2374 
2375  // Generate reductions from info
2376  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2377  builder.SetInsertPoint(tempTerminator);
2378 
2379  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2380  ompBuilder->createReductions(builder.saveIP(), allocaIP,
2381  reductionInfos, isByRef, false);
2382  if (!contInsertPoint)
2383  return contInsertPoint.takeError();
2384 
2385  if (!contInsertPoint->getBlock())
2386  return llvm::make_error<PreviouslyReportedError>();
2387 
2388  tempTerminator->eraseFromParent();
2389  builder.restoreIP(*contInsertPoint);
2390  }
2391 
2392  return llvm::Error::success();
2393  };
2394 
2395  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2396  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2397  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2398  // bodyGenCB.
2399  replVal = &val;
2400  return codeGenIP;
2401  };
2402 
2403  // TODO: Perform finalization actions for variables. This has to be
2404  // called for variables which have destructors/finalizers.
2405  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2406  InsertPointTy oldIP = builder.saveIP();
2407  builder.restoreIP(codeGenIP);
2408 
2409  // if the reduction has a cleanup region, inline it here to finalize the
2410  // reduction variables
2411  SmallVector<Region *> reductionCleanupRegions;
2412  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2413  [](omp::DeclareReductionOp reductionDecl) {
2414  return &reductionDecl.getCleanupRegion();
2415  });
2416  if (failed(inlineOmpRegionCleanup(
2417  reductionCleanupRegions, privateReductionVariables,
2418  moduleTranslation, builder, "omp.reduction.cleanup")))
2419  return llvm::createStringError(
2420  "failed to inline `cleanup` region of `omp.declare_reduction`");
2421 
2422  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2423  privateVarsInfo.llvmVars,
2424  privateVarsInfo.privatizers)))
2425  return llvm::make_error<PreviouslyReportedError>();
2426 
2427  builder.restoreIP(oldIP);
2428  return llvm::Error::success();
2429  };
2430 
2431  llvm::Value *ifCond = nullptr;
2432  if (auto ifVar = opInst.getIfExpr())
2433  ifCond = moduleTranslation.lookupValue(ifVar);
2434  llvm::Value *numThreads = nullptr;
2435  if (auto numThreadsVar = opInst.getNumThreads())
2436  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2437  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2438  if (auto bind = opInst.getProcBindKind())
2439  pbKind = getProcBindKind(*bind);
2440  // TODO: Is the Parallel construct cancellable?
2441  bool isCancellable = false;
2442 
2443  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2444  findAllocaInsertPoint(builder, moduleTranslation);
2445  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2446 
2447  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2448  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2449  ifCond, numThreads, pbKind, isCancellable);
2450 
2451  if (failed(handleError(afterIP, *opInst)))
2452  return failure();
2453 
2454  builder.restoreIP(*afterIP);
2455  return success();
2456 }
2457 
2458 /// Convert Order attribute to llvm::omp::OrderKind.
2459 static llvm::omp::OrderKind
2460 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2461  if (!o)
2462  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2463  switch (*o) {
2464  case omp::ClauseOrderKind::Concurrent:
2465  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2466  }
2467  llvm_unreachable("Unknown ClauseOrderKind kind");
2468 }
2469 
2470 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2471 static LogicalResult
2472 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2473  LLVM::ModuleTranslation &moduleTranslation) {
2474  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2475  auto simdOp = cast<omp::SimdOp>(opInst);
2476 
2477  // TODO: Replace this with proper composite translation support.
2478  // Currently, simd information on composite constructs is ignored, so e.g.
2479  // 'do/for simd' will be treated the same as a standalone 'do/for'. This is
2480  // allowed by the spec, since it's equivalent to using a SIMD length of 1.
2481  if (simdOp.isComposite()) {
2482  if (failed(convertIgnoredWrapper(simdOp, moduleTranslation)))
2483  return failure();
2484 
2485  return inlineConvertOmpRegions(simdOp.getRegion(), "omp.simd.region",
2486  builder, moduleTranslation);
2487  }
2488 
2489  if (failed(checkImplementationStatus(opInst)))
2490  return failure();
2491 
2492  PrivateVarsInfo privateVarsInfo(simdOp);
2493 
2494  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2495  findAllocaInsertPoint(builder, moduleTranslation);
2496 
2498  builder, moduleTranslation, privateVarsInfo, allocaIP);
2499  if (handleError(afterAllocas, opInst).failed())
2500  return failure();
2501 
2502  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo),
2503  opInst)
2504  .failed())
2505  return failure();
2506 
2507  llvm::ConstantInt *simdlen = nullptr;
2508  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2509  simdlen = builder.getInt64(simdlenVar.value());
2510 
2511  llvm::ConstantInt *safelen = nullptr;
2512  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2513  safelen = builder.getInt64(safelenVar.value());
2514 
2515  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2516  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2517  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
2518  std::optional<ArrayAttr> alignmentValues = simdOp.getAlignments();
2519  mlir::OperandRange operands = simdOp.getAlignedVars();
2520  for (size_t i = 0; i < operands.size(); ++i) {
2521  llvm::Value *alignment = nullptr;
2522  llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]);
2523  llvm::Type *ty = llvmVal->getType();
2524 
2525  auto intAttr = cast<IntegerAttr>((*alignmentValues)[i]);
2526  alignment = builder.getInt64(intAttr.getInt());
2527  assert(ty->isPointerTy() && "Invalid type for aligned variable");
2528  assert(alignment && "Invalid alignment value");
2529  auto curInsert = builder.saveIP();
2530  builder.SetInsertPoint(sourceBlock);
2531  llvmVal = builder.CreateLoad(ty, llvmVal);
2532  builder.restoreIP(curInsert);
2533  alignedVars[llvmVal] = alignment;
2534  }
2535 
2537  simdOp.getRegion(), "omp.simd.region", builder, moduleTranslation);
2538 
2539  if (failed(handleError(regionBlock, opInst)))
2540  return failure();
2541 
2542  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2543  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
2544  ompBuilder->applySimd(loopInfo, alignedVars,
2545  simdOp.getIfExpr()
2546  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2547  : nullptr,
2548  order, simdlen, safelen);
2549 
2550  return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
2551  privateVarsInfo.llvmVars,
2552  privateVarsInfo.privatizers);
2553 }
2554 
2555 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
2556 static LogicalResult
2557 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
2558  LLVM::ModuleTranslation &moduleTranslation) {
2559  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2560  auto loopOp = cast<omp::LoopNestOp>(opInst);
2561 
2562  // Set up the source location value for OpenMP runtime.
2563  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2564 
2565  // Generator of the canonical loop body.
2568  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2569  llvm::Value *iv) -> llvm::Error {
2570  // Make sure further conversions know about the induction variable.
2571  moduleTranslation.mapValue(
2572  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2573 
2574  // Capture the body insertion point for use in nested loops. BodyIP of the
2575  // CanonicalLoopInfo always points to the beginning of the entry block of
2576  // the body.
2577  bodyInsertPoints.push_back(ip);
2578 
2579  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2580  return llvm::Error::success();
2581 
2582  // Convert the body of the loop.
2583  builder.restoreIP(ip);
2585  loopOp.getRegion(), "omp.loop_nest.region", builder, moduleTranslation);
2586  if (!regionBlock)
2587  return regionBlock.takeError();
2588 
2589  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2590  return llvm::Error::success();
2591  };
2592 
2593  // Delegate actual loop construction to the OpenMP IRBuilder.
2594  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
2595  // loop, i.e. it has a positive step, uses signed integer semantics.
2596  // Reconsider this code when the nested loop operation clearly supports more
2597  // cases.
2598  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
2599  llvm::Value *lowerBound =
2600  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
2601  llvm::Value *upperBound =
2602  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
2603  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
2604 
2605  // Make sure loop trip count are emitted in the preheader of the outermost
2606  // loop at the latest so that they are all available for the new collapsed
2607  // loop will be created below.
2608  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
2609  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
2610  if (i != 0) {
2611  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
2612  ompLoc.DL);
2613  computeIP = loopInfos.front()->getPreheaderIP();
2614  }
2615 
2617  ompBuilder->createCanonicalLoop(
2618  loc, bodyGen, lowerBound, upperBound, step,
2619  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
2620 
2621  if (failed(handleError(loopResult, *loopOp)))
2622  return failure();
2623 
2624  loopInfos.push_back(*loopResult);
2625  }
2626 
2627  // Collapse loops. Store the insertion point because LoopInfos may get
2628  // invalidated.
2629  llvm::OpenMPIRBuilder::InsertPointTy afterIP =
2630  loopInfos.front()->getAfterIP();
2631 
2632  // Update the stack frame created for this loop to point to the resulting loop
2633  // after applying transformations.
2634  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
2635  [&](OpenMPLoopInfoStackFrame &frame) {
2636  frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
2637  return WalkResult::interrupt();
2638  });
2639 
2640  // Continue building IR after the loop. Note that the LoopInfo returned by
2641  // `collapseLoops` points inside the outermost loop and is intended for
2642  // potential further loop transformations. Use the insertion point stored
2643  // before collapsing loops instead.
2644  builder.restoreIP(afterIP);
2645  return success();
2646 }
2647 
2648 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
2649 static llvm::AtomicOrdering
2650 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
2651  if (!ao)
2652  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
2653 
2654  switch (*ao) {
2655  case omp::ClauseMemoryOrderKind::Seq_cst:
2656  return llvm::AtomicOrdering::SequentiallyConsistent;
2657  case omp::ClauseMemoryOrderKind::Acq_rel:
2658  return llvm::AtomicOrdering::AcquireRelease;
2659  case omp::ClauseMemoryOrderKind::Acquire:
2660  return llvm::AtomicOrdering::Acquire;
2661  case omp::ClauseMemoryOrderKind::Release:
2662  return llvm::AtomicOrdering::Release;
2663  case omp::ClauseMemoryOrderKind::Relaxed:
2664  return llvm::AtomicOrdering::Monotonic;
2665  }
2666  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
2667 }
2668 
2669 /// Convert omp.atomic.read operation to LLVM IR.
2670 static LogicalResult
2671 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
2672  LLVM::ModuleTranslation &moduleTranslation) {
2673  auto readOp = cast<omp::AtomicReadOp>(opInst);
2674  if (failed(checkImplementationStatus(opInst)))
2675  return failure();
2676 
2677  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2678 
2679  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2680 
2681  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
2682  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
2683  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
2684 
2685  llvm::Type *elementType =
2686  moduleTranslation.convertType(readOp.getElementType());
2687 
2688  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
2689  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
2690  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
2691  return success();
2692 }
2693 
2694 /// Converts an omp.atomic.write operation to LLVM IR.
2695 static LogicalResult
2696 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
2697  LLVM::ModuleTranslation &moduleTranslation) {
2698  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
2699  if (failed(checkImplementationStatus(opInst)))
2700  return failure();
2701 
2702  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2703 
2704  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2705  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
2706  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
2707  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
2708  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
2709  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
2710  /*isVolatile=*/false};
2711  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
2712  return success();
2713 }
2714 
2715 /// Converts an LLVM dialect binary operation to the corresponding enum value
2716 /// for `atomicrmw` supported binary operation.
2717 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
2719  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
2720  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
2721  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
2722  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
2723  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
2724  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
2725  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
2726  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
2727  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
2728  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
2729 }
2730 
2731 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
2732 static LogicalResult
2733 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
2734  llvm::IRBuilderBase &builder,
2735  LLVM::ModuleTranslation &moduleTranslation) {
2736  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2737  if (failed(checkImplementationStatus(*opInst)))
2738  return failure();
2739 
2740  // Convert values and types.
2741  auto &innerOpList = opInst.getRegion().front().getOperations();
2742  bool isXBinopExpr{false};
2743  llvm::AtomicRMWInst::BinOp binop;
2744  mlir::Value mlirExpr;
2745  llvm::Value *llvmExpr = nullptr;
2746  llvm::Value *llvmX = nullptr;
2747  llvm::Type *llvmXElementType = nullptr;
2748  if (innerOpList.size() == 2) {
2749  // The two operations here are the update and the terminator.
2750  // Since we can identify the update operation, there is a possibility
2751  // that we can generate the atomicrmw instruction.
2752  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
2753  if (!llvm::is_contained(innerOp.getOperands(),
2754  opInst.getRegion().getArgument(0))) {
2755  return opInst.emitError("no atomic update operation with region argument"
2756  " as operand found inside atomic.update region");
2757  }
2758  binop = convertBinOpToAtomic(innerOp);
2759  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
2760  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2761  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2762  } else {
2763  // Since the update region includes more than one operation
2764  // we will resort to generating a cmpxchg loop.
2765  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2766  }
2767  llvmX = moduleTranslation.lookupValue(opInst.getX());
2768  llvmXElementType = moduleTranslation.convertType(
2769  opInst.getRegion().getArgument(0).getType());
2770  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2771  /*isSigned=*/false,
2772  /*isVolatile=*/false};
2773 
2774  llvm::AtomicOrdering atomicOrdering =
2775  convertAtomicOrdering(opInst.getMemoryOrder());
2776 
2777  // Generate update code.
2778  auto updateFn =
2779  [&opInst, &moduleTranslation](
2780  llvm::Value *atomicx,
2781  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2782  Block &bb = *opInst.getRegion().begin();
2783  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
2784  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2785  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2786  return llvm::make_error<PreviouslyReportedError>();
2787 
2788  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2789  assert(yieldop && yieldop.getResults().size() == 1 &&
2790  "terminator must be omp.yield op and it must have exactly one "
2791  "argument");
2792  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2793  };
2794 
2795  // Handle ambiguous alloca, if any.
2796  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2797  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2798  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2799  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
2800  atomicOrdering, binop, updateFn,
2801  isXBinopExpr);
2802 
2803  if (failed(handleError(afterIP, *opInst)))
2804  return failure();
2805 
2806  builder.restoreIP(*afterIP);
2807  return success();
2808 }
2809 
2810 static LogicalResult
2811 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
2812  llvm::IRBuilderBase &builder,
2813  LLVM::ModuleTranslation &moduleTranslation) {
2814  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2815  if (failed(checkImplementationStatus(*atomicCaptureOp)))
2816  return failure();
2817 
2818  mlir::Value mlirExpr;
2819  bool isXBinopExpr = false, isPostfixUpdate = false;
2820  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2821 
2822  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
2823  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
2824 
2825  assert((atomicUpdateOp || atomicWriteOp) &&
2826  "internal op must be an atomic.update or atomic.write op");
2827 
2828  if (atomicWriteOp) {
2829  isPostfixUpdate = true;
2830  mlirExpr = atomicWriteOp.getExpr();
2831  } else {
2832  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
2833  atomicCaptureOp.getAtomicUpdateOp().getOperation();
2834  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
2835  // Find the binary update operation that uses the region argument
2836  // and get the expression to update
2837  if (innerOpList.size() == 2) {
2838  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
2839  if (!llvm::is_contained(innerOp.getOperands(),
2840  atomicUpdateOp.getRegion().getArgument(0))) {
2841  return atomicUpdateOp.emitError(
2842  "no atomic update operation with region argument"
2843  " as operand found inside atomic.update region");
2844  }
2845  binop = convertBinOpToAtomic(innerOp);
2846  isXBinopExpr =
2847  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
2848  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2849  } else {
2850  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2851  }
2852  }
2853 
2854  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2855  llvm::Value *llvmX =
2856  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
2857  llvm::Value *llvmV =
2858  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
2859  llvm::Type *llvmXElementType = moduleTranslation.convertType(
2860  atomicCaptureOp.getAtomicReadOp().getElementType());
2861  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2862  /*isSigned=*/false,
2863  /*isVolatile=*/false};
2864  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
2865  /*isSigned=*/false,
2866  /*isVolatile=*/false};
2867 
2868  llvm::AtomicOrdering atomicOrdering =
2869  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
2870 
2871  auto updateFn =
2872  [&](llvm::Value *atomicx,
2873  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2874  if (atomicWriteOp)
2875  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
2876  Block &bb = *atomicUpdateOp.getRegion().begin();
2877  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
2878  atomicx);
2879  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2880  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2881  return llvm::make_error<PreviouslyReportedError>();
2882 
2883  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2884  assert(yieldop && yieldop.getResults().size() == 1 &&
2885  "terminator must be omp.yield op and it must have exactly one "
2886  "argument");
2887  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2888  };
2889 
2890  // Handle ambiguous alloca, if any.
2891  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2892  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2893  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2894  ompBuilder->createAtomicCapture(
2895  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
2896  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
2897 
2898  if (failed(handleError(afterIP, *atomicCaptureOp)))
2899  return failure();
2900 
2901  builder.restoreIP(*afterIP);
2902  return success();
2903 }
2904 
2905 /// Converts an OpenMP Threadprivate operation into LLVM IR using
2906 /// OpenMPIRBuilder.
2907 static LogicalResult
2908 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
2909  LLVM::ModuleTranslation &moduleTranslation) {
2910  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2911  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2912  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
2913 
2914  if (failed(checkImplementationStatus(opInst)))
2915  return failure();
2916 
2917  Value symAddr = threadprivateOp.getSymAddr();
2918  auto *symOp = symAddr.getDefiningOp();
2919 
2920  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
2921  symOp = asCast.getOperand().getDefiningOp();
2922 
2923  if (!isa<LLVM::AddressOfOp>(symOp))
2924  return opInst.emitError("Addressing symbol not found");
2925  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
2926 
2927  LLVM::GlobalOp global =
2928  addressOfOp.getGlobal(moduleTranslation.symbolTable());
2929  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
2930 
2931  if (!ompBuilder->Config.isTargetDevice()) {
2932  llvm::Type *type = globalValue->getValueType();
2933  llvm::TypeSize typeSize =
2934  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
2935  type);
2936  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
2937  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
2938  ompLoc, globalValue, size, global.getSymName() + ".cache");
2939  moduleTranslation.mapValue(opInst.getResult(0), callInst);
2940  } else {
2941  moduleTranslation.mapValue(opInst.getResult(0), globalValue);
2942  }
2943 
2944  return success();
2945 }
2946 
2947 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
2948 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
2949  switch (deviceClause) {
2950  case mlir::omp::DeclareTargetDeviceType::host:
2951  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
2952  break;
2953  case mlir::omp::DeclareTargetDeviceType::nohost:
2954  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
2955  break;
2956  case mlir::omp::DeclareTargetDeviceType::any:
2957  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
2958  break;
2959  }
2960  llvm_unreachable("unhandled device clause");
2961 }
2962 
2963 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
2965  mlir::omp::DeclareTargetCaptureClause captureClause) {
2966  switch (captureClause) {
2967  case mlir::omp::DeclareTargetCaptureClause::to:
2968  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
2969  case mlir::omp::DeclareTargetCaptureClause::link:
2970  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
2971  case mlir::omp::DeclareTargetCaptureClause::enter:
2972  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
2973  }
2974  llvm_unreachable("unhandled capture clause");
2975 }
2976 
2977 static llvm::SmallString<64>
2978 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
2979  llvm::OpenMPIRBuilder &ompBuilder) {
2980  llvm::SmallString<64> suffix;
2981  llvm::raw_svector_ostream os(suffix);
2982  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
2983  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
2984  auto fileInfoCallBack = [&loc]() {
2985  return std::pair<std::string, uint64_t>(
2986  llvm::StringRef(loc.getFilename()), loc.getLine());
2987  };
2988 
2989  os << llvm::format(
2990  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
2991  }
2992  os << "_decl_tgt_ref_ptr";
2993 
2994  return suffix;
2995 }
2996 
2997 static bool isDeclareTargetLink(mlir::Value value) {
2998  if (auto addressOfOp =
2999  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3000  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
3001  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
3002  if (auto declareTargetGlobal =
3003  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
3004  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3005  mlir::omp::DeclareTargetCaptureClause::link)
3006  return true;
3007  }
3008  return false;
3009 }
3010 
3011 // Returns the reference pointer generated by the lowering of the declare target
3012 // operation in cases where the link clause is used or the to clause is used in
3013 // USM mode.
3014 static llvm::Value *
3016  LLVM::ModuleTranslation &moduleTranslation) {
3017  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3018 
3019  // An easier way to do this may just be to keep track of any pointer
3020  // references and their mapping to their respective operation
3021  if (auto addressOfOp =
3022  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
3023  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
3024  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
3025  addressOfOp.getGlobalName()))) {
3026 
3027  if (auto declareTargetGlobal =
3028  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3029  gOp.getOperation())) {
3030 
3031  // In this case, we must utilise the reference pointer generated by the
3032  // declare target operation, similar to Clang
3033  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
3034  mlir::omp::DeclareTargetCaptureClause::link) ||
3035  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
3036  mlir::omp::DeclareTargetCaptureClause::to &&
3037  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3038  llvm::SmallString<64> suffix =
3039  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
3040 
3041  if (gOp.getSymName().contains(suffix))
3042  return moduleTranslation.getLLVMModule()->getNamedValue(
3043  gOp.getSymName());
3044 
3045  return moduleTranslation.getLLVMModule()->getNamedValue(
3046  (gOp.getSymName().str() + suffix.str()).str());
3047  }
3048  }
3049  }
3050  }
3051 
3052  return nullptr;
3053 }
3054 
3055 namespace {
3056 // Append customMappers information to existing MapInfosTy
3057 struct MapInfosTy : llvm::OpenMPIRBuilder::MapInfosTy {
3059 
3060  /// Append arrays in \a CurInfo.
3061  void append(MapInfosTy &curInfo) {
3062  Mappers.append(curInfo.Mappers.begin(), curInfo.Mappers.end());
3063  llvm::OpenMPIRBuilder::MapInfosTy::append(curInfo);
3064  }
3065 };
3066 // A small helper structure to contain data gathered
3067 // for map lowering and coalese it into one area and
3068 // avoiding extra computations such as searches in the
3069 // llvm module for lowered mapped variables or checking
3070 // if something is declare target (and retrieving the
3071 // value) more than neccessary.
3072 struct MapInfoData : MapInfosTy {
3073  llvm::SmallVector<bool, 4> IsDeclareTarget;
3074  llvm::SmallVector<bool, 4> IsAMember;
3075  // Identify if mapping was added by mapClause or use_device clauses.
3076  llvm::SmallVector<bool, 4> IsAMapping;
3079  // Stripped off array/pointer to get the underlying
3080  // element type
3082 
3083  /// Append arrays in \a CurInfo.
3084  void append(MapInfoData &CurInfo) {
3085  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
3086  CurInfo.IsDeclareTarget.end());
3087  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
3088  OriginalValue.append(CurInfo.OriginalValue.begin(),
3089  CurInfo.OriginalValue.end());
3090  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
3091  MapInfosTy::append(CurInfo);
3092  }
3093 };
3094 } // namespace
3095 
3096 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
3097  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
3098  arrTy.getElementType()))
3099  return getArrayElementSizeInBits(nestedArrTy, dl);
3100  return dl.getTypeSizeInBits(arrTy.getElementType());
3101 }
3102 
3103 // This function calculates the size to be offloaded for a specified type, given
3104 // its associated map clause (which can contain bounds information which affects
3105 // the total size), this size is calculated based on the underlying element type
3106 // e.g. given a 1-D array of ints, we will calculate the size from the integer
3107 // type * number of elements in the array. This size can be used in other
3108 // calculations but is ultimately used as an argument to the OpenMP runtimes
3109 // kernel argument structure which is generated through the combinedInfo data
3110 // structures.
3111 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
3112 // CGOpenMPRuntime.cpp.
3113 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
3114  Operation *clauseOp, llvm::Value *basePointer,
3115  llvm::Type *baseType, llvm::IRBuilderBase &builder,
3116  LLVM::ModuleTranslation &moduleTranslation) {
3117  if (auto memberClause =
3118  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
3119  // This calculates the size to transfer based on bounds and the underlying
3120  // element type, provided bounds have been specified (Fortran
3121  // pointers/allocatables/target and arrays that have sections specified fall
3122  // into this as well).
3123  if (!memberClause.getBounds().empty()) {
3124  llvm::Value *elementCount = builder.getInt64(1);
3125  for (auto bounds : memberClause.getBounds()) {
3126  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
3127  bounds.getDefiningOp())) {
3128  // The below calculation for the size to be mapped calculated from the
3129  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
3130  // multiply by the underlying element types byte size to get the full
3131  // size to be offloaded based on the bounds
3132  elementCount = builder.CreateMul(
3133  elementCount,
3134  builder.CreateAdd(
3135  builder.CreateSub(
3136  moduleTranslation.lookupValue(boundOp.getUpperBound()),
3137  moduleTranslation.lookupValue(boundOp.getLowerBound())),
3138  builder.getInt64(1)));
3139  }
3140  }
3141 
3142  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
3143  // the size in inconsistent byte or bit format.
3144  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
3145  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
3146  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
3147 
3148  // The size in bytes x number of elements, the sizeInBytes stored is
3149  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
3150  // size, so we do some on the fly runtime math to get the size in
3151  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
3152  // some adjustment for members with more complex types.
3153  return builder.CreateMul(elementCount,
3154  builder.getInt64(underlyingTypeSzInBits / 8));
3155  }
3156  }
3157 
3158  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
3159 }
3160 
3162  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
3163  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
3164  llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
3165  ArrayRef<Value> useDevAddrOperands = {},
3166  ArrayRef<Value> hasDevAddrOperands = {}) {
3167  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
3168  // Check if this is a member mapping and correctly assign that it is, if
3169  // it is a member of a larger object.
3170  // TODO: Need better handling of members, and distinguishing of members
3171  // that are implicitly allocated on device vs explicitly passed in as
3172  // arguments.
3173  // TODO: May require some further additions to support nested record
3174  // types, i.e. member maps that can have member maps.
3175  for (Value mapValue : mapVars) {
3176  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3177  for (auto member : map.getMembers())
3178  if (member == mapOp)
3179  return true;
3180  }
3181  return false;
3182  };
3183 
3184  // Process MapOperands
3185  for (Value mapValue : mapVars) {
3186  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3187  Value offloadPtr =
3188  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3189  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
3190  mapData.Pointers.push_back(mapData.OriginalValue.back());
3191 
3192  if (llvm::Value *refPtr =
3193  getRefPtrIfDeclareTarget(offloadPtr,
3194  moduleTranslation)) { // declare target
3195  mapData.IsDeclareTarget.push_back(true);
3196  mapData.BasePointers.push_back(refPtr);
3197  } else { // regular mapped variable
3198  mapData.IsDeclareTarget.push_back(false);
3199  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3200  }
3201 
3202  mapData.BaseType.push_back(
3203  moduleTranslation.convertType(mapOp.getVarType()));
3204  mapData.Sizes.push_back(
3205  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
3206  mapData.BaseType.back(), builder, moduleTranslation));
3207  mapData.MapClause.push_back(mapOp.getOperation());
3208  mapData.Types.push_back(
3209  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType()));
3210  mapData.Names.push_back(LLVM::createMappingInformation(
3211  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3212  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
3213  if (mapOp.getMapperId())
3214  mapData.Mappers.push_back(
3215  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3216  mapOp, mapOp.getMapperIdAttr()));
3217  else
3218  mapData.Mappers.push_back(nullptr);
3219  mapData.IsAMapping.push_back(true);
3220  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
3221  }
3222 
3223  auto findMapInfo = [&mapData](llvm::Value *val,
3224  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3225  unsigned index = 0;
3226  bool found = false;
3227  for (llvm::Value *basePtr : mapData.OriginalValue) {
3228  if (basePtr == val && mapData.IsAMapping[index]) {
3229  found = true;
3230  mapData.Types[index] |=
3231  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
3232  mapData.DevicePointers[index] = devInfoTy;
3233  }
3234  index++;
3235  }
3236  return found;
3237  };
3238 
3239  // Process useDevPtr(Addr)Operands
3240  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
3241  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
3242  for (Value mapValue : useDevOperands) {
3243  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3244  Value offloadPtr =
3245  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3246  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3247 
3248  // Check if map info is already present for this entry.
3249  if (!findMapInfo(origValue, devInfoTy)) {
3250  mapData.OriginalValue.push_back(origValue);
3251  mapData.Pointers.push_back(mapData.OriginalValue.back());
3252  mapData.IsDeclareTarget.push_back(false);
3253  mapData.BasePointers.push_back(mapData.OriginalValue.back());
3254  mapData.BaseType.push_back(
3255  moduleTranslation.convertType(mapOp.getVarType()));
3256  mapData.Sizes.push_back(builder.getInt64(0));
3257  mapData.MapClause.push_back(mapOp.getOperation());
3258  mapData.Types.push_back(
3259  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
3260  mapData.Names.push_back(LLVM::createMappingInformation(
3261  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3262  mapData.DevicePointers.push_back(devInfoTy);
3263  mapData.Mappers.push_back(nullptr);
3264  mapData.IsAMapping.push_back(false);
3265  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
3266  }
3267  }
3268  };
3269 
3270  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3271  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
3272 
3273  for (Value mapValue : hasDevAddrOperands) {
3274  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
3275  Value offloadPtr =
3276  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
3277  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
3278  auto mapType =
3279  static_cast<llvm::omp::OpenMPOffloadMappingFlags>(mapOp.getMapType());
3280  auto mapTypeAlways = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
3281 
3282  mapData.OriginalValue.push_back(origValue);
3283  mapData.BasePointers.push_back(origValue);
3284  mapData.Pointers.push_back(origValue);
3285  mapData.IsDeclareTarget.push_back(false);
3286  mapData.BaseType.push_back(
3287  moduleTranslation.convertType(mapOp.getVarType()));
3288  mapData.Sizes.push_back(
3289  builder.getInt64(dl.getTypeSize(mapOp.getVarType())));
3290  mapData.MapClause.push_back(mapOp.getOperation());
3291  if (llvm::to_underlying(mapType & mapTypeAlways)) {
3292  // Descriptors are mapped with the ALWAYS flag, since they can get
3293  // rematerialized, so the address of the decriptor for a given object
3294  // may change from one place to another.
3295  mapData.Types.push_back(mapType);
3296  // Technically it's possible for a non-descriptor mapping to have
3297  // both has-device-addr and ALWAYS, so lookup the mapper in case it
3298  // exists.
3299  if (mapOp.getMapperId()) {
3300  mapData.Mappers.push_back(
3301  SymbolTable::lookupNearestSymbolFrom<omp::DeclareMapperOp>(
3302  mapOp, mapOp.getMapperIdAttr()));
3303  } else {
3304  mapData.Mappers.push_back(nullptr);
3305  }
3306  } else {
3307  mapData.Types.push_back(
3308  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
3309  mapData.Mappers.push_back(nullptr);
3310  }
3311  mapData.Names.push_back(LLVM::createMappingInformation(
3312  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
3313  mapData.DevicePointers.push_back(
3314  llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
3315  mapData.IsAMapping.push_back(false);
3316  mapData.IsAMember.push_back(checkIsAMember(hasDevAddrOperands, mapOp));
3317  }
3318 }
3319 
3320 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
3321  auto *res = llvm::find(mapData.MapClause, memberOp);
3322  assert(res != mapData.MapClause.end() &&
3323  "MapInfoOp for member not found in MapData, cannot return index");
3324  return std::distance(mapData.MapClause.begin(), res);
3325 }
3326 
3327 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
3328  bool first) {
3329  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
3330  // Only 1 member has been mapped, we can return it.
3331  if (indexAttr.size() == 1)
3332  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
3333 
3334  llvm::SmallVector<size_t> indices(indexAttr.size());
3335  std::iota(indices.begin(), indices.end(), 0);
3336 
3337  llvm::sort(indices.begin(), indices.end(),
3338  [&](const size_t a, const size_t b) {
3339  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
3340  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
3341  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
3342  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
3343  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
3344 
3345  if (aIndex == bIndex)
3346  continue;
3347 
3348  if (aIndex < bIndex)
3349  return first;
3350 
3351  if (aIndex > bIndex)
3352  return !first;
3353  }
3354 
3355  // Iterated the up until the end of the smallest member and
3356  // they were found to be equal up to that point, so select
3357  // the member with the lowest index count, so the "parent"
3358  return memberIndicesA.size() < memberIndicesB.size();
3359  });
3360 
3361  return llvm::cast<omp::MapInfoOp>(
3362  mapInfo.getMembers()[indices.front()].getDefiningOp());
3363 }
3364 
3365 /// This function calculates the array/pointer offset for map data provided
3366 /// with bounds operations, e.g. when provided something like the following:
3367 ///
3368 /// Fortran
3369 /// map(tofrom: array(2:5, 3:2))
3370 /// or
3371 /// C++
3372 /// map(tofrom: array[1:4][2:3])
3373 /// We must calculate the initial pointer offset to pass across, this function
3374 /// performs this using bounds.
3375 ///
3376 /// NOTE: which while specified in row-major order it currently needs to be
3377 /// flipped for Fortran's column order array allocation and access (as
3378 /// opposed to C++'s row-major, hence the backwards processing where order is
3379 /// important). This is likely important to keep in mind for the future when
3380 /// we incorporate a C++ frontend, both frontends will need to agree on the
3381 /// ordering of generated bounds operations (one may have to flip them) to
3382 /// make the below lowering frontend agnostic. The offload size
3383 /// calcualtion may also have to be adjusted for C++.
3384 std::vector<llvm::Value *>
3386  llvm::IRBuilderBase &builder, bool isArrayTy,
3387  OperandRange bounds) {
3388  std::vector<llvm::Value *> idx;
3389  // There's no bounds to calculate an offset from, we can safely
3390  // ignore and return no indices.
3391  if (bounds.empty())
3392  return idx;
3393 
3394  // If we have an array type, then we have its type so can treat it as a
3395  // normal GEP instruction where the bounds operations are simply indexes
3396  // into the array. We currently do reverse order of the bounds, which
3397  // I believe leans more towards Fortran's column-major in memory.
3398  if (isArrayTy) {
3399  idx.push_back(builder.getInt64(0));
3400  for (int i = bounds.size() - 1; i >= 0; --i) {
3401  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3402  bounds[i].getDefiningOp())) {
3403  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
3404  }
3405  }
3406  } else {
3407  // If we do not have an array type, but we have bounds, then we're dealing
3408  // with a pointer that's being treated like an array and we have the
3409  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
3410  // address (pointer pointing to the actual data) so we must caclulate the
3411  // offset using a single index which the following two loops attempts to
3412  // compute.
3413 
3414  // Calculates the size offset we need to make per row e.g. first row or
3415  // column only needs to be offset by one, but the next would have to be
3416  // the previous row/column offset multiplied by the extent of current row.
3417  //
3418  // For example ([1][10][100]):
3419  //
3420  // - First row/column we move by 1 for each index increment
3421  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
3422  // current) for 10 for each index increment
3423  // - Third row/column we would move by 10 (second row/column) *
3424  // (extent/size of current) 100 for 1000 for each index increment
3425  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
3426  for (size_t i = 1; i < bounds.size(); ++i) {
3427  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3428  bounds[i].getDefiningOp())) {
3429  dimensionIndexSizeOffset.push_back(builder.CreateMul(
3430  moduleTranslation.lookupValue(boundOp.getExtent()),
3431  dimensionIndexSizeOffset[i - 1]));
3432  }
3433  }
3434 
3435  // Now that we have calculated how much we move by per index, we must
3436  // multiply each lower bound offset in indexes by the size offset we
3437  // have calculated in the previous and accumulate the results to get
3438  // our final resulting offset.
3439  for (int i = bounds.size() - 1; i >= 0; --i) {
3440  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3441  bounds[i].getDefiningOp())) {
3442  if (idx.empty())
3443  idx.emplace_back(builder.CreateMul(
3444  moduleTranslation.lookupValue(boundOp.getLowerBound()),
3445  dimensionIndexSizeOffset[i]));
3446  else
3447  idx.back() = builder.CreateAdd(
3448  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
3449  boundOp.getLowerBound()),
3450  dimensionIndexSizeOffset[i]));
3451  }
3452  }
3453  }
3454 
3455  return idx;
3456 }
3457 
3458 // This creates two insertions into the MapInfosTy data structure for the
3459 // "parent" of a set of members, (usually a container e.g.
3460 // class/structure/derived type) when subsequent members have also been
3461 // explicitly mapped on the same map clause. Certain types, such as Fortran
3462 // descriptors are mapped like this as well, however, the members are
3463 // implicit as far as a user is concerned, but we must explicitly map them
3464 // internally.
3465 //
3466 // This function also returns the memberOfFlag for this particular parent,
3467 // which is utilised in subsequent member mappings (by modifying there map type
3468 // with it) to indicate that a member is part of this parent and should be
3469 // treated by the runtime as such. Important to achieve the correct mapping.
3470 //
3471 // This function borrows a lot from Clang's emitCombinedEntry function
3472 // inside of CGOpenMPRuntime.cpp
3473 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
3474  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3475  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3476  MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) {
3477  // Map the first segment of our structure
3478  combinedInfo.Types.emplace_back(
3479  isTargetParams
3480  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
3481  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
3482  combinedInfo.DevicePointers.emplace_back(
3483  mapData.DevicePointers[mapDataIndex]);
3484  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIndex]);
3485  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3486  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3487  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3488 
3489  // Calculate size of the parent object being mapped based on the
3490  // addresses at runtime, highAddr - lowAddr = size. This of course
3491  // doesn't factor in allocated data like pointers, hence the further
3492  // processing of members specified by users, or in the case of
3493  // Fortran pointers and allocatables, the mapping of the pointed to
3494  // data by the descriptor (which itself, is a structure containing
3495  // runtime information on the dynamically allocated data).
3496  auto parentClause =
3497  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3498 
3499  llvm::Value *lowAddr, *highAddr;
3500  if (!parentClause.getPartialMap()) {
3501  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3502  builder.getPtrTy());
3503  highAddr = builder.CreatePointerCast(
3504  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3505  mapData.Pointers[mapDataIndex], 1),
3506  builder.getPtrTy());
3507  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3508  } else {
3509  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3510  int firstMemberIdx = getMapDataMemberIdx(
3511  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
3512  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3513  builder.getPtrTy());
3514  int lastMemberIdx = getMapDataMemberIdx(
3515  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
3516  highAddr = builder.CreatePointerCast(
3517  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3518  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3519  builder.getPtrTy());
3520  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
3521  }
3522 
3523  llvm::Value *size = builder.CreateIntCast(
3524  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3525  builder.getInt64Ty(),
3526  /*isSigned=*/false);
3527  combinedInfo.Sizes.push_back(size);
3528 
3529  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
3530  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3531 
3532  // This creates the initial MEMBER_OF mapping that consists of
3533  // the parent/top level container (same as above effectively, except
3534  // with a fixed initial compile time size and separate maptype which
3535  // indicates the true mape type (tofrom etc.). This parent mapping is
3536  // only relevant if the structure in its totality is being mapped,
3537  // otherwise the above suffices.
3538  if (!parentClause.getPartialMap()) {
3539  // TODO: This will need to be expanded to include the whole host of logic
3540  // for the map flags that Clang currently supports (e.g. it should do some
3541  // further case specific flag modifications). For the moment, it handles
3542  // what we support as expected.
3543  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
3544  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3545  combinedInfo.Types.emplace_back(mapFlag);
3546  combinedInfo.DevicePointers.emplace_back(
3548  combinedInfo.Mappers.emplace_back(nullptr);
3549  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3550  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3551  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3552  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3553  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
3554  }
3555  return memberOfFlag;
3556 }
3557 
3558 // The intent is to verify if the mapped data being passed is a
3559 // pointer -> pointee that requires special handling in certain cases,
3560 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
3561 //
3562 // There may be a better way to verify this, but unfortunately with
3563 // opaque pointers we lose the ability to easily check if something is
3564 // a pointer whilst maintaining access to the underlying type.
3565 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
3566  // If we have a varPtrPtr field assigned then the underlying type is a pointer
3567  if (mapOp.getVarPtrPtr())
3568  return true;
3569 
3570  // If the map data is declare target with a link clause, then it's represented
3571  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
3572  // no relation to pointers.
3573  if (isDeclareTargetLink(mapOp.getVarPtr()))
3574  return true;
3575 
3576  return false;
3577 }
3578 
3579 // This function is intended to add explicit mappings of members
3581  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3582  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
3583  MapInfoData &mapData, uint64_t mapDataIndex,
3584  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
3585 
3586  auto parentClause =
3587  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3588 
3589  for (auto mappedMembers : parentClause.getMembers()) {
3590  auto memberClause =
3591  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
3592  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3593 
3594  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
3595 
3596  // If we're currently mapping a pointer to a block of data, we must
3597  // initially map the pointer, and then attatch/bind the data with a
3598  // subsequent map to the pointer. This segment of code generates the
3599  // pointer mapping, which can in certain cases be optimised out as Clang
3600  // currently does in its lowering. However, for the moment we do not do so,
3601  // in part as we currently have substantially less information on the data
3602  // being mapped at this stage.
3603  if (checkIfPointerMap(memberClause)) {
3604  auto mapFlag =
3605  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
3606  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3607  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3608  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3609  combinedInfo.Types.emplace_back(mapFlag);
3610  combinedInfo.DevicePointers.emplace_back(
3612  combinedInfo.Mappers.emplace_back(nullptr);
3613  combinedInfo.Names.emplace_back(
3614  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3615  combinedInfo.BasePointers.emplace_back(
3616  mapData.BasePointers[mapDataIndex]);
3617  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
3618  combinedInfo.Sizes.emplace_back(builder.getInt64(
3619  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
3620  }
3621 
3622  // Same MemberOfFlag to indicate its link with parent and other members
3623  // of.
3624  auto mapFlag =
3625  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType());
3626  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3627  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3628  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3629  if (checkIfPointerMap(memberClause))
3630  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3631 
3632  combinedInfo.Types.emplace_back(mapFlag);
3633  combinedInfo.DevicePointers.emplace_back(
3634  mapData.DevicePointers[memberDataIdx]);
3635  combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
3636  combinedInfo.Names.emplace_back(
3637  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3638  uint64_t basePointerIndex =
3639  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
3640  combinedInfo.BasePointers.emplace_back(
3641  mapData.BasePointers[basePointerIndex]);
3642  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
3643 
3644  llvm::Value *size = mapData.Sizes[memberDataIdx];
3645  if (checkIfPointerMap(memberClause)) {
3646  size = builder.CreateSelect(
3647  builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
3648  builder.getInt64(0), size);
3649  }
3650 
3651  combinedInfo.Sizes.emplace_back(size);
3652  }
3653 }
3654 
3655 static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
3656  MapInfosTy &combinedInfo, bool isTargetParams,
3657  int mapDataParentIdx = -1) {
3658  // Declare Target Mappings are excluded from being marked as
3659  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
3660  // marked with OMP_MAP_PTR_AND_OBJ instead.
3661  auto mapFlag = mapData.Types[mapDataIdx];
3662  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
3663 
3664  bool isPtrTy = checkIfPointerMap(mapInfoOp);
3665  if (isPtrTy)
3666  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3667 
3668  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
3669  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3670 
3671  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
3672  !isPtrTy)
3673  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
3674 
3675  // if we're provided a mapDataParentIdx, then the data being mapped is
3676  // part of a larger object (in a parent <-> member mapping) and in this
3677  // case our BasePointer should be the parent.
3678  if (mapDataParentIdx >= 0)
3679  combinedInfo.BasePointers.emplace_back(
3680  mapData.BasePointers[mapDataParentIdx]);
3681  else
3682  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
3683 
3684  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
3685  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
3686  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
3687  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
3688  combinedInfo.Types.emplace_back(mapFlag);
3689  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
3690 }
3691 
3692 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
3693  llvm::IRBuilderBase &builder,
3694  llvm::OpenMPIRBuilder &ompBuilder,
3695  DataLayout &dl, MapInfosTy &combinedInfo,
3696  MapInfoData &mapData, uint64_t mapDataIndex,
3697  bool isTargetParams) {
3698  auto parentClause =
3699  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3700 
3701  // If we have a partial map (no parent referenced in the map clauses of the
3702  // directive, only members) and only a single member, we do not need to bind
3703  // the map of the member to the parent, we can pass the member separately.
3704  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
3705  auto memberClause = llvm::cast<omp::MapInfoOp>(
3706  parentClause.getMembers()[0].getDefiningOp());
3707  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3708  // Note: Clang treats arrays with explicit bounds that fall into this
3709  // category as a parent with map case, however, it seems this isn't a
3710  // requirement, and processing them as an individual map is fine. So,
3711  // we will handle them as individual maps for the moment, as it's
3712  // difficult for us to check this as we always require bounds to be
3713  // specified currently and it's also marginally more optimal (single
3714  // map rather than two). The difference may come from the fact that
3715  // Clang maps array without bounds as pointers (which we do not
3716  // currently do), whereas we treat them as arrays in all cases
3717  // currently.
3718  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
3719  mapDataIndex);
3720  return;
3721  }
3722 
3723  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
3724  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
3725  combinedInfo, mapData, mapDataIndex, isTargetParams);
3726  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
3727  combinedInfo, mapData, mapDataIndex,
3728  memberOfParentFlag);
3729 }
3730 
3731 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
3732 // generates different operation (e.g. load/store) combinations for
3733 // arguments to the kernel, based on map capture kinds which are then
3734 // utilised in the combinedInfo in place of the original Map value.
3735 static void
3736 createAlteredByCaptureMap(MapInfoData &mapData,
3737  LLVM::ModuleTranslation &moduleTranslation,
3738  llvm::IRBuilderBase &builder) {
3739  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3740  // if it's declare target, skip it, it's handled separately.
3741  if (!mapData.IsDeclareTarget[i]) {
3742  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3743  omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
3744  bool isPtrTy = checkIfPointerMap(mapOp);
3745 
3746  // Currently handles array sectioning lowerbound case, but more
3747  // logic may be required in the future. Clang invokes EmitLValue,
3748  // which has specialised logic for special Clang types such as user
3749  // defines, so it is possible we will have to extend this for
3750  // structures or other complex types. As the general idea is that this
3751  // function mimics some of the logic from Clang that we require for
3752  // kernel argument passing from host -> device.
3753  switch (captureKind) {
3754  case omp::VariableCaptureKind::ByRef: {
3755  llvm::Value *newV = mapData.Pointers[i];
3756  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
3757  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
3758  mapOp.getBounds());
3759  if (isPtrTy)
3760  newV = builder.CreateLoad(builder.getPtrTy(), newV);
3761 
3762  if (!offsetIdx.empty())
3763  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
3764  "array_offset");
3765  mapData.Pointers[i] = newV;
3766  } break;
3767  case omp::VariableCaptureKind::ByCopy: {
3768  llvm::Type *type = mapData.BaseType[i];
3769  llvm::Value *newV;
3770  if (mapData.Pointers[i]->getType()->isPointerTy())
3771  newV = builder.CreateLoad(type, mapData.Pointers[i]);
3772  else
3773  newV = mapData.Pointers[i];
3774 
3775  if (!isPtrTy) {
3776  auto curInsert = builder.saveIP();
3777  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
3778  auto *memTempAlloc =
3779  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
3780  builder.restoreIP(curInsert);
3781 
3782  builder.CreateStore(newV, memTempAlloc);
3783  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
3784  }
3785 
3786  mapData.Pointers[i] = newV;
3787  mapData.BasePointers[i] = newV;
3788  } break;
3789  case omp::VariableCaptureKind::This:
3790  case omp::VariableCaptureKind::VLAType:
3791  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
3792  break;
3793  }
3794  }
3795  }
3796 }
3797 
3798 // Generate all map related information and fill the combinedInfo.
3799 static void genMapInfos(llvm::IRBuilderBase &builder,
3800  LLVM::ModuleTranslation &moduleTranslation,
3801  DataLayout &dl, MapInfosTy &combinedInfo,
3802  MapInfoData &mapData, bool isTargetParams = false) {
3803  // We wish to modify some of the methods in which arguments are
3804  // passed based on their capture type by the target region, this can
3805  // involve generating new loads and stores, which changes the
3806  // MLIR value to LLVM value mapping, however, we only wish to do this
3807  // locally for the current function/target and also avoid altering
3808  // ModuleTranslation, so we remap the base pointer or pointer stored
3809  // in the map infos corresponding MapInfoData, which is later accessed
3810  // by genMapInfos and createTarget to help generate the kernel and
3811  // kernel arg structure. It primarily becomes relevant in cases like
3812  // bycopy, or byref range'd arrays. In the default case, we simply
3813  // pass thee pointer byref as both basePointer and pointer.
3814  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3815  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
3816 
3817  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3818 
3819  // We operate under the assumption that all vectors that are
3820  // required in MapInfoData are of equal lengths (either filled with
3821  // default constructed data or appropiate information) so we can
3822  // utilise the size from any component of MapInfoData, if we can't
3823  // something is missing from the initial MapInfoData construction.
3824  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3825  // NOTE/TODO: We currently do not support arbitrary depth record
3826  // type mapping.
3827  if (mapData.IsAMember[i])
3828  continue;
3829 
3830  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
3831  if (!mapInfoOp.getMembers().empty()) {
3832  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
3833  combinedInfo, mapData, i, isTargetParams);
3834  continue;
3835  }
3836 
3837  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
3838  }
3839 }
3840 
3842 emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder,
3843  LLVM::ModuleTranslation &moduleTranslation,
3844  llvm::StringRef mapperFuncName);
3845 
3847 getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder,
3848  LLVM::ModuleTranslation &moduleTranslation) {
3849  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
3850  std::string mapperFuncName =
3851  moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName(
3852  {"omp_mapper", declMapperOp.getSymName()});
3853 
3854  if (auto *lookupFunc = moduleTranslation.lookupFunction(mapperFuncName))
3855  return lookupFunc;
3856 
3857  return emitUserDefinedMapper(declMapperOp, builder, moduleTranslation,
3858  mapperFuncName);
3859 }
3860 
3862 emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
3863  LLVM::ModuleTranslation &moduleTranslation,
3864  llvm::StringRef mapperFuncName) {
3865  auto declMapperOp = cast<omp::DeclareMapperOp>(op);
3866  auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo();
3867  DataLayout dl = DataLayout(declMapperOp->getParentOfType<ModuleOp>());
3868  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3869  llvm::Type *varType = moduleTranslation.convertType(declMapperOp.getType());
3870  SmallVector<Value> mapVars = declMapperInfoOp.getMapVars();
3871 
3872  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3873 
3874  // Fill up the arrays with all the mapped variables.
3875  MapInfosTy combinedInfo;
3876  auto genMapInfoCB =
3877  [&](InsertPointTy codeGenIP, llvm::Value *ptrPHI,
3878  llvm::Value *unused2) -> llvm::OpenMPIRBuilder::MapInfosOrErrorTy {
3879  builder.restoreIP(codeGenIP);
3880  moduleTranslation.mapValue(declMapperOp.getSymVal(), ptrPHI);
3881  moduleTranslation.mapBlock(&declMapperOp.getRegion().front(),
3882  builder.GetInsertBlock());
3883  if (failed(moduleTranslation.convertBlock(declMapperOp.getRegion().front(),
3884  /*ignoreArguments=*/true,
3885  builder)))
3886  return llvm::make_error<PreviouslyReportedError>();
3887  MapInfoData mapData;
3888  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
3889  builder);
3890  genMapInfos(builder, moduleTranslation, dl, combinedInfo, mapData);
3891 
3892  // Drop the mapping that is no longer necessary so that the same region can
3893  // be processed multiple times.
3894  moduleTranslation.forgetMapping(declMapperOp.getRegion());
3895  return combinedInfo;
3896  };
3897 
3898  auto customMapperCB = [&](unsigned i) -> llvm::Expected<llvm::Function *> {
3899  if (!combinedInfo.Mappers[i])
3900  return nullptr;
3901  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
3902  moduleTranslation);
3903  };
3904 
3905  llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
3906  genMapInfoCB, varType, mapperFuncName, customMapperCB);
3907  if (!newFn)
3908  return newFn.takeError();
3909  moduleTranslation.mapFunction(mapperFuncName, *newFn);
3910  return *newFn;
3911 }
3912 
3913 static LogicalResult
3914 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
3915  LLVM::ModuleTranslation &moduleTranslation) {
3916  llvm::Value *ifCond = nullptr;
3917  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
3918  SmallVector<Value> mapVars;
3919  SmallVector<Value> useDevicePtrVars;
3920  SmallVector<Value> useDeviceAddrVars;
3921  llvm::omp::RuntimeFunction RTLFn;
3922  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
3923 
3924  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3925  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
3926  /*SeparateBeginEndCalls=*/true);
3927 
3928  LogicalResult result =
3930  .Case([&](omp::TargetDataOp dataOp) {
3931  if (failed(checkImplementationStatus(*dataOp)))
3932  return failure();
3933 
3934  if (auto ifVar = dataOp.getIfExpr())
3935  ifCond = moduleTranslation.lookupValue(ifVar);
3936 
3937  if (auto devId = dataOp.getDevice())
3938  if (auto constOp =
3939  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3940  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3941  deviceID = intAttr.getInt();
3942 
3943  mapVars = dataOp.getMapVars();
3944  useDevicePtrVars = dataOp.getUseDevicePtrVars();
3945  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
3946  return success();
3947  })
3948  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
3949  if (failed(checkImplementationStatus(*enterDataOp)))
3950  return failure();
3951 
3952  if (auto ifVar = enterDataOp.getIfExpr())
3953  ifCond = moduleTranslation.lookupValue(ifVar);
3954 
3955  if (auto devId = enterDataOp.getDevice())
3956  if (auto constOp =
3957  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3958  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3959  deviceID = intAttr.getInt();
3960  RTLFn =
3961  enterDataOp.getNowait()
3962  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
3963  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
3964  mapVars = enterDataOp.getMapVars();
3965  info.HasNoWait = enterDataOp.getNowait();
3966  return success();
3967  })
3968  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
3969  if (failed(checkImplementationStatus(*exitDataOp)))
3970  return failure();
3971 
3972  if (auto ifVar = exitDataOp.getIfExpr())
3973  ifCond = moduleTranslation.lookupValue(ifVar);
3974 
3975  if (auto devId = exitDataOp.getDevice())
3976  if (auto constOp =
3977  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3978  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3979  deviceID = intAttr.getInt();
3980 
3981  RTLFn = exitDataOp.getNowait()
3982  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
3983  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
3984  mapVars = exitDataOp.getMapVars();
3985  info.HasNoWait = exitDataOp.getNowait();
3986  return success();
3987  })
3988  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
3989  if (failed(checkImplementationStatus(*updateDataOp)))
3990  return failure();
3991 
3992  if (auto ifVar = updateDataOp.getIfExpr())
3993  ifCond = moduleTranslation.lookupValue(ifVar);
3994 
3995  if (auto devId = updateDataOp.getDevice())
3996  if (auto constOp =
3997  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3998  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3999  deviceID = intAttr.getInt();
4000 
4001  RTLFn =
4002  updateDataOp.getNowait()
4003  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
4004  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
4005  mapVars = updateDataOp.getMapVars();
4006  info.HasNoWait = updateDataOp.getNowait();
4007  return success();
4008  })
4009  .Default([&](Operation *op) {
4010  llvm_unreachable("unexpected operation");
4011  return failure();
4012  });
4013 
4014  if (failed(result))
4015  return failure();
4016 
4017  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4018  MapInfoData mapData;
4019  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
4020  builder, useDevicePtrVars, useDeviceAddrVars);
4021 
4022  // Fill up the arrays with all the mapped variables.
4023  MapInfosTy combinedInfo;
4024  auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> MapInfosTy & {
4025  builder.restoreIP(codeGenIP);
4026  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
4027  return combinedInfo;
4028  };
4029 
4030  // Define a lambda to apply mappings between use_device_addr and
4031  // use_device_ptr base pointers, and their associated block arguments.
4032  auto mapUseDevice =
4033  [&moduleTranslation](
4034  llvm::OpenMPIRBuilder::DeviceInfoTy type,
4036  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
4037  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
4038  for (auto [arg, useDevVar] :
4039  llvm::zip_equal(blockArgs, useDeviceVars)) {
4040 
4041  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
4042  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
4043  : mapInfoOp.getVarPtr();
4044  };
4045 
4046  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
4047  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
4048  mapInfoData.MapClause, mapInfoData.DevicePointers,
4049  mapInfoData.BasePointers)) {
4050  auto mapOp = cast<omp::MapInfoOp>(mapClause);
4051  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
4052  devicePointer != type)
4053  continue;
4054 
4055  if (llvm::Value *devPtrInfoMap =
4056  mapper ? mapper(basePointer) : basePointer) {
4057  moduleTranslation.mapValue(arg, devPtrInfoMap);
4058  break;
4059  }
4060  }
4061  }
4062  };
4063 
4064  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
4065  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
4066  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4067  builder.restoreIP(codeGenIP);
4068  assert(isa<omp::TargetDataOp>(op) &&
4069  "BodyGen requested for non TargetDataOp");
4070  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
4071  Region &region = cast<omp::TargetDataOp>(op).getRegion();
4072  switch (bodyGenType) {
4073  case BodyGenTy::Priv:
4074  // Check if any device ptr/addr info is available
4075  if (!info.DevicePtrInfoMap.empty()) {
4076  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4077  blockArgIface.getUseDeviceAddrBlockArgs(),
4078  useDeviceAddrVars, mapData,
4079  [&](llvm::Value *basePointer) -> llvm::Value * {
4080  if (!info.DevicePtrInfoMap[basePointer].second)
4081  return nullptr;
4082  return builder.CreateLoad(
4083  builder.getPtrTy(),
4084  info.DevicePtrInfoMap[basePointer].second);
4085  });
4086  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4087  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
4088  mapData, [&](llvm::Value *basePointer) {
4089  return info.DevicePtrInfoMap[basePointer].second;
4090  });
4091 
4092  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4093  moduleTranslation)))
4094  return llvm::make_error<PreviouslyReportedError>();
4095  }
4096  break;
4097  case BodyGenTy::DupNoPriv:
4098  // We must always restoreIP regardless of doing anything the caller
4099  // does not restore it, leading to incorrect (no) branch generation.
4100  builder.restoreIP(codeGenIP);
4101  break;
4102  case BodyGenTy::NoPriv:
4103  // If device info is available then region has already been generated
4104  if (info.DevicePtrInfoMap.empty()) {
4105  // For device pass, if use_device_ptr(addr) mappings were present,
4106  // we need to link them here before codegen.
4107  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
4108  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
4109  blockArgIface.getUseDeviceAddrBlockArgs(),
4110  useDeviceAddrVars, mapData);
4111  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
4112  blockArgIface.getUseDevicePtrBlockArgs(),
4113  useDevicePtrVars, mapData);
4114  }
4115 
4116  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
4117  moduleTranslation)))
4118  return llvm::make_error<PreviouslyReportedError>();
4119  }
4120  break;
4121  }
4122  return builder.saveIP();
4123  };
4124 
4125  auto customMapperCB =
4126  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4127  if (!combinedInfo.Mappers[i])
4128  return nullptr;
4129  info.HasMapper = true;
4130  return getOrCreateUserDefinedMapperFunc(combinedInfo.Mappers[i], builder,
4131  moduleTranslation);
4132  };
4133 
4134  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4135  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4136  findAllocaInsertPoint(builder, moduleTranslation);
4137  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
4138  if (isa<omp::TargetDataOp>(op))
4139  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
4140  builder.getInt64(deviceID), ifCond,
4141  info, genMapInfoCB, customMapperCB,
4142  /*MapperFunc=*/nullptr, bodyGenCB,
4143  /*DeviceAddrCB=*/nullptr);
4144  return ompBuilder->createTargetData(
4145  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
4146  info, genMapInfoCB, customMapperCB, &RTLFn);
4147  }();
4148 
4149  if (failed(handleError(afterIP, *op)))
4150  return failure();
4151 
4152  builder.restoreIP(*afterIP);
4153  return success();
4154 }
4155 
4156 static LogicalResult
4157 convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
4158  LLVM::ModuleTranslation &moduleTranslation) {
4159  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4160  auto distributeOp = cast<omp::DistributeOp>(opInst);
4161  if (failed(checkImplementationStatus(opInst)))
4162  return failure();
4163 
4164  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4165  auto bodyGenCB = [&](InsertPointTy allocaIP,
4166  InsertPointTy codeGenIP) -> llvm::Error {
4167  // Save the alloca insertion point on ModuleTranslation stack for use in
4168  // nested regions.
4170  moduleTranslation, allocaIP);
4171 
4172  // DistributeOp has only one region associated with it.
4173  builder.restoreIP(codeGenIP);
4174  PrivateVarsInfo privVarsInfo(distributeOp);
4175 
4176  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4177  allocatePrivateVars(builder, moduleTranslation, privVarsInfo, allocaIP);
4178  if (handleError(afterAllocas, opInst).failed())
4179  return llvm::make_error<PreviouslyReportedError>();
4180 
4181  if (handleError(initPrivateVars(builder, moduleTranslation, privVarsInfo),
4182  opInst)
4183  .failed())
4184  return llvm::make_error<PreviouslyReportedError>();
4185 
4186  if (failed(copyFirstPrivateVars(
4187  builder, moduleTranslation, privVarsInfo.mlirVars,
4188  privVarsInfo.llvmVars, privVarsInfo.privatizers)))
4189  return llvm::make_error<PreviouslyReportedError>();
4190 
4191  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4192  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4194  convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4195  builder, moduleTranslation);
4196  if (!regionBlock)
4197  return regionBlock.takeError();
4198  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
4199 
4200  // Skip applying a workshare loop below when translating 'distribute
4201  // parallel do' (it's been already handled by this point while translating
4202  // the nested omp.wsloop).
4203  if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4204  // TODO: Add support for clauses which are valid for DISTRIBUTE
4205  // constructs. Static schedule is the default.
4206  auto schedule = omp::ClauseScheduleKind::Static;
4207  bool isOrdered = false;
4208  std::optional<omp::ScheduleModifier> scheduleMod;
4209  bool isSimd = false;
4210  llvm::omp::WorksharingLoopType workshareLoopType =
4211  llvm::omp::WorksharingLoopType::DistributeStaticLoop;
4212  bool loopNeedsBarrier = false;
4213  llvm::Value *chunk = nullptr;
4214 
4215  llvm::CanonicalLoopInfo *loopInfo =
4216  findCurrentLoopInfo(moduleTranslation);
4217  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
4218  ompBuilder->applyWorkshareLoop(
4219  ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
4220  convertToScheduleKind(schedule), chunk, isSimd,
4221  scheduleMod == omp::ScheduleModifier::monotonic,
4222  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4223  workshareLoopType);
4224 
4225  if (!wsloopIP)
4226  return wsloopIP.takeError();
4227  }
4228 
4229  if (failed(cleanupPrivateVars(builder, moduleTranslation,
4230  distributeOp.getLoc(), privVarsInfo.llvmVars,
4231  privVarsInfo.privatizers)))
4232  return llvm::make_error<PreviouslyReportedError>();
4233 
4234  return llvm::Error::success();
4235  };
4236 
4237  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4238  findAllocaInsertPoint(builder, moduleTranslation);
4239  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4240  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4241  ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
4242 
4243  if (failed(handleError(afterIP, opInst)))
4244  return failure();
4245 
4246  builder.restoreIP(*afterIP);
4247  return success();
4248 }
4249 
4250 /// Lowers the FlagsAttr which is applied to the module on the device
4251 /// pass when offloading, this attribute contains OpenMP RTL globals that can
4252 /// be passed as flags to the frontend, otherwise they are set to default
4253 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
4254  LLVM::ModuleTranslation &moduleTranslation) {
4255  if (!cast<mlir::ModuleOp>(op))
4256  return failure();
4257 
4258  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4259 
4260  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
4261  attribute.getOpenmpDeviceVersion());
4262 
4263  if (attribute.getNoGpuLib())
4264  return success();
4265 
4266  ompBuilder->createGlobalFlag(
4267  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
4268  "__omp_rtl_debug_kind");
4269  ompBuilder->createGlobalFlag(
4270  attribute
4271  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
4272  ,
4273  "__omp_rtl_assume_teams_oversubscription");
4274  ompBuilder->createGlobalFlag(
4275  attribute
4276  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
4277  ,
4278  "__omp_rtl_assume_threads_oversubscription");
4279  ompBuilder->createGlobalFlag(
4280  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
4281  "__omp_rtl_assume_no_thread_state");
4282  ompBuilder->createGlobalFlag(
4283  attribute
4284  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
4285  ,
4286  "__omp_rtl_assume_no_nested_parallelism");
4287  return success();
4288 }
4289 
4290 static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
4291  omp::TargetOp targetOp,
4292  llvm::StringRef parentName = "") {
4293  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
4294 
4295  assert(fileLoc && "No file found from location");
4296  StringRef fileName = fileLoc.getFilename().getValue();
4297 
4298  llvm::sys::fs::UniqueID id;
4299  uint64_t line = fileLoc.getLine();
4300  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
4301  size_t fileHash = llvm::hash_value(fileName.str());
4302  size_t deviceId = 0xdeadf17e;
4303  targetInfo =
4304  llvm::TargetRegionEntryInfo(parentName, deviceId, fileHash, line);
4305  } else {
4306  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
4307  id.getFile(), line);
4308  }
4309 }
4310 
4311 static void
4312 handleDeclareTargetMapVar(MapInfoData &mapData,
4313  LLVM::ModuleTranslation &moduleTranslation,
4314  llvm::IRBuilderBase &builder, llvm::Function *func) {
4315  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
4316  // In the case of declare target mapped variables, the basePointer is
4317  // the reference pointer generated by the convertDeclareTargetAttr
4318  // method. Whereas the kernelValue is the original variable, so for
4319  // the device we must replace all uses of this original global variable
4320  // (stored in kernelValue) with the reference pointer (stored in
4321  // basePointer for declare target mapped variables), as for device the
4322  // data is mapped into this reference pointer and should be loaded
4323  // from it, the original variable is discarded. On host both exist and
4324  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
4325  // function to link the two variables in the runtime and then both the
4326  // reference pointer and the pointer are assigned in the kernel argument
4327  // structure for the host.
4328  if (mapData.IsDeclareTarget[i]) {
4329  // If the original map value is a constant, then we have to make sure all
4330  // of it's uses within the current kernel/function that we are going to
4331  // rewrite are converted to instructions, as we will be altering the old
4332  // use (OriginalValue) from a constant to an instruction, which will be
4333  // illegal and ICE the compiler if the user is a constant expression of
4334  // some kind e.g. a constant GEP.
4335  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
4336  convertUsersOfConstantsToInstructions(constant, func, false);
4337 
4338  // The users iterator will get invalidated if we modify an element,
4339  // so we populate this vector of uses to alter each user on an
4340  // individual basis to emit its own load (rather than one load for
4341  // all).
4343  for (llvm::User *user : mapData.OriginalValue[i]->users())
4344  userVec.push_back(user);
4345 
4346  for (llvm::User *user : userVec) {
4347  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
4348  if (insn->getFunction() == func) {
4349  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
4350  mapData.BasePointers[i]);
4351  load->moveBefore(insn->getIterator());
4352  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
4353  }
4354  }
4355  }
4356  }
4357  }
4358 }
4359 
4360 // The createDeviceArgumentAccessor function generates
4361 // instructions for retrieving (acessing) kernel
4362 // arguments inside of the device kernel for use by
4363 // the kernel. This enables different semantics such as
4364 // the creation of temporary copies of data allowing
4365 // semantics like read-only/no host write back kernel
4366 // arguments.
4367 //
4368 // This currently implements a very light version of Clang's
4369 // EmitParmDecl's handling of direct argument handling as well
4370 // as a portion of the argument access generation based on
4371 // capture types found at the end of emitOutlinedFunctionPrologue
4372 // in Clang. The indirect path handling of EmitParmDecl's may be
4373 // required for future work, but a direct 1-to-1 copy doesn't seem
4374 // possible as the logic is rather scattered throughout Clang's
4375 // lowering and perhaps we wish to deviate slightly.
4376 //
4377 // \param mapData - A container containing vectors of information
4378 // corresponding to the input argument, which should have a
4379 // corresponding entry in the MapInfoData containers
4380 // OrigialValue's.
4381 // \param arg - This is the generated kernel function argument that
4382 // corresponds to the passed in input argument. We generated different
4383 // accesses of this Argument, based on capture type and other Input
4384 // related information.
4385 // \param input - This is the host side value that will be passed to
4386 // the kernel i.e. the kernel input, we rewrite all uses of this within
4387 // the kernel (as we generate the kernel body based on the target's region
4388 // which maintians references to the original input) to the retVal argument
4389 // apon exit of this function inside of the OMPIRBuilder. This interlinks
4390 // the kernel argument to future uses of it in the function providing
4391 // appropriate "glue" instructions inbetween.
4392 // \param retVal - This is the value that all uses of input inside of the
4393 // kernel will be re-written to, the goal of this function is to generate
4394 // an appropriate location for the kernel argument to be accessed from,
4395 // e.g. ByRef will result in a temporary allocation location and then
4396 // a store of the kernel argument into this allocated memory which
4397 // will then be loaded from, ByCopy will use the allocated memory
4398 // directly.
4399 static llvm::IRBuilderBase::InsertPoint
4400 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
4401  llvm::Value *input, llvm::Value *&retVal,
4402  llvm::IRBuilderBase &builder,
4403  llvm::OpenMPIRBuilder &ompBuilder,
4404  LLVM::ModuleTranslation &moduleTranslation,
4405  llvm::IRBuilderBase::InsertPoint allocaIP,
4406  llvm::IRBuilderBase::InsertPoint codeGenIP) {
4407  builder.restoreIP(allocaIP);
4408 
4409  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
4410 
4411  // Find the associated MapInfoData entry for the current input
4412  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
4413  if (mapData.OriginalValue[i] == input) {
4414  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
4415  capture = mapOp.getMapCaptureType();
4416 
4417  break;
4418  }
4419 
4420  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
4421  unsigned int defaultAS =
4422  ompBuilder.M.getDataLayout().getProgramAddressSpace();
4423 
4424  // Create the alloca for the argument the current point.
4425  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
4426 
4427  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
4428  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
4429 
4430  builder.CreateStore(&arg, v);
4431 
4432  builder.restoreIP(codeGenIP);
4433 
4434  switch (capture) {
4435  case omp::VariableCaptureKind::ByCopy: {
4436  retVal = v;
4437  break;
4438  }
4439  case omp::VariableCaptureKind::ByRef: {
4440  retVal = builder.CreateAlignedLoad(
4441  v->getType(), v,
4442  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
4443  break;
4444  }
4445  case omp::VariableCaptureKind::This:
4446  case omp::VariableCaptureKind::VLAType:
4447  // TODO: Consider returning error to use standard reporting for
4448  // unimplemented features.
4449  assert(false && "Currently unsupported capture kind");
4450  break;
4451  }
4452 
4453  return builder.saveIP();
4454 }
4455 
4456 /// Follow uses of `host_eval`-defined block arguments of the given `omp.target`
4457 /// operation and populate output variables with their corresponding host value
4458 /// (i.e. operand evaluated outside of the target region), based on their uses
4459 /// inside of the target region.
4460 ///
4461 /// Loop bounds and steps are only optionally populated, if output vectors are
4462 /// provided.
4463 static void
4464 extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads,
4465  Value &numTeamsLower, Value &numTeamsUpper,
4466  Value &threadLimit,
4467  llvm::SmallVectorImpl<Value> *lowerBounds = nullptr,
4468  llvm::SmallVectorImpl<Value> *upperBounds = nullptr,
4469  llvm::SmallVectorImpl<Value> *steps = nullptr) {
4470  auto blockArgIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
4471  for (auto item : llvm::zip_equal(targetOp.getHostEvalVars(),
4472  blockArgIface.getHostEvalBlockArgs())) {
4473  Value hostEvalVar = std::get<0>(item), blockArg = std::get<1>(item);
4474 
4475  for (Operation *user : blockArg.getUsers()) {
4477  .Case([&](omp::TeamsOp teamsOp) {
4478  if (teamsOp.getNumTeamsLower() == blockArg)
4479  numTeamsLower = hostEvalVar;
4480  else if (teamsOp.getNumTeamsUpper() == blockArg)
4481  numTeamsUpper = hostEvalVar;
4482  else if (teamsOp.getThreadLimit() == blockArg)
4483  threadLimit = hostEvalVar;
4484  else
4485  llvm_unreachable("unsupported host_eval use");
4486  })
4487  .Case([&](omp::ParallelOp parallelOp) {
4488  if (parallelOp.getNumThreads() == blockArg)
4489  numThreads = hostEvalVar;
4490  else
4491  llvm_unreachable("unsupported host_eval use");
4492  })
4493  .Case([&](omp::LoopNestOp loopOp) {
4494  auto processBounds =
4495  [&](OperandRange opBounds,
4496  llvm::SmallVectorImpl<Value> *outBounds) -> bool {
4497  bool found = false;
4498  for (auto [i, lb] : llvm::enumerate(opBounds)) {
4499  if (lb == blockArg) {
4500  found = true;
4501  if (outBounds)
4502  (*outBounds)[i] = hostEvalVar;
4503  }
4504  }
4505  return found;
4506  };
4507  bool found =
4508  processBounds(loopOp.getLoopLowerBounds(), lowerBounds);
4509  found = processBounds(loopOp.getLoopUpperBounds(), upperBounds) ||
4510  found;
4511  found = processBounds(loopOp.getLoopSteps(), steps) || found;
4512  (void)found;
4513  assert(found && "unsupported host_eval use");
4514  })
4515  .Default([](Operation *) {
4516  llvm_unreachable("unsupported host_eval use");
4517  });
4518  }
4519  }
4520 }
4521 
4522 /// If \p op is of the given type parameter, return it casted to that type.
4523 /// Otherwise, if its immediate parent operation (or some other higher-level
4524 /// parent, if \p immediateParent is false) is of that type, return that parent
4525 /// casted to the given type.
4526 ///
4527 /// If \p op is \c null or neither it or its parent(s) are of the specified
4528 /// type, return a \c null operation.
4529 template <typename OpTy>
4530 static OpTy castOrGetParentOfType(Operation *op, bool immediateParent = false) {
4531  if (!op)
4532  return OpTy();
4533 
4534  if (OpTy casted = dyn_cast<OpTy>(op))
4535  return casted;
4536 
4537  if (immediateParent)
4538  return dyn_cast_if_present<OpTy>(op->getParentOp());
4539 
4540  return op->getParentOfType<OpTy>();
4541 }
4542 
4543 /// If the given \p value is defined by an \c llvm.mlir.constant operation and
4544 /// it is of an integer type, return its value.
4545 static std::optional<int64_t> extractConstInteger(Value value) {
4546  if (!value)
4547  return std::nullopt;
4548 
4549  if (auto constOp =
4550  dyn_cast_if_present<LLVM::ConstantOp>(value.getDefiningOp()))
4551  if (auto constAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
4552  return constAttr.getInt();
4553 
4554  return std::nullopt;
4555 }
4556 
4557 /// Populate default `MinTeams`, `MaxTeams` and `MaxThreads` to their default
4558 /// values as stated by the corresponding clauses, if constant.
4559 ///
4560 /// These default values must be set before the creation of the outlined LLVM
4561 /// function for the target region, so that they can be used to initialize the
4562 /// corresponding global `ConfigurationEnvironmentTy` structure.
4563 static void
4564 initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
4565  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs,
4566  bool isTargetDevice) {
4567  // TODO: Handle constant 'if' clauses.
4568 
4569  Value numThreads, numTeamsLower, numTeamsUpper, threadLimit;
4570  if (!isTargetDevice) {
4571  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
4572  threadLimit);
4573  } else {
4574  // In the target device, values for these clauses are not passed as
4575  // host_eval, but instead evaluated prior to entry to the region. This
4576  // ensures values are mapped and available inside of the target region.
4577  if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
4578  numTeamsLower = teamsOp.getNumTeamsLower();
4579  numTeamsUpper = teamsOp.getNumTeamsUpper();
4580  threadLimit = teamsOp.getThreadLimit();
4581  }
4582 
4583  if (auto parallelOp = castOrGetParentOfType<omp::ParallelOp>(capturedOp))
4584  numThreads = parallelOp.getNumThreads();
4585  }
4586 
4587  // Handle clauses impacting the number of teams.
4588 
4589  int32_t minTeamsVal = 1, maxTeamsVal = -1;
4590  if (castOrGetParentOfType<omp::TeamsOp>(capturedOp)) {
4591  // TODO: Use `hostNumTeamsLower` to initialize `minTeamsVal`. For now, match
4592  // clang and set min and max to the same value.
4593  if (numTeamsUpper) {
4594  if (auto val = extractConstInteger(numTeamsUpper))
4595  minTeamsVal = maxTeamsVal = *val;
4596  } else {
4597  minTeamsVal = maxTeamsVal = 0;
4598  }
4599  } else if (castOrGetParentOfType<omp::ParallelOp>(capturedOp,
4600  /*immediateParent=*/true) ||
4601  castOrGetParentOfType<omp::SimdOp>(capturedOp,
4602  /*immediateParent=*/true)) {
4603  minTeamsVal = maxTeamsVal = 1;
4604  } else {
4605  minTeamsVal = maxTeamsVal = -1;
4606  }
4607 
4608  // Handle clauses impacting the number of threads.
4609 
4610  auto setMaxValueFromClause = [](Value clauseValue, int32_t &result) {
4611  if (!clauseValue)
4612  return;
4613 
4614  if (auto val = extractConstInteger(clauseValue))
4615  result = *val;
4616 
4617  // Found an applicable clause, so it's not undefined. Mark as unknown
4618  // because it's not constant.
4619  if (result < 0)
4620  result = 0;
4621  };
4622 
4623  // Extract 'thread_limit' clause from 'target' and 'teams' directives.
4624  int32_t targetThreadLimitVal = -1, teamsThreadLimitVal = -1;
4625  setMaxValueFromClause(targetOp.getThreadLimit(), targetThreadLimitVal);
4626  setMaxValueFromClause(threadLimit, teamsThreadLimitVal);
4627 
4628  // Extract 'max_threads' clause from 'parallel' or set to 1 if it's SIMD.
4629  int32_t maxThreadsVal = -1;
4630  if (castOrGetParentOfType<omp::ParallelOp>(capturedOp))
4631  setMaxValueFromClause(numThreads, maxThreadsVal);
4632  else if (castOrGetParentOfType<omp::SimdOp>(capturedOp,
4633  /*immediateParent=*/true))
4634  maxThreadsVal = 1;
4635 
4636  // For max values, < 0 means unset, == 0 means set but unknown. Select the
4637  // minimum value between 'max_threads' and 'thread_limit' clauses that were
4638  // set.
4639  int32_t combinedMaxThreadsVal = targetThreadLimitVal;
4640  if (combinedMaxThreadsVal < 0 ||
4641  (teamsThreadLimitVal >= 0 && teamsThreadLimitVal < combinedMaxThreadsVal))
4642  combinedMaxThreadsVal = teamsThreadLimitVal;
4643 
4644  if (combinedMaxThreadsVal < 0 ||
4645  (maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
4646  combinedMaxThreadsVal = maxThreadsVal;
4647 
4648  // Update kernel bounds structure for the `OpenMPIRBuilder` to use.
4649  attrs.ExecFlags = targetOp.getKernelExecFlags(capturedOp);
4650  attrs.MinTeams = minTeamsVal;
4651  attrs.MaxTeams.front() = maxTeamsVal;
4652  attrs.MinThreads = 1;
4653  attrs.MaxThreads.front() = combinedMaxThreadsVal;
4654 }
4655 
4656 /// Gather LLVM runtime values for all clauses evaluated in the host that are
4657 /// passed to the kernel invocation.
4658 ///
4659 /// This function must be called only when compiling for the host. Also, it will
4660 /// only provide correct results if it's called after the body of \c targetOp
4661 /// has been fully generated.
4662 static void
4663 initTargetRuntimeAttrs(llvm::IRBuilderBase &builder,
4664  LLVM::ModuleTranslation &moduleTranslation,
4665  omp::TargetOp targetOp, Operation *capturedOp,
4666  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs) {
4667  omp::LoopNestOp loopOp = castOrGetParentOfType<omp::LoopNestOp>(capturedOp);
4668  unsigned numLoops = loopOp ? loopOp.getNumLoops() : 0;
4669 
4670  Value numThreads, numTeamsLower, numTeamsUpper, teamsThreadLimit;
4671  llvm::SmallVector<Value> lowerBounds(numLoops), upperBounds(numLoops),
4672  steps(numLoops);
4673  extractHostEvalClauses(targetOp, numThreads, numTeamsLower, numTeamsUpper,
4674  teamsThreadLimit, &lowerBounds, &upperBounds, &steps);
4675 
4676  // TODO: Handle constant 'if' clauses.
4677  if (Value targetThreadLimit = targetOp.getThreadLimit())
4678  attrs.TargetThreadLimit.front() =
4679  moduleTranslation.lookupValue(targetThreadLimit);
4680 
4681  if (numTeamsLower)
4682  attrs.MinTeams = moduleTranslation.lookupValue(numTeamsLower);
4683 
4684  if (numTeamsUpper)
4685  attrs.MaxTeams.front() = moduleTranslation.lookupValue(numTeamsUpper);
4686 
4687  if (teamsThreadLimit)
4688  attrs.TeamsThreadLimit.front() =
4689  moduleTranslation.lookupValue(teamsThreadLimit);
4690 
4691  if (numThreads)
4692  attrs.MaxThreads = moduleTranslation.lookupValue(numThreads);
4693 
4694  if (targetOp.getKernelExecFlags(capturedOp) !=
4695  llvm::omp::OMP_TGT_EXEC_MODE_GENERIC) {
4696  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4697  attrs.LoopTripCount = nullptr;
4698 
4699  // To calculate the trip count, we multiply together the trip counts of
4700  // every collapsed canonical loop. We don't need to create the loop nests
4701  // here, since we're only interested in the trip count.
4702  for (auto [loopLower, loopUpper, loopStep] :
4703  llvm::zip_equal(lowerBounds, upperBounds, steps)) {
4704  llvm::Value *lowerBound = moduleTranslation.lookupValue(loopLower);
4705  llvm::Value *upperBound = moduleTranslation.lookupValue(loopUpper);
4706  llvm::Value *step = moduleTranslation.lookupValue(loopStep);
4707 
4708  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
4709  llvm::Value *tripCount = ompBuilder->calculateCanonicalLoopTripCount(
4710  loc, lowerBound, upperBound, step, /*IsSigned=*/true,
4711  loopOp.getLoopInclusive());
4712 
4713  if (!attrs.LoopTripCount) {
4714  attrs.LoopTripCount = tripCount;
4715  continue;
4716  }
4717 
4718  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
4719  attrs.LoopTripCount = builder.CreateMul(attrs.LoopTripCount, tripCount,
4720  {}, /*HasNUW=*/true);
4721  }
4722  }
4723 }
4724 
4725 static LogicalResult
4726 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
4727  LLVM::ModuleTranslation &moduleTranslation) {
4728  auto targetOp = cast<omp::TargetOp>(opInst);
4729  if (failed(checkImplementationStatus(opInst)))
4730  return failure();
4731 
4732  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4733  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
4734 
4735  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
4736  auto argIface = cast<omp::BlockArgOpenMPOpInterface>(opInst);
4737  auto &targetRegion = targetOp.getRegion();
4738  // Holds the private vars that have been mapped along with the block argument
4739  // that corresponds to the MapInfoOp corresponding to the private var in
4740  // question. So, for instance:
4741  //
4742  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
4743  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
4744  //
4745  // Then, %10 has been created so that the descriptor can be used by the
4746  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
4747  // %arg0} in the mappedPrivateVars map.
4748  llvm::DenseMap<Value, Value> mappedPrivateVars;
4749  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
4750  SmallVector<Value> mapVars = targetOp.getMapVars();
4751  SmallVector<Value> hdaVars = targetOp.getHasDeviceAddrVars();
4752  ArrayRef<BlockArgument> mapBlockArgs = argIface.getMapBlockArgs();
4753  ArrayRef<BlockArgument> hdaBlockArgs = argIface.getHasDeviceAddrBlockArgs();
4754  llvm::Function *llvmOutlinedFn = nullptr;
4755 
4756  // TODO: It can also be false if a compile-time constant `false` IF clause is
4757  // specified.
4758  bool isOffloadEntry =
4759  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
4760 
4761  // For some private variables, the MapsForPrivatizedVariablesPass
4762  // creates MapInfoOp instances. Go through the private variables and
4763  // the mapped variables so that during codegeneration we are able
4764  // to quickly look up the corresponding map variable, if any for each
4765  // private variable.
4766  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
4767  OperandRange privateVars = targetOp.getPrivateVars();
4768  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
4769  std::optional<DenseI64ArrayAttr> privateMapIndices =
4770  targetOp.getPrivateMapsAttr();
4771 
4772  for (auto [privVarIdx, privVarSymPair] :
4773  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
4774  auto privVar = std::get<0>(privVarSymPair);
4775  auto privSym = std::get<1>(privVarSymPair);
4776 
4777  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
4778  omp::PrivateClauseOp privatizer =
4779  findPrivatizer(targetOp, privatizerName);
4780 
4781  if (!privatizer.needsMap())
4782  continue;
4783 
4784  mlir::Value mappedValue =
4785  targetOp.getMappedValueForPrivateVar(privVarIdx);
4786  assert(mappedValue && "Expected to find mapped value for a privatized "
4787  "variable that needs mapping");
4788 
4789  // The MapInfoOp defining the map var isn't really needed later.
4790  // So, we don't store it in any datastructure. Instead, we just
4791  // do some sanity checks on it right now.
4792  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
4793  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
4794 
4795  // Check #1: Check that the type of the private variable matches
4796  // the type of the variable being mapped.
4797  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
4798  assert(
4799  varType == privVar.getType() &&
4800  "Type of private var doesn't match the type of the mapped value");
4801 
4802  // Ok, only 1 sanity check for now.
4803  // Record the block argument corresponding to this mapvar.
4804  mappedPrivateVars.insert(
4805  {privVar,
4806  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
4807  (*privateMapIndices)[privVarIdx])});
4808  }
4809  }
4810 
4811  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4812  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
4813  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4814  llvm::IRBuilderBase::InsertPointGuard guard(builder);
4815  builder.SetCurrentDebugLocation(llvm::DebugLoc());
4816  // Forward target-cpu and target-features function attributes from the
4817  // original function to the new outlined function.
4818  llvm::Function *llvmParentFn =
4819  moduleTranslation.lookupFunction(parentFn.getName());
4820  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
4821  assert(llvmParentFn && llvmOutlinedFn &&
4822  "Both parent and outlined functions must exist at this point");
4823 
4824  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
4825  attr.isStringAttribute())
4826  llvmOutlinedFn->addFnAttr(attr);
4827 
4828  if (auto attr = llvmParentFn->getFnAttribute("target-features");
4829  attr.isStringAttribute())
4830  llvmOutlinedFn->addFnAttr(attr);
4831 
4832  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
4833  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
4834  llvm::Value *mapOpValue =
4835  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
4836  moduleTranslation.mapValue(arg, mapOpValue);
4837  }
4838  for (auto [arg, mapOp] : llvm::zip_equal(hdaBlockArgs, hdaVars)) {
4839  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
4840  llvm::Value *mapOpValue =
4841  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
4842  moduleTranslation.mapValue(arg, mapOpValue);
4843  }
4844 
4845  // Do privatization after moduleTranslation has already recorded
4846  // mapped values.
4847  PrivateVarsInfo privateVarsInfo(targetOp);
4848 
4849  llvm::Expected<llvm::BasicBlock *> afterAllocas =
4850  allocatePrivateVars(builder, moduleTranslation, privateVarsInfo,
4851  allocaIP, &mappedPrivateVars);
4852 
4853  if (failed(handleError(afterAllocas, *targetOp)))
4854  return llvm::make_error<PreviouslyReportedError>();
4855 
4856  builder.restoreIP(codeGenIP);
4857  if (handleError(initPrivateVars(builder, moduleTranslation, privateVarsInfo,
4858  &mappedPrivateVars),
4859  *targetOp)
4860  .failed())
4861  return llvm::make_error<PreviouslyReportedError>();
4862 
4863  SmallVector<Region *> privateCleanupRegions;
4864  llvm::transform(privateVarsInfo.privatizers,
4865  std::back_inserter(privateCleanupRegions),
4866  [](omp::PrivateClauseOp privatizer) {
4867  return &privatizer.getDeallocRegion();
4868  });
4869 
4871  targetRegion, "omp.target", builder, moduleTranslation);
4872 
4873  if (!exitBlock)
4874  return exitBlock.takeError();
4875 
4876  builder.SetInsertPoint(*exitBlock);
4877  if (!privateCleanupRegions.empty()) {
4878  if (failed(inlineOmpRegionCleanup(
4879  privateCleanupRegions, privateVarsInfo.llvmVars,
4880  moduleTranslation, builder, "omp.targetop.private.cleanup",
4881  /*shouldLoadCleanupRegionArg=*/false))) {
4882  return llvm::createStringError(
4883  "failed to inline `dealloc` region of `omp.private` "
4884  "op in the target region");
4885  }
4886  return builder.saveIP();
4887  }
4888 
4889  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
4890  };
4891 
4892  StringRef parentName = parentFn.getName();
4893 
4894  llvm::TargetRegionEntryInfo entryInfo;
4895 
4896  getTargetEntryUniqueInfo(entryInfo, targetOp, parentName);
4897 
4898  MapInfoData mapData;
4899  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4900  builder, /*useDevPtrOperands=*/{},
4901  /*useDevAddrOperands=*/{}, hdaVars);
4902 
4903  MapInfosTy combinedInfos;
4904  auto genMapInfoCB =
4905  [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> MapInfosTy & {
4906  builder.restoreIP(codeGenIP);
4907  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
4908  return combinedInfos;
4909  };
4910 
4911  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
4912  llvm::Value *&retVal, InsertPointTy allocaIP,
4913  InsertPointTy codeGenIP)
4914  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4915  llvm::IRBuilderBase::InsertPointGuard guard(builder);
4916  builder.SetCurrentDebugLocation(llvm::DebugLoc());
4917  // We just return the unaltered argument for the host function
4918  // for now, some alterations may be required in the future to
4919  // keep host fallback functions working identically to the device
4920  // version (e.g. pass ByCopy values should be treated as such on
4921  // host and device, currently not always the case)
4922  if (!isTargetDevice) {
4923  retVal = cast<llvm::Value>(&arg);
4924  return codeGenIP;
4925  }
4926 
4927  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
4928  *ompBuilder, moduleTranslation,
4929  allocaIP, codeGenIP);
4930  };
4931 
4932  llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
4933  llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs;
4934  Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
4935  initTargetDefaultAttrs(targetOp, targetCapturedOp, defaultAttrs,
4936  isTargetDevice);
4937 
4938  // Collect host-evaluated values needed to properly launch the kernel from the
4939  // host.
4940  if (!isTargetDevice)
4941  initTargetRuntimeAttrs(builder, moduleTranslation, targetOp,
4942  targetCapturedOp, runtimeAttrs);
4943 
4944  // Pass host-evaluated values as parameters to the kernel / host fallback,
4945  // except if they are constants. In any case, map the MLIR block argument to
4946  // the corresponding LLVM values.
4948  SmallVector<Value> hostEvalVars = targetOp.getHostEvalVars();
4949  ArrayRef<BlockArgument> hostEvalBlockArgs = argIface.getHostEvalBlockArgs();
4950  for (auto [arg, var] : llvm::zip_equal(hostEvalBlockArgs, hostEvalVars)) {
4951  llvm::Value *value = moduleTranslation.lookupValue(var);
4952  moduleTranslation.mapValue(arg, value);
4953 
4954  if (!llvm::isa<llvm::Constant>(value))
4955  kernelInput.push_back(value);
4956  }
4957 
4958  for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
4959  // declare target arguments are not passed to kernels as arguments
4960  // TODO: We currently do not handle cases where a member is explicitly
4961  // passed in as an argument, this will likley need to be handled in
4962  // the near future, rather than using IsAMember, it may be better to
4963  // test if the relevant BlockArg is used within the target region and
4964  // then use that as a basis for exclusion in the kernel inputs.
4965  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
4966  kernelInput.push_back(mapData.OriginalValue[i]);
4967  }
4968 
4970  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
4971  moduleTranslation, dds);
4972 
4973  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4974  findAllocaInsertPoint(builder, moduleTranslation);
4975  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4976 
4977  llvm::OpenMPIRBuilder::TargetDataInfo info(
4978  /*RequiresDevicePointerInfo=*/false,
4979  /*SeparateBeginEndCalls=*/true);
4980 
4981  auto customMapperCB =
4982  [&](unsigned int i) -> llvm::Expected<llvm::Function *> {
4983  if (!combinedInfos.Mappers[i])
4984  return nullptr;
4985  info.HasMapper = true;
4986  return getOrCreateUserDefinedMapperFunc(combinedInfos.Mappers[i], builder,
4987  moduleTranslation);
4988  };
4989 
4990  llvm::Value *ifCond = nullptr;
4991  if (Value targetIfCond = targetOp.getIfExpr())
4992  ifCond = moduleTranslation.lookupValue(targetIfCond);
4993 
4994  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4995  moduleTranslation.getOpenMPBuilder()->createTarget(
4996  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
4997  defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
4998  argAccessorCB, customMapperCB, dds, targetOp.getNowait());
4999 
5000  if (failed(handleError(afterIP, opInst)))
5001  return failure();
5002 
5003  builder.restoreIP(*afterIP);
5004 
5005  // Remap access operations to declare target reference pointers for the
5006  // device, essentially generating extra loadop's as necessary
5007  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
5008  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
5009  llvmOutlinedFn);
5010 
5011  return success();
5012 }
5013 
5014 static LogicalResult
5015 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
5016  LLVM::ModuleTranslation &moduleTranslation) {
5017  // Amend omp.declare_target by deleting the IR of the outlined functions
5018  // created for target regions. They cannot be filtered out from MLIR earlier
5019  // because the omp.target operation inside must be translated to LLVM, but
5020  // the wrapper functions themselves must not remain at the end of the
5021  // process. We know that functions where omp.declare_target does not match
5022  // omp.is_target_device at this stage can only be wrapper functions because
5023  // those that aren't are removed earlier as an MLIR transformation pass.
5024  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
5025  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
5026  op->getParentOfType<ModuleOp>().getOperation())) {
5027  if (!offloadMod.getIsTargetDevice())
5028  return success();
5029 
5030  omp::DeclareTargetDeviceType declareType =
5031  attribute.getDeviceType().getValue();
5032 
5033  if (declareType == omp::DeclareTargetDeviceType::host) {
5034  llvm::Function *llvmFunc =
5035  moduleTranslation.lookupFunction(funcOp.getName());
5036  llvmFunc->dropAllReferences();
5037  llvmFunc->eraseFromParent();
5038  }
5039  }
5040  return success();
5041  }
5042 
5043  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
5044  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
5045  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
5046  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5047  bool isDeclaration = gOp.isDeclaration();
5048  bool isExternallyVisible =
5049  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
5050  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
5051  llvm::StringRef mangledName = gOp.getSymName();
5052  auto captureClause =
5053  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
5054  auto deviceClause =
5055  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
5056  // unused for MLIR at the moment, required in Clang for book
5057  // keeping
5058  std::vector<llvm::GlobalVariable *> generatedRefs;
5059 
5060  std::vector<llvm::Triple> targetTriple;
5061  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
5062  op->getParentOfType<mlir::ModuleOp>()->getAttr(
5063  LLVM::LLVMDialect::getTargetTripleAttrName()));
5064  if (targetTripleAttr)
5065  targetTriple.emplace_back(targetTripleAttr.data());
5066 
5067  auto fileInfoCallBack = [&loc]() {
5068  std::string filename = "";
5069  std::uint64_t lineNo = 0;
5070 
5071  if (loc) {
5072  filename = loc.getFilename().str();
5073  lineNo = loc.getLine();
5074  }
5075 
5076  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
5077  lineNo);
5078  };
5079 
5080  ompBuilder->registerTargetGlobalVariable(
5081  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5082  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5083  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
5084  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
5085  gVal->getType(), gVal);
5086 
5087  if (ompBuilder->Config.isTargetDevice() &&
5088  (attribute.getCaptureClause().getValue() !=
5089  mlir::omp::DeclareTargetCaptureClause::to ||
5090  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
5091  ompBuilder->getAddrOfDeclareTargetVar(
5092  captureClause, deviceClause, isDeclaration, isExternallyVisible,
5093  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
5094  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
5095  /*GlobalInitializer*/ nullptr,
5096  /*VariableLinkage*/ nullptr);
5097  }
5098  }
5099  }
5100 
5101  return success();
5102 }
5103 
5104 // Returns true if the operation is inside a TargetOp or
5105 // is part of a declare target function.
5106 static bool isTargetDeviceOp(Operation *op) {
5107  // Assumes no reverse offloading
5108  if (op->getParentOfType<omp::TargetOp>())
5109  return true;
5110 
5111  // Certain operations return results, and whether utilised in host or
5112  // target there is a chance an LLVM Dialect operation depends on it
5113  // by taking it in as an operand, so we must always lower these in
5114  // some manner or result in an ICE (whether they end up in a no-op
5115  // or otherwise).
5116  if (mlir::isa<omp::ThreadprivateOp>(op))
5117  return true;
5118 
5119  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
5120  if (auto declareTargetIface =
5121  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
5122  parentFn.getOperation()))
5123  if (declareTargetIface.isDeclareTarget() &&
5124  declareTargetIface.getDeclareTargetDeviceType() !=
5125  mlir::omp::DeclareTargetDeviceType::host)
5126  return true;
5127 
5128  return false;
5129 }
5130 
5131 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
5132 /// OpenMP runtime calls).
5133 static LogicalResult
5134 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
5135  LLVM::ModuleTranslation &moduleTranslation) {
5136  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5137 
5138  // For each loop, introduce one stack frame to hold loop information. Ensure
5139  // this is only done for the outermost loop wrapper to prevent introducing
5140  // multiple stack frames for a single loop. Initially set to null, the loop
5141  // information structure is initialized during translation of the nested
5142  // omp.loop_nest operation, making it available to translation of all loop
5143  // wrappers after their body has been successfully translated.
5144  bool isOutermostLoopWrapper =
5145  isa_and_present<omp::LoopWrapperInterface>(op) &&
5146  !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
5147 
5148  if (isOutermostLoopWrapper)
5149  moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
5150 
5151  auto result =
5153  .Case([&](omp::BarrierOp op) -> LogicalResult {
5154  if (failed(checkImplementationStatus(*op)))
5155  return failure();
5156 
5157  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
5158  ompBuilder->createBarrier(builder.saveIP(),
5159  llvm::omp::OMPD_barrier);
5160  return handleError(afterIP, *op);
5161  })
5162  .Case([&](omp::TaskyieldOp op) {
5163  if (failed(checkImplementationStatus(*op)))
5164  return failure();
5165 
5166  ompBuilder->createTaskyield(builder.saveIP());
5167  return success();
5168  })
5169  .Case([&](omp::FlushOp op) {
5170  if (failed(checkImplementationStatus(*op)))
5171  return failure();
5172 
5173  // No support in Openmp runtime function (__kmpc_flush) to accept
5174  // the argument list.
5175  // OpenMP standard states the following:
5176  // "An implementation may implement a flush with a list by ignoring
5177  // the list, and treating it the same as a flush without a list."
5178  //
5179  // The argument list is discarded so that, flush with a list is
5180  // treated same as a flush without a list.
5181  ompBuilder->createFlush(builder.saveIP());
5182  return success();
5183  })
5184  .Case([&](omp::ParallelOp op) {
5185  return convertOmpParallel(op, builder, moduleTranslation);
5186  })
5187  .Case([&](omp::MaskedOp) {
5188  return convertOmpMasked(*op, builder, moduleTranslation);
5189  })
5190  .Case([&](omp::MasterOp) {
5191  return convertOmpMaster(*op, builder, moduleTranslation);
5192  })
5193  .Case([&](omp::CriticalOp) {
5194  return convertOmpCritical(*op, builder, moduleTranslation);
5195  })
5196  .Case([&](omp::OrderedRegionOp) {
5197  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
5198  })
5199  .Case([&](omp::OrderedOp) {
5200  return convertOmpOrdered(*op, builder, moduleTranslation);
5201  })
5202  .Case([&](omp::WsloopOp) {
5203  return convertOmpWsloop(*op, builder, moduleTranslation);
5204  })
5205  .Case([&](omp::SimdOp) {
5206  return convertOmpSimd(*op, builder, moduleTranslation);
5207  })
5208  .Case([&](omp::AtomicReadOp) {
5209  return convertOmpAtomicRead(*op, builder, moduleTranslation);
5210  })
5211  .Case([&](omp::AtomicWriteOp) {
5212  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
5213  })
5214  .Case([&](omp::AtomicUpdateOp op) {
5215  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
5216  })
5217  .Case([&](omp::AtomicCaptureOp op) {
5218  return convertOmpAtomicCapture(op, builder, moduleTranslation);
5219  })
5220  .Case([&](omp::SectionsOp) {
5221  return convertOmpSections(*op, builder, moduleTranslation);
5222  })
5223  .Case([&](omp::SingleOp op) {
5224  return convertOmpSingle(op, builder, moduleTranslation);
5225  })
5226  .Case([&](omp::TeamsOp op) {
5227  return convertOmpTeams(op, builder, moduleTranslation);
5228  })
5229  .Case([&](omp::TaskOp op) {
5230  return convertOmpTaskOp(op, builder, moduleTranslation);
5231  })
5232  .Case([&](omp::TaskgroupOp op) {
5233  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
5234  })
5235  .Case([&](omp::TaskwaitOp op) {
5236  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
5237  })
5238  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareMapperOp,
5239  omp::DeclareMapperInfoOp, omp::DeclareReductionOp,
5240  omp::CriticalDeclareOp>([](auto op) {
5241  // `yield` and `terminator` can be just omitted. The block structure
5242  // was created in the region that handles their parent operation.
5243  // `declare_reduction` will be used by reductions and is not
5244  // converted directly, skip it.
5245  // `declare_mapper` and `declare_mapper.info` are handled whenever
5246  // they are referred to through a `map` clause.
5247  // `critical.declare` is only used to declare names of critical
5248  // sections which will be used by `critical` ops and hence can be
5249  // ignored for lowering. The OpenMP IRBuilder will create unique
5250  // name for critical section names.
5251  return success();
5252  })
5253  .Case([&](omp::ThreadprivateOp) {
5254  return convertOmpThreadprivate(*op, builder, moduleTranslation);
5255  })
5256  .Case<omp::TargetDataOp, omp::TargetEnterDataOp,
5257  omp::TargetExitDataOp, omp::TargetUpdateOp>([&](auto op) {
5258  return convertOmpTargetData(op, builder, moduleTranslation);
5259  })
5260  .Case([&](omp::TargetOp) {
5261  return convertOmpTarget(*op, builder, moduleTranslation);
5262  })
5263  .Case([&](omp::DistributeOp) {
5264  return convertOmpDistribute(*op, builder, moduleTranslation);
5265  })
5266  .Case([&](omp::LoopNestOp) {
5267  return convertOmpLoopNest(*op, builder, moduleTranslation);
5268  })
5269  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
5270  [&](auto op) {
5271  // No-op, should be handled by relevant owning operations e.g.
5272  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
5273  // etc. and then discarded
5274  return success();
5275  })
5276  .Default([&](Operation *inst) {
5277  return inst->emitError()
5278  << "not yet implemented: " << inst->getName();
5279  });
5280 
5281  if (isOutermostLoopWrapper)
5282  moduleTranslation.stackPop();
5283 
5284  return result;
5285 }
5286 
5287 static LogicalResult
5288 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
5289  LLVM::ModuleTranslation &moduleTranslation) {
5290  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5291 }
5292 
5293 static LogicalResult
5294 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
5295  LLVM::ModuleTranslation &moduleTranslation) {
5296  if (isa<omp::TargetOp>(op))
5297  return convertOmpTarget(*op, builder, moduleTranslation);
5298  if (isa<omp::TargetDataOp>(op))
5299  return convertOmpTargetData(op, builder, moduleTranslation);
5300  bool interrupted =
5301  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
5302  if (isa<omp::TargetOp>(oper)) {
5303  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
5304  return WalkResult::interrupt();
5305  return WalkResult::skip();
5306  }
5307  if (isa<omp::TargetDataOp>(oper)) {
5308  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
5309  return WalkResult::interrupt();
5310  return WalkResult::skip();
5311  }
5312 
5313  // Non-target ops might nest target-related ops, therefore, we
5314  // translate them as non-OpenMP scopes. Translating them is needed by
5315  // nested target-related ops since they might need LLVM values defined
5316  // in their parent non-target ops.
5317  if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
5318  oper->getParentOfType<LLVM::LLVMFuncOp>() &&
5319  !oper->getRegions().empty()) {
5320  if (auto blockArgsIface =
5321  dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
5322  forwardArgs(moduleTranslation, blockArgsIface);
5323  else {
5324  // Here we map entry block arguments of
5325  // non-BlockArgOpenMPOpInterface ops if they can be encountered
5326  // inside of a function and they define any of these arguments.
5327  if (isa<mlir::omp::AtomicUpdateOp>(oper))
5328  for (auto [operand, arg] :
5329  llvm::zip_equal(oper->getOperands(),
5330  oper->getRegion(0).getArguments())) {
5331  moduleTranslation.mapValue(
5332  arg, builder.CreateLoad(
5333  moduleTranslation.convertType(arg.getType()),
5334  moduleTranslation.lookupValue(operand)));
5335  }
5336  }
5337 
5338  if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
5339  assert(builder.GetInsertBlock() &&
5340  "No insert block is set for the builder");
5341  for (auto iv : loopNest.getIVs()) {
5342  // Map iv to an undefined value just to keep the IR validity.
5343  moduleTranslation.mapValue(
5345  moduleTranslation.convertType(iv.getType())));
5346  }
5347  }
5348 
5349  for (Region &region : oper->getRegions()) {
5350  // Regions are fake in the sense that they are not a truthful
5351  // translation of the OpenMP construct being converted (e.g. no
5352  // OpenMP runtime calls will be generated). We just need this to
5353  // prepare the kernel invocation args.
5355  auto result = convertOmpOpRegions(
5356  region, oper->getName().getStringRef().str() + ".fake.region",
5357  builder, moduleTranslation, &phis);
5358  if (failed(handleError(result, *oper)))
5359  return WalkResult::interrupt();
5360 
5361  builder.SetInsertPoint(result.get(), result.get()->end());
5362  }
5363 
5364  return WalkResult::skip();
5365  }
5366 
5367  return WalkResult::advance();
5368  }).wasInterrupted();
5369  return failure(interrupted);
5370 }
5371 
5372 namespace {
5373 
5374 /// Implementation of the dialect interface that converts operations belonging
5375 /// to the OpenMP dialect to LLVM IR.
5376 class OpenMPDialectLLVMIRTranslationInterface
5378 public:
5380 
5381  /// Translates the given operation to LLVM IR using the provided IR builder
5382  /// and saving the state in `moduleTranslation`.
5383  LogicalResult
5384  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
5385  LLVM::ModuleTranslation &moduleTranslation) const final;
5386 
5387  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
5388  /// runtime calls, or operation amendments
5389  LogicalResult
5391  NamedAttribute attribute,
5392  LLVM::ModuleTranslation &moduleTranslation) const final;
5393 };
5394 
5395 } // namespace
5396 
5397 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
5398  Operation *op, ArrayRef<llvm::Instruction *> instructions,
5399  NamedAttribute attribute,
5400  LLVM::ModuleTranslation &moduleTranslation) const {
5401  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
5402  attribute.getName())
5403  .Case("omp.is_target_device",
5404  [&](Attribute attr) {
5405  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
5406  llvm::OpenMPIRBuilderConfig &config =
5407  moduleTranslation.getOpenMPBuilder()->Config;
5408  config.setIsTargetDevice(deviceAttr.getValue());
5409  return success();
5410  }
5411  return failure();
5412  })
5413  .Case("omp.is_gpu",
5414  [&](Attribute attr) {
5415  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
5416  llvm::OpenMPIRBuilderConfig &config =
5417  moduleTranslation.getOpenMPBuilder()->Config;
5418  config.setIsGPU(gpuAttr.getValue());
5419  return success();
5420  }
5421  return failure();
5422  })
5423  .Case("omp.host_ir_filepath",
5424  [&](Attribute attr) {
5425  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
5426  llvm::OpenMPIRBuilder *ompBuilder =
5427  moduleTranslation.getOpenMPBuilder();
5428  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
5429  return success();
5430  }
5431  return failure();
5432  })
5433  .Case("omp.flags",
5434  [&](Attribute attr) {
5435  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
5436  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
5437  return failure();
5438  })
5439  .Case("omp.version",
5440  [&](Attribute attr) {
5441  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
5442  llvm::OpenMPIRBuilder *ompBuilder =
5443  moduleTranslation.getOpenMPBuilder();
5444  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
5445  versionAttr.getVersion());
5446  return success();
5447  }
5448  return failure();
5449  })
5450  .Case("omp.declare_target",
5451  [&](Attribute attr) {
5452  if (auto declareTargetAttr =
5453  dyn_cast<omp::DeclareTargetAttr>(attr))
5454  return convertDeclareTargetAttr(op, declareTargetAttr,
5455  moduleTranslation);
5456  return failure();
5457  })
5458  .Case("omp.requires",
5459  [&](Attribute attr) {
5460  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
5461  using Requires = omp::ClauseRequires;
5462  Requires flags = requiresAttr.getValue();
5463  llvm::OpenMPIRBuilderConfig &config =
5464  moduleTranslation.getOpenMPBuilder()->Config;
5465  config.setHasRequiresReverseOffload(
5466  bitEnumContainsAll(flags, Requires::reverse_offload));
5467  config.setHasRequiresUnifiedAddress(
5468  bitEnumContainsAll(flags, Requires::unified_address));
5469  config.setHasRequiresUnifiedSharedMemory(
5470  bitEnumContainsAll(flags, Requires::unified_shared_memory));
5471  config.setHasRequiresDynamicAllocators(
5472  bitEnumContainsAll(flags, Requires::dynamic_allocators));
5473  return success();
5474  }
5475  return failure();
5476  })
5477  .Case("omp.target_triples",
5478  [&](Attribute attr) {
5479  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
5480  llvm::OpenMPIRBuilderConfig &config =
5481  moduleTranslation.getOpenMPBuilder()->Config;
5482  config.TargetTriples.clear();
5483  config.TargetTriples.reserve(triplesAttr.size());
5484  for (Attribute tripleAttr : triplesAttr) {
5485  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
5486  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
5487  else
5488  return failure();
5489  }
5490  return success();
5491  }
5492  return failure();
5493  })
5494  .Default([](Attribute) {
5495  // Fall through for omp attributes that do not require lowering.
5496  return success();
5497  })(attribute.getValue());
5498 
5499  return failure();
5500 }
5501 
5502 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
5503 /// (including OpenMP runtime calls).
5504 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
5505  Operation *op, llvm::IRBuilderBase &builder,
5506  LLVM::ModuleTranslation &moduleTranslation) const {
5507 
5508  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
5509  if (ompBuilder->Config.isTargetDevice()) {
5510  if (isTargetDeviceOp(op)) {
5511  return convertTargetDeviceOp(op, builder, moduleTranslation);
5512  } else {
5513  return convertTargetOpsInNest(op, builder, moduleTranslation);
5514  }
5515  }
5516  return convertHostOrTargetOperation(op, builder, moduleTranslation);
5517 }
5518 
5520  registry.insert<omp::OpenMPDialect>();
5521  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
5522  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
5523  });
5524 }
5525 
5527  DialectRegistry registry;
5529  context.appendDialectRegistry(registry);
5530 }
union mlir::linalg::@1183::ArityGroupAndKind::Kind kind
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static llvm::Expected< llvm::Function * > emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName)
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::Expected< llvm::Value * > initPrivateVar(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Initialize a single (first)private variable.
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static OpTy castOrGetParentOfType(Operation *op, bool immediateParent=false)
If op is of the given type parameter, return it casted to that type.
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate and initialize delayed private variables.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static void setInsertPointForPossiblyEmptyBlock(llvm::IRBuilderBase &builder, llvm::BasicBlock *block=nullptr)
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
Inline reductions' init regions.
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::Error initPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, PrivateVarsInfo &privateVarsInfo, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static void getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void extractHostEvalClauses(omp::TargetOp targetOp, Value &numThreads, Value &numTeamsLower, Value &numTeamsUpper, Value &threadLimit, llvm::SmallVectorImpl< Value > *lowerBounds=nullptr, llvm::SmallVectorImpl< Value > *upperBounds=nullptr, llvm::SmallVectorImpl< Value > *steps=nullptr)
Follow uses of host_eval-defined block arguments of the given omp.target operation and populate outpu...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static llvm::Expected< llvm::Function * > getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation, omp::BlockArgOpenMPOpInterface blockArgIface)
Maps block arguments from blockArgIface (which are MLIR values) to the corresponding LLVM values of t...
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static LogicalResult copyFirstPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, ArrayRef< llvm::Value * > llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static llvm::CanonicalLoopInfo * findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation)
Find the loop information structure for the loop nest being translated.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static std::optional< int64_t > extractConstInteger(Value value)
If the given value is defined by an llvm.mlir.constant operation and it is of an integer type,...
static LogicalResult convertIgnoredWrapper(omp::LoopWrapperInterface opInst, LLVM::ModuleTranslation &moduleTranslation)
Helper function to map block arguments defined by ignored loop wrappers to LLVM values and prevent an...
static void initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs &attrs)
Gather LLVM runtime values for all clauses evaluated in the host that are passed to the kernel invoca...
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &attrs, bool isTargetDevice)
Populate default MinTeams, MaxTeams and MaxThreads to their default values as stated by the correspon...
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, ArrayRef< Value > useDevPtrOperands={}, ArrayRef< Value > useDevAddrOperands={}, ArrayRef< Value > hasDevAddrOperands={})
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:331
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:319
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:181
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
WalkResult stackWalk(llvm::function_ref< WalkResult(T &)> callback)
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void stackPush(Args &&...args)
Creates a stack frame of type T on ModuleTranslation stack.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
void mapFunction(StringRef name, llvm::Function *func)
Stores the mapping between a function name and its LLVM IR representation.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
void stackPop()
Pops the last element from the ModuleTranslation stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:44
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:222
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:798
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:687
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:874
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
inline ::llvm::hash_code hash_value(const PolynomialBase< D, T > &arg)
Definition: Polynomial.h:262
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
A util to collect info needed to convert delayed privatizers from MLIR to LLVM.
SmallVector< mlir::Value > mlirVars
SmallVector< omp::PrivateClauseOp > privatizers
MutableArrayRef< BlockArgument > blockArgs
SmallVector< llvm::Value * > llvmVars
RAII object calling stackPush/stackPop on construction/destruction.