MLIR  20.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
18 #include "mlir/IR/IRMapping.h"
19 #include "mlir/IR/Operation.h"
20 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Frontend/OpenMP/OMPConstants.h"
29 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/ReplaceConstant.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include "llvm/Transforms/Utils/ModuleUtils.h"
36 
37 #include <any>
38 #include <cstdint>
39 #include <iterator>
40 #include <numeric>
41 #include <optional>
42 #include <utility>
43 
44 using namespace mlir;
45 
46 namespace {
47 static llvm::omp::ScheduleKind
48 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
49  if (!schedKind.has_value())
50  return llvm::omp::OMP_SCHEDULE_Default;
51  switch (schedKind.value()) {
52  case omp::ClauseScheduleKind::Static:
53  return llvm::omp::OMP_SCHEDULE_Static;
54  case omp::ClauseScheduleKind::Dynamic:
55  return llvm::omp::OMP_SCHEDULE_Dynamic;
56  case omp::ClauseScheduleKind::Guided:
57  return llvm::omp::OMP_SCHEDULE_Guided;
58  case omp::ClauseScheduleKind::Auto:
59  return llvm::omp::OMP_SCHEDULE_Auto;
61  return llvm::omp::OMP_SCHEDULE_Runtime;
62  }
63  llvm_unreachable("unhandled schedule clause argument");
64 }
65 
66 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
67 /// insertion points for allocas.
68 class OpenMPAllocaStackFrame
69  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
70 public:
71  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
72 
73  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
74  : allocaInsertPoint(allocaIP) {}
75  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
76 };
77 
78 /// ModuleTranslation stack frame containing the partial mapping between MLIR
79 /// values and their LLVM IR equivalents.
80 class OpenMPVarMappingStackFrame
82  OpenMPVarMappingStackFrame> {
83 public:
84  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
85 
86  explicit OpenMPVarMappingStackFrame(
87  const DenseMap<Value, llvm::Value *> &mapping)
88  : mapping(mapping) {}
89 
91 };
92 
93 /// Custom error class to signal translation errors that don't need reporting,
94 /// since encountering them will have already triggered relevant error messages.
95 ///
96 /// Its purpose is to serve as the glue between MLIR failures represented as
97 /// \see LogicalResult instances and \see llvm::Error instances used to
98 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
99 /// error of the first type is raised, a message is emitted directly (the \see
100 /// LogicalResult itself does not hold any information). If we need to forward
101 /// this error condition as an \see llvm::Error while avoiding triggering some
102 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
103 /// class to just signal this situation has happened.
104 ///
105 /// For example, this class should be used to trigger errors from within
106 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
107 /// translation of their own regions. This unclutters the error log from
108 /// redundant messages.
109 class PreviouslyReportedError
110  : public llvm::ErrorInfo<PreviouslyReportedError> {
111 public:
112  void log(raw_ostream &) const override {
113  // Do not log anything.
114  }
115 
116  std::error_code convertToErrorCode() const override {
117  llvm_unreachable(
118  "PreviouslyReportedError doesn't support ECError conversion");
119  }
120 
121  // Used by ErrorInfo::classID.
122  static char ID;
123 };
124 
126 
127 } // namespace
128 
129 /// Looks up from the operation from and returns the PrivateClauseOp with
130 /// name symbolName
131 static omp::PrivateClauseOp findPrivatizer(Operation *from,
132  SymbolRefAttr symbolName) {
133  omp::PrivateClauseOp privatizer =
134  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
135  symbolName);
136  assert(privatizer && "privatizer not found in the symbol table");
137  return privatizer;
138 }
139 
140 /// Check whether translation to LLVM IR for the given operation is currently
141 /// supported. If not, descriptive diagnostics will be emitted to let users know
142 /// this is a not-yet-implemented feature.
143 ///
144 /// \returns success if no unimplemented features are needed to translate the
145 /// given operation.
146 static LogicalResult checkImplementationStatus(Operation &op) {
147  auto todo = [&op](StringRef clauseName) {
148  return op.emitError() << "not yet implemented: Unhandled clause "
149  << clauseName << " in " << op.getName()
150  << " operation";
151  };
152 
153  auto checkAligned = [&todo](auto op, LogicalResult &result) {
154  if (!op.getAlignedVars().empty() || op.getAlignments())
155  result = todo("aligned");
156  };
157  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
158  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
159  result = todo("allocate");
160  };
161  auto checkDepend = [&todo](auto op, LogicalResult &result) {
162  if (!op.getDependVars().empty() || op.getDependKinds())
163  result = todo("depend");
164  };
165  auto checkDevice = [&todo](auto op, LogicalResult &result) {
166  if (op.getDevice())
167  result = todo("device");
168  };
169  auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
170  if (!op.getHasDeviceAddrVars().empty())
171  result = todo("has_device_addr");
172  };
173  auto checkHint = [](auto op, LogicalResult &) {
174  if (op.getHint())
175  op.emitWarning("hint clause discarded");
176  };
177  auto checkIf = [&todo](auto op, LogicalResult &result) {
178  if (op.getIfExpr())
179  result = todo("if");
180  };
181  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
182  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
183  op.getInReductionSyms())
184  result = todo("in_reduction");
185  };
186  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
187  if (!op.getIsDevicePtrVars().empty())
188  result = todo("is_device_ptr");
189  };
190  auto checkLinear = [&todo](auto op, LogicalResult &result) {
191  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
192  result = todo("linear");
193  };
194  auto checkMergeable = [&todo](auto op, LogicalResult &result) {
195  if (op.getMergeable())
196  result = todo("mergeable");
197  };
198  auto checkNontemporal = [&todo](auto op, LogicalResult &result) {
199  if (!op.getNontemporalVars().empty())
200  result = todo("nontemporal");
201  };
202  auto checkNowait = [&todo](auto op, LogicalResult &result) {
203  if (op.getNowait())
204  result = todo("nowait");
205  };
206  auto checkOrder = [&todo](auto op, LogicalResult &result) {
207  if (op.getOrder() || op.getOrderMod())
208  result = todo("order");
209  };
210  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
211  if (op.getParLevelSimd())
212  result = todo("parallelization-level");
213  };
214  auto checkPriority = [&todo](auto op, LogicalResult &result) {
215  if (op.getPriority())
216  result = todo("priority");
217  };
218  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
219  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
220  result = todo("privatization");
221  };
222  auto checkReduction = [&todo](auto op, LogicalResult &result) {
223  if (!op.getReductionVars().empty() || op.getReductionByref() ||
224  op.getReductionSyms())
225  result = todo("reduction");
226  };
227  auto checkThreadLimit = [&todo](auto op, LogicalResult &result) {
228  if (op.getThreadLimit())
229  result = todo("thread_limit");
230  };
231  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
232  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
233  op.getTaskReductionSyms())
234  result = todo("task_reduction");
235  };
236  auto checkUntied = [&todo](auto op, LogicalResult &result) {
237  if (op.getUntied())
238  result = todo("untied");
239  };
240 
241  LogicalResult result = success();
243  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
244  .Case([&](omp::SectionsOp op) {
245  checkAllocate(op, result);
246  checkPrivate(op, result);
247  })
248  .Case([&](omp::SingleOp op) {
249  checkAllocate(op, result);
250  checkPrivate(op, result);
251  })
252  .Case([&](omp::TeamsOp op) {
253  checkAllocate(op, result);
254  checkPrivate(op, result);
255  checkReduction(op, result);
256  })
257  .Case([&](omp::TaskOp op) {
258  checkAllocate(op, result);
259  checkInReduction(op, result);
260  checkMergeable(op, result);
261  checkPriority(op, result);
262  checkUntied(op, result);
263  })
264  .Case([&](omp::TaskgroupOp op) {
265  checkAllocate(op, result);
266  checkTaskReduction(op, result);
267  })
268  .Case([&](omp::TaskwaitOp op) {
269  checkDepend(op, result);
270  checkNowait(op, result);
271  })
272  .Case([&](omp::WsloopOp op) {
273  checkAllocate(op, result);
274  checkLinear(op, result);
275  checkOrder(op, result);
276  checkPrivate(op, result);
277  })
278  .Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
279  .Case([&](omp::SimdOp op) {
280  checkAligned(op, result);
281  checkLinear(op, result);
282  checkNontemporal(op, result);
283  checkPrivate(op, result);
284  checkReduction(op, result);
285  })
286  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
287  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
288  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
289  [&](auto op) { checkDepend(op, result); })
290  .Case([&](omp::TargetOp op) {
291  checkAllocate(op, result);
292  checkDevice(op, result);
293  checkHasDeviceAddr(op, result);
294  checkIf(op, result);
295  checkInReduction(op, result);
296  checkIsDevicePtr(op, result);
297  // Privatization clauses are supported, except on some situations, so we
298  // need to check here whether any of these unsupported cases are being
299  // translated.
300  if (std::optional<ArrayAttr> privateSyms = op.getPrivateSyms()) {
301  for (Attribute privatizerNameAttr : *privateSyms) {
302  omp::PrivateClauseOp privatizer = findPrivatizer(
303  op.getOperation(), cast<SymbolRefAttr>(privatizerNameAttr));
304 
305  if (privatizer.getDataSharingType() ==
306  omp::DataSharingClauseType::FirstPrivate)
307  result = todo("firstprivate");
308 
309  if (!privatizer.getDeallocRegion().empty())
310  result = op.emitError("not yet implemented: privatization of "
311  "structures in omp.target operation");
312  }
313  }
314  checkThreadLimit(op, result);
315  })
316  .Default([](Operation &) {
317  // Assume all clauses for an operation can be translated unless they are
318  // checked above.
319  });
320  return result;
321 }
322 
323 static LogicalResult handleError(llvm::Error error, Operation &op) {
324  LogicalResult result = success();
325  if (error) {
326  llvm::handleAllErrors(
327  std::move(error),
328  [&](const PreviouslyReportedError &) { result = failure(); },
329  [&](const llvm::ErrorInfoBase &err) {
330  result = op.emitError(err.message());
331  });
332  }
333  return result;
334 }
335 
336 template <typename T>
337 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
338  if (!result)
339  return handleError(result.takeError(), op);
340 
341  return success();
342 }
343 
344 /// Find the insertion point for allocas given the current insertion point for
345 /// normal operations in the builder.
346 static llvm::OpenMPIRBuilder::InsertPointTy
347 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
348  const LLVM::ModuleTranslation &moduleTranslation) {
349  // If there is an alloca insertion point on stack, i.e. we are in a nested
350  // operation and a specific point was provided by some surrounding operation,
351  // use it.
352  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
353  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
354  [&](const OpenMPAllocaStackFrame &frame) {
355  allocaInsertPoint = frame.allocaInsertPoint;
356  return WalkResult::interrupt();
357  });
358  if (walkResult.wasInterrupted())
359  return allocaInsertPoint;
360 
361  // Otherwise, insert to the entry block of the surrounding function.
362  // If the current IRBuilder InsertPoint is the function's entry, it cannot
363  // also be used for alloca insertion which would result in insertion order
364  // confusion. Create a new BasicBlock for the Builder and use the entry block
365  // for the allocs.
366  // TODO: Create a dedicated alloca BasicBlock at function creation such that
367  // we do not need to move the current InertPoint here.
368  if (builder.GetInsertBlock() ==
369  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
370  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
371  "Assuming end of basic block");
372  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
373  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
374  builder.GetInsertBlock()->getNextNode());
375  builder.CreateBr(entryBB);
376  builder.SetInsertPoint(entryBB);
377  }
378 
379  llvm::BasicBlock &funcEntryBlock =
380  builder.GetInsertBlock()->getParent()->getEntryBlock();
381  return llvm::OpenMPIRBuilder::InsertPointTy(
382  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
383 }
384 
385 /// Converts the given region that appears within an OpenMP dialect operation to
386 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
387 /// region, and a branch from any block with an successor-less OpenMP terminator
388 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
389 /// of the continuation block if provided.
391  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
392  LLVM::ModuleTranslation &moduleTranslation,
393  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
394  llvm::BasicBlock *continuationBlock =
395  splitBB(builder, true, "omp.region.cont");
396  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
397 
398  llvm::LLVMContext &llvmContext = builder.getContext();
399  for (Block &bb : region) {
400  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
401  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
402  builder.GetInsertBlock()->getNextNode());
403  moduleTranslation.mapBlock(&bb, llvmBB);
404  }
405 
406  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
407 
408  // Terminators (namely YieldOp) may be forwarding values to the region that
409  // need to be available in the continuation block. Collect the types of these
410  // operands in preparation of creating PHI nodes.
411  SmallVector<llvm::Type *> continuationBlockPHITypes;
412  bool operandsProcessed = false;
413  unsigned numYields = 0;
414  for (Block &bb : region.getBlocks()) {
415  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
416  if (!operandsProcessed) {
417  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
418  continuationBlockPHITypes.push_back(
419  moduleTranslation.convertType(yield->getOperand(i).getType()));
420  }
421  operandsProcessed = true;
422  } else {
423  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
424  "mismatching number of values yielded from the region");
425  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
426  llvm::Type *operandType =
427  moduleTranslation.convertType(yield->getOperand(i).getType());
428  (void)operandType;
429  assert(continuationBlockPHITypes[i] == operandType &&
430  "values of mismatching types yielded from the region");
431  }
432  }
433  numYields++;
434  }
435  }
436 
437  // Insert PHI nodes in the continuation block for any values forwarded by the
438  // terminators in this region.
439  if (!continuationBlockPHITypes.empty())
440  assert(
441  continuationBlockPHIs &&
442  "expected continuation block PHIs if converted regions yield values");
443  if (continuationBlockPHIs) {
444  llvm::IRBuilderBase::InsertPointGuard guard(builder);
445  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
446  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
447  for (llvm::Type *ty : continuationBlockPHITypes)
448  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
449  }
450 
451  // Convert blocks one by one in topological order to ensure
452  // defs are converted before uses.
454  for (Block *bb : blocks) {
455  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
456  // Retarget the branch of the entry block to the entry block of the
457  // converted region (regions are single-entry).
458  if (bb->isEntryBlock()) {
459  assert(sourceTerminator->getNumSuccessors() == 1 &&
460  "provided entry block has multiple successors");
461  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
462  "ContinuationBlock is not the successor of the entry block");
463  sourceTerminator->setSuccessor(0, llvmBB);
464  }
465 
466  llvm::IRBuilderBase::InsertPointGuard guard(builder);
467  if (failed(
468  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
469  return llvm::make_error<PreviouslyReportedError>();
470 
471  // Special handling for `omp.yield` and `omp.terminator` (we may have more
472  // than one): they return the control to the parent OpenMP dialect operation
473  // so replace them with the branch to the continuation block. We handle this
474  // here to avoid relying inter-function communication through the
475  // ModuleTranslation class to set up the correct insertion point. This is
476  // also consistent with MLIR's idiom of handling special region terminators
477  // in the same code that handles the region-owning operation.
478  Operation *terminator = bb->getTerminator();
479  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
480  builder.CreateBr(continuationBlock);
481 
482  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
483  (*continuationBlockPHIs)[i]->addIncoming(
484  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
485  }
486  }
487  // After all blocks have been traversed and values mapped, connect the PHI
488  // nodes to the results of preceding blocks.
489  LLVM::detail::connectPHINodes(region, moduleTranslation);
490 
491  // Remove the blocks and values defined in this region from the mapping since
492  // they are not visible outside of this region. This allows the same region to
493  // be converted several times, that is cloned, without clashes, and slightly
494  // speeds up the lookups.
495  moduleTranslation.forgetMapping(region);
496 
497  return continuationBlock;
498 }
499 
500 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
501 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
502  switch (kind) {
503  case omp::ClauseProcBindKind::Close:
504  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
505  case omp::ClauseProcBindKind::Master:
506  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
507  case omp::ClauseProcBindKind::Primary:
508  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
509  case omp::ClauseProcBindKind::Spread:
510  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
511  }
512  llvm_unreachable("Unknown ClauseProcBindKind kind");
513 }
514 
515 /// Helper function to map block arguments defined by ignored loop wrappers to
516 /// LLVM values and prevent any uses of those from triggering null pointer
517 /// dereferences.
518 ///
519 /// This must be called after block arguments of parent wrappers have already
520 /// been mapped to LLVM IR values.
521 static LogicalResult
522 convertIgnoredWrapper(omp::LoopWrapperInterface &opInst,
523  LLVM::ModuleTranslation &moduleTranslation) {
524  // Map block arguments directly to the LLVM value associated to the
525  // corresponding operand. This is semantically equivalent to this wrapper not
526  // being present.
527  auto forwardArgs =
528  [&moduleTranslation](llvm::ArrayRef<BlockArgument> blockArgs,
529  OperandRange operands) {
530  for (auto [arg, var] : llvm::zip_equal(blockArgs, operands))
531  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
532  };
533 
535  .Case([&](omp::SimdOp op) {
536  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(*op);
537  forwardArgs(blockArgIface.getPrivateBlockArgs(), op.getPrivateVars());
538  forwardArgs(blockArgIface.getReductionBlockArgs(),
539  op.getReductionVars());
540  op.emitWarning() << "simd information on composite construct discarded";
541  return success();
542  })
543  .Default([&](Operation *op) {
544  return op->emitError() << "cannot ignore nested wrapper";
545  });
546 }
547 
548 /// Helper function to call \c convertIgnoredWrapper() for all wrappers of the
549 /// given \c loopOp nested inside of \c parentOp. This has the effect of mapping
550 /// entry block arguments defined by these operations to outside values.
551 ///
552 /// It must be called after block arguments of \c parentOp have already been
553 /// mapped themselves.
554 static LogicalResult
555 convertIgnoredWrappers(omp::LoopNestOp loopOp,
556  omp::LoopWrapperInterface parentOp,
557  LLVM::ModuleTranslation &moduleTranslation) {
559  loopOp.gatherWrappers(wrappers);
560 
561  // Process wrappers nested inside of `parentOp` from outermost to innermost.
562  for (auto it =
563  std::next(std::find(wrappers.rbegin(), wrappers.rend(), parentOp));
564  it != wrappers.rend(); ++it) {
565  if (failed(convertIgnoredWrapper(*it, moduleTranslation)))
566  return failure();
567  }
568 
569  return success();
570 }
571 
572 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
573 static LogicalResult
574 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
575  LLVM::ModuleTranslation &moduleTranslation) {
576  auto maskedOp = cast<omp::MaskedOp>(opInst);
577  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
578 
579  if (failed(checkImplementationStatus(opInst)))
580  return failure();
581 
582  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
583  // MaskedOp has only one region associated with it.
584  auto &region = maskedOp.getRegion();
585  builder.restoreIP(codeGenIP);
586  return convertOmpOpRegions(region, "omp.masked.region", builder,
587  moduleTranslation)
588  .takeError();
589  };
590 
591  // TODO: Perform finalization actions for variables. This has to be
592  // called for variables which have destructors/finalizers.
593  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
594 
595  llvm::Value *filterVal = nullptr;
596  if (auto filterVar = maskedOp.getFilteredThreadId()) {
597  filterVal = moduleTranslation.lookupValue(filterVar);
598  } else {
599  llvm::LLVMContext &llvmContext = builder.getContext();
600  filterVal =
601  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
602  }
603  assert(filterVal != nullptr);
604  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
605  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
606  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
607  finiCB, filterVal);
608 
609  if (failed(handleError(afterIP, opInst)))
610  return failure();
611 
612  builder.restoreIP(*afterIP);
613  return success();
614 }
615 
616 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
617 static LogicalResult
618 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
619  LLVM::ModuleTranslation &moduleTranslation) {
620  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
621  auto masterOp = cast<omp::MasterOp>(opInst);
622 
623  if (failed(checkImplementationStatus(opInst)))
624  return failure();
625 
626  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
627  // MasterOp has only one region associated with it.
628  auto &region = masterOp.getRegion();
629  builder.restoreIP(codeGenIP);
630  return convertOmpOpRegions(region, "omp.master.region", builder,
631  moduleTranslation)
632  .takeError();
633  };
634 
635  // TODO: Perform finalization actions for variables. This has to be
636  // called for variables which have destructors/finalizers.
637  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
638 
639  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
640  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
641  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
642  finiCB);
643 
644  if (failed(handleError(afterIP, opInst)))
645  return failure();
646 
647  builder.restoreIP(*afterIP);
648  return success();
649 }
650 
651 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
652 static LogicalResult
653 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
654  LLVM::ModuleTranslation &moduleTranslation) {
655  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
656  auto criticalOp = cast<omp::CriticalOp>(opInst);
657 
658  if (failed(checkImplementationStatus(opInst)))
659  return failure();
660 
661  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
662  // CriticalOp has only one region associated with it.
663  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
664  builder.restoreIP(codeGenIP);
665  return convertOmpOpRegions(region, "omp.critical.region", builder,
666  moduleTranslation)
667  .takeError();
668  };
669 
670  // TODO: Perform finalization actions for variables. This has to be
671  // called for variables which have destructors/finalizers.
672  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
673 
674  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
675  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
676  llvm::Constant *hint = nullptr;
677 
678  // If it has a name, it probably has a hint too.
679  if (criticalOp.getNameAttr()) {
680  // The verifiers in OpenMP Dialect guarentee that all the pointers are
681  // non-null
682  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
683  auto criticalDeclareOp =
684  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
685  symbolRef);
686  hint =
687  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
688  static_cast<int>(criticalDeclareOp.getHint()));
689  }
690  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
691  moduleTranslation.getOpenMPBuilder()->createCritical(
692  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
693 
694  if (failed(handleError(afterIP, opInst)))
695  return failure();
696 
697  builder.restoreIP(*afterIP);
698  return success();
699 }
700 
701 /// Populates `privatizations` with privatization declarations used for the
702 /// given op.
703 template <class OP>
705  OP op, SmallVectorImpl<omp::PrivateClauseOp> &privatizations) {
706  std::optional<ArrayAttr> attr = op.getPrivateSyms();
707  if (!attr)
708  return;
709 
710  privatizations.reserve(privatizations.size() + attr->size());
711  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
712  privatizations.push_back(findPrivatizer(op, symbolRef));
713  }
714 }
715 
716 /// Populates `reductions` with reduction declarations used in the given op.
717 template <typename T>
718 static void
721  std::optional<ArrayAttr> attr = op.getReductionSyms();
722  if (!attr)
723  return;
724 
725  reductions.reserve(reductions.size() + op.getNumReductionVars());
726  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
727  reductions.push_back(
728  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
729  op, symbolRef));
730  }
731 }
732 
733 /// Translates the blocks contained in the given region and appends them to at
734 /// the current insertion point of `builder`. The operations of the entry block
735 /// are appended to the current insertion block. If set, `continuationBlockArgs`
736 /// is populated with translated values that correspond to the values
737 /// omp.yield'ed from the region.
738 static LogicalResult inlineConvertOmpRegions(
739  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
740  LLVM::ModuleTranslation &moduleTranslation,
741  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
742  if (region.empty())
743  return success();
744 
745  // Special case for single-block regions that don't create additional blocks:
746  // insert operations without creating additional blocks.
747  if (llvm::hasSingleElement(region)) {
748  llvm::Instruction *potentialTerminator =
749  builder.GetInsertBlock()->empty() ? nullptr
750  : &builder.GetInsertBlock()->back();
751 
752  if (potentialTerminator && potentialTerminator->isTerminator())
753  potentialTerminator->removeFromParent();
754  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
755 
756  if (failed(moduleTranslation.convertBlock(
757  region.front(), /*ignoreArguments=*/true, builder)))
758  return failure();
759 
760  // The continuation arguments are simply the translated terminator operands.
761  if (continuationBlockArgs)
762  llvm::append_range(
763  *continuationBlockArgs,
764  moduleTranslation.lookupValues(region.front().back().getOperands()));
765 
766  // Drop the mapping that is no longer necessary so that the same region can
767  // be processed multiple times.
768  moduleTranslation.forgetMapping(region);
769 
770  if (potentialTerminator && potentialTerminator->isTerminator()) {
771  llvm::BasicBlock *block = builder.GetInsertBlock();
772  if (block->empty()) {
773  // this can happen for really simple reduction init regions e.g.
774  // %0 = llvm.mlir.constant(0 : i32) : i32
775  // omp.yield(%0 : i32)
776  // because the llvm.mlir.constant (MLIR op) isn't converted into any
777  // llvm op
778  potentialTerminator->insertInto(block, block->begin());
779  } else {
780  potentialTerminator->insertAfter(&block->back());
781  }
782  }
783 
784  return success();
785  }
786 
788  llvm::Expected<llvm::BasicBlock *> continuationBlock =
789  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
790 
791  if (failed(handleError(continuationBlock, *region.getParentOp())))
792  return failure();
793 
794  if (continuationBlockArgs)
795  llvm::append_range(*continuationBlockArgs, phis);
796  builder.SetInsertPoint(*continuationBlock,
797  (*continuationBlock)->getFirstInsertionPt());
798  return success();
799 }
800 
801 namespace {
802 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
803 /// store lambdas with capture.
804 using OwningReductionGen =
805  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
806  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
807  llvm::Value *&)>;
808 using OwningAtomicReductionGen =
809  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
810  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
811  llvm::Value *)>;
812 } // namespace
813 
814 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
815 /// reduction declaration. The generator uses `builder` but ignores its
816 /// insertion point.
817 static OwningReductionGen
818 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
819  LLVM::ModuleTranslation &moduleTranslation) {
820  // The lambda is mutable because we need access to non-const methods of decl
821  // (which aren't actually mutating it), and we must capture decl by-value to
822  // avoid the dangling reference after the parent function returns.
823  OwningReductionGen gen =
824  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
825  llvm::Value *lhs, llvm::Value *rhs,
826  llvm::Value *&result) mutable
827  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
828  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
829  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
830  builder.restoreIP(insertPoint);
832  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
833  "omp.reduction.nonatomic.body", builder,
834  moduleTranslation, &phis)))
835  return llvm::createStringError(
836  "failed to inline `combiner` region of `omp.declare_reduction`");
837  assert(phis.size() == 1);
838  result = phis[0];
839  return builder.saveIP();
840  };
841  return gen;
842 }
843 
844 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
845 /// given reduction declaration. The generator uses `builder` but ignores its
846 /// insertion point. Returns null if there is no atomic region available in the
847 /// reduction declaration.
848 static OwningAtomicReductionGen
849 makeAtomicReductionGen(omp::DeclareReductionOp decl,
850  llvm::IRBuilderBase &builder,
851  LLVM::ModuleTranslation &moduleTranslation) {
852  if (decl.getAtomicReductionRegion().empty())
853  return OwningAtomicReductionGen();
854 
855  // The lambda is mutable because we need access to non-const methods of decl
856  // (which aren't actually mutating it), and we must capture decl by-value to
857  // avoid the dangling reference after the parent function returns.
858  OwningAtomicReductionGen atomicGen =
859  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
860  llvm::Value *lhs, llvm::Value *rhs) mutable
861  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
862  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
863  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
864  builder.restoreIP(insertPoint);
866  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
867  "omp.reduction.atomic.body", builder,
868  moduleTranslation, &phis)))
869  return llvm::createStringError(
870  "failed to inline `atomic` region of `omp.declare_reduction`");
871  assert(phis.empty());
872  return builder.saveIP();
873  };
874  return atomicGen;
875 }
876 
877 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
878 static LogicalResult
879 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
880  LLVM::ModuleTranslation &moduleTranslation) {
881  auto orderedOp = cast<omp::OrderedOp>(opInst);
882 
883  if (failed(checkImplementationStatus(opInst)))
884  return failure();
885 
886  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
887  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
888  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
889  SmallVector<llvm::Value *> vecValues =
890  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
891 
892  size_t indexVecValues = 0;
893  while (indexVecValues < vecValues.size()) {
894  SmallVector<llvm::Value *> storeValues;
895  storeValues.reserve(numLoops);
896  for (unsigned i = 0; i < numLoops; i++) {
897  storeValues.push_back(vecValues[indexVecValues]);
898  indexVecValues++;
899  }
900  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
901  findAllocaInsertPoint(builder, moduleTranslation);
902  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
903  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
904  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
905  }
906  return success();
907 }
908 
909 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
910 /// OpenMPIRBuilder.
911 static LogicalResult
912 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
913  LLVM::ModuleTranslation &moduleTranslation) {
914  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
915  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
916 
917  if (failed(checkImplementationStatus(opInst)))
918  return failure();
919 
920  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
921  // OrderedOp has only one region associated with it.
922  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
923  builder.restoreIP(codeGenIP);
924  return convertOmpOpRegions(region, "omp.ordered.region", builder,
925  moduleTranslation)
926  .takeError();
927  };
928 
929  // TODO: Perform finalization actions for variables. This has to be
930  // called for variables which have destructors/finalizers.
931  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
932 
933  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
934  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
935  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
936  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
937 
938  if (failed(handleError(afterIP, opInst)))
939  return failure();
940 
941  builder.restoreIP(*afterIP);
942  return success();
943 }
944 
945 namespace {
946 /// Contains the arguments for an LLVM store operation
947 struct DeferredStore {
948  DeferredStore(llvm::Value *value, llvm::Value *address)
949  : value(value), address(address) {}
950 
951  llvm::Value *value;
952  llvm::Value *address;
953 };
954 } // namespace
955 
956 /// Allocate space for privatized reduction variables.
957 /// `deferredStores` contains information to create store operations which needs
958 /// to be inserted after all allocas
959 template <typename T>
960 static LogicalResult
962  llvm::IRBuilderBase &builder,
963  LLVM::ModuleTranslation &moduleTranslation,
964  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
966  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
967  DenseMap<Value, llvm::Value *> &reductionVariableMap,
968  SmallVectorImpl<DeferredStore> &deferredStores,
969  llvm::ArrayRef<bool> isByRefs) {
970  llvm::IRBuilderBase::InsertPointGuard guard(builder);
971  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
972 
973  // delay creating stores until after all allocas
974  deferredStores.reserve(loop.getNumReductionVars());
975 
976  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
977  Region &allocRegion = reductionDecls[i].getAllocRegion();
978  if (isByRefs[i]) {
979  if (allocRegion.empty())
980  continue;
981 
983  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
984  builder, moduleTranslation, &phis)))
985  return loop.emitError(
986  "failed to inline `alloc` region of `omp.declare_reduction`");
987 
988  assert(phis.size() == 1 && "expected one allocation to be yielded");
989  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
990 
991  // Allocate reduction variable (which is a pointer to the real reduction
992  // variable allocated in the inlined region)
993  llvm::Value *var = builder.CreateAlloca(
994  moduleTranslation.convertType(reductionDecls[i].getType()));
995  deferredStores.emplace_back(phis[0], var);
996 
997  privateReductionVariables[i] = var;
998  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
999  reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
1000  } else {
1001  assert(allocRegion.empty() &&
1002  "allocaction is implicit for by-val reduction");
1003  llvm::Value *var = builder.CreateAlloca(
1004  moduleTranslation.convertType(reductionDecls[i].getType()));
1005  moduleTranslation.mapValue(reductionArgs[i], var);
1006  privateReductionVariables[i] = var;
1007  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
1008  }
1009  }
1010 
1011  return success();
1012 }
1013 
1014 /// Map input arguments to reduction initialization region
1015 template <typename T>
1016 static void
1019  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1020  unsigned i) {
1021  // map input argument to the initialization region
1022  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1023  Region &initializerRegion = reduction.getInitializerRegion();
1024  Block &entry = initializerRegion.front();
1025 
1026  mlir::Value mlirSource = loop.getReductionVars()[i];
1027  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1028  assert(llvmSource && "lookup reduction var");
1029  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1030 
1031  if (entry.getNumArguments() > 1) {
1032  llvm::Value *allocation =
1033  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1034  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1035  }
1036 }
1037 
1038 /// Collect reduction info
1039 template <typename T>
1041  T loop, llvm::IRBuilderBase &builder,
1042  LLVM::ModuleTranslation &moduleTranslation,
1044  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1045  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1046  const ArrayRef<llvm::Value *> privateReductionVariables,
1048  unsigned numReductions = loop.getNumReductionVars();
1049 
1050  for (unsigned i = 0; i < numReductions; ++i) {
1051  owningReductionGens.push_back(
1052  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1053  owningAtomicReductionGens.push_back(
1054  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1055  }
1056 
1057  // Collect the reduction information.
1058  reductionInfos.reserve(numReductions);
1059  for (unsigned i = 0; i < numReductions; ++i) {
1060  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1061  if (owningAtomicReductionGens[i])
1062  atomicGen = owningAtomicReductionGens[i];
1063  llvm::Value *variable =
1064  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1065  reductionInfos.push_back(
1066  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1067  privateReductionVariables[i],
1068  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1069  owningReductionGens[i],
1070  /*ReductionGenClang=*/nullptr, atomicGen});
1071  }
1072 }
1073 
1074 /// handling of DeclareReductionOp's cleanup region
1075 static LogicalResult
1077  llvm::ArrayRef<llvm::Value *> privateVariables,
1078  LLVM::ModuleTranslation &moduleTranslation,
1079  llvm::IRBuilderBase &builder, StringRef regionName,
1080  bool shouldLoadCleanupRegionArg = true) {
1081  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1082  if (cleanupRegion->empty())
1083  continue;
1084 
1085  // map the argument to the cleanup region
1086  Block &entry = cleanupRegion->front();
1087 
1088  llvm::Instruction *potentialTerminator =
1089  builder.GetInsertBlock()->empty() ? nullptr
1090  : &builder.GetInsertBlock()->back();
1091  if (potentialTerminator && potentialTerminator->isTerminator())
1092  builder.SetInsertPoint(potentialTerminator);
1093  llvm::Value *privateVarValue =
1094  shouldLoadCleanupRegionArg
1095  ? builder.CreateLoad(
1096  moduleTranslation.convertType(entry.getArgument(0).getType()),
1097  privateVariables[i])
1098  : privateVariables[i];
1099 
1100  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1101 
1102  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1103  moduleTranslation)))
1104  return failure();
1105 
1106  // clear block argument mapping in case it needs to be re-created with a
1107  // different source for another use of the same reduction decl
1108  moduleTranslation.forgetMapping(*cleanupRegion);
1109  }
1110  return success();
1111 }
1112 
1113 // TODO: not used by ParallelOp
1114 template <class OP>
1115 static LogicalResult createReductionsAndCleanup(
1116  OP op, llvm::IRBuilderBase &builder,
1117  LLVM::ModuleTranslation &moduleTranslation,
1118  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1120  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
1121  // Process the reductions if required.
1122  if (op.getNumReductionVars() == 0)
1123  return success();
1124 
1125  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1126 
1127  // Create the reduction generators. We need to own them here because
1128  // ReductionInfo only accepts references to the generators.
1129  SmallVector<OwningReductionGen> owningReductionGens;
1130  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1132  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1133  owningReductionGens, owningAtomicReductionGens,
1134  privateReductionVariables, reductionInfos);
1135 
1136  // The call to createReductions below expects the block to have a
1137  // terminator. Create an unreachable instruction to serve as terminator
1138  // and remove it later.
1139  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1140  builder.SetInsertPoint(tempTerminator);
1141  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1142  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1143  isByRef, op.getNowait());
1144 
1145  if (failed(handleError(contInsertPoint, *op)))
1146  return failure();
1147 
1148  if (!contInsertPoint->getBlock())
1149  return op->emitOpError() << "failed to convert reductions";
1150 
1151  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1152  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1153 
1154  if (failed(handleError(afterIP, *op)))
1155  return failure();
1156 
1157  tempTerminator->eraseFromParent();
1158  builder.restoreIP(*afterIP);
1159 
1160  // after the construct, deallocate private reduction variables
1161  SmallVector<Region *> reductionRegions;
1162  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1163  [](omp::DeclareReductionOp reductionDecl) {
1164  return &reductionDecl.getCleanupRegion();
1165  });
1166  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1167  moduleTranslation, builder,
1168  "omp.reduction.cleanup");
1169  return success();
1170 }
1171 
1172 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1173  if (!attr)
1174  return {};
1175  return *attr;
1176 }
1177 
1178 // TODO: not used by omp.parallel
1179 template <typename OP>
1180 static LogicalResult allocAndInitializeReductionVars(
1181  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1182  LLVM::ModuleTranslation &moduleTranslation,
1183  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1185  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1186  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1187  llvm::ArrayRef<bool> isByRef) {
1188  if (op.getNumReductionVars() == 0)
1189  return success();
1190 
1191  SmallVector<DeferredStore> deferredStores;
1192 
1193  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1194  allocaIP, reductionDecls,
1195  privateReductionVariables, reductionVariableMap,
1196  deferredStores, isByRef)))
1197  return failure();
1198 
1199  // store result of the alloc region to the allocated pointer to the real
1200  // reduction variable
1201  for (auto [data, addr] : deferredStores)
1202  builder.CreateStore(data, addr);
1203 
1204  // Before the loop, store the initial values of reductions into reduction
1205  // variables. Although this could be done after allocas, we don't want to mess
1206  // up with the alloca insertion point.
1207  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1209 
1210  // map block argument to initializer region
1211  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1212  reductionVariableMap, i);
1213 
1214  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1215  "omp.reduction.neutral", builder,
1216  moduleTranslation, &phis)))
1217  return failure();
1218  assert(phis.size() == 1 && "expected one value to be yielded from the "
1219  "reduction neutral element declaration region");
1220  if (isByRef[i]) {
1221  if (!reductionDecls[i].getAllocRegion().empty())
1222  // done in allocReductionVars
1223  continue;
1224 
1225  // TODO: this path can be removed once all users of by-ref are updated to
1226  // use an alloc region
1227 
1228  // Allocate reduction variable (which is a pointer to the real reduction
1229  // variable allocated in the inlined region)
1230  llvm::Value *var = builder.CreateAlloca(
1231  moduleTranslation.convertType(reductionDecls[i].getType()));
1232  // Store the result of the inlined region to the allocated reduction var
1233  // ptr
1234  builder.CreateStore(phis[0], var);
1235 
1236  privateReductionVariables[i] = var;
1237  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1238  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1239  } else {
1240  // for by-ref case the store is inside of the reduction region
1241  builder.CreateStore(phis[0], privateReductionVariables[i]);
1242  // the rest was handled in allocByValReductionVars
1243  }
1244 
1245  // forget the mapping for the initializer region because we might need a
1246  // different mapping if this reduction declaration is re-used for a
1247  // different variable
1248  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1249  }
1250 
1251  return success();
1252 }
1253 
1254 /// Allocate delayed private variables. Returns the basic block which comes
1255 /// after all of these allocations. llvm::Value * for each of these private
1256 /// variables are populated in llvmPrivateVars.
1258 allocatePrivateVars(llvm::IRBuilderBase &builder,
1259  LLVM::ModuleTranslation &moduleTranslation,
1260  MutableArrayRef<BlockArgument> privateBlockArgs,
1262  MutableArrayRef<mlir::Value> mlirPrivateVars,
1263  llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1264  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
1265  // Allocate private vars
1266  llvm::BranchInst *allocaTerminator =
1267  llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
1268  builder.SetInsertPoint(allocaTerminator);
1269  assert(allocaTerminator->getNumSuccessors() == 1 &&
1270  "This is an unconditional branch created by OpenMPIRBuilder");
1271  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1272 
1273  // FIXME: Some of the allocation regions do more than just allocating.
1274  // They read from their block argument (amongst other non-alloca things).
1275  // When OpenMPIRBuilder outlines the parallel region into a different
1276  // function it places the loads for live in-values (such as these block
1277  // arguments) at the end of the entry block (because the entry block is
1278  // assumed to contain only allocas). Therefore, if we put these complicated
1279  // alloc blocks in the entry block, these will not dominate the availability
1280  // of the live-in values they are using. Fix this by adding a latealloc
1281  // block after the entry block to put these in (this also helps to avoid
1282  // mixing non-alloca code with allocas).
1283  // Alloc regions which do not use the block argument can still be placed in
1284  // the entry block (therefore keeping the allocas together).
1285  llvm::BasicBlock *privAllocBlock = nullptr;
1286  if (!privateBlockArgs.empty())
1287  privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
1288  for (auto [privDecl, mlirPrivVar, blockArg] :
1289  llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
1290  Region &allocRegion = privDecl.getAllocRegion();
1291 
1292  // map allocation region block argument
1293  llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirPrivVar);
1294  assert(nonPrivateVar);
1295  moduleTranslation.mapValue(privDecl.getAllocMoldArg(), nonPrivateVar);
1296 
1297  // in-place convert the private allocation region
1299  if (privDecl.getAllocMoldArg().getUses().empty()) {
1300  // TODO this should use
1301  // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1302  // the code for fetching the thread id. Not doing this for now to avoid
1303  // test churn.
1304  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1305  } else {
1306  builder.SetInsertPoint(privAllocBlock->getTerminator());
1307  }
1308  if (failed(inlineConvertOmpRegions(allocRegion, "omp.private.alloc",
1309  builder, moduleTranslation, &phis)))
1310  return llvm::createStringError(
1311  "failed to inline `alloc` region of `omp.private`");
1312 
1313  assert(phis.size() == 1 && "expected one allocation to be yielded");
1314 
1315  moduleTranslation.mapValue(blockArg, phis[0]);
1316  llvmPrivateVars.push_back(phis[0]);
1317 
1318  // clear alloc region block argument mapping in case it needs to be
1319  // re-created with a different source for another use of the same
1320  // reduction decl
1321  moduleTranslation.forgetMapping(allocRegion);
1322  }
1323  return afterAllocas;
1324 }
1325 
1326 static LogicalResult
1327 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1328  LLVM::ModuleTranslation &moduleTranslation) {
1329  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1330  using StorableBodyGenCallbackTy =
1331  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1332 
1333  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1334 
1335  if (failed(checkImplementationStatus(opInst)))
1336  return failure();
1337 
1338  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1339  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1340 
1341  SmallVector<omp::DeclareReductionOp> reductionDecls;
1342  collectReductionDecls(sectionsOp, reductionDecls);
1343  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1344  findAllocaInsertPoint(builder, moduleTranslation);
1345 
1346  SmallVector<llvm::Value *> privateReductionVariables(
1347  sectionsOp.getNumReductionVars());
1348  DenseMap<Value, llvm::Value *> reductionVariableMap;
1349 
1350  MutableArrayRef<BlockArgument> reductionArgs =
1351  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1352 
1354  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1355  reductionDecls, privateReductionVariables, reductionVariableMap,
1356  isByRef)))
1357  return failure();
1358 
1359  // Store the mapping between reduction variables and their private copies on
1360  // ModuleTranslation stack. It can be then recovered when translating
1361  // omp.reduce operations in a separate call.
1363  moduleTranslation, reductionVariableMap);
1364 
1366 
1367  for (Operation &op : *sectionsOp.getRegion().begin()) {
1368  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1369  if (!sectionOp) // omp.terminator
1370  continue;
1371 
1372  Region &region = sectionOp.getRegion();
1373  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1374  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1375  builder.restoreIP(codeGenIP);
1376 
1377  // map the omp.section reduction block argument to the omp.sections block
1378  // arguments
1379  // TODO: this assumes that the only block arguments are reduction
1380  // variables
1381  assert(region.getNumArguments() ==
1382  sectionsOp.getRegion().getNumArguments());
1383  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1384  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1385  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1386  assert(llvmVal);
1387  moduleTranslation.mapValue(sectionArg, llvmVal);
1388  }
1389 
1390  return convertOmpOpRegions(region, "omp.section.region", builder,
1391  moduleTranslation)
1392  .takeError();
1393  };
1394  sectionCBs.push_back(sectionCB);
1395  }
1396 
1397  // No sections within omp.sections operation - skip generation. This situation
1398  // is only possible if there is only a terminator operation inside the
1399  // sections operation
1400  if (sectionCBs.empty())
1401  return success();
1402 
1403  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1404 
1405  // TODO: Perform appropriate actions according to the data-sharing
1406  // attribute (shared, private, firstprivate, ...) of variables.
1407  // Currently defaults to shared.
1408  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1409  llvm::Value &vPtr, llvm::Value *&replacementValue)
1410  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1411  replacementValue = &vPtr;
1412  return codeGenIP;
1413  };
1414 
1415  // TODO: Perform finalization actions for variables. This has to be
1416  // called for variables which have destructors/finalizers.
1417  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1418 
1419  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1420  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1421  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1422  moduleTranslation.getOpenMPBuilder()->createSections(
1423  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
1424  sectionsOp.getNowait());
1425 
1426  if (failed(handleError(afterIP, opInst)))
1427  return failure();
1428 
1429  builder.restoreIP(*afterIP);
1430 
1431  // Process the reductions if required.
1432  return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
1433  allocaIP, reductionDecls,
1434  privateReductionVariables, isByRef);
1435 }
1436 
1437 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1438 static LogicalResult
1439 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1440  LLVM::ModuleTranslation &moduleTranslation) {
1441  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1442  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1443 
1444  if (failed(checkImplementationStatus(*singleOp)))
1445  return failure();
1446 
1447  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1448  builder.restoreIP(codegenIP);
1449  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1450  builder, moduleTranslation)
1451  .takeError();
1452  };
1453  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1454 
1455  // Handle copyprivate
1456  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1457  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1460  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1461  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1462  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1463  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1464  llvmCPFuncs.push_back(
1465  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1466  }
1467 
1468  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1469  moduleTranslation.getOpenMPBuilder()->createSingle(
1470  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1471  llvmCPFuncs);
1472 
1473  if (failed(handleError(afterIP, *singleOp)))
1474  return failure();
1475 
1476  builder.restoreIP(*afterIP);
1477  return success();
1478 }
1479 
1480 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1481 static LogicalResult
1482 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1483  LLVM::ModuleTranslation &moduleTranslation) {
1484  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1485  if (failed(checkImplementationStatus(*op)))
1486  return failure();
1487 
1488  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1490  moduleTranslation, allocaIP);
1491  builder.restoreIP(codegenIP);
1492  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1493  moduleTranslation)
1494  .takeError();
1495  };
1496 
1497  llvm::Value *numTeamsLower = nullptr;
1498  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1499  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1500 
1501  llvm::Value *numTeamsUpper = nullptr;
1502  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1503  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1504 
1505  llvm::Value *threadLimit = nullptr;
1506  if (Value threadLimitVar = op.getThreadLimit())
1507  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1508 
1509  llvm::Value *ifExpr = nullptr;
1510  if (Value ifVar = op.getIfExpr())
1511  ifExpr = moduleTranslation.lookupValue(ifVar);
1512 
1513  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1514  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1515  moduleTranslation.getOpenMPBuilder()->createTeams(
1516  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1517 
1518  if (failed(handleError(afterIP, *op)))
1519  return failure();
1520 
1521  builder.restoreIP(*afterIP);
1522  return success();
1523 }
1524 
1525 static void
1526 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
1527  LLVM::ModuleTranslation &moduleTranslation,
1529  if (dependVars.empty())
1530  return;
1531  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
1532  llvm::omp::RTLDependenceKindTy type;
1533  switch (
1534  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1535  case mlir::omp::ClauseTaskDepend::taskdependin:
1536  type = llvm::omp::RTLDependenceKindTy::DepIn;
1537  break;
1538  // The OpenMP runtime requires that the codegen for 'depend' clause for
1539  // 'out' dependency kind must be the same as codegen for 'depend' clause
1540  // with 'inout' dependency.
1541  case mlir::omp::ClauseTaskDepend::taskdependout:
1542  case mlir::omp::ClauseTaskDepend::taskdependinout:
1543  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1544  break;
1545  };
1546  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1547  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1548  dds.emplace_back(dd);
1549  }
1550 }
1551 
1552 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
1553 static LogicalResult
1554 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1555  LLVM::ModuleTranslation &moduleTranslation) {
1556  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1557  if (failed(checkImplementationStatus(*taskOp)))
1558  return failure();
1559 
1560  // Collect delayed privatisation declarations
1561  MutableArrayRef<BlockArgument> privateBlockArgs =
1562  cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs();
1563  SmallVector<mlir::Value> mlirPrivateVars;
1564  SmallVector<llvm::Value *> llvmPrivateVars;
1565  SmallVector<omp::PrivateClauseOp> privateDecls;
1566  mlirPrivateVars.reserve(privateBlockArgs.size());
1567  llvmPrivateVars.reserve(privateBlockArgs.size());
1568  collectPrivatizationDecls(taskOp, privateDecls);
1569  for (mlir::Value privateVar : taskOp.getPrivateVars())
1570  mlirPrivateVars.push_back(privateVar);
1571 
1572  auto bodyCB = [&](InsertPointTy allocaIP,
1573  InsertPointTy codegenIP) -> llvm::Error {
1574  // Save the alloca insertion point on ModuleTranslation stack for use in
1575  // nested regions.
1577  moduleTranslation, allocaIP);
1578 
1580  builder, moduleTranslation, privateBlockArgs, privateDecls,
1581  mlirPrivateVars, llvmPrivateVars, allocaIP);
1582  if (handleError(afterAllocas, *taskOp).failed())
1583  return llvm::make_error<PreviouslyReportedError>();
1584 
1585  // Apply copy region for firstprivate
1586  bool needsFirstPrivate =
1587  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1588  return privOp.getDataSharingType() ==
1589  omp::DataSharingClauseType::FirstPrivate;
1590  });
1591  if (needsFirstPrivate) {
1592  // Find the end of the allocation blocks
1593  assert(afterAllocas.get()->getSinglePredecessor());
1594  builder.SetInsertPoint(
1595  afterAllocas.get()->getSinglePredecessor()->getTerminator());
1596  llvm::BasicBlock *copyBlock =
1597  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1598  builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1599  }
1600  for (auto [decl, mlirVar, llvmVar] :
1601  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1602  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1603  continue;
1604 
1605  // copyRegion implements `lhs = rhs`
1606  Region &copyRegion = decl.getCopyRegion();
1607 
1608  // map copyRegion rhs arg
1609  llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1610  assert(nonPrivateVar);
1611  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1612 
1613  // map copyRegion lhs arg
1614  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1615 
1616  // in-place convert copy region
1617  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1618  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
1619  builder, moduleTranslation)))
1620  return llvm::createStringError(
1621  "failed to inline `copy` region of an `omp.private` op in taskOp");
1622 
1623  // ignore unused value yielded from copy region
1624 
1625  // clear copy region block argument mapping in case it needs to be
1626  // re-created with different source for reuse of the same reduction decl
1627  moduleTranslation.forgetMapping(copyRegion);
1628  }
1629 
1630  // translate the body of the task:
1631  builder.restoreIP(codegenIP);
1632  auto continuationBlockOrError = convertOmpOpRegions(
1633  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
1634  if (failed(handleError(continuationBlockOrError, *taskOp)))
1635  return llvm::make_error<PreviouslyReportedError>();
1636 
1637  // private variable deallocation
1638  SmallVector<Region *> privateCleanupRegions;
1639  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1640  [](omp::PrivateClauseOp privatizer) {
1641  return &privatizer.getDeallocRegion();
1642  });
1643 
1644  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
1645  if (failed(inlineOmpRegionCleanup(
1646  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1647  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1648  return llvm::createStringError("failed to inline `dealloc` region of an "
1649  "`omp.private` op in an omp.task");
1650 
1651  return llvm::Error::success();
1652  };
1653 
1655  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
1656  moduleTranslation, dds);
1657 
1658  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1659  findAllocaInsertPoint(builder, moduleTranslation);
1660  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1661  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1662  moduleTranslation.getOpenMPBuilder()->createTask(
1663  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
1664  moduleTranslation.lookupValue(taskOp.getFinal()),
1665  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds);
1666 
1667  if (failed(handleError(afterIP, *taskOp)))
1668  return failure();
1669 
1670  builder.restoreIP(*afterIP);
1671  return success();
1672 }
1673 
1674 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
1675 static LogicalResult
1676 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
1677  LLVM::ModuleTranslation &moduleTranslation) {
1678  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1679  if (failed(checkImplementationStatus(*tgOp)))
1680  return failure();
1681 
1682  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1683  builder.restoreIP(codegenIP);
1684  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
1685  builder, moduleTranslation)
1686  .takeError();
1687  };
1688 
1689  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1690  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1691  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1692  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
1693  bodyCB);
1694 
1695  if (failed(handleError(afterIP, *tgOp)))
1696  return failure();
1697 
1698  builder.restoreIP(*afterIP);
1699  return success();
1700 }
1701 
1702 static LogicalResult
1703 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
1704  LLVM::ModuleTranslation &moduleTranslation) {
1705  if (failed(checkImplementationStatus(*twOp)))
1706  return failure();
1707 
1708  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
1709  return success();
1710 }
1711 
1712 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
1713 static LogicalResult
1714 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
1715  LLVM::ModuleTranslation &moduleTranslation) {
1716  auto wsloopOp = cast<omp::WsloopOp>(opInst);
1717  if (failed(checkImplementationStatus(opInst)))
1718  return failure();
1719 
1720  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
1721 
1722  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
1723  assert(isByRef.size() == wsloopOp.getNumReductionVars());
1724 
1725  // Static is the default.
1726  auto schedule =
1727  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
1728 
1729  // Find the loop configuration.
1730  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
1731  llvm::Type *ivType = step->getType();
1732  llvm::Value *chunk = nullptr;
1733  if (wsloopOp.getScheduleChunk()) {
1734  llvm::Value *chunkVar =
1735  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
1736  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
1737  }
1738 
1739  SmallVector<omp::DeclareReductionOp> reductionDecls;
1740  collectReductionDecls(wsloopOp, reductionDecls);
1741  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1742  findAllocaInsertPoint(builder, moduleTranslation);
1743 
1744  SmallVector<llvm::Value *> privateReductionVariables(
1745  wsloopOp.getNumReductionVars());
1746  DenseMap<Value, llvm::Value *> reductionVariableMap;
1747 
1748  MutableArrayRef<BlockArgument> reductionArgs =
1749  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1750 
1752  wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
1753  reductionDecls, privateReductionVariables, reductionVariableMap,
1754  isByRef)))
1755  return failure();
1756 
1757  // TODO: Replace this with proper composite translation support.
1758  // Currently, all nested wrappers are ignored, so 'do/for simd' will be
1759  // treated the same as a standalone 'do/for'. This is allowed by the spec,
1760  // since it's equivalent to always using a SIMD length of 1.
1761  if (failed(convertIgnoredWrappers(loopOp, wsloopOp, moduleTranslation)))
1762  return failure();
1763 
1764  // Store the mapping between reduction variables and their private copies on
1765  // ModuleTranslation stack. It can be then recovered when translating
1766  // omp.reduce operations in a separate call.
1768  moduleTranslation, reductionVariableMap);
1769 
1770  // Set up the source location value for OpenMP runtime.
1771  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1772 
1773  // Generator of the canonical loop body.
1776  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
1777  llvm::Value *iv) -> llvm::Error {
1778  // Make sure further conversions know about the induction variable.
1779  moduleTranslation.mapValue(
1780  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1781 
1782  // Capture the body insertion point for use in nested loops. BodyIP of the
1783  // CanonicalLoopInfo always points to the beginning of the entry block of
1784  // the body.
1785  bodyInsertPoints.push_back(ip);
1786 
1787  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1788  return llvm::Error::success();
1789 
1790  // Convert the body of the loop.
1791  builder.restoreIP(ip);
1792  return convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
1793  moduleTranslation)
1794  .takeError();
1795  };
1796 
1797  // Delegate actual loop construction to the OpenMP IRBuilder.
1798  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1799  // loop, i.e. it has a positive step, uses signed integer semantics.
1800  // Reconsider this code when the nested loop operation clearly supports more
1801  // cases.
1802  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1803  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1804  llvm::Value *lowerBound =
1805  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
1806  llvm::Value *upperBound =
1807  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
1808  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
1809 
1810  // Make sure loop trip count are emitted in the preheader of the outermost
1811  // loop at the latest so that they are all available for the new collapsed
1812  // loop will be created below.
1813  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1814  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1815  if (i != 0) {
1816  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
1817  computeIP = loopInfos.front()->getPreheaderIP();
1818  }
1819 
1821  ompBuilder->createCanonicalLoop(
1822  loc, bodyGen, lowerBound, upperBound, step,
1823  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
1824 
1825  if (failed(handleError(loopResult, *loopOp)))
1826  return failure();
1827 
1828  loopInfos.push_back(*loopResult);
1829  }
1830 
1831  // Collapse loops. Store the insertion point because LoopInfos may get
1832  // invalidated.
1833  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1834  llvm::CanonicalLoopInfo *loopInfo =
1835  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1836 
1837  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1838 
1839  // TODO: Handle doacross loops when the ordered clause has a parameter.
1840  bool isOrdered = wsloopOp.getOrdered().has_value();
1841  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
1842  bool isSimd = wsloopOp.getScheduleSimd();
1843 
1844  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
1845  ompBuilder->applyWorkshareLoop(
1846  ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
1847  convertToScheduleKind(schedule), chunk, isSimd,
1848  scheduleMod == omp::ScheduleModifier::monotonic,
1849  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
1850 
1851  if (failed(handleError(wsloopIP, opInst)))
1852  return failure();
1853 
1854  // Continue building IR after the loop. Note that the LoopInfo returned by
1855  // `collapseLoops` points inside the outermost loop and is intended for
1856  // potential further loop transformations. Use the insertion point stored
1857  // before collapsing loops instead.
1858  builder.restoreIP(afterIP);
1859 
1860  // Process the reductions if required.
1861  return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
1862  allocaIP, reductionDecls,
1863  privateReductionVariables, isByRef);
1864 }
1865 
1866 /// Converts the OpenMP parallel operation to LLVM IR.
1867 static LogicalResult
1868 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1869  LLVM::ModuleTranslation &moduleTranslation) {
1870  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1871  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
1872  assert(isByRef.size() == opInst.getNumReductionVars());
1873  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1874 
1875  if (failed(checkImplementationStatus(*opInst)))
1876  return failure();
1877 
1878  // Collect delayed privatization declarations
1879  MutableArrayRef<BlockArgument> privateBlockArgs =
1880  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getPrivateBlockArgs();
1881  SmallVector<mlir::Value> mlirPrivateVars;
1882  SmallVector<llvm::Value *> llvmPrivateVars;
1883  SmallVector<omp::PrivateClauseOp> privateDecls;
1884  mlirPrivateVars.reserve(privateBlockArgs.size());
1885  llvmPrivateVars.reserve(privateBlockArgs.size());
1886  collectPrivatizationDecls(opInst, privateDecls);
1887  for (mlir::Value privateVar : opInst.getPrivateVars())
1888  mlirPrivateVars.push_back(privateVar);
1889 
1890  // Collect reduction declarations
1891  SmallVector<omp::DeclareReductionOp> reductionDecls;
1892  collectReductionDecls(opInst, reductionDecls);
1893  SmallVector<llvm::Value *> privateReductionVariables(
1894  opInst.getNumReductionVars());
1895  SmallVector<DeferredStore> deferredStores;
1896 
1897  auto bodyGenCB = [&](InsertPointTy allocaIP,
1898  InsertPointTy codeGenIP) -> llvm::Error {
1900  builder, moduleTranslation, privateBlockArgs, privateDecls,
1901  mlirPrivateVars, llvmPrivateVars, allocaIP);
1902  if (handleError(afterAllocas, *opInst).failed())
1903  return llvm::make_error<PreviouslyReportedError>();
1904 
1905  // Allocate reduction vars
1906  DenseMap<Value, llvm::Value *> reductionVariableMap;
1907 
1908  MutableArrayRef<BlockArgument> reductionArgs =
1909  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
1910 
1911  allocaIP =
1912  InsertPointTy(allocaIP.getBlock(),
1913  allocaIP.getBlock()->getTerminator()->getIterator());
1914 
1915  if (failed(allocReductionVars(
1916  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
1917  reductionDecls, privateReductionVariables, reductionVariableMap,
1918  deferredStores, isByRef)))
1919  return llvm::make_error<PreviouslyReportedError>();
1920 
1921  // Apply copy region for firstprivate.
1922  bool needsFirstprivate =
1923  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1924  return privOp.getDataSharingType() ==
1925  omp::DataSharingClauseType::FirstPrivate;
1926  });
1927  if (needsFirstprivate) {
1928  // Find the end of the allocation blocks
1929  assert(afterAllocas.get()->getSinglePredecessor());
1930  builder.SetInsertPoint(
1931  afterAllocas.get()->getSinglePredecessor()->getTerminator());
1932  llvm::BasicBlock *copyBlock =
1933  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1934  builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1935  }
1936  for (auto [decl, mlirVar, llvmVar] :
1937  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1938  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1939  continue;
1940 
1941  // copyRegion implements `lhs = rhs`
1942  Region &copyRegion = decl.getCopyRegion();
1943 
1944  // map copyRegion rhs arg
1945  llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1946  assert(nonPrivateVar);
1947  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1948 
1949  // map copyRegion lhs arg
1950  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1951 
1952  // in-place convert copy region
1953  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1954  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
1955  builder, moduleTranslation)))
1956  return llvm::createStringError(
1957  "failed to inline `copy` region of `omp.private`");
1958 
1959  // ignore unused value yielded from copy region
1960 
1961  // clear copy region block argument mapping in case it needs to be
1962  // re-created with different sources for reuse of the same reduction
1963  // decl
1964  moduleTranslation.forgetMapping(copyRegion);
1965  }
1966 
1967  // Initialize reduction vars
1968  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1969  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1970  allocaIP =
1971  InsertPointTy(allocaIP.getBlock(),
1972  allocaIP.getBlock()->getTerminator()->getIterator());
1973 
1974  builder.restoreIP(allocaIP);
1975  SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
1976  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1977  if (isByRef[i]) {
1978  if (!reductionDecls[i].getAllocRegion().empty())
1979  continue;
1980 
1981  // TODO: remove after all users of by-ref are updated to use the alloc
1982  // region: Allocate reduction variable (which is a pointer to the real
1983  // reduciton variable allocated in the inlined region)
1984  byRefVars[i] = builder.CreateAlloca(
1985  moduleTranslation.convertType(reductionDecls[i].getType()));
1986  }
1987  }
1988 
1989  builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
1990 
1991  // insert stores deferred until after all allocas
1992  // these store the results of the alloc region into the allocation for the
1993  // pointer to the reduction variable
1994  for (auto [data, addr] : deferredStores)
1995  builder.CreateStore(data, addr);
1996 
1997  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1999 
2000  // map the block argument
2001  mapInitializationArgs(opInst, moduleTranslation, reductionDecls,
2002  reductionVariableMap, i);
2003  if (failed(inlineConvertOmpRegions(
2004  reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
2005  builder, moduleTranslation, &phis)))
2006  return llvm::createStringError(
2007  "failed to inline `init` region of `omp.declare_reduction`");
2008  assert(phis.size() == 1 &&
2009  "expected one value to be yielded from the "
2010  "reduction neutral element declaration region");
2011 
2012  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
2013 
2014  if (isByRef[i]) {
2015  if (!reductionDecls[i].getAllocRegion().empty())
2016  continue;
2017 
2018  // TODO: remove after all users of by-ref are updated to use the alloc
2019 
2020  // Store the result of the inlined region to the allocated reduction var
2021  // ptr
2022  builder.CreateStore(phis[0], byRefVars[i]);
2023 
2024  privateReductionVariables[i] = byRefVars[i];
2025  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
2026  reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
2027  } else {
2028  // for by-ref case the store is inside of the reduction init region
2029  builder.CreateStore(phis[0], privateReductionVariables[i]);
2030  // the rest is done in allocByValReductionVars
2031  }
2032 
2033  // clear block argument mapping in case it needs to be re-created with a
2034  // different source for another use of the same reduction decl
2035  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
2036  }
2037 
2038  // Store the mapping between reduction variables and their private copies on
2039  // ModuleTranslation stack. It can be then recovered when translating
2040  // omp.reduce operations in a separate call.
2042  moduleTranslation, reductionVariableMap);
2043 
2044  // Save the alloca insertion point on ModuleTranslation stack for use in
2045  // nested regions.
2047  moduleTranslation, allocaIP);
2048 
2049  // ParallelOp has only one region associated with it.
2050  builder.restoreIP(codeGenIP);
2052  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2053  if (!regionBlock)
2054  return regionBlock.takeError();
2055 
2056  // Process the reductions if required.
2057  if (opInst.getNumReductionVars() > 0) {
2058  // Collect reduction info
2059  SmallVector<OwningReductionGen> owningReductionGens;
2060  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2062  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2063  owningReductionGens, owningAtomicReductionGens,
2064  privateReductionVariables, reductionInfos);
2065 
2066  // Move to region cont block
2067  builder.SetInsertPoint((*regionBlock)->getTerminator());
2068 
2069  // Generate reductions from info
2070  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2071  builder.SetInsertPoint(tempTerminator);
2072 
2073  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2074  ompBuilder->createReductions(builder.saveIP(), allocaIP,
2075  reductionInfos, isByRef, false);
2076  if (!contInsertPoint)
2077  return contInsertPoint.takeError();
2078 
2079  if (!contInsertPoint->getBlock())
2080  return llvm::make_error<PreviouslyReportedError>();
2081 
2082  tempTerminator->eraseFromParent();
2083  builder.restoreIP(*contInsertPoint);
2084  }
2085  return llvm::Error::success();
2086  };
2087 
2088  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2089  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2090  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2091  // bodyGenCB.
2092  replVal = &val;
2093  return codeGenIP;
2094  };
2095 
2096  // TODO: Perform finalization actions for variables. This has to be
2097  // called for variables which have destructors/finalizers.
2098  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2099  InsertPointTy oldIP = builder.saveIP();
2100  builder.restoreIP(codeGenIP);
2101 
2102  // if the reduction has a cleanup region, inline it here to finalize the
2103  // reduction variables
2104  SmallVector<Region *> reductionCleanupRegions;
2105  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2106  [](omp::DeclareReductionOp reductionDecl) {
2107  return &reductionDecl.getCleanupRegion();
2108  });
2109  if (failed(inlineOmpRegionCleanup(
2110  reductionCleanupRegions, privateReductionVariables,
2111  moduleTranslation, builder, "omp.reduction.cleanup")))
2112  return llvm::createStringError(
2113  "failed to inline `cleanup` region of `omp.declare_reduction`");
2114 
2115  SmallVector<Region *> privateCleanupRegions;
2116  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
2117  [](omp::PrivateClauseOp privatizer) {
2118  return &privatizer.getDeallocRegion();
2119  });
2120 
2121  if (failed(inlineOmpRegionCleanup(
2122  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
2123  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
2124  return llvm::createStringError(
2125  "failed to inline `dealloc` region of `omp.private`");
2126 
2127  builder.restoreIP(oldIP);
2128  return llvm::Error::success();
2129  };
2130 
2131  llvm::Value *ifCond = nullptr;
2132  if (auto ifVar = opInst.getIfExpr())
2133  ifCond = moduleTranslation.lookupValue(ifVar);
2134  llvm::Value *numThreads = nullptr;
2135  if (auto numThreadsVar = opInst.getNumThreads())
2136  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2137  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2138  if (auto bind = opInst.getProcBindKind())
2139  pbKind = getProcBindKind(*bind);
2140  // TODO: Is the Parallel construct cancellable?
2141  bool isCancellable = false;
2142 
2143  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2144  findAllocaInsertPoint(builder, moduleTranslation);
2145  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2146 
2147  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2148  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2149  ifCond, numThreads, pbKind, isCancellable);
2150 
2151  if (failed(handleError(afterIP, *opInst)))
2152  return failure();
2153 
2154  builder.restoreIP(*afterIP);
2155  return success();
2156 }
2157 
2158 /// Convert Order attribute to llvm::omp::OrderKind.
2159 static llvm::omp::OrderKind
2160 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2161  if (!o)
2162  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2163  switch (*o) {
2164  case omp::ClauseOrderKind::Concurrent:
2165  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2166  }
2167  llvm_unreachable("Unknown ClauseOrderKind kind");
2168 }
2169 
2170 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2171 static LogicalResult
2172 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2173  LLVM::ModuleTranslation &moduleTranslation) {
2174  auto simdOp = cast<omp::SimdOp>(opInst);
2175  auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
2176 
2177  if (failed(checkImplementationStatus(opInst)))
2178  return failure();
2179 
2180  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2181 
2182  // Generator of the canonical loop body.
2185  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2186  llvm::Value *iv) -> llvm::Error {
2187  // Make sure further conversions know about the induction variable.
2188  moduleTranslation.mapValue(
2189  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2190 
2191  // Capture the body insertion point for use in nested loops. BodyIP of the
2192  // CanonicalLoopInfo always points to the beginning of the entry block of
2193  // the body.
2194  bodyInsertPoints.push_back(ip);
2195 
2196  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2197  return llvm::Error::success();
2198 
2199  // Convert the body of the loop.
2200  builder.restoreIP(ip);
2201  return convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
2202  moduleTranslation)
2203  .takeError();
2204  };
2205 
2206  // Delegate actual loop construction to the OpenMP IRBuilder.
2207  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
2208  // loop, i.e. it has a positive step, uses signed integer semantics.
2209  // Reconsider this code when the nested loop operation clearly supports more
2210  // cases.
2211  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2212  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
2213  llvm::Value *lowerBound =
2214  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
2215  llvm::Value *upperBound =
2216  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
2217  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
2218 
2219  // Make sure loop trip count are emitted in the preheader of the outermost
2220  // loop at the latest so that they are all available for the new collapsed
2221  // loop will be created below.
2222  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
2223  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
2224  if (i != 0) {
2225  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
2226  ompLoc.DL);
2227  computeIP = loopInfos.front()->getPreheaderIP();
2228  }
2229 
2231  ompBuilder->createCanonicalLoop(
2232  loc, bodyGen, lowerBound, upperBound, step,
2233  /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP);
2234 
2235  if (failed(handleError(loopResult, *loopOp)))
2236  return failure();
2237 
2238  loopInfos.push_back(*loopResult);
2239  }
2240 
2241  // Collapse loops.
2242  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
2243  llvm::CanonicalLoopInfo *loopInfo =
2244  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
2245 
2246  llvm::ConstantInt *simdlen = nullptr;
2247  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2248  simdlen = builder.getInt64(simdlenVar.value());
2249 
2250  llvm::ConstantInt *safelen = nullptr;
2251  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2252  safelen = builder.getInt64(safelenVar.value());
2253 
2254  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2255  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2256  ompBuilder->applySimd(loopInfo, alignedVars,
2257  simdOp.getIfExpr()
2258  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2259  : nullptr,
2260  order, simdlen, safelen);
2261 
2262  builder.restoreIP(afterIP);
2263  return success();
2264 }
2265 
2266 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
2267 static llvm::AtomicOrdering
2268 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
2269  if (!ao)
2270  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
2271 
2272  switch (*ao) {
2273  case omp::ClauseMemoryOrderKind::Seq_cst:
2274  return llvm::AtomicOrdering::SequentiallyConsistent;
2275  case omp::ClauseMemoryOrderKind::Acq_rel:
2276  return llvm::AtomicOrdering::AcquireRelease;
2277  case omp::ClauseMemoryOrderKind::Acquire:
2278  return llvm::AtomicOrdering::Acquire;
2279  case omp::ClauseMemoryOrderKind::Release:
2280  return llvm::AtomicOrdering::Release;
2281  case omp::ClauseMemoryOrderKind::Relaxed:
2282  return llvm::AtomicOrdering::Monotonic;
2283  }
2284  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
2285 }
2286 
2287 /// Convert omp.atomic.read operation to LLVM IR.
2288 static LogicalResult
2289 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
2290  LLVM::ModuleTranslation &moduleTranslation) {
2291  auto readOp = cast<omp::AtomicReadOp>(opInst);
2292  if (failed(checkImplementationStatus(opInst)))
2293  return failure();
2294 
2295  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2296 
2297  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2298 
2299  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
2300  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
2301  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
2302 
2303  llvm::Type *elementType =
2304  moduleTranslation.convertType(readOp.getElementType());
2305 
2306  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
2307  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
2308  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
2309  return success();
2310 }
2311 
2312 /// Converts an omp.atomic.write operation to LLVM IR.
2313 static LogicalResult
2314 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
2315  LLVM::ModuleTranslation &moduleTranslation) {
2316  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
2317  if (failed(checkImplementationStatus(opInst)))
2318  return failure();
2319 
2320  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2321 
2322  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2323  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
2324  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
2325  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
2326  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
2327  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
2328  /*isVolatile=*/false};
2329  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
2330  return success();
2331 }
2332 
2333 /// Converts an LLVM dialect binary operation to the corresponding enum value
2334 /// for `atomicrmw` supported binary operation.
2335 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
2337  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
2338  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
2339  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
2340  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
2341  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
2342  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
2343  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
2344  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
2345  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
2346  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
2347 }
2348 
2349 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
2350 static LogicalResult
2351 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
2352  llvm::IRBuilderBase &builder,
2353  LLVM::ModuleTranslation &moduleTranslation) {
2354  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2355  if (failed(checkImplementationStatus(*opInst)))
2356  return failure();
2357 
2358  // Convert values and types.
2359  auto &innerOpList = opInst.getRegion().front().getOperations();
2360  bool isXBinopExpr{false};
2361  llvm::AtomicRMWInst::BinOp binop;
2362  mlir::Value mlirExpr;
2363  llvm::Value *llvmExpr = nullptr;
2364  llvm::Value *llvmX = nullptr;
2365  llvm::Type *llvmXElementType = nullptr;
2366  if (innerOpList.size() == 2) {
2367  // The two operations here are the update and the terminator.
2368  // Since we can identify the update operation, there is a possibility
2369  // that we can generate the atomicrmw instruction.
2370  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
2371  if (!llvm::is_contained(innerOp.getOperands(),
2372  opInst.getRegion().getArgument(0))) {
2373  return opInst.emitError("no atomic update operation with region argument"
2374  " as operand found inside atomic.update region");
2375  }
2376  binop = convertBinOpToAtomic(innerOp);
2377  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
2378  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2379  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2380  } else {
2381  // Since the update region includes more than one operation
2382  // we will resort to generating a cmpxchg loop.
2383  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2384  }
2385  llvmX = moduleTranslation.lookupValue(opInst.getX());
2386  llvmXElementType = moduleTranslation.convertType(
2387  opInst.getRegion().getArgument(0).getType());
2388  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2389  /*isSigned=*/false,
2390  /*isVolatile=*/false};
2391 
2392  llvm::AtomicOrdering atomicOrdering =
2393  convertAtomicOrdering(opInst.getMemoryOrder());
2394 
2395  // Generate update code.
2396  auto updateFn =
2397  [&opInst, &moduleTranslation](
2398  llvm::Value *atomicx,
2399  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2400  Block &bb = *opInst.getRegion().begin();
2401  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
2402  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2403  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2404  return llvm::make_error<PreviouslyReportedError>();
2405 
2406  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2407  assert(yieldop && yieldop.getResults().size() == 1 &&
2408  "terminator must be omp.yield op and it must have exactly one "
2409  "argument");
2410  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2411  };
2412 
2413  // Handle ambiguous alloca, if any.
2414  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2415  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2416  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2417  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
2418  atomicOrdering, binop, updateFn,
2419  isXBinopExpr);
2420 
2421  if (failed(handleError(afterIP, *opInst)))
2422  return failure();
2423 
2424  builder.restoreIP(*afterIP);
2425  return success();
2426 }
2427 
2428 static LogicalResult
2429 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
2430  llvm::IRBuilderBase &builder,
2431  LLVM::ModuleTranslation &moduleTranslation) {
2432  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2433  if (failed(checkImplementationStatus(*atomicCaptureOp)))
2434  return failure();
2435 
2436  mlir::Value mlirExpr;
2437  bool isXBinopExpr = false, isPostfixUpdate = false;
2438  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2439 
2440  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
2441  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
2442 
2443  assert((atomicUpdateOp || atomicWriteOp) &&
2444  "internal op must be an atomic.update or atomic.write op");
2445 
2446  if (atomicWriteOp) {
2447  isPostfixUpdate = true;
2448  mlirExpr = atomicWriteOp.getExpr();
2449  } else {
2450  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
2451  atomicCaptureOp.getAtomicUpdateOp().getOperation();
2452  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
2453  // Find the binary update operation that uses the region argument
2454  // and get the expression to update
2455  if (innerOpList.size() == 2) {
2456  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
2457  if (!llvm::is_contained(innerOp.getOperands(),
2458  atomicUpdateOp.getRegion().getArgument(0))) {
2459  return atomicUpdateOp.emitError(
2460  "no atomic update operation with region argument"
2461  " as operand found inside atomic.update region");
2462  }
2463  binop = convertBinOpToAtomic(innerOp);
2464  isXBinopExpr =
2465  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
2466  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2467  } else {
2468  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2469  }
2470  }
2471 
2472  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2473  llvm::Value *llvmX =
2474  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
2475  llvm::Value *llvmV =
2476  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
2477  llvm::Type *llvmXElementType = moduleTranslation.convertType(
2478  atomicCaptureOp.getAtomicReadOp().getElementType());
2479  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2480  /*isSigned=*/false,
2481  /*isVolatile=*/false};
2482  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
2483  /*isSigned=*/false,
2484  /*isVolatile=*/false};
2485 
2486  llvm::AtomicOrdering atomicOrdering =
2487  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
2488 
2489  auto updateFn =
2490  [&](llvm::Value *atomicx,
2491  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2492  if (atomicWriteOp)
2493  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
2494  Block &bb = *atomicUpdateOp.getRegion().begin();
2495  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
2496  atomicx);
2497  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2498  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2499  return llvm::make_error<PreviouslyReportedError>();
2500 
2501  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2502  assert(yieldop && yieldop.getResults().size() == 1 &&
2503  "terminator must be omp.yield op and it must have exactly one "
2504  "argument");
2505  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2506  };
2507 
2508  // Handle ambiguous alloca, if any.
2509  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2510  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2511  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2512  ompBuilder->createAtomicCapture(
2513  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
2514  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
2515 
2516  if (failed(handleError(afterIP, *atomicCaptureOp)))
2517  return failure();
2518 
2519  builder.restoreIP(*afterIP);
2520  return success();
2521 }
2522 
2523 /// Converts an OpenMP Threadprivate operation into LLVM IR using
2524 /// OpenMPIRBuilder.
2525 static LogicalResult
2526 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
2527  LLVM::ModuleTranslation &moduleTranslation) {
2528  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2529  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
2530 
2531  if (failed(checkImplementationStatus(opInst)))
2532  return failure();
2533 
2534  Value symAddr = threadprivateOp.getSymAddr();
2535  auto *symOp = symAddr.getDefiningOp();
2536  if (!isa<LLVM::AddressOfOp>(symOp))
2537  return opInst.emitError("Addressing symbol not found");
2538  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
2539 
2540  LLVM::GlobalOp global =
2541  addressOfOp.getGlobal(moduleTranslation.symbolTable());
2542  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
2543  llvm::Type *type = globalValue->getValueType();
2544  llvm::TypeSize typeSize =
2545  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
2546  type);
2547  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
2548  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
2549  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
2550  llvm::Value *callInst =
2551  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
2552  ompLoc, globalValue, size, cacheName);
2553  moduleTranslation.mapValue(opInst.getResult(0), callInst);
2554  return success();
2555 }
2556 
2557 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
2558 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
2559  switch (deviceClause) {
2560  case mlir::omp::DeclareTargetDeviceType::host:
2561  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
2562  break;
2563  case mlir::omp::DeclareTargetDeviceType::nohost:
2564  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
2565  break;
2566  case mlir::omp::DeclareTargetDeviceType::any:
2567  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
2568  break;
2569  }
2570  llvm_unreachable("unhandled device clause");
2571 }
2572 
2573 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
2575  mlir::omp::DeclareTargetCaptureClause captureClause) {
2576  switch (captureClause) {
2577  case mlir::omp::DeclareTargetCaptureClause::to:
2578  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
2579  case mlir::omp::DeclareTargetCaptureClause::link:
2580  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
2581  case mlir::omp::DeclareTargetCaptureClause::enter:
2582  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
2583  }
2584  llvm_unreachable("unhandled capture clause");
2585 }
2586 
2587 static llvm::SmallString<64>
2588 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
2589  llvm::OpenMPIRBuilder &ompBuilder) {
2590  llvm::SmallString<64> suffix;
2591  llvm::raw_svector_ostream os(suffix);
2592  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
2593  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
2594  auto fileInfoCallBack = [&loc]() {
2595  return std::pair<std::string, uint64_t>(
2596  llvm::StringRef(loc.getFilename()), loc.getLine());
2597  };
2598 
2599  os << llvm::format(
2600  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
2601  }
2602  os << "_decl_tgt_ref_ptr";
2603 
2604  return suffix;
2605 }
2606 
2607 static bool isDeclareTargetLink(mlir::Value value) {
2608  if (auto addressOfOp =
2609  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2610  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
2611  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
2612  if (auto declareTargetGlobal =
2613  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
2614  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2615  mlir::omp::DeclareTargetCaptureClause::link)
2616  return true;
2617  }
2618  return false;
2619 }
2620 
2621 // Returns the reference pointer generated by the lowering of the declare target
2622 // operation in cases where the link clause is used or the to clause is used in
2623 // USM mode.
2624 static llvm::Value *
2626  LLVM::ModuleTranslation &moduleTranslation) {
2627  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2628 
2629  // An easier way to do this may just be to keep track of any pointer
2630  // references and their mapping to their respective operation
2631  if (auto addressOfOp =
2632  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2633  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
2634  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
2635  addressOfOp.getGlobalName()))) {
2636 
2637  if (auto declareTargetGlobal =
2638  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
2639  gOp.getOperation())) {
2640 
2641  // In this case, we must utilise the reference pointer generated by the
2642  // declare target operation, similar to Clang
2643  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
2644  mlir::omp::DeclareTargetCaptureClause::link) ||
2645  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2646  mlir::omp::DeclareTargetCaptureClause::to &&
2647  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
2648  llvm::SmallString<64> suffix =
2649  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
2650 
2651  if (gOp.getSymName().contains(suffix))
2652  return moduleTranslation.getLLVMModule()->getNamedValue(
2653  gOp.getSymName());
2654 
2655  return moduleTranslation.getLLVMModule()->getNamedValue(
2656  (gOp.getSymName().str() + suffix.str()).str());
2657  }
2658  }
2659  }
2660  }
2661 
2662  return nullptr;
2663 }
2664 
2665 namespace {
2666 // A small helper structure to contain data gathered
2667 // for map lowering and coalese it into one area and
2668 // avoiding extra computations such as searches in the
2669 // llvm module for lowered mapped variables or checking
2670 // if something is declare target (and retrieving the
2671 // value) more than neccessary.
2672 struct MapInfoData : llvm::OpenMPIRBuilder::MapInfosTy {
2673  llvm::SmallVector<bool, 4> IsDeclareTarget;
2674  llvm::SmallVector<bool, 4> IsAMember;
2675  // Identify if mapping was added by mapClause or use_device clauses.
2676  llvm::SmallVector<bool, 4> IsAMapping;
2679  // Stripped off array/pointer to get the underlying
2680  // element type
2682 
2683  /// Append arrays in \a CurInfo.
2684  void append(MapInfoData &CurInfo) {
2685  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
2686  CurInfo.IsDeclareTarget.end());
2687  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
2688  OriginalValue.append(CurInfo.OriginalValue.begin(),
2689  CurInfo.OriginalValue.end());
2690  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
2691  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
2692  }
2693 };
2694 } // namespace
2695 
2696 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
2697  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
2698  arrTy.getElementType()))
2699  return getArrayElementSizeInBits(nestedArrTy, dl);
2700  return dl.getTypeSizeInBits(arrTy.getElementType());
2701 }
2702 
2703 // This function calculates the size to be offloaded for a specified type, given
2704 // its associated map clause (which can contain bounds information which affects
2705 // the total size), this size is calculated based on the underlying element type
2706 // e.g. given a 1-D array of ints, we will calculate the size from the integer
2707 // type * number of elements in the array. This size can be used in other
2708 // calculations but is ultimately used as an argument to the OpenMP runtimes
2709 // kernel argument structure which is generated through the combinedInfo data
2710 // structures.
2711 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
2712 // CGOpenMPRuntime.cpp.
2713 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
2714  Operation *clauseOp, llvm::Value *basePointer,
2715  llvm::Type *baseType, llvm::IRBuilderBase &builder,
2716  LLVM::ModuleTranslation &moduleTranslation) {
2717  if (auto memberClause =
2718  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
2719  // This calculates the size to transfer based on bounds and the underlying
2720  // element type, provided bounds have been specified (Fortran
2721  // pointers/allocatables/target and arrays that have sections specified fall
2722  // into this as well).
2723  if (!memberClause.getBounds().empty()) {
2724  llvm::Value *elementCount = builder.getInt64(1);
2725  for (auto bounds : memberClause.getBounds()) {
2726  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2727  bounds.getDefiningOp())) {
2728  // The below calculation for the size to be mapped calculated from the
2729  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
2730  // multiply by the underlying element types byte size to get the full
2731  // size to be offloaded based on the bounds
2732  elementCount = builder.CreateMul(
2733  elementCount,
2734  builder.CreateAdd(
2735  builder.CreateSub(
2736  moduleTranslation.lookupValue(boundOp.getUpperBound()),
2737  moduleTranslation.lookupValue(boundOp.getLowerBound())),
2738  builder.getInt64(1)));
2739  }
2740  }
2741 
2742  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
2743  // the size in inconsistent byte or bit format.
2744  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
2745  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
2746  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
2747 
2748  // The size in bytes x number of elements, the sizeInBytes stored is
2749  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
2750  // size, so we do some on the fly runtime math to get the size in
2751  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2752  // some adjustment for members with more complex types.
2753  return builder.CreateMul(elementCount,
2754  builder.getInt64(underlyingTypeSzInBits / 8));
2755  }
2756  }
2757 
2758  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
2759 }
2760 
2762  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
2763  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
2764  llvm::IRBuilderBase &builder, const ArrayRef<Value> &useDevPtrOperands = {},
2765  const ArrayRef<Value> &useDevAddrOperands = {}) {
2766  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
2767  // Check if this is a member mapping and correctly assign that it is, if
2768  // it is a member of a larger object.
2769  // TODO: Need better handling of members, and distinguishing of members
2770  // that are implicitly allocated on device vs explicitly passed in as
2771  // arguments.
2772  // TODO: May require some further additions to support nested record
2773  // types, i.e. member maps that can have member maps.
2774  for (Value mapValue : mapVars) {
2775  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2776  for (auto member : map.getMembers())
2777  if (member == mapOp)
2778  return true;
2779  }
2780  return false;
2781  };
2782 
2783  // Process MapOperands
2784  for (Value mapValue : mapVars) {
2785  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2786  Value offloadPtr =
2787  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2788  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
2789  mapData.Pointers.push_back(mapData.OriginalValue.back());
2790 
2791  if (llvm::Value *refPtr =
2792  getRefPtrIfDeclareTarget(offloadPtr,
2793  moduleTranslation)) { // declare target
2794  mapData.IsDeclareTarget.push_back(true);
2795  mapData.BasePointers.push_back(refPtr);
2796  } else { // regular mapped variable
2797  mapData.IsDeclareTarget.push_back(false);
2798  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2799  }
2800 
2801  mapData.BaseType.push_back(
2802  moduleTranslation.convertType(mapOp.getVarType()));
2803  mapData.Sizes.push_back(
2804  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
2805  mapData.BaseType.back(), builder, moduleTranslation));
2806  mapData.MapClause.push_back(mapOp.getOperation());
2807  mapData.Types.push_back(
2808  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
2809  mapData.Names.push_back(LLVM::createMappingInformation(
2810  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2811  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
2812  mapData.IsAMapping.push_back(true);
2813  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
2814  }
2815 
2816  auto findMapInfo = [&mapData](llvm::Value *val,
2817  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
2818  unsigned index = 0;
2819  bool found = false;
2820  for (llvm::Value *basePtr : mapData.OriginalValue) {
2821  if (basePtr == val && mapData.IsAMapping[index]) {
2822  found = true;
2823  mapData.Types[index] |=
2824  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
2825  mapData.DevicePointers[index] = devInfoTy;
2826  }
2827  index++;
2828  }
2829  return found;
2830  };
2831 
2832  // Process useDevPtr(Addr)Operands
2833  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
2834  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
2835  for (Value mapValue : useDevOperands) {
2836  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2837  Value offloadPtr =
2838  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2839  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
2840 
2841  // Check if map info is already present for this entry.
2842  if (!findMapInfo(origValue, devInfoTy)) {
2843  mapData.OriginalValue.push_back(origValue);
2844  mapData.Pointers.push_back(mapData.OriginalValue.back());
2845  mapData.IsDeclareTarget.push_back(false);
2846  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2847  mapData.BaseType.push_back(
2848  moduleTranslation.convertType(mapOp.getVarType()));
2849  mapData.Sizes.push_back(builder.getInt64(0));
2850  mapData.MapClause.push_back(mapOp.getOperation());
2851  mapData.Types.push_back(
2852  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
2853  mapData.Names.push_back(LLVM::createMappingInformation(
2854  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2855  mapData.DevicePointers.push_back(devInfoTy);
2856  mapData.IsAMapping.push_back(false);
2857  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
2858  }
2859  }
2860  };
2861 
2862  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
2863  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
2864 }
2865 
2866 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
2867  auto *res = llvm::find(mapData.MapClause, memberOp);
2868  assert(res != mapData.MapClause.end() &&
2869  "MapInfoOp for member not found in MapData, cannot return index");
2870  return std::distance(mapData.MapClause.begin(), res);
2871 }
2872 
2873 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
2874  bool first) {
2875  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
2876  // Only 1 member has been mapped, we can return it.
2877  if (indexAttr.size() == 1)
2878  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
2879 
2880  llvm::SmallVector<size_t> indices(indexAttr.size());
2881  std::iota(indices.begin(), indices.end(), 0);
2882 
2883  llvm::sort(indices.begin(), indices.end(),
2884  [&](const size_t a, const size_t b) {
2885  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
2886  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
2887  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
2888  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
2889  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
2890 
2891  if (aIndex == bIndex)
2892  continue;
2893 
2894  if (aIndex < bIndex)
2895  return first;
2896 
2897  if (aIndex > bIndex)
2898  return !first;
2899  }
2900 
2901  // Iterated the up until the end of the smallest member and
2902  // they were found to be equal up to that point, so select
2903  // the member with the lowest index count, so the "parent"
2904  return memberIndicesA.size() < memberIndicesB.size();
2905  });
2906 
2907  return llvm::cast<omp::MapInfoOp>(
2908  mapInfo.getMembers()[indices.front()].getDefiningOp());
2909 }
2910 
2911 /// This function calculates the array/pointer offset for map data provided
2912 /// with bounds operations, e.g. when provided something like the following:
2913 ///
2914 /// Fortran
2915 /// map(tofrom: array(2:5, 3:2))
2916 /// or
2917 /// C++
2918 /// map(tofrom: array[1:4][2:3])
2919 /// We must calculate the initial pointer offset to pass across, this function
2920 /// performs this using bounds.
2921 ///
2922 /// NOTE: which while specified in row-major order it currently needs to be
2923 /// flipped for Fortran's column order array allocation and access (as
2924 /// opposed to C++'s row-major, hence the backwards processing where order is
2925 /// important). This is likely important to keep in mind for the future when
2926 /// we incorporate a C++ frontend, both frontends will need to agree on the
2927 /// ordering of generated bounds operations (one may have to flip them) to
2928 /// make the below lowering frontend agnostic. The offload size
2929 /// calcualtion may also have to be adjusted for C++.
2930 std::vector<llvm::Value *>
2932  llvm::IRBuilderBase &builder, bool isArrayTy,
2933  OperandRange bounds) {
2934  std::vector<llvm::Value *> idx;
2935  // There's no bounds to calculate an offset from, we can safely
2936  // ignore and return no indices.
2937  if (bounds.empty())
2938  return idx;
2939 
2940  // If we have an array type, then we have its type so can treat it as a
2941  // normal GEP instruction where the bounds operations are simply indexes
2942  // into the array. We currently do reverse order of the bounds, which
2943  // I believe leans more towards Fortran's column-major in memory.
2944  if (isArrayTy) {
2945  idx.push_back(builder.getInt64(0));
2946  for (int i = bounds.size() - 1; i >= 0; --i) {
2947  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
2948  bounds[i].getDefiningOp())) {
2949  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
2950  }
2951  }
2952  } else {
2953  // If we do not have an array type, but we have bounds, then we're dealing
2954  // with a pointer that's being treated like an array and we have the
2955  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
2956  // address (pointer pointing to the actual data) so we must caclulate the
2957  // offset using a single index which the following two loops attempts to
2958  // compute.
2959 
2960  // Calculates the size offset we need to make per row e.g. first row or
2961  // column only needs to be offset by one, but the next would have to be
2962  // the previous row/column offset multiplied by the extent of current row.
2963  //
2964  // For example ([1][10][100]):
2965  //
2966  // - First row/column we move by 1 for each index increment
2967  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
2968  // current) for 10 for each index increment
2969  // - Third row/column we would move by 10 (second row/column) *
2970  // (extent/size of current) 100 for 1000 for each index increment
2971  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
2972  for (size_t i = 1; i < bounds.size(); ++i) {
2973  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
2974  bounds[i].getDefiningOp())) {
2975  dimensionIndexSizeOffset.push_back(builder.CreateMul(
2976  moduleTranslation.lookupValue(boundOp.getExtent()),
2977  dimensionIndexSizeOffset[i - 1]));
2978  }
2979  }
2980 
2981  // Now that we have calculated how much we move by per index, we must
2982  // multiply each lower bound offset in indexes by the size offset we
2983  // have calculated in the previous and accumulate the results to get
2984  // our final resulting offset.
2985  for (int i = bounds.size() - 1; i >= 0; --i) {
2986  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
2987  bounds[i].getDefiningOp())) {
2988  if (idx.empty())
2989  idx.emplace_back(builder.CreateMul(
2990  moduleTranslation.lookupValue(boundOp.getLowerBound()),
2991  dimensionIndexSizeOffset[i]));
2992  else
2993  idx.back() = builder.CreateAdd(
2994  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
2995  boundOp.getLowerBound()),
2996  dimensionIndexSizeOffset[i]));
2997  }
2998  }
2999  }
3000 
3001  return idx;
3002 }
3003 
3004 // This creates two insertions into the MapInfosTy data structure for the
3005 // "parent" of a set of members, (usually a container e.g.
3006 // class/structure/derived type) when subsequent members have also been
3007 // explicitly mapped on the same map clause. Certain types, such as Fortran
3008 // descriptors are mapped like this as well, however, the members are
3009 // implicit as far as a user is concerned, but we must explicitly map them
3010 // internally.
3011 //
3012 // This function also returns the memberOfFlag for this particular parent,
3013 // which is utilised in subsequent member mappings (by modifying there map type
3014 // with it) to indicate that a member is part of this parent and should be
3015 // treated by the runtime as such. Important to achieve the correct mapping.
3016 //
3017 // This function borrows a lot from Clang's emitCombinedEntry function
3018 // inside of CGOpenMPRuntime.cpp
3019 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
3020  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3021  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
3022  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
3023  uint64_t mapDataIndex, bool isTargetParams) {
3024  // Map the first segment of our structure
3025  combinedInfo.Types.emplace_back(
3026  isTargetParams
3027  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
3028  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
3029  combinedInfo.DevicePointers.emplace_back(
3030  mapData.DevicePointers[mapDataIndex]);
3031  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3032  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3033  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3034 
3035  // Calculate size of the parent object being mapped based on the
3036  // addresses at runtime, highAddr - lowAddr = size. This of course
3037  // doesn't factor in allocated data like pointers, hence the further
3038  // processing of members specified by users, or in the case of
3039  // Fortran pointers and allocatables, the mapping of the pointed to
3040  // data by the descriptor (which itself, is a structure containing
3041  // runtime information on the dynamically allocated data).
3042  auto parentClause =
3043  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3044 
3045  llvm::Value *lowAddr, *highAddr;
3046  if (!parentClause.getPartialMap()) {
3047  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3048  builder.getPtrTy());
3049  highAddr = builder.CreatePointerCast(
3050  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3051  mapData.Pointers[mapDataIndex], 1),
3052  builder.getPtrTy());
3053  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3054  } else {
3055  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3056  int firstMemberIdx = getMapDataMemberIdx(
3057  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
3058  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3059  builder.getPtrTy());
3060  int lastMemberIdx = getMapDataMemberIdx(
3061  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
3062  highAddr = builder.CreatePointerCast(
3063  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3064  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3065  builder.getPtrTy());
3066  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
3067  }
3068 
3069  llvm::Value *size = builder.CreateIntCast(
3070  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3071  builder.getInt64Ty(),
3072  /*isSigned=*/false);
3073  combinedInfo.Sizes.push_back(size);
3074 
3075  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
3076  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3077 
3078  // This creates the initial MEMBER_OF mapping that consists of
3079  // the parent/top level container (same as above effectively, except
3080  // with a fixed initial compile time size and separate maptype which
3081  // indicates the true mape type (tofrom etc.). This parent mapping is
3082  // only relevant if the structure in its totality is being mapped,
3083  // otherwise the above suffices.
3084  if (!parentClause.getPartialMap()) {
3085  // TODO: This will need to be expanded to include the whole host of logic
3086  // for the map flags that Clang currently supports (e.g. it should do some
3087  // further case specific flag modifications). For the moment, it handles
3088  // what we support as expected.
3089  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
3090  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3091  combinedInfo.Types.emplace_back(mapFlag);
3092  combinedInfo.DevicePointers.emplace_back(
3094  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3095  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3096  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3097  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3098  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
3099  }
3100  return memberOfFlag;
3101 }
3102 
3103 // The intent is to verify if the mapped data being passed is a
3104 // pointer -> pointee that requires special handling in certain cases,
3105 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
3106 //
3107 // There may be a better way to verify this, but unfortunately with
3108 // opaque pointers we lose the ability to easily check if something is
3109 // a pointer whilst maintaining access to the underlying type.
3110 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
3111  // If we have a varPtrPtr field assigned then the underlying type is a pointer
3112  if (mapOp.getVarPtrPtr())
3113  return true;
3114 
3115  // If the map data is declare target with a link clause, then it's represented
3116  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
3117  // no relation to pointers.
3118  if (isDeclareTargetLink(mapOp.getVarPtr()))
3119  return true;
3120 
3121  return false;
3122 }
3123 
3124 // This function is intended to add explicit mappings of members
3126  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3127  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
3128  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
3129  uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
3130 
3131  auto parentClause =
3132  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3133 
3134  for (auto mappedMembers : parentClause.getMembers()) {
3135  auto memberClause =
3136  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
3137  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3138 
3139  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
3140 
3141  // If we're currently mapping a pointer to a block of data, we must
3142  // initially map the pointer, and then attatch/bind the data with a
3143  // subsequent map to the pointer. This segment of code generates the
3144  // pointer mapping, which can in certain cases be optimised out as Clang
3145  // currently does in its lowering. However, for the moment we do not do so,
3146  // in part as we currently have substantially less information on the data
3147  // being mapped at this stage.
3148  if (checkIfPointerMap(memberClause)) {
3149  auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
3150  memberClause.getMapType().value());
3151  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3152  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3153  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3154  combinedInfo.Types.emplace_back(mapFlag);
3155  combinedInfo.DevicePointers.emplace_back(
3157  combinedInfo.Names.emplace_back(
3158  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3159  combinedInfo.BasePointers.emplace_back(
3160  mapData.BasePointers[mapDataIndex]);
3161  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
3162  combinedInfo.Sizes.emplace_back(builder.getInt64(
3163  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
3164  }
3165 
3166  // Same MemberOfFlag to indicate its link with parent and other members
3167  // of.
3168  auto mapFlag =
3169  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
3170  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3171  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3172  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3173  if (checkIfPointerMap(memberClause))
3174  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3175 
3176  combinedInfo.Types.emplace_back(mapFlag);
3177  combinedInfo.DevicePointers.emplace_back(
3178  mapData.DevicePointers[memberDataIdx]);
3179  combinedInfo.Names.emplace_back(
3180  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3181  uint64_t basePointerIndex =
3182  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
3183  combinedInfo.BasePointers.emplace_back(
3184  mapData.BasePointers[basePointerIndex]);
3185  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
3186  combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
3187  }
3188 }
3189 
3190 static void
3191 processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
3192  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
3193  bool isTargetParams, int mapDataParentIdx = -1) {
3194  // Declare Target Mappings are excluded from being marked as
3195  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
3196  // marked with OMP_MAP_PTR_AND_OBJ instead.
3197  auto mapFlag = mapData.Types[mapDataIdx];
3198  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
3199 
3200  bool isPtrTy = checkIfPointerMap(mapInfoOp);
3201  if (isPtrTy)
3202  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3203 
3204  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
3205  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3206 
3207  if (mapInfoOp.getMapCaptureType().value() ==
3208  omp::VariableCaptureKind::ByCopy &&
3209  !isPtrTy)
3210  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
3211 
3212  // if we're provided a mapDataParentIdx, then the data being mapped is
3213  // part of a larger object (in a parent <-> member mapping) and in this
3214  // case our BasePointer should be the parent.
3215  if (mapDataParentIdx >= 0)
3216  combinedInfo.BasePointers.emplace_back(
3217  mapData.BasePointers[mapDataParentIdx]);
3218  else
3219  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
3220 
3221  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
3222  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
3223  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
3224  combinedInfo.Types.emplace_back(mapFlag);
3225  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
3226 }
3227 
3229  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3230  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
3231  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
3232  uint64_t mapDataIndex, bool isTargetParams) {
3233  auto parentClause =
3234  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3235 
3236  // If we have a partial map (no parent referenced in the map clauses of the
3237  // directive, only members) and only a single member, we do not need to bind
3238  // the map of the member to the parent, we can pass the member separately.
3239  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
3240  auto memberClause = llvm::cast<omp::MapInfoOp>(
3241  parentClause.getMembers()[0].getDefiningOp());
3242  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3243  // Note: Clang treats arrays with explicit bounds that fall into this
3244  // category as a parent with map case, however, it seems this isn't a
3245  // requirement, and processing them as an individual map is fine. So,
3246  // we will handle them as individual maps for the moment, as it's
3247  // difficult for us to check this as we always require bounds to be
3248  // specified currently and it's also marginally more optimal (single
3249  // map rather than two). The difference may come from the fact that
3250  // Clang maps array without bounds as pointers (which we do not
3251  // currently do), whereas we treat them as arrays in all cases
3252  // currently.
3253  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
3254  mapDataIndex);
3255  return;
3256  }
3257 
3258  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
3259  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
3260  combinedInfo, mapData, mapDataIndex, isTargetParams);
3261  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
3262  combinedInfo, mapData, mapDataIndex,
3263  memberOfParentFlag);
3264 }
3265 
3266 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
3267 // generates different operation (e.g. load/store) combinations for
3268 // arguments to the kernel, based on map capture kinds which are then
3269 // utilised in the combinedInfo in place of the original Map value.
3270 static void
3271 createAlteredByCaptureMap(MapInfoData &mapData,
3272  LLVM::ModuleTranslation &moduleTranslation,
3273  llvm::IRBuilderBase &builder) {
3274  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3275  // if it's declare target, skip it, it's handled separately.
3276  if (!mapData.IsDeclareTarget[i]) {
3277  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3278  omp::VariableCaptureKind captureKind =
3279  mapOp.getMapCaptureType().value_or(omp::VariableCaptureKind::ByRef);
3280  bool isPtrTy = checkIfPointerMap(mapOp);
3281 
3282  // Currently handles array sectioning lowerbound case, but more
3283  // logic may be required in the future. Clang invokes EmitLValue,
3284  // which has specialised logic for special Clang types such as user
3285  // defines, so it is possible we will have to extend this for
3286  // structures or other complex types. As the general idea is that this
3287  // function mimics some of the logic from Clang that we require for
3288  // kernel argument passing from host -> device.
3289  switch (captureKind) {
3290  case omp::VariableCaptureKind::ByRef: {
3291  llvm::Value *newV = mapData.Pointers[i];
3292  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
3293  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
3294  mapOp.getBounds());
3295  if (isPtrTy)
3296  newV = builder.CreateLoad(builder.getPtrTy(), newV);
3297 
3298  if (!offsetIdx.empty())
3299  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
3300  "array_offset");
3301  mapData.Pointers[i] = newV;
3302  } break;
3303  case omp::VariableCaptureKind::ByCopy: {
3304  llvm::Type *type = mapData.BaseType[i];
3305  llvm::Value *newV;
3306  if (mapData.Pointers[i]->getType()->isPointerTy())
3307  newV = builder.CreateLoad(type, mapData.Pointers[i]);
3308  else
3309  newV = mapData.Pointers[i];
3310 
3311  if (!isPtrTy) {
3312  auto curInsert = builder.saveIP();
3313  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
3314  auto *memTempAlloc =
3315  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
3316  builder.restoreIP(curInsert);
3317 
3318  builder.CreateStore(newV, memTempAlloc);
3319  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
3320  }
3321 
3322  mapData.Pointers[i] = newV;
3323  mapData.BasePointers[i] = newV;
3324  } break;
3325  case omp::VariableCaptureKind::This:
3326  case omp::VariableCaptureKind::VLAType:
3327  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
3328  break;
3329  }
3330  }
3331  }
3332 }
3333 
3334 // Generate all map related information and fill the combinedInfo.
3335 static void genMapInfos(llvm::IRBuilderBase &builder,
3336  LLVM::ModuleTranslation &moduleTranslation,
3337  DataLayout &dl,
3338  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
3339  MapInfoData &mapData, bool isTargetParams = false) {
3340  // We wish to modify some of the methods in which arguments are
3341  // passed based on their capture type by the target region, this can
3342  // involve generating new loads and stores, which changes the
3343  // MLIR value to LLVM value mapping, however, we only wish to do this
3344  // locally for the current function/target and also avoid altering
3345  // ModuleTranslation, so we remap the base pointer or pointer stored
3346  // in the map infos corresponding MapInfoData, which is later accessed
3347  // by genMapInfos and createTarget to help generate the kernel and
3348  // kernel arg structure. It primarily becomes relevant in cases like
3349  // bycopy, or byref range'd arrays. In the default case, we simply
3350  // pass thee pointer byref as both basePointer and pointer.
3351  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3352  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
3353 
3354  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3355 
3356  // We operate under the assumption that all vectors that are
3357  // required in MapInfoData are of equal lengths (either filled with
3358  // default constructed data or appropiate information) so we can
3359  // utilise the size from any component of MapInfoData, if we can't
3360  // something is missing from the initial MapInfoData construction.
3361  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3362  // NOTE/TODO: We currently do not support arbitrary depth record
3363  // type mapping.
3364  if (mapData.IsAMember[i])
3365  continue;
3366 
3367  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
3368  if (!mapInfoOp.getMembers().empty()) {
3369  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
3370  combinedInfo, mapData, i, isTargetParams);
3371  continue;
3372  }
3373 
3374  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
3375  }
3376 }
3377 
3378 static LogicalResult
3379 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
3380  LLVM::ModuleTranslation &moduleTranslation) {
3381  llvm::Value *ifCond = nullptr;
3382  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
3383  SmallVector<Value> mapVars;
3384  SmallVector<Value> useDevicePtrVars;
3385  SmallVector<Value> useDeviceAddrVars;
3386  llvm::omp::RuntimeFunction RTLFn;
3387  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
3388 
3389  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3390  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
3391  /*SeparateBeginEndCalls=*/true);
3392 
3393  LogicalResult result =
3395  .Case([&](omp::TargetDataOp dataOp) {
3396  if (failed(checkImplementationStatus(*dataOp)))
3397  return failure();
3398 
3399  if (auto ifVar = dataOp.getIfExpr())
3400  ifCond = moduleTranslation.lookupValue(ifVar);
3401 
3402  if (auto devId = dataOp.getDevice())
3403  if (auto constOp =
3404  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3405  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3406  deviceID = intAttr.getInt();
3407 
3408  mapVars = dataOp.getMapVars();
3409  useDevicePtrVars = dataOp.getUseDevicePtrVars();
3410  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
3411  return success();
3412  })
3413  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
3414  if (failed(checkImplementationStatus(*enterDataOp)))
3415  return failure();
3416 
3417  if (auto ifVar = enterDataOp.getIfExpr())
3418  ifCond = moduleTranslation.lookupValue(ifVar);
3419 
3420  if (auto devId = enterDataOp.getDevice())
3421  if (auto constOp =
3422  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3423  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3424  deviceID = intAttr.getInt();
3425  RTLFn =
3426  enterDataOp.getNowait()
3427  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
3428  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
3429  mapVars = enterDataOp.getMapVars();
3430  info.HasNoWait = enterDataOp.getNowait();
3431  return success();
3432  })
3433  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
3434  if (failed(checkImplementationStatus(*exitDataOp)))
3435  return failure();
3436 
3437  if (auto ifVar = exitDataOp.getIfExpr())
3438  ifCond = moduleTranslation.lookupValue(ifVar);
3439 
3440  if (auto devId = exitDataOp.getDevice())
3441  if (auto constOp =
3442  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3443  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3444  deviceID = intAttr.getInt();
3445 
3446  RTLFn = exitDataOp.getNowait()
3447  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
3448  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
3449  mapVars = exitDataOp.getMapVars();
3450  info.HasNoWait = exitDataOp.getNowait();
3451  return success();
3452  })
3453  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
3454  if (failed(checkImplementationStatus(*updateDataOp)))
3455  return failure();
3456 
3457  if (auto ifVar = updateDataOp.getIfExpr())
3458  ifCond = moduleTranslation.lookupValue(ifVar);
3459 
3460  if (auto devId = updateDataOp.getDevice())
3461  if (auto constOp =
3462  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3463  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3464  deviceID = intAttr.getInt();
3465 
3466  RTLFn =
3467  updateDataOp.getNowait()
3468  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
3469  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
3470  mapVars = updateDataOp.getMapVars();
3471  info.HasNoWait = updateDataOp.getNowait();
3472  return success();
3473  })
3474  .Default([&](Operation *op) {
3475  llvm_unreachable("unexpected operation");
3476  return failure();
3477  });
3478 
3479  if (failed(result))
3480  return failure();
3481 
3482  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3483 
3484  MapInfoData mapData;
3485  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
3486  builder, useDevicePtrVars, useDeviceAddrVars);
3487 
3488  // Fill up the arrays with all the mapped variables.
3489  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
3490  auto genMapInfoCB =
3491  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
3492  builder.restoreIP(codeGenIP);
3493  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
3494  return combinedInfo;
3495  };
3496 
3497  // Define a lambda to apply mappings between use_device_addr and
3498  // use_device_ptr base pointers, and their associated block arguments.
3499  auto mapUseDevice =
3500  [&moduleTranslation](
3501  llvm::OpenMPIRBuilder::DeviceInfoTy type,
3503  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
3504  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
3505  for (auto [arg, useDevVar] :
3506  llvm::zip_equal(blockArgs, useDeviceVars)) {
3507 
3508  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
3509  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
3510  : mapInfoOp.getVarPtr();
3511  };
3512 
3513  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
3514  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
3515  mapInfoData.MapClause, mapInfoData.DevicePointers,
3516  mapInfoData.BasePointers)) {
3517  auto mapOp = cast<omp::MapInfoOp>(mapClause);
3518  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
3519  devicePointer != type)
3520  continue;
3521 
3522  if (llvm::Value *devPtrInfoMap =
3523  mapper ? mapper(basePointer) : basePointer) {
3524  moduleTranslation.mapValue(arg, devPtrInfoMap);
3525  break;
3526  }
3527  }
3528  }
3529  };
3530 
3531  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
3532  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
3533  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
3534  assert(isa<omp::TargetDataOp>(op) &&
3535  "BodyGen requested for non TargetDataOp");
3536  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
3537  Region &region = cast<omp::TargetDataOp>(op).getRegion();
3538  switch (bodyGenType) {
3539  case BodyGenTy::Priv:
3540  // Check if any device ptr/addr info is available
3541  if (!info.DevicePtrInfoMap.empty()) {
3542  builder.restoreIP(codeGenIP);
3543 
3544  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
3545  blockArgIface.getUseDeviceAddrBlockArgs(),
3546  useDeviceAddrVars, mapData,
3547  [&](llvm::Value *basePointer) -> llvm::Value * {
3548  if (!info.DevicePtrInfoMap[basePointer].second)
3549  return nullptr;
3550  return builder.CreateLoad(
3551  builder.getPtrTy(),
3552  info.DevicePtrInfoMap[basePointer].second);
3553  });
3554  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
3555  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
3556  mapData, [&](llvm::Value *basePointer) {
3557  return info.DevicePtrInfoMap[basePointer].second;
3558  });
3559 
3560  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
3561  moduleTranslation)))
3562  return llvm::make_error<PreviouslyReportedError>();
3563  }
3564  break;
3565  case BodyGenTy::DupNoPriv:
3566  break;
3567  case BodyGenTy::NoPriv:
3568  // If device info is available then region has already been generated
3569  if (info.DevicePtrInfoMap.empty()) {
3570  builder.restoreIP(codeGenIP);
3571  // For device pass, if use_device_ptr(addr) mappings were present,
3572  // we need to link them here before codegen.
3573  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
3574  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
3575  blockArgIface.getUseDeviceAddrBlockArgs(),
3576  useDeviceAddrVars, mapData);
3577  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
3578  blockArgIface.getUseDevicePtrBlockArgs(),
3579  useDevicePtrVars, mapData);
3580  }
3581 
3582  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
3583  moduleTranslation)))
3584  return llvm::make_error<PreviouslyReportedError>();
3585  }
3586  break;
3587  }
3588  return builder.saveIP();
3589  };
3590 
3591  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3592  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3593  findAllocaInsertPoint(builder, moduleTranslation);
3594  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
3595  if (isa<omp::TargetDataOp>(op))
3596  return ompBuilder->createTargetData(
3597  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID),
3598  ifCond, info, genMapInfoCB, nullptr, bodyGenCB);
3599  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
3600  builder.getInt64(deviceID), ifCond,
3601  info, genMapInfoCB, &RTLFn);
3602  }();
3603 
3604  if (failed(handleError(afterIP, *op)))
3605  return failure();
3606 
3607  builder.restoreIP(*afterIP);
3608  return success();
3609 }
3610 
3611 /// Lowers the FlagsAttr which is applied to the module on the device
3612 /// pass when offloading, this attribute contains OpenMP RTL globals that can
3613 /// be passed as flags to the frontend, otherwise they are set to default
3614 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
3615  LLVM::ModuleTranslation &moduleTranslation) {
3616  if (!cast<mlir::ModuleOp>(op))
3617  return failure();
3618 
3619  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3620 
3621  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
3622  attribute.getOpenmpDeviceVersion());
3623 
3624  if (attribute.getNoGpuLib())
3625  return success();
3626 
3627  ompBuilder->createGlobalFlag(
3628  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
3629  "__omp_rtl_debug_kind");
3630  ompBuilder->createGlobalFlag(
3631  attribute
3632  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
3633  ,
3634  "__omp_rtl_assume_teams_oversubscription");
3635  ompBuilder->createGlobalFlag(
3636  attribute
3637  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
3638  ,
3639  "__omp_rtl_assume_threads_oversubscription");
3640  ompBuilder->createGlobalFlag(
3641  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
3642  "__omp_rtl_assume_no_thread_state");
3643  ompBuilder->createGlobalFlag(
3644  attribute
3645  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
3646  ,
3647  "__omp_rtl_assume_no_nested_parallelism");
3648  return success();
3649 }
3650 
3651 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
3652  omp::TargetOp targetOp,
3653  llvm::StringRef parentName = "") {
3654  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
3655 
3656  assert(fileLoc && "No file found from location");
3657  StringRef fileName = fileLoc.getFilename().getValue();
3658 
3659  llvm::sys::fs::UniqueID id;
3660  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
3661  targetOp.emitError("Unable to get unique ID for file");
3662  return false;
3663  }
3664 
3665  uint64_t line = fileLoc.getLine();
3666  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
3667  id.getFile(), line);
3668  return true;
3669 }
3670 
3671 static void
3672 handleDeclareTargetMapVar(MapInfoData &mapData,
3673  LLVM::ModuleTranslation &moduleTranslation,
3674  llvm::IRBuilderBase &builder, llvm::Function *func) {
3675  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3676  // In the case of declare target mapped variables, the basePointer is
3677  // the reference pointer generated by the convertDeclareTargetAttr
3678  // method. Whereas the kernelValue is the original variable, so for
3679  // the device we must replace all uses of this original global variable
3680  // (stored in kernelValue) with the reference pointer (stored in
3681  // basePointer for declare target mapped variables), as for device the
3682  // data is mapped into this reference pointer and should be loaded
3683  // from it, the original variable is discarded. On host both exist and
3684  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
3685  // function to link the two variables in the runtime and then both the
3686  // reference pointer and the pointer are assigned in the kernel argument
3687  // structure for the host.
3688  if (mapData.IsDeclareTarget[i]) {
3689  // If the original map value is a constant, then we have to make sure all
3690  // of it's uses within the current kernel/function that we are going to
3691  // rewrite are converted to instructions, as we will be altering the old
3692  // use (OriginalValue) from a constant to an instruction, which will be
3693  // illegal and ICE the compiler if the user is a constant expression of
3694  // some kind e.g. a constant GEP.
3695  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
3696  convertUsersOfConstantsToInstructions(constant, func, false);
3697 
3698  // The users iterator will get invalidated if we modify an element,
3699  // so we populate this vector of uses to alter each user on an
3700  // individual basis to emit its own load (rather than one load for
3701  // all).
3703  for (llvm::User *user : mapData.OriginalValue[i]->users())
3704  userVec.push_back(user);
3705 
3706  for (llvm::User *user : userVec) {
3707  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
3708  if (insn->getFunction() == func) {
3709  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
3710  mapData.BasePointers[i]);
3711  load->moveBefore(insn);
3712  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
3713  }
3714  }
3715  }
3716  }
3717  }
3718 }
3719 
3720 // The createDeviceArgumentAccessor function generates
3721 // instructions for retrieving (acessing) kernel
3722 // arguments inside of the device kernel for use by
3723 // the kernel. This enables different semantics such as
3724 // the creation of temporary copies of data allowing
3725 // semantics like read-only/no host write back kernel
3726 // arguments.
3727 //
3728 // This currently implements a very light version of Clang's
3729 // EmitParmDecl's handling of direct argument handling as well
3730 // as a portion of the argument access generation based on
3731 // capture types found at the end of emitOutlinedFunctionPrologue
3732 // in Clang. The indirect path handling of EmitParmDecl's may be
3733 // required for future work, but a direct 1-to-1 copy doesn't seem
3734 // possible as the logic is rather scattered throughout Clang's
3735 // lowering and perhaps we wish to deviate slightly.
3736 //
3737 // \param mapData - A container containing vectors of information
3738 // corresponding to the input argument, which should have a
3739 // corresponding entry in the MapInfoData containers
3740 // OrigialValue's.
3741 // \param arg - This is the generated kernel function argument that
3742 // corresponds to the passed in input argument. We generated different
3743 // accesses of this Argument, based on capture type and other Input
3744 // related information.
3745 // \param input - This is the host side value that will be passed to
3746 // the kernel i.e. the kernel input, we rewrite all uses of this within
3747 // the kernel (as we generate the kernel body based on the target's region
3748 // which maintians references to the original input) to the retVal argument
3749 // apon exit of this function inside of the OMPIRBuilder. This interlinks
3750 // the kernel argument to future uses of it in the function providing
3751 // appropriate "glue" instructions inbetween.
3752 // \param retVal - This is the value that all uses of input inside of the
3753 // kernel will be re-written to, the goal of this function is to generate
3754 // an appropriate location for the kernel argument to be accessed from,
3755 // e.g. ByRef will result in a temporary allocation location and then
3756 // a store of the kernel argument into this allocated memory which
3757 // will then be loaded from, ByCopy will use the allocated memory
3758 // directly.
3759 static llvm::IRBuilderBase::InsertPoint
3760 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
3761  llvm::Value *input, llvm::Value *&retVal,
3762  llvm::IRBuilderBase &builder,
3763  llvm::OpenMPIRBuilder &ompBuilder,
3764  LLVM::ModuleTranslation &moduleTranslation,
3765  llvm::IRBuilderBase::InsertPoint allocaIP,
3766  llvm::IRBuilderBase::InsertPoint codeGenIP) {
3767  builder.restoreIP(allocaIP);
3768 
3769  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
3770 
3771  // Find the associated MapInfoData entry for the current input
3772  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
3773  if (mapData.OriginalValue[i] == input) {
3774  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3775  capture =
3776  mapOp.getMapCaptureType().value_or(omp::VariableCaptureKind::ByRef);
3777 
3778  break;
3779  }
3780 
3781  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
3782  unsigned int defaultAS =
3783  ompBuilder.M.getDataLayout().getProgramAddressSpace();
3784 
3785  // Create the alloca for the argument the current point.
3786  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
3787 
3788  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
3789  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
3790 
3791  builder.CreateStore(&arg, v);
3792 
3793  builder.restoreIP(codeGenIP);
3794 
3795  switch (capture) {
3796  case omp::VariableCaptureKind::ByCopy: {
3797  retVal = v;
3798  break;
3799  }
3800  case omp::VariableCaptureKind::ByRef: {
3801  retVal = builder.CreateAlignedLoad(
3802  v->getType(), v,
3803  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
3804  break;
3805  }
3806  case omp::VariableCaptureKind::This:
3807  case omp::VariableCaptureKind::VLAType:
3808  // TODO: Consider returning error to use standard reporting for
3809  // unimplemented features.
3810  assert(false && "Currently unsupported capture kind");
3811  break;
3812  }
3813 
3814  return builder.saveIP();
3815 }
3816 
3817 static LogicalResult
3818 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
3819  LLVM::ModuleTranslation &moduleTranslation) {
3820  auto targetOp = cast<omp::TargetOp>(opInst);
3821  if (failed(checkImplementationStatus(opInst)))
3822  return failure();
3823 
3824  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3825  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
3826  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
3827  auto &targetRegion = targetOp.getRegion();
3828  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
3829  SmallVector<Value> mapVars = targetOp.getMapVars();
3830  ArrayRef<BlockArgument> mapBlockArgs =
3831  cast<omp::BlockArgOpenMPOpInterface>(opInst).getMapBlockArgs();
3832  llvm::Function *llvmOutlinedFn = nullptr;
3833 
3834  // TODO: It can also be false if a compile-time constant `false` IF clause is
3835  // specified.
3836  bool isOffloadEntry =
3837  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
3838 
3839  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3840  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
3841  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
3842  // Forward target-cpu and target-features function attributes from the
3843  // original function to the new outlined function.
3844  llvm::Function *llvmParentFn =
3845  moduleTranslation.lookupFunction(parentFn.getName());
3846  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
3847  assert(llvmParentFn && llvmOutlinedFn &&
3848  "Both parent and outlined functions must exist at this point");
3849 
3850  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
3851  attr.isStringAttribute())
3852  llvmOutlinedFn->addFnAttr(attr);
3853 
3854  if (auto attr = llvmParentFn->getFnAttribute("target-features");
3855  attr.isStringAttribute())
3856  llvmOutlinedFn->addFnAttr(attr);
3857 
3858  builder.restoreIP(codeGenIP);
3859  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
3860  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
3861  llvm::Value *mapOpValue =
3862  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
3863  moduleTranslation.mapValue(arg, mapOpValue);
3864  }
3865 
3866  // Do privatization after moduleTranslation has already recorded
3867  // mapped values.
3868  if (!targetOp.getPrivateVars().empty()) {
3869  builder.restoreIP(allocaIP);
3870 
3871  OperandRange privateVars = targetOp.getPrivateVars();
3872  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
3873  MutableArrayRef<BlockArgument> privateBlockArgs =
3874  cast<omp::BlockArgOpenMPOpInterface>(opInst).getPrivateBlockArgs();
3875 
3876  for (auto [privVar, privatizerNameAttr, privBlockArg] :
3877  llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {
3878 
3879  SymbolRefAttr privSym = cast<SymbolRefAttr>(privatizerNameAttr);
3880  omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
3881  assert(privatizer.getDataSharingType() !=
3882  omp::DataSharingClauseType::FirstPrivate &&
3883  privatizer.getDeallocRegion().empty() &&
3884  "unsupported privatizer");
3885  moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
3886  moduleTranslation.lookupValue(privVar));
3887  Region &allocRegion = privatizer.getAllocRegion();
3888  SmallVector<llvm::Value *, 1> yieldedValues;
3889  if (failed(inlineConvertOmpRegions(
3890  allocRegion, "omp.targetop.privatizer", builder,
3891  moduleTranslation, &yieldedValues))) {
3892  return llvm::createStringError(
3893  "failed to inline `alloc` region of `omp.private`");
3894  }
3895  assert(yieldedValues.size() == 1);
3896  moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
3897  moduleTranslation.forgetMapping(allocRegion);
3898  builder.restoreIP(builder.saveIP());
3899  }
3900  }
3901 
3903  targetRegion, "omp.target", builder, moduleTranslation);
3904  if (!exitBlock)
3905  return exitBlock.takeError();
3906 
3907  builder.SetInsertPoint(*exitBlock);
3908  return builder.saveIP();
3909  };
3910 
3911  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3912  StringRef parentName = parentFn.getName();
3913 
3914  llvm::TargetRegionEntryInfo entryInfo;
3915 
3916  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
3917  return failure();
3918 
3919  int32_t defaultValTeams = -1;
3920  int32_t defaultValThreads = 0;
3921 
3922  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3923  findAllocaInsertPoint(builder, moduleTranslation);
3924 
3925  MapInfoData mapData;
3926  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
3927  builder);
3928 
3929  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
3930  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
3931  -> llvm::OpenMPIRBuilder::MapInfosTy & {
3932  builder.restoreIP(codeGenIP);
3933  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
3934  return combinedInfos;
3935  };
3936 
3937  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
3938  llvm::Value *&retVal, InsertPointTy allocaIP,
3939  InsertPointTy codeGenIP)
3940  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
3941  // We just return the unaltered argument for the host function
3942  // for now, some alterations may be required in the future to
3943  // keep host fallback functions working identically to the device
3944  // version (e.g. pass ByCopy values should be treated as such on
3945  // host and device, currently not always the case)
3946  if (!isTargetDevice) {
3947  retVal = cast<llvm::Value>(&arg);
3948  return codeGenIP;
3949  }
3950 
3951  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
3952  *ompBuilder, moduleTranslation,
3953  allocaIP, codeGenIP);
3954  };
3955 
3957  for (size_t i = 0; i < mapVars.size(); ++i) {
3958  // declare target arguments are not passed to kernels as arguments
3959  // TODO: We currently do not handle cases where a member is explicitly
3960  // passed in as an argument, this will likley need to be handled in
3961  // the near future, rather than using IsAMember, it may be better to
3962  // test if the relevant BlockArg is used within the target region and
3963  // then use that as a basis for exclusion in the kernel inputs.
3964  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
3965  kernelInput.push_back(mapData.OriginalValue[i]);
3966  }
3967 
3969  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
3970  moduleTranslation, dds);
3971 
3972  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3973  moduleTranslation.getOpenMPBuilder()->createTarget(
3974  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
3975  defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
3976  argAccessorCB, dds, targetOp.getNowait());
3977 
3978  if (failed(handleError(afterIP, opInst)))
3979  return failure();
3980 
3981  builder.restoreIP(*afterIP);
3982 
3983  // Remap access operations to declare target reference pointers for the
3984  // device, essentially generating extra loadop's as necessary
3985  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3986  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
3987  llvmOutlinedFn);
3988 
3989  return success();
3990 }
3991 
3992 static LogicalResult
3993 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
3994  LLVM::ModuleTranslation &moduleTranslation) {
3995  // Amend omp.declare_target by deleting the IR of the outlined functions
3996  // created for target regions. They cannot be filtered out from MLIR earlier
3997  // because the omp.target operation inside must be translated to LLVM, but
3998  // the wrapper functions themselves must not remain at the end of the
3999  // process. We know that functions where omp.declare_target does not match
4000  // omp.is_target_device at this stage can only be wrapper functions because
4001  // those that aren't are removed earlier as an MLIR transformation pass.
4002  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
4003  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
4004  op->getParentOfType<ModuleOp>().getOperation())) {
4005  if (!offloadMod.getIsTargetDevice())
4006  return success();
4007 
4008  omp::DeclareTargetDeviceType declareType =
4009  attribute.getDeviceType().getValue();
4010 
4011  if (declareType == omp::DeclareTargetDeviceType::host) {
4012  llvm::Function *llvmFunc =
4013  moduleTranslation.lookupFunction(funcOp.getName());
4014  llvmFunc->dropAllReferences();
4015  llvmFunc->eraseFromParent();
4016  }
4017  }
4018  return success();
4019  }
4020 
4021  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
4022  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
4023  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
4024  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4025  bool isDeclaration = gOp.isDeclaration();
4026  bool isExternallyVisible =
4027  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
4028  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
4029  llvm::StringRef mangledName = gOp.getSymName();
4030  auto captureClause =
4031  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
4032  auto deviceClause =
4033  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
4034  // unused for MLIR at the moment, required in Clang for book
4035  // keeping
4036  std::vector<llvm::GlobalVariable *> generatedRefs;
4037 
4038  std::vector<llvm::Triple> targetTriple;
4039  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
4040  op->getParentOfType<mlir::ModuleOp>()->getAttr(
4041  LLVM::LLVMDialect::getTargetTripleAttrName()));
4042  if (targetTripleAttr)
4043  targetTriple.emplace_back(targetTripleAttr.data());
4044 
4045  auto fileInfoCallBack = [&loc]() {
4046  std::string filename = "";
4047  std::uint64_t lineNo = 0;
4048 
4049  if (loc) {
4050  filename = loc.getFilename().str();
4051  lineNo = loc.getLine();
4052  }
4053 
4054  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
4055  lineNo);
4056  };
4057 
4058  ompBuilder->registerTargetGlobalVariable(
4059  captureClause, deviceClause, isDeclaration, isExternallyVisible,
4060  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
4061  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
4062  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
4063  gVal->getType(), gVal);
4064 
4065  if (ompBuilder->Config.isTargetDevice() &&
4066  (attribute.getCaptureClause().getValue() !=
4067  mlir::omp::DeclareTargetCaptureClause::to ||
4068  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
4069  ompBuilder->getAddrOfDeclareTargetVar(
4070  captureClause, deviceClause, isDeclaration, isExternallyVisible,
4071  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
4072  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
4073  /*GlobalInitializer*/ nullptr,
4074  /*VariableLinkage*/ nullptr);
4075  }
4076  }
4077  }
4078 
4079  return success();
4080 }
4081 
4082 // Returns true if the operation is inside a TargetOp or
4083 // is part of a declare target function.
4084 static bool isTargetDeviceOp(Operation *op) {
4085  // Assumes no reverse offloading
4086  if (op->getParentOfType<omp::TargetOp>())
4087  return true;
4088 
4089  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
4090  if (auto declareTargetIface =
4091  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
4092  parentFn.getOperation()))
4093  if (declareTargetIface.isDeclareTarget() &&
4094  declareTargetIface.getDeclareTargetDeviceType() !=
4095  mlir::omp::DeclareTargetDeviceType::host)
4096  return true;
4097 
4098  return false;
4099 }
4100 
4101 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
4102 /// (including OpenMP runtime calls).
4103 static LogicalResult
4104 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
4105  LLVM::ModuleTranslation &moduleTranslation) {
4106 
4107  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4108 
4110  .Case([&](omp::BarrierOp op) -> LogicalResult {
4111  if (failed(checkImplementationStatus(*op)))
4112  return failure();
4113 
4114  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4115  ompBuilder->createBarrier(builder.saveIP(),
4116  llvm::omp::OMPD_barrier);
4117  return handleError(afterIP, *op);
4118  })
4119  .Case([&](omp::TaskyieldOp op) {
4120  if (failed(checkImplementationStatus(*op)))
4121  return failure();
4122 
4123  ompBuilder->createTaskyield(builder.saveIP());
4124  return success();
4125  })
4126  .Case([&](omp::FlushOp op) {
4127  if (failed(checkImplementationStatus(*op)))
4128  return failure();
4129 
4130  // No support in Openmp runtime function (__kmpc_flush) to accept
4131  // the argument list.
4132  // OpenMP standard states the following:
4133  // "An implementation may implement a flush with a list by ignoring
4134  // the list, and treating it the same as a flush without a list."
4135  //
4136  // The argument list is discarded so that, flush with a list is treated
4137  // same as a flush without a list.
4138  ompBuilder->createFlush(builder.saveIP());
4139  return success();
4140  })
4141  .Case([&](omp::ParallelOp op) {
4142  return convertOmpParallel(op, builder, moduleTranslation);
4143  })
4144  .Case([&](omp::MaskedOp) {
4145  return convertOmpMasked(*op, builder, moduleTranslation);
4146  })
4147  .Case([&](omp::MasterOp) {
4148  return convertOmpMaster(*op, builder, moduleTranslation);
4149  })
4150  .Case([&](omp::CriticalOp) {
4151  return convertOmpCritical(*op, builder, moduleTranslation);
4152  })
4153  .Case([&](omp::OrderedRegionOp) {
4154  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
4155  })
4156  .Case([&](omp::OrderedOp) {
4157  return convertOmpOrdered(*op, builder, moduleTranslation);
4158  })
4159  .Case([&](omp::WsloopOp) {
4160  return convertOmpWsloop(*op, builder, moduleTranslation);
4161  })
4162  .Case([&](omp::SimdOp) {
4163  return convertOmpSimd(*op, builder, moduleTranslation);
4164  })
4165  .Case([&](omp::AtomicReadOp) {
4166  return convertOmpAtomicRead(*op, builder, moduleTranslation);
4167  })
4168  .Case([&](omp::AtomicWriteOp) {
4169  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
4170  })
4171  .Case([&](omp::AtomicUpdateOp op) {
4172  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
4173  })
4174  .Case([&](omp::AtomicCaptureOp op) {
4175  return convertOmpAtomicCapture(op, builder, moduleTranslation);
4176  })
4177  .Case([&](omp::SectionsOp) {
4178  return convertOmpSections(*op, builder, moduleTranslation);
4179  })
4180  .Case([&](omp::SingleOp op) {
4181  return convertOmpSingle(op, builder, moduleTranslation);
4182  })
4183  .Case([&](omp::TeamsOp op) {
4184  return convertOmpTeams(op, builder, moduleTranslation);
4185  })
4186  .Case([&](omp::TaskOp op) {
4187  return convertOmpTaskOp(op, builder, moduleTranslation);
4188  })
4189  .Case([&](omp::TaskgroupOp op) {
4190  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
4191  })
4192  .Case([&](omp::TaskwaitOp op) {
4193  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
4194  })
4195  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp,
4196  omp::CriticalDeclareOp>([](auto op) {
4197  // `yield` and `terminator` can be just omitted. The block structure
4198  // was created in the region that handles their parent operation.
4199  // `declare_reduction` will be used by reductions and is not
4200  // converted directly, skip it.
4201  // `critical.declare` is only used to declare names of critical
4202  // sections which will be used by `critical` ops and hence can be
4203  // ignored for lowering. The OpenMP IRBuilder will create unique
4204  // name for critical section names.
4205  return success();
4206  })
4207  .Case([&](omp::ThreadprivateOp) {
4208  return convertOmpThreadprivate(*op, builder, moduleTranslation);
4209  })
4210  .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp,
4211  omp::TargetUpdateOp>([&](auto op) {
4212  return convertOmpTargetData(op, builder, moduleTranslation);
4213  })
4214  .Case([&](omp::TargetOp) {
4215  return convertOmpTarget(*op, builder, moduleTranslation);
4216  })
4217  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
4218  [&](auto op) {
4219  // No-op, should be handled by relevant owning operations e.g.
4220  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc.
4221  // and then discarded
4222  return success();
4223  })
4224  .Default([&](Operation *inst) {
4225  return inst->emitError() << "not yet implemented: " << inst->getName();
4226  });
4227 }
4228 
4229 static LogicalResult
4230 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
4231  LLVM::ModuleTranslation &moduleTranslation) {
4232  return convertHostOrTargetOperation(op, builder, moduleTranslation);
4233 }
4234 
4235 static LogicalResult
4236 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
4237  LLVM::ModuleTranslation &moduleTranslation) {
4238  if (isa<omp::TargetOp>(op))
4239  return convertOmpTarget(*op, builder, moduleTranslation);
4240  if (isa<omp::TargetDataOp>(op))
4241  return convertOmpTargetData(op, builder, moduleTranslation);
4242  bool interrupted =
4243  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
4244  if (isa<omp::TargetOp>(oper)) {
4245  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
4246  return WalkResult::interrupt();
4247  return WalkResult::skip();
4248  }
4249  if (isa<omp::TargetDataOp>(oper)) {
4250  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
4251  return WalkResult::interrupt();
4252  return WalkResult::skip();
4253  }
4254  return WalkResult::advance();
4255  }).wasInterrupted();
4256  return failure(interrupted);
4257 }
4258 
4259 namespace {
4260 
4261 /// Implementation of the dialect interface that converts operations belonging
4262 /// to the OpenMP dialect to LLVM IR.
4263 class OpenMPDialectLLVMIRTranslationInterface
4265 public:
4267 
4268  /// Translates the given operation to LLVM IR using the provided IR builder
4269  /// and saving the state in `moduleTranslation`.
4270  LogicalResult
4271  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
4272  LLVM::ModuleTranslation &moduleTranslation) const final;
4273 
4274  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
4275  /// runtime calls, or operation amendments
4276  LogicalResult
4278  NamedAttribute attribute,
4279  LLVM::ModuleTranslation &moduleTranslation) const final;
4280 };
4281 
4282 } // namespace
4283 
4284 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
4285  Operation *op, ArrayRef<llvm::Instruction *> instructions,
4286  NamedAttribute attribute,
4287  LLVM::ModuleTranslation &moduleTranslation) const {
4288  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
4289  attribute.getName())
4290  .Case("omp.is_target_device",
4291  [&](Attribute attr) {
4292  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
4293  llvm::OpenMPIRBuilderConfig &config =
4294  moduleTranslation.getOpenMPBuilder()->Config;
4295  config.setIsTargetDevice(deviceAttr.getValue());
4296  return success();
4297  }
4298  return failure();
4299  })
4300  .Case("omp.is_gpu",
4301  [&](Attribute attr) {
4302  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
4303  llvm::OpenMPIRBuilderConfig &config =
4304  moduleTranslation.getOpenMPBuilder()->Config;
4305  config.setIsGPU(gpuAttr.getValue());
4306  return success();
4307  }
4308  return failure();
4309  })
4310  .Case("omp.host_ir_filepath",
4311  [&](Attribute attr) {
4312  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
4313  llvm::OpenMPIRBuilder *ompBuilder =
4314  moduleTranslation.getOpenMPBuilder();
4315  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
4316  return success();
4317  }
4318  return failure();
4319  })
4320  .Case("omp.flags",
4321  [&](Attribute attr) {
4322  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
4323  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
4324  return failure();
4325  })
4326  .Case("omp.version",
4327  [&](Attribute attr) {
4328  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
4329  llvm::OpenMPIRBuilder *ompBuilder =
4330  moduleTranslation.getOpenMPBuilder();
4331  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
4332  versionAttr.getVersion());
4333  return success();
4334  }
4335  return failure();
4336  })
4337  .Case("omp.declare_target",
4338  [&](Attribute attr) {
4339  if (auto declareTargetAttr =
4340  dyn_cast<omp::DeclareTargetAttr>(attr))
4341  return convertDeclareTargetAttr(op, declareTargetAttr,
4342  moduleTranslation);
4343  return failure();
4344  })
4345  .Case("omp.requires",
4346  [&](Attribute attr) {
4347  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
4348  using Requires = omp::ClauseRequires;
4349  Requires flags = requiresAttr.getValue();
4350  llvm::OpenMPIRBuilderConfig &config =
4351  moduleTranslation.getOpenMPBuilder()->Config;
4352  config.setHasRequiresReverseOffload(
4353  bitEnumContainsAll(flags, Requires::reverse_offload));
4354  config.setHasRequiresUnifiedAddress(
4355  bitEnumContainsAll(flags, Requires::unified_address));
4356  config.setHasRequiresUnifiedSharedMemory(
4357  bitEnumContainsAll(flags, Requires::unified_shared_memory));
4358  config.setHasRequiresDynamicAllocators(
4359  bitEnumContainsAll(flags, Requires::dynamic_allocators));
4360  return success();
4361  }
4362  return failure();
4363  })
4364  .Case("omp.target_triples",
4365  [&](Attribute attr) {
4366  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
4367  llvm::OpenMPIRBuilderConfig &config =
4368  moduleTranslation.getOpenMPBuilder()->Config;
4369  config.TargetTriples.clear();
4370  config.TargetTriples.reserve(triplesAttr.size());
4371  for (Attribute tripleAttr : triplesAttr) {
4372  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
4373  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
4374  else
4375  return failure();
4376  }
4377  return success();
4378  }
4379  return failure();
4380  })
4381  .Default([](Attribute) {
4382  // Fall through for omp attributes that do not require lowering.
4383  return success();
4384  })(attribute.getValue());
4385 
4386  return failure();
4387 }
4388 
4389 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
4390 /// (including OpenMP runtime calls).
4391 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
4392  Operation *op, llvm::IRBuilderBase &builder,
4393  LLVM::ModuleTranslation &moduleTranslation) const {
4394 
4395  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4396  if (ompBuilder->Config.isTargetDevice()) {
4397  if (isTargetDeviceOp(op)) {
4398  return convertTargetDeviceOp(op, builder, moduleTranslation);
4399  } else {
4400  return convertTargetOpsInNest(op, builder, moduleTranslation);
4401  }
4402  }
4403  return convertHostOrTargetOperation(op, builder, moduleTranslation);
4404 }
4405 
4407  registry.insert<omp::OpenMPDialect>();
4408  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
4409  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
4410  });
4411 }
4412 
4414  DialectRegistry registry;
4416  context.appendDialectRegistry(registry);
4417 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, MutableArrayRef< BlockArgument > privateBlockArgs, MutableArrayRef< omp::PrivateClauseOp > privateDecls, MutableArrayRef< mlir::Value > mlirPrivateVars, llvm::SmallVectorImpl< llvm::Value * > &llvmPrivateVars, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP)
Allocate delayed private variables.
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static LogicalResult convertIgnoredWrappers(omp::LoopNestOp loopOp, omp::LoopWrapperInterface parentOp, LLVM::ModuleTranslation &moduleTranslation)
Helper function to call convertIgnoredWrapper() for all wrappers of the given loopOp nested inside of...
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static void collectPrivatizationDecls(OP op, SmallVectorImpl< omp::PrivateClauseOp > &privatizations)
Populates privatizations with privatization declarations used for the given op.
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult convertIgnoredWrapper(omp::LoopWrapperInterface &opInst, LLVM::ModuleTranslation &moduleTranslation)
Helper function to map block arguments defined by ignored loop wrappers to LLVM values and prevent an...
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, const ArrayRef< Value > &useDevPtrOperands={}, const ArrayRef< Value > &useDevAddrOperands={})
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:44
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
RAII object calling stackPush/stackPop on construction/destruction.