MLIR  20.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
18 #include "mlir/IR/IRMapping.h"
19 #include "mlir/IR/Operation.h"
20 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Frontend/OpenMP/OMPConstants.h"
29 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/ReplaceConstant.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include "llvm/Transforms/Utils/ModuleUtils.h"
36 
37 #include <any>
38 #include <cstdint>
39 #include <iterator>
40 #include <numeric>
41 #include <optional>
42 #include <utility>
43 
44 using namespace mlir;
45 
46 namespace {
47 static llvm::omp::ScheduleKind
48 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
49  if (!schedKind.has_value())
50  return llvm::omp::OMP_SCHEDULE_Default;
51  switch (schedKind.value()) {
52  case omp::ClauseScheduleKind::Static:
53  return llvm::omp::OMP_SCHEDULE_Static;
54  case omp::ClauseScheduleKind::Dynamic:
55  return llvm::omp::OMP_SCHEDULE_Dynamic;
56  case omp::ClauseScheduleKind::Guided:
57  return llvm::omp::OMP_SCHEDULE_Guided;
58  case omp::ClauseScheduleKind::Auto:
59  return llvm::omp::OMP_SCHEDULE_Auto;
61  return llvm::omp::OMP_SCHEDULE_Runtime;
62  }
63  llvm_unreachable("unhandled schedule clause argument");
64 }
65 
66 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
67 /// insertion points for allocas.
68 class OpenMPAllocaStackFrame
69  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
70 public:
71  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
72 
73  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
74  : allocaInsertPoint(allocaIP) {}
75  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
76 };
77 
78 /// ModuleTranslation stack frame containing the partial mapping between MLIR
79 /// values and their LLVM IR equivalents.
80 class OpenMPVarMappingStackFrame
82  OpenMPVarMappingStackFrame> {
83 public:
84  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
85 
86  explicit OpenMPVarMappingStackFrame(
87  const DenseMap<Value, llvm::Value *> &mapping)
88  : mapping(mapping) {}
89 
91 };
92 
93 /// Custom error class to signal translation errors that don't need reporting,
94 /// since encountering them will have already triggered relevant error messages.
95 ///
96 /// Its purpose is to serve as the glue between MLIR failures represented as
97 /// \see LogicalResult instances and \see llvm::Error instances used to
98 /// propagate errors through the \see llvm::OpenMPIRBuilder. Generally, when an
99 /// error of the first type is raised, a message is emitted directly (the \see
100 /// LogicalResult itself does not hold any information). If we need to forward
101 /// this error condition as an \see llvm::Error while avoiding triggering some
102 /// redundant error reporting later on, we need a custom \see llvm::ErrorInfo
103 /// class to just signal this situation has happened.
104 ///
105 /// For example, this class should be used to trigger errors from within
106 /// callbacks passed to the \see OpenMPIRBuilder when they were triggered by the
107 /// translation of their own regions. This unclutters the error log from
108 /// redundant messages.
109 class PreviouslyReportedError
110  : public llvm::ErrorInfo<PreviouslyReportedError> {
111 public:
112  void log(raw_ostream &) const override {
113  // Do not log anything.
114  }
115 
116  std::error_code convertToErrorCode() const override {
117  llvm_unreachable(
118  "PreviouslyReportedError doesn't support ECError conversion");
119  }
120 
121  // Used by ErrorInfo::classID.
122  static char ID;
123 };
124 
126 
127 } // namespace
128 
129 /// Looks up from the operation from and returns the PrivateClauseOp with
130 /// name symbolName
131 static omp::PrivateClauseOp findPrivatizer(Operation *from,
132  SymbolRefAttr symbolName) {
133  omp::PrivateClauseOp privatizer =
134  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
135  symbolName);
136  assert(privatizer && "privatizer not found in the symbol table");
137  return privatizer;
138 }
139 
140 /// Check whether translation to LLVM IR for the given operation is currently
141 /// supported. If not, descriptive diagnostics will be emitted to let users know
142 /// this is a not-yet-implemented feature.
143 ///
144 /// \returns success if no unimplemented features are needed to translate the
145 /// given operation.
146 static LogicalResult checkImplementationStatus(Operation &op) {
147  auto todo = [&op](StringRef clauseName) {
148  return op.emitError() << "not yet implemented: Unhandled clause "
149  << clauseName << " in " << op.getName()
150  << " operation";
151  };
152 
153  auto checkAligned = [&todo](auto op, LogicalResult &result) {
154  if (!op.getAlignedVars().empty() || op.getAlignments())
155  result = todo("aligned");
156  };
157  auto checkAllocate = [&todo](auto op, LogicalResult &result) {
158  if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
159  result = todo("allocate");
160  };
161  auto checkBare = [&todo](auto op, LogicalResult &result) {
162  if (op.getBare())
163  result = todo("ompx_bare");
164  };
165  auto checkDepend = [&todo](auto op, LogicalResult &result) {
166  if (!op.getDependVars().empty() || op.getDependKinds())
167  result = todo("depend");
168  };
169  auto checkDevice = [&todo](auto op, LogicalResult &result) {
170  if (op.getDevice())
171  result = todo("device");
172  };
173  auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
174  if (!op.getHasDeviceAddrVars().empty())
175  result = todo("has_device_addr");
176  };
177  auto checkHint = [](auto op, LogicalResult &) {
178  if (op.getHint())
179  op.emitWarning("hint clause discarded");
180  };
181  auto checkIf = [&todo](auto op, LogicalResult &result) {
182  if (op.getIfExpr())
183  result = todo("if");
184  };
185  auto checkInReduction = [&todo](auto op, LogicalResult &result) {
186  if (!op.getInReductionVars().empty() || op.getInReductionByref() ||
187  op.getInReductionSyms())
188  result = todo("in_reduction");
189  };
190  auto checkIsDevicePtr = [&todo](auto op, LogicalResult &result) {
191  if (!op.getIsDevicePtrVars().empty())
192  result = todo("is_device_ptr");
193  };
194  auto checkLinear = [&todo](auto op, LogicalResult &result) {
195  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
196  result = todo("linear");
197  };
198  auto checkNontemporal = [&todo](auto op, LogicalResult &result) {
199  if (!op.getNontemporalVars().empty())
200  result = todo("nontemporal");
201  };
202  auto checkNowait = [&todo](auto op, LogicalResult &result) {
203  if (op.getNowait())
204  result = todo("nowait");
205  };
206  auto checkOrder = [&todo](auto op, LogicalResult &result) {
207  if (op.getOrder() || op.getOrderMod())
208  result = todo("order");
209  };
210  auto checkParLevelSimd = [&todo](auto op, LogicalResult &result) {
211  if (op.getParLevelSimd())
212  result = todo("parallelization-level");
213  };
214  auto checkPriority = [&todo](auto op, LogicalResult &result) {
215  if (op.getPriority())
216  result = todo("priority");
217  };
218  auto checkPrivate = [&todo](auto op, LogicalResult &result) {
219  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
220  result = todo("privatization");
221  };
222  auto checkReduction = [&todo](auto op, LogicalResult &result) {
223  if (!op.getReductionVars().empty() || op.getReductionByref() ||
224  op.getReductionSyms())
225  result = todo("reduction");
226  };
227  auto checkThreadLimit = [&todo](auto op, LogicalResult &result) {
228  if (op.getThreadLimit())
229  result = todo("thread_limit");
230  };
231  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
232  if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
233  op.getTaskReductionSyms())
234  result = todo("task_reduction");
235  };
236  auto checkUntied = [&todo](auto op, LogicalResult &result) {
237  if (op.getUntied())
238  result = todo("untied");
239  };
240 
241  LogicalResult result = success();
243  .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
244  .Case([&](omp::SectionsOp op) {
245  checkAllocate(op, result);
246  checkPrivate(op, result);
247  })
248  .Case([&](omp::SingleOp op) {
249  checkAllocate(op, result);
250  checkPrivate(op, result);
251  })
252  .Case([&](omp::TeamsOp op) {
253  checkAllocate(op, result);
254  checkPrivate(op, result);
255  checkReduction(op, result);
256  })
257  .Case([&](omp::TaskOp op) {
258  checkAllocate(op, result);
259  checkInReduction(op, result);
260  checkPriority(op, result);
261  })
262  .Case([&](omp::TaskgroupOp op) {
263  checkAllocate(op, result);
264  checkTaskReduction(op, result);
265  })
266  .Case([&](omp::TaskwaitOp op) {
267  checkDepend(op, result);
268  checkNowait(op, result);
269  })
270  .Case([&](omp::TaskloopOp op) {
271  // TODO: Add other clauses check
272  checkUntied(op, result);
273  })
274  .Case([&](omp::WsloopOp op) {
275  checkAllocate(op, result);
276  checkLinear(op, result);
277  checkOrder(op, result);
278  })
279  .Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
280  .Case([&](omp::SimdOp op) {
281  checkAligned(op, result);
282  checkLinear(op, result);
283  checkNontemporal(op, result);
284  checkPrivate(op, result);
285  checkReduction(op, result);
286  })
287  .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
288  omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
289  .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
290  [&](auto op) { checkDepend(op, result); })
291  .Case([&](omp::TargetOp op) {
292  checkAllocate(op, result);
293  checkBare(op, result);
294  checkDevice(op, result);
295  checkHasDeviceAddr(op, result);
296  checkIf(op, result);
297  checkInReduction(op, result);
298  checkIsDevicePtr(op, result);
299  // Privatization clauses are supported, except on some situations, so we
300  // need to check here whether any of these unsupported cases are being
301  // translated.
302  if (std::optional<ArrayAttr> privateSyms = op.getPrivateSyms()) {
303  for (Attribute privatizerNameAttr : *privateSyms) {
304  omp::PrivateClauseOp privatizer = findPrivatizer(
305  op.getOperation(), cast<SymbolRefAttr>(privatizerNameAttr));
306 
307  if (privatizer.getDataSharingType() ==
308  omp::DataSharingClauseType::FirstPrivate)
309  result = todo("firstprivate");
310  }
311  }
312  checkThreadLimit(op, result);
313  })
314  .Default([](Operation &) {
315  // Assume all clauses for an operation can be translated unless they are
316  // checked above.
317  });
318  return result;
319 }
320 
321 static LogicalResult handleError(llvm::Error error, Operation &op) {
322  LogicalResult result = success();
323  if (error) {
324  llvm::handleAllErrors(
325  std::move(error),
326  [&](const PreviouslyReportedError &) { result = failure(); },
327  [&](const llvm::ErrorInfoBase &err) {
328  result = op.emitError(err.message());
329  });
330  }
331  return result;
332 }
333 
334 template <typename T>
335 static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
336  if (!result)
337  return handleError(result.takeError(), op);
338 
339  return success();
340 }
341 
342 /// Find the insertion point for allocas given the current insertion point for
343 /// normal operations in the builder.
344 static llvm::OpenMPIRBuilder::InsertPointTy
345 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
346  const LLVM::ModuleTranslation &moduleTranslation) {
347  // If there is an alloca insertion point on stack, i.e. we are in a nested
348  // operation and a specific point was provided by some surrounding operation,
349  // use it.
350  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
351  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
352  [&](const OpenMPAllocaStackFrame &frame) {
353  allocaInsertPoint = frame.allocaInsertPoint;
354  return WalkResult::interrupt();
355  });
356  if (walkResult.wasInterrupted())
357  return allocaInsertPoint;
358 
359  // Otherwise, insert to the entry block of the surrounding function.
360  // If the current IRBuilder InsertPoint is the function's entry, it cannot
361  // also be used for alloca insertion which would result in insertion order
362  // confusion. Create a new BasicBlock for the Builder and use the entry block
363  // for the allocs.
364  // TODO: Create a dedicated alloca BasicBlock at function creation such that
365  // we do not need to move the current InertPoint here.
366  if (builder.GetInsertBlock() ==
367  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
368  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
369  "Assuming end of basic block");
370  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
371  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
372  builder.GetInsertBlock()->getNextNode());
373  builder.CreateBr(entryBB);
374  builder.SetInsertPoint(entryBB);
375  }
376 
377  llvm::BasicBlock &funcEntryBlock =
378  builder.GetInsertBlock()->getParent()->getEntryBlock();
379  return llvm::OpenMPIRBuilder::InsertPointTy(
380  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
381 }
382 
383 /// Converts the given region that appears within an OpenMP dialect operation to
384 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
385 /// region, and a branch from any block with an successor-less OpenMP terminator
386 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
387 /// of the continuation block if provided.
389  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
390  LLVM::ModuleTranslation &moduleTranslation,
391  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
392  llvm::BasicBlock *continuationBlock =
393  splitBB(builder, true, "omp.region.cont");
394  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
395 
396  llvm::LLVMContext &llvmContext = builder.getContext();
397  for (Block &bb : region) {
398  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
399  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
400  builder.GetInsertBlock()->getNextNode());
401  moduleTranslation.mapBlock(&bb, llvmBB);
402  }
403 
404  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
405 
406  // Terminators (namely YieldOp) may be forwarding values to the region that
407  // need to be available in the continuation block. Collect the types of these
408  // operands in preparation of creating PHI nodes.
409  SmallVector<llvm::Type *> continuationBlockPHITypes;
410  bool operandsProcessed = false;
411  unsigned numYields = 0;
412  for (Block &bb : region.getBlocks()) {
413  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
414  if (!operandsProcessed) {
415  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
416  continuationBlockPHITypes.push_back(
417  moduleTranslation.convertType(yield->getOperand(i).getType()));
418  }
419  operandsProcessed = true;
420  } else {
421  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
422  "mismatching number of values yielded from the region");
423  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
424  llvm::Type *operandType =
425  moduleTranslation.convertType(yield->getOperand(i).getType());
426  (void)operandType;
427  assert(continuationBlockPHITypes[i] == operandType &&
428  "values of mismatching types yielded from the region");
429  }
430  }
431  numYields++;
432  }
433  }
434 
435  // Insert PHI nodes in the continuation block for any values forwarded by the
436  // terminators in this region.
437  if (!continuationBlockPHITypes.empty())
438  assert(
439  continuationBlockPHIs &&
440  "expected continuation block PHIs if converted regions yield values");
441  if (continuationBlockPHIs) {
442  llvm::IRBuilderBase::InsertPointGuard guard(builder);
443  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
444  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
445  for (llvm::Type *ty : continuationBlockPHITypes)
446  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
447  }
448 
449  // Convert blocks one by one in topological order to ensure
450  // defs are converted before uses.
452  for (Block *bb : blocks) {
453  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
454  // Retarget the branch of the entry block to the entry block of the
455  // converted region (regions are single-entry).
456  if (bb->isEntryBlock()) {
457  assert(sourceTerminator->getNumSuccessors() == 1 &&
458  "provided entry block has multiple successors");
459  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
460  "ContinuationBlock is not the successor of the entry block");
461  sourceTerminator->setSuccessor(0, llvmBB);
462  }
463 
464  llvm::IRBuilderBase::InsertPointGuard guard(builder);
465  if (failed(
466  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
467  return llvm::make_error<PreviouslyReportedError>();
468 
469  // Special handling for `omp.yield` and `omp.terminator` (we may have more
470  // than one): they return the control to the parent OpenMP dialect operation
471  // so replace them with the branch to the continuation block. We handle this
472  // here to avoid relying inter-function communication through the
473  // ModuleTranslation class to set up the correct insertion point. This is
474  // also consistent with MLIR's idiom of handling special region terminators
475  // in the same code that handles the region-owning operation.
476  Operation *terminator = bb->getTerminator();
477  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
478  builder.CreateBr(continuationBlock);
479 
480  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
481  (*continuationBlockPHIs)[i]->addIncoming(
482  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
483  }
484  }
485  // After all blocks have been traversed and values mapped, connect the PHI
486  // nodes to the results of preceding blocks.
487  LLVM::detail::connectPHINodes(region, moduleTranslation);
488 
489  // Remove the blocks and values defined in this region from the mapping since
490  // they are not visible outside of this region. This allows the same region to
491  // be converted several times, that is cloned, without clashes, and slightly
492  // speeds up the lookups.
493  moduleTranslation.forgetMapping(region);
494 
495  return continuationBlock;
496 }
497 
498 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
499 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
500  switch (kind) {
501  case omp::ClauseProcBindKind::Close:
502  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
503  case omp::ClauseProcBindKind::Master:
504  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
505  case omp::ClauseProcBindKind::Primary:
506  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
507  case omp::ClauseProcBindKind::Spread:
508  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
509  }
510  llvm_unreachable("Unknown ClauseProcBindKind kind");
511 }
512 
513 /// Helper function to map block arguments defined by ignored loop wrappers to
514 /// LLVM values and prevent any uses of those from triggering null pointer
515 /// dereferences.
516 ///
517 /// This must be called after block arguments of parent wrappers have already
518 /// been mapped to LLVM IR values.
519 static LogicalResult
520 convertIgnoredWrapper(omp::LoopWrapperInterface &opInst,
521  LLVM::ModuleTranslation &moduleTranslation) {
522  // Map block arguments directly to the LLVM value associated to the
523  // corresponding operand. This is semantically equivalent to this wrapper not
524  // being present.
525  auto forwardArgs =
526  [&moduleTranslation](llvm::ArrayRef<BlockArgument> blockArgs,
527  OperandRange operands) {
528  for (auto [arg, var] : llvm::zip_equal(blockArgs, operands))
529  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
530  };
531 
533  .Case([&](omp::SimdOp op) {
534  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(*op);
535  forwardArgs(blockArgIface.getPrivateBlockArgs(), op.getPrivateVars());
536  forwardArgs(blockArgIface.getReductionBlockArgs(),
537  op.getReductionVars());
538  op.emitWarning() << "simd information on composite construct discarded";
539  return success();
540  })
541  .Default([&](Operation *op) {
542  return op->emitError() << "cannot ignore nested wrapper";
543  });
544 }
545 
546 /// Helper function to call \c convertIgnoredWrapper() for all wrappers of the
547 /// given \c loopOp nested inside of \c parentOp. This has the effect of mapping
548 /// entry block arguments defined by these operations to outside values.
549 ///
550 /// It must be called after block arguments of \c parentOp have already been
551 /// mapped themselves.
552 static LogicalResult
553 convertIgnoredWrappers(omp::LoopNestOp loopOp,
554  omp::LoopWrapperInterface parentOp,
555  LLVM::ModuleTranslation &moduleTranslation) {
557  loopOp.gatherWrappers(wrappers);
558 
559  // Process wrappers nested inside of `parentOp` from outermost to innermost.
560  for (auto it =
561  std::next(std::find(wrappers.rbegin(), wrappers.rend(), parentOp));
562  it != wrappers.rend(); ++it) {
563  if (failed(convertIgnoredWrapper(*it, moduleTranslation)))
564  return failure();
565  }
566 
567  return success();
568 }
569 
570 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
571 static LogicalResult
572 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
573  LLVM::ModuleTranslation &moduleTranslation) {
574  auto maskedOp = cast<omp::MaskedOp>(opInst);
575  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
576 
577  if (failed(checkImplementationStatus(opInst)))
578  return failure();
579 
580  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
581  // MaskedOp has only one region associated with it.
582  auto &region = maskedOp.getRegion();
583  builder.restoreIP(codeGenIP);
584  return convertOmpOpRegions(region, "omp.masked.region", builder,
585  moduleTranslation)
586  .takeError();
587  };
588 
589  // TODO: Perform finalization actions for variables. This has to be
590  // called for variables which have destructors/finalizers.
591  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
592 
593  llvm::Value *filterVal = nullptr;
594  if (auto filterVar = maskedOp.getFilteredThreadId()) {
595  filterVal = moduleTranslation.lookupValue(filterVar);
596  } else {
597  llvm::LLVMContext &llvmContext = builder.getContext();
598  filterVal =
599  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
600  }
601  assert(filterVal != nullptr);
602  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
603  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
604  moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
605  finiCB, filterVal);
606 
607  if (failed(handleError(afterIP, opInst)))
608  return failure();
609 
610  builder.restoreIP(*afterIP);
611  return success();
612 }
613 
614 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
615 static LogicalResult
616 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
617  LLVM::ModuleTranslation &moduleTranslation) {
618  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
619  auto masterOp = cast<omp::MasterOp>(opInst);
620 
621  if (failed(checkImplementationStatus(opInst)))
622  return failure();
623 
624  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
625  // MasterOp has only one region associated with it.
626  auto &region = masterOp.getRegion();
627  builder.restoreIP(codeGenIP);
628  return convertOmpOpRegions(region, "omp.master.region", builder,
629  moduleTranslation)
630  .takeError();
631  };
632 
633  // TODO: Perform finalization actions for variables. This has to be
634  // called for variables which have destructors/finalizers.
635  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
636 
637  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
638  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
639  moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
640  finiCB);
641 
642  if (failed(handleError(afterIP, opInst)))
643  return failure();
644 
645  builder.restoreIP(*afterIP);
646  return success();
647 }
648 
649 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
650 static LogicalResult
651 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
652  LLVM::ModuleTranslation &moduleTranslation) {
653  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
654  auto criticalOp = cast<omp::CriticalOp>(opInst);
655 
656  if (failed(checkImplementationStatus(opInst)))
657  return failure();
658 
659  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
660  // CriticalOp has only one region associated with it.
661  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
662  builder.restoreIP(codeGenIP);
663  return convertOmpOpRegions(region, "omp.critical.region", builder,
664  moduleTranslation)
665  .takeError();
666  };
667 
668  // TODO: Perform finalization actions for variables. This has to be
669  // called for variables which have destructors/finalizers.
670  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
671 
672  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
673  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
674  llvm::Constant *hint = nullptr;
675 
676  // If it has a name, it probably has a hint too.
677  if (criticalOp.getNameAttr()) {
678  // The verifiers in OpenMP Dialect guarentee that all the pointers are
679  // non-null
680  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
681  auto criticalDeclareOp =
682  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
683  symbolRef);
684  hint =
685  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
686  static_cast<int>(criticalDeclareOp.getHint()));
687  }
688  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
689  moduleTranslation.getOpenMPBuilder()->createCritical(
690  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
691 
692  if (failed(handleError(afterIP, opInst)))
693  return failure();
694 
695  builder.restoreIP(*afterIP);
696  return success();
697 }
698 
699 /// Populates `privatizations` with privatization declarations used for the
700 /// given op.
701 template <class OP>
703  OP op, SmallVectorImpl<omp::PrivateClauseOp> &privatizations) {
704  std::optional<ArrayAttr> attr = op.getPrivateSyms();
705  if (!attr)
706  return;
707 
708  privatizations.reserve(privatizations.size() + attr->size());
709  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
710  privatizations.push_back(findPrivatizer(op, symbolRef));
711  }
712 }
713 
714 /// Populates `reductions` with reduction declarations used in the given op.
715 template <typename T>
716 static void
719  std::optional<ArrayAttr> attr = op.getReductionSyms();
720  if (!attr)
721  return;
722 
723  reductions.reserve(reductions.size() + op.getNumReductionVars());
724  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
725  reductions.push_back(
726  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
727  op, symbolRef));
728  }
729 }
730 
731 /// Translates the blocks contained in the given region and appends them to at
732 /// the current insertion point of `builder`. The operations of the entry block
733 /// are appended to the current insertion block. If set, `continuationBlockArgs`
734 /// is populated with translated values that correspond to the values
735 /// omp.yield'ed from the region.
736 static LogicalResult inlineConvertOmpRegions(
737  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
738  LLVM::ModuleTranslation &moduleTranslation,
739  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
740  if (region.empty())
741  return success();
742 
743  // Special case for single-block regions that don't create additional blocks:
744  // insert operations without creating additional blocks.
745  if (llvm::hasSingleElement(region)) {
746  llvm::Instruction *potentialTerminator =
747  builder.GetInsertBlock()->empty() ? nullptr
748  : &builder.GetInsertBlock()->back();
749 
750  if (potentialTerminator && potentialTerminator->isTerminator())
751  potentialTerminator->removeFromParent();
752  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
753 
754  if (failed(moduleTranslation.convertBlock(
755  region.front(), /*ignoreArguments=*/true, builder)))
756  return failure();
757 
758  // The continuation arguments are simply the translated terminator operands.
759  if (continuationBlockArgs)
760  llvm::append_range(
761  *continuationBlockArgs,
762  moduleTranslation.lookupValues(region.front().back().getOperands()));
763 
764  // Drop the mapping that is no longer necessary so that the same region can
765  // be processed multiple times.
766  moduleTranslation.forgetMapping(region);
767 
768  if (potentialTerminator && potentialTerminator->isTerminator()) {
769  llvm::BasicBlock *block = builder.GetInsertBlock();
770  if (block->empty()) {
771  // this can happen for really simple reduction init regions e.g.
772  // %0 = llvm.mlir.constant(0 : i32) : i32
773  // omp.yield(%0 : i32)
774  // because the llvm.mlir.constant (MLIR op) isn't converted into any
775  // llvm op
776  potentialTerminator->insertInto(block, block->begin());
777  } else {
778  potentialTerminator->insertAfter(&block->back());
779  }
780  }
781 
782  return success();
783  }
784 
786  llvm::Expected<llvm::BasicBlock *> continuationBlock =
787  convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
788 
789  if (failed(handleError(continuationBlock, *region.getParentOp())))
790  return failure();
791 
792  if (continuationBlockArgs)
793  llvm::append_range(*continuationBlockArgs, phis);
794  builder.SetInsertPoint(*continuationBlock,
795  (*continuationBlock)->getFirstInsertionPt());
796  return success();
797 }
798 
799 namespace {
800 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
801 /// store lambdas with capture.
802 using OwningReductionGen =
803  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
804  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
805  llvm::Value *&)>;
806 using OwningAtomicReductionGen =
807  std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
808  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
809  llvm::Value *)>;
810 } // namespace
811 
812 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
813 /// reduction declaration. The generator uses `builder` but ignores its
814 /// insertion point.
815 static OwningReductionGen
816 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
817  LLVM::ModuleTranslation &moduleTranslation) {
818  // The lambda is mutable because we need access to non-const methods of decl
819  // (which aren't actually mutating it), and we must capture decl by-value to
820  // avoid the dangling reference after the parent function returns.
821  OwningReductionGen gen =
822  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
823  llvm::Value *lhs, llvm::Value *rhs,
824  llvm::Value *&result) mutable
825  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
826  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
827  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
828  builder.restoreIP(insertPoint);
830  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
831  "omp.reduction.nonatomic.body", builder,
832  moduleTranslation, &phis)))
833  return llvm::createStringError(
834  "failed to inline `combiner` region of `omp.declare_reduction`");
835  assert(phis.size() == 1);
836  result = phis[0];
837  return builder.saveIP();
838  };
839  return gen;
840 }
841 
842 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
843 /// given reduction declaration. The generator uses `builder` but ignores its
844 /// insertion point. Returns null if there is no atomic region available in the
845 /// reduction declaration.
846 static OwningAtomicReductionGen
847 makeAtomicReductionGen(omp::DeclareReductionOp decl,
848  llvm::IRBuilderBase &builder,
849  LLVM::ModuleTranslation &moduleTranslation) {
850  if (decl.getAtomicReductionRegion().empty())
851  return OwningAtomicReductionGen();
852 
853  // The lambda is mutable because we need access to non-const methods of decl
854  // (which aren't actually mutating it), and we must capture decl by-value to
855  // avoid the dangling reference after the parent function returns.
856  OwningAtomicReductionGen atomicGen =
857  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
858  llvm::Value *lhs, llvm::Value *rhs) mutable
859  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
860  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
861  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
862  builder.restoreIP(insertPoint);
864  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
865  "omp.reduction.atomic.body", builder,
866  moduleTranslation, &phis)))
867  return llvm::createStringError(
868  "failed to inline `atomic` region of `omp.declare_reduction`");
869  assert(phis.empty());
870  return builder.saveIP();
871  };
872  return atomicGen;
873 }
874 
875 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
876 static LogicalResult
877 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
878  LLVM::ModuleTranslation &moduleTranslation) {
879  auto orderedOp = cast<omp::OrderedOp>(opInst);
880 
881  if (failed(checkImplementationStatus(opInst)))
882  return failure();
883 
884  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
885  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
886  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
887  SmallVector<llvm::Value *> vecValues =
888  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
889 
890  size_t indexVecValues = 0;
891  while (indexVecValues < vecValues.size()) {
892  SmallVector<llvm::Value *> storeValues;
893  storeValues.reserve(numLoops);
894  for (unsigned i = 0; i < numLoops; i++) {
895  storeValues.push_back(vecValues[indexVecValues]);
896  indexVecValues++;
897  }
898  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
899  findAllocaInsertPoint(builder, moduleTranslation);
900  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
901  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
902  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
903  }
904  return success();
905 }
906 
907 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
908 /// OpenMPIRBuilder.
909 static LogicalResult
910 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
911  LLVM::ModuleTranslation &moduleTranslation) {
912  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
913  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
914 
915  if (failed(checkImplementationStatus(opInst)))
916  return failure();
917 
918  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
919  // OrderedOp has only one region associated with it.
920  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
921  builder.restoreIP(codeGenIP);
922  return convertOmpOpRegions(region, "omp.ordered.region", builder,
923  moduleTranslation)
924  .takeError();
925  };
926 
927  // TODO: Perform finalization actions for variables. This has to be
928  // called for variables which have destructors/finalizers.
929  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
930 
931  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
932  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
933  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
934  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
935 
936  if (failed(handleError(afterIP, opInst)))
937  return failure();
938 
939  builder.restoreIP(*afterIP);
940  return success();
941 }
942 
943 namespace {
944 /// Contains the arguments for an LLVM store operation
945 struct DeferredStore {
946  DeferredStore(llvm::Value *value, llvm::Value *address)
947  : value(value), address(address) {}
948 
949  llvm::Value *value;
950  llvm::Value *address;
951 };
952 } // namespace
953 
954 /// Allocate space for privatized reduction variables.
955 /// `deferredStores` contains information to create store operations which needs
956 /// to be inserted after all allocas
957 template <typename T>
958 static LogicalResult
960  llvm::IRBuilderBase &builder,
961  LLVM::ModuleTranslation &moduleTranslation,
962  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
964  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
965  DenseMap<Value, llvm::Value *> &reductionVariableMap,
966  SmallVectorImpl<DeferredStore> &deferredStores,
967  llvm::ArrayRef<bool> isByRefs) {
968  llvm::IRBuilderBase::InsertPointGuard guard(builder);
969  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
970 
971  // delay creating stores until after all allocas
972  deferredStores.reserve(loop.getNumReductionVars());
973 
974  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
975  Region &allocRegion = reductionDecls[i].getAllocRegion();
976  if (isByRefs[i]) {
977  if (allocRegion.empty())
978  continue;
979 
981  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
982  builder, moduleTranslation, &phis)))
983  return loop.emitError(
984  "failed to inline `alloc` region of `omp.declare_reduction`");
985 
986  assert(phis.size() == 1 && "expected one allocation to be yielded");
987  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
988 
989  // Allocate reduction variable (which is a pointer to the real reduction
990  // variable allocated in the inlined region)
991  llvm::Value *var = builder.CreateAlloca(
992  moduleTranslation.convertType(reductionDecls[i].getType()));
993  deferredStores.emplace_back(phis[0], var);
994 
995  privateReductionVariables[i] = var;
996  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
997  reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
998  } else {
999  assert(allocRegion.empty() &&
1000  "allocaction is implicit for by-val reduction");
1001  llvm::Value *var = builder.CreateAlloca(
1002  moduleTranslation.convertType(reductionDecls[i].getType()));
1003  moduleTranslation.mapValue(reductionArgs[i], var);
1004  privateReductionVariables[i] = var;
1005  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
1006  }
1007  }
1008 
1009  return success();
1010 }
1011 
1012 /// Map input arguments to reduction initialization region
1013 template <typename T>
1014 static void
1017  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1018  unsigned i) {
1019  // map input argument to the initialization region
1020  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
1021  Region &initializerRegion = reduction.getInitializerRegion();
1022  Block &entry = initializerRegion.front();
1023 
1024  mlir::Value mlirSource = loop.getReductionVars()[i];
1025  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
1026  assert(llvmSource && "lookup reduction var");
1027  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
1028 
1029  if (entry.getNumArguments() > 1) {
1030  llvm::Value *allocation =
1031  reductionVariableMap.lookup(loop.getReductionVars()[i]);
1032  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
1033  }
1034 }
1035 
1036 template <typename OP>
1037 static LogicalResult
1039  llvm::IRBuilderBase &builder,
1040  LLVM::ModuleTranslation &moduleTranslation,
1041  llvm::BasicBlock *latestAllocaBlock,
1043  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1044  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1045  llvm::ArrayRef<bool> isByRef,
1046  SmallVectorImpl<DeferredStore> &deferredStores) {
1047  if (op.getNumReductionVars() == 0)
1048  return success();
1049 
1050  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1051 
1052  builder.SetInsertPoint(latestAllocaBlock->getTerminator());
1053  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1054  auto allocaIP = llvm::IRBuilderBase::InsertPoint(
1055  latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
1056  builder.restoreIP(allocaIP);
1057  SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
1058 
1059  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1060  if (isByRef[i]) {
1061  if (!reductionDecls[i].getAllocRegion().empty())
1062  continue;
1063 
1064  // TODO: remove after all users of by-ref are updated to use the alloc
1065  // region: Allocate reduction variable (which is a pointer to the real
1066  // reduciton variable allocated in the inlined region)
1067  byRefVars[i] = builder.CreateAlloca(
1068  moduleTranslation.convertType(reductionDecls[i].getType()));
1069  }
1070  }
1071 
1072  builder.SetInsertPoint(&*initBlock->getFirstNonPHIOrDbgOrAlloca());
1073 
1074  // store result of the alloc region to the allocated pointer to the real
1075  // reduction variable
1076  for (auto [data, addr] : deferredStores)
1077  builder.CreateStore(data, addr);
1078 
1079  // Before the loop, store the initial values of reductions into reduction
1080  // variables. Although this could be done after allocas, we don't want to mess
1081  // up with the alloca insertion point.
1082  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
1084 
1085  // map block argument to initializer region
1086  mapInitializationArgs(op, moduleTranslation, reductionDecls,
1087  reductionVariableMap, i);
1088 
1089  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
1090  "omp.reduction.neutral", builder,
1091  moduleTranslation, &phis)))
1092  return failure();
1093 
1094  assert(phis.size() == 1 && "expected one value to be yielded from the "
1095  "reduction neutral element declaration region");
1096 
1097  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1098 
1099  if (isByRef[i]) {
1100  if (!reductionDecls[i].getAllocRegion().empty())
1101  // done in allocReductionVars
1102  continue;
1103 
1104  // TODO: this path can be removed once all users of by-ref are updated to
1105  // use an alloc region
1106 
1107  // Store the result of the inlined region to the allocated reduction var
1108  // ptr
1109  builder.CreateStore(phis[0], byRefVars[i]);
1110 
1111  privateReductionVariables[i] = byRefVars[i];
1112  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1113  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
1114  } else {
1115  // for by-ref case the store is inside of the reduction region
1116  builder.CreateStore(phis[0], privateReductionVariables[i]);
1117  // the rest was handled in allocByValReductionVars
1118  }
1119 
1120  // forget the mapping for the initializer region because we might need a
1121  // different mapping if this reduction declaration is re-used for a
1122  // different variable
1123  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1124  }
1125 
1126  return success();
1127 }
1128 
1129 /// Collect reduction info
1130 template <typename T>
1132  T loop, llvm::IRBuilderBase &builder,
1133  LLVM::ModuleTranslation &moduleTranslation,
1135  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
1136  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
1137  const ArrayRef<llvm::Value *> privateReductionVariables,
1139  unsigned numReductions = loop.getNumReductionVars();
1140 
1141  for (unsigned i = 0; i < numReductions; ++i) {
1142  owningReductionGens.push_back(
1143  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
1144  owningAtomicReductionGens.push_back(
1145  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
1146  }
1147 
1148  // Collect the reduction information.
1149  reductionInfos.reserve(numReductions);
1150  for (unsigned i = 0; i < numReductions; ++i) {
1151  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
1152  if (owningAtomicReductionGens[i])
1153  atomicGen = owningAtomicReductionGens[i];
1154  llvm::Value *variable =
1155  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
1156  reductionInfos.push_back(
1157  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
1158  privateReductionVariables[i],
1159  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
1160  owningReductionGens[i],
1161  /*ReductionGenClang=*/nullptr, atomicGen});
1162  }
1163 }
1164 
1165 /// handling of DeclareReductionOp's cleanup region
1166 static LogicalResult
1168  llvm::ArrayRef<llvm::Value *> privateVariables,
1169  LLVM::ModuleTranslation &moduleTranslation,
1170  llvm::IRBuilderBase &builder, StringRef regionName,
1171  bool shouldLoadCleanupRegionArg = true) {
1172  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
1173  if (cleanupRegion->empty())
1174  continue;
1175 
1176  // map the argument to the cleanup region
1177  Block &entry = cleanupRegion->front();
1178 
1179  llvm::Instruction *potentialTerminator =
1180  builder.GetInsertBlock()->empty() ? nullptr
1181  : &builder.GetInsertBlock()->back();
1182  if (potentialTerminator && potentialTerminator->isTerminator())
1183  builder.SetInsertPoint(potentialTerminator);
1184  llvm::Value *privateVarValue =
1185  shouldLoadCleanupRegionArg
1186  ? builder.CreateLoad(
1187  moduleTranslation.convertType(entry.getArgument(0).getType()),
1188  privateVariables[i])
1189  : privateVariables[i];
1190 
1191  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
1192 
1193  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
1194  moduleTranslation)))
1195  return failure();
1196 
1197  // clear block argument mapping in case it needs to be re-created with a
1198  // different source for another use of the same reduction decl
1199  moduleTranslation.forgetMapping(*cleanupRegion);
1200  }
1201  return success();
1202 }
1203 
1204 // TODO: not used by ParallelOp
1205 template <class OP>
1206 static LogicalResult createReductionsAndCleanup(
1207  OP op, llvm::IRBuilderBase &builder,
1208  LLVM::ModuleTranslation &moduleTranslation,
1209  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1211  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
1212  // Process the reductions if required.
1213  if (op.getNumReductionVars() == 0)
1214  return success();
1215 
1216  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1217 
1218  // Create the reduction generators. We need to own them here because
1219  // ReductionInfo only accepts references to the generators.
1220  SmallVector<OwningReductionGen> owningReductionGens;
1221  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1223  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
1224  owningReductionGens, owningAtomicReductionGens,
1225  privateReductionVariables, reductionInfos);
1226 
1227  // The call to createReductions below expects the block to have a
1228  // terminator. Create an unreachable instruction to serve as terminator
1229  // and remove it later.
1230  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1231  builder.SetInsertPoint(tempTerminator);
1232  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
1233  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1234  isByRef, op.getNowait());
1235 
1236  if (failed(handleError(contInsertPoint, *op)))
1237  return failure();
1238 
1239  if (!contInsertPoint->getBlock())
1240  return op->emitOpError() << "failed to convert reductions";
1241 
1242  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1243  ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
1244 
1245  if (failed(handleError(afterIP, *op)))
1246  return failure();
1247 
1248  tempTerminator->eraseFromParent();
1249  builder.restoreIP(*afterIP);
1250 
1251  // after the construct, deallocate private reduction variables
1252  SmallVector<Region *> reductionRegions;
1253  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
1254  [](omp::DeclareReductionOp reductionDecl) {
1255  return &reductionDecl.getCleanupRegion();
1256  });
1257  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
1258  moduleTranslation, builder,
1259  "omp.reduction.cleanup");
1260  return success();
1261 }
1262 
1263 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
1264  if (!attr)
1265  return {};
1266  return *attr;
1267 }
1268 
1269 // TODO: not used by omp.parallel
1270 template <typename OP>
1271 static LogicalResult allocAndInitializeReductionVars(
1272  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
1273  LLVM::ModuleTranslation &moduleTranslation,
1274  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1276  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
1277  DenseMap<Value, llvm::Value *> &reductionVariableMap,
1278  llvm::ArrayRef<bool> isByRef) {
1279  if (op.getNumReductionVars() == 0)
1280  return success();
1281 
1282  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1283  SmallVector<DeferredStore> deferredStores;
1284 
1285  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
1286  allocaIP, reductionDecls,
1287  privateReductionVariables, reductionVariableMap,
1288  deferredStores, isByRef)))
1289  return failure();
1290 
1291  return initReductionVars(op, reductionArgs, builder, moduleTranslation,
1292  allocaIP.getBlock(), reductionDecls,
1293  privateReductionVariables, reductionVariableMap,
1294  isByRef, deferredStores);
1295 }
1296 
1297 /// Return the llvm::Value * corresponding to the `privateVar` that
1298 /// is being privatized. It isn't always as simple as looking up
1299 /// moduleTranslation with privateVar. For instance, in case of
1300 /// an allocatable, the descriptor for the allocatable is privatized.
1301 /// This descriptor is mapped using an MapInfoOp. So, this function
1302 /// will return a pointer to the llvm::Value corresponding to the
1303 /// block argument for the mapped descriptor.
1304 static llvm::Value *
1305 findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
1306  LLVM::ModuleTranslation &moduleTranslation,
1307  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1308  if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
1309  return moduleTranslation.lookupValue(privateVar);
1310 
1311  Value blockArg = (*mappedPrivateVars)[privateVar];
1312  Type privVarType = privateVar.getType();
1313  Type blockArgType = blockArg.getType();
1314  assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
1315  "A block argument corresponding to a mapped var should have "
1316  "!llvm.ptr type");
1317 
1318  if (privVarType == blockArgType)
1319  return moduleTranslation.lookupValue(blockArg);
1320 
1321  // This typically happens when the privatized type is lowered from
1322  // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
1323  // struct/pair is passed by value. But, mapped values are passed only as
1324  // pointers, so before we privatize, we must load the pointer.
1325  if (!isa<LLVM::LLVMPointerType>(privVarType))
1326  return builder.CreateLoad(moduleTranslation.convertType(privVarType),
1327  moduleTranslation.lookupValue(blockArg));
1328 
1329  return moduleTranslation.lookupValue(privateVar);
1330 }
1331 
1332 /// Allocate delayed private variables. Returns the basic block which comes
1333 /// after all of these allocations. llvm::Value * for each of these private
1334 /// variables are populated in llvmPrivateVars.
1336 allocatePrivateVars(llvm::IRBuilderBase &builder,
1337  LLVM::ModuleTranslation &moduleTranslation,
1338  MutableArrayRef<BlockArgument> privateBlockArgs,
1340  MutableArrayRef<mlir::Value> mlirPrivateVars,
1341  llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1342  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
1343  llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
1344  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1345  // Allocate private vars
1346  llvm::BranchInst *allocaTerminator =
1347  llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
1348  builder.SetInsertPoint(allocaTerminator);
1349  assert(allocaTerminator->getNumSuccessors() == 1 &&
1350  "This is an unconditional branch created by OpenMPIRBuilder");
1351  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1352 
1353  // FIXME: Some of the allocation regions do more than just allocating.
1354  // They read from their block argument (amongst other non-alloca things).
1355  // When OpenMPIRBuilder outlines the parallel region into a different
1356  // function it places the loads for live in-values (such as these block
1357  // arguments) at the end of the entry block (because the entry block is
1358  // assumed to contain only allocas). Therefore, if we put these complicated
1359  // alloc blocks in the entry block, these will not dominate the availability
1360  // of the live-in values they are using. Fix this by adding a latealloc
1361  // block after the entry block to put these in (this also helps to avoid
1362  // mixing non-alloca code with allocas).
1363  // Alloc regions which do not use the block argument can still be placed in
1364  // the entry block (therefore keeping the allocas together).
1365  llvm::BasicBlock *privAllocBlock = nullptr;
1366  if (!privateBlockArgs.empty())
1367  privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
1368  for (auto [privDecl, mlirPrivVar, blockArg] :
1369  llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
1370  Region &allocRegion = privDecl.getAllocRegion();
1371 
1372  // map allocation region block argument
1373  llvm::Value *nonPrivateVar = findAssociatedValue(
1374  mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
1375  assert(nonPrivateVar);
1376  moduleTranslation.mapValue(privDecl.getAllocMoldArg(), nonPrivateVar);
1377 
1378  // in-place convert the private allocation region
1380  if (privDecl.getAllocMoldArg().getUses().empty()) {
1381  // TODO this should use
1382  // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1383  // the code for fetching the thread id. Not doing this for now to avoid
1384  // test churn.
1385  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1386  } else {
1387  builder.SetInsertPoint(privAllocBlock->getTerminator());
1388  }
1389 
1390  if (failed(inlineConvertOmpRegions(allocRegion, "omp.private.alloc",
1391  builder, moduleTranslation, &phis)))
1392  return llvm::createStringError(
1393  "failed to inline `alloc` region of `omp.private`");
1394 
1395  assert(phis.size() == 1 && "expected one allocation to be yielded");
1396 
1397  moduleTranslation.mapValue(blockArg, phis[0]);
1398  llvmPrivateVars.push_back(phis[0]);
1399 
1400  // clear alloc region block argument mapping in case it needs to be
1401  // re-created with a different source for another use of the same
1402  // reduction decl
1403  moduleTranslation.forgetMapping(allocRegion);
1404  }
1405  return afterAllocas;
1406 }
1407 
1408 static LogicalResult
1409 initFirstPrivateVars(llvm::IRBuilderBase &builder,
1410  LLVM::ModuleTranslation &moduleTranslation,
1411  SmallVectorImpl<mlir::Value> &mlirPrivateVars,
1412  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1414  llvm::BasicBlock *afterAllocas) {
1415  llvm::IRBuilderBase::InsertPointGuard guard(builder);
1416  // Apply copy region for firstprivate.
1417  bool needsFirstprivate =
1418  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1419  return privOp.getDataSharingType() ==
1420  omp::DataSharingClauseType::FirstPrivate;
1421  });
1422 
1423  if (!needsFirstprivate)
1424  return success();
1425 
1426  assert(afterAllocas->getSinglePredecessor());
1427 
1428  // Find the end of the allocation blocks
1429  builder.SetInsertPoint(afterAllocas->getSinglePredecessor()->getTerminator());
1430  llvm::BasicBlock *copyBlock =
1431  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1432  builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1433 
1434  for (auto [decl, mlirVar, llvmVar] :
1435  llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
1436  if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
1437  continue;
1438 
1439  // copyRegion implements `lhs = rhs`
1440  Region &copyRegion = decl.getCopyRegion();
1441 
1442  // map copyRegion rhs arg
1443  llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
1444  assert(nonPrivateVar);
1445  moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
1446 
1447  // map copyRegion lhs arg
1448  moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
1449 
1450  // in-place convert copy region
1451  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1452  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
1453  moduleTranslation)))
1454  return decl.emitError("failed to inline `copy` region of `omp.private`");
1455 
1456  // ignore unused value yielded from copy region
1457 
1458  // clear copy region block argument mapping in case it needs to be
1459  // re-created with different sources for reuse of the same reduction
1460  // decl
1461  moduleTranslation.forgetMapping(copyRegion);
1462  }
1463 
1464  return success();
1465 }
1466 
1467 static LogicalResult
1468 cleanupPrivateVars(llvm::IRBuilderBase &builder,
1469  LLVM::ModuleTranslation &moduleTranslation, Location loc,
1470  SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
1471  SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
1472  // private variable deallocation
1473  SmallVector<Region *> privateCleanupRegions;
1474  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1475  [](omp::PrivateClauseOp privatizer) {
1476  return &privatizer.getDeallocRegion();
1477  });
1478 
1479  if (failed(inlineOmpRegionCleanup(
1480  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1481  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1482  return mlir::emitError(loc, "failed to inline `dealloc` region of an "
1483  "`omp.private` op in");
1484 
1485  return success();
1486 }
1487 
1488 static LogicalResult
1489 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
1490  LLVM::ModuleTranslation &moduleTranslation) {
1491  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1492  using StorableBodyGenCallbackTy =
1493  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
1494 
1495  auto sectionsOp = cast<omp::SectionsOp>(opInst);
1496 
1497  if (failed(checkImplementationStatus(opInst)))
1498  return failure();
1499 
1500  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
1501  assert(isByRef.size() == sectionsOp.getNumReductionVars());
1502 
1503  SmallVector<omp::DeclareReductionOp> reductionDecls;
1504  collectReductionDecls(sectionsOp, reductionDecls);
1505  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1506  findAllocaInsertPoint(builder, moduleTranslation);
1507 
1508  SmallVector<llvm::Value *> privateReductionVariables(
1509  sectionsOp.getNumReductionVars());
1510  DenseMap<Value, llvm::Value *> reductionVariableMap;
1511 
1512  MutableArrayRef<BlockArgument> reductionArgs =
1513  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1514 
1516  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
1517  reductionDecls, privateReductionVariables, reductionVariableMap,
1518  isByRef)))
1519  return failure();
1520 
1521  // Store the mapping between reduction variables and their private copies on
1522  // ModuleTranslation stack. It can be then recovered when translating
1523  // omp.reduce operations in a separate call.
1525  moduleTranslation, reductionVariableMap);
1526 
1528 
1529  for (Operation &op : *sectionsOp.getRegion().begin()) {
1530  auto sectionOp = dyn_cast<omp::SectionOp>(op);
1531  if (!sectionOp) // omp.terminator
1532  continue;
1533 
1534  Region &region = sectionOp.getRegion();
1535  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
1536  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1537  builder.restoreIP(codeGenIP);
1538 
1539  // map the omp.section reduction block argument to the omp.sections block
1540  // arguments
1541  // TODO: this assumes that the only block arguments are reduction
1542  // variables
1543  assert(region.getNumArguments() ==
1544  sectionsOp.getRegion().getNumArguments());
1545  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
1546  sectionsOp.getRegion().getArguments(), region.getArguments())) {
1547  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
1548  assert(llvmVal);
1549  moduleTranslation.mapValue(sectionArg, llvmVal);
1550  }
1551 
1552  return convertOmpOpRegions(region, "omp.section.region", builder,
1553  moduleTranslation)
1554  .takeError();
1555  };
1556  sectionCBs.push_back(sectionCB);
1557  }
1558 
1559  // No sections within omp.sections operation - skip generation. This situation
1560  // is only possible if there is only a terminator operation inside the
1561  // sections operation
1562  if (sectionCBs.empty())
1563  return success();
1564 
1565  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1566 
1567  // TODO: Perform appropriate actions according to the data-sharing
1568  // attribute (shared, private, firstprivate, ...) of variables.
1569  // Currently defaults to shared.
1570  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1571  llvm::Value &vPtr, llvm::Value *&replacementValue)
1572  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
1573  replacementValue = &vPtr;
1574  return codeGenIP;
1575  };
1576 
1577  // TODO: Perform finalization actions for variables. This has to be
1578  // called for variables which have destructors/finalizers.
1579  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1580 
1581  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1582  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1583  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1584  moduleTranslation.getOpenMPBuilder()->createSections(
1585  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
1586  sectionsOp.getNowait());
1587 
1588  if (failed(handleError(afterIP, opInst)))
1589  return failure();
1590 
1591  builder.restoreIP(*afterIP);
1592 
1593  // Process the reductions if required.
1594  return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
1595  allocaIP, reductionDecls,
1596  privateReductionVariables, isByRef);
1597 }
1598 
1599 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1600 static LogicalResult
1601 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1602  LLVM::ModuleTranslation &moduleTranslation) {
1603  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1604  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1605 
1606  if (failed(checkImplementationStatus(*singleOp)))
1607  return failure();
1608 
1609  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1610  builder.restoreIP(codegenIP);
1611  return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
1612  builder, moduleTranslation)
1613  .takeError();
1614  };
1615  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
1616 
1617  // Handle copyprivate
1618  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1619  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1622  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1623  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1624  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1625  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1626  llvmCPFuncs.push_back(
1627  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1628  }
1629 
1630  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1631  moduleTranslation.getOpenMPBuilder()->createSingle(
1632  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
1633  llvmCPFuncs);
1634 
1635  if (failed(handleError(afterIP, *singleOp)))
1636  return failure();
1637 
1638  builder.restoreIP(*afterIP);
1639  return success();
1640 }
1641 
1642 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1643 static LogicalResult
1644 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1645  LLVM::ModuleTranslation &moduleTranslation) {
1646  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1647  if (failed(checkImplementationStatus(*op)))
1648  return failure();
1649 
1650  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1652  moduleTranslation, allocaIP);
1653  builder.restoreIP(codegenIP);
1654  return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1655  moduleTranslation)
1656  .takeError();
1657  };
1658 
1659  llvm::Value *numTeamsLower = nullptr;
1660  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1661  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1662 
1663  llvm::Value *numTeamsUpper = nullptr;
1664  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1665  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1666 
1667  llvm::Value *threadLimit = nullptr;
1668  if (Value threadLimitVar = op.getThreadLimit())
1669  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1670 
1671  llvm::Value *ifExpr = nullptr;
1672  if (Value ifVar = op.getIfExpr())
1673  ifExpr = moduleTranslation.lookupValue(ifVar);
1674 
1675  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1676  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1677  moduleTranslation.getOpenMPBuilder()->createTeams(
1678  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
1679 
1680  if (failed(handleError(afterIP, *op)))
1681  return failure();
1682 
1683  builder.restoreIP(*afterIP);
1684  return success();
1685 }
1686 
1687 static void
1688 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
1689  LLVM::ModuleTranslation &moduleTranslation,
1691  if (dependVars.empty())
1692  return;
1693  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
1694  llvm::omp::RTLDependenceKindTy type;
1695  switch (
1696  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1697  case mlir::omp::ClauseTaskDepend::taskdependin:
1698  type = llvm::omp::RTLDependenceKindTy::DepIn;
1699  break;
1700  // The OpenMP runtime requires that the codegen for 'depend' clause for
1701  // 'out' dependency kind must be the same as codegen for 'depend' clause
1702  // with 'inout' dependency.
1703  case mlir::omp::ClauseTaskDepend::taskdependout:
1704  case mlir::omp::ClauseTaskDepend::taskdependinout:
1705  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1706  break;
1707  };
1708  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1709  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1710  dds.emplace_back(dd);
1711  }
1712 }
1713 
1714 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
1715 static LogicalResult
1716 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1717  LLVM::ModuleTranslation &moduleTranslation) {
1718  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1719  if (failed(checkImplementationStatus(*taskOp)))
1720  return failure();
1721 
1722  // Collect delayed privatisation declarations
1723  MutableArrayRef<BlockArgument> privateBlockArgs =
1724  cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs();
1725  SmallVector<mlir::Value> mlirPrivateVars;
1726  SmallVector<llvm::Value *> llvmPrivateVars;
1727  SmallVector<omp::PrivateClauseOp> privateDecls;
1728  mlirPrivateVars.reserve(privateBlockArgs.size());
1729  llvmPrivateVars.reserve(privateBlockArgs.size());
1730  collectPrivatizationDecls(taskOp, privateDecls);
1731  for (mlir::Value privateVar : taskOp.getPrivateVars())
1732  mlirPrivateVars.push_back(privateVar);
1733 
1734  auto bodyCB = [&](InsertPointTy allocaIP,
1735  InsertPointTy codegenIP) -> llvm::Error {
1736  // Save the alloca insertion point on ModuleTranslation stack for use in
1737  // nested regions.
1739  moduleTranslation, allocaIP);
1740 
1742  builder, moduleTranslation, privateBlockArgs, privateDecls,
1743  mlirPrivateVars, llvmPrivateVars, allocaIP);
1744  if (handleError(afterAllocas, *taskOp).failed())
1745  return llvm::make_error<PreviouslyReportedError>();
1746 
1747  if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1748  llvmPrivateVars, privateDecls,
1749  afterAllocas.get())))
1750  return llvm::make_error<PreviouslyReportedError>();
1751 
1752  // translate the body of the task:
1753  builder.restoreIP(codegenIP);
1754  auto continuationBlockOrError = convertOmpOpRegions(
1755  taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
1756  if (failed(handleError(continuationBlockOrError, *taskOp)))
1757  return llvm::make_error<PreviouslyReportedError>();
1758 
1759  builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
1760 
1761  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
1762  llvmPrivateVars, privateDecls)))
1763  return llvm::make_error<PreviouslyReportedError>();
1764 
1765  return llvm::Error::success();
1766  };
1767 
1769  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
1770  moduleTranslation, dds);
1771 
1772  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1773  findAllocaInsertPoint(builder, moduleTranslation);
1774  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1775  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1776  moduleTranslation.getOpenMPBuilder()->createTask(
1777  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
1778  moduleTranslation.lookupValue(taskOp.getFinal()),
1779  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
1780  taskOp.getMergeable(),
1781  moduleTranslation.lookupValue(taskOp.getEventHandle()));
1782 
1783  if (failed(handleError(afterIP, *taskOp)))
1784  return failure();
1785 
1786  builder.restoreIP(*afterIP);
1787  return success();
1788 }
1789 
1790 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
1791 static LogicalResult
1792 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
1793  LLVM::ModuleTranslation &moduleTranslation) {
1794  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1795  if (failed(checkImplementationStatus(*tgOp)))
1796  return failure();
1797 
1798  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1799  builder.restoreIP(codegenIP);
1800  return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
1801  builder, moduleTranslation)
1802  .takeError();
1803  };
1804 
1805  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1806  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1807  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1808  moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
1809  bodyCB);
1810 
1811  if (failed(handleError(afterIP, *tgOp)))
1812  return failure();
1813 
1814  builder.restoreIP(*afterIP);
1815  return success();
1816 }
1817 
1818 static LogicalResult
1819 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
1820  LLVM::ModuleTranslation &moduleTranslation) {
1821  if (failed(checkImplementationStatus(*twOp)))
1822  return failure();
1823 
1824  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
1825  return success();
1826 }
1827 
1828 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
1829 static LogicalResult
1830 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
1831  LLVM::ModuleTranslation &moduleTranslation) {
1832  auto wsloopOp = cast<omp::WsloopOp>(opInst);
1833  if (failed(checkImplementationStatus(opInst)))
1834  return failure();
1835 
1836  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
1837  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
1838  assert(isByRef.size() == wsloopOp.getNumReductionVars());
1839 
1840  // Static is the default.
1841  auto schedule =
1842  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
1843 
1844  // Find the loop configuration.
1845  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
1846  llvm::Type *ivType = step->getType();
1847  llvm::Value *chunk = nullptr;
1848  if (wsloopOp.getScheduleChunk()) {
1849  llvm::Value *chunkVar =
1850  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
1851  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
1852  }
1853 
1854  MutableArrayRef<BlockArgument> privateBlockArgs =
1855  cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
1856  SmallVector<mlir::Value> mlirPrivateVars;
1857  SmallVector<llvm::Value *> llvmPrivateVars;
1858  SmallVector<omp::PrivateClauseOp> privateDecls;
1859  mlirPrivateVars.reserve(privateBlockArgs.size());
1860  llvmPrivateVars.reserve(privateBlockArgs.size());
1861  collectPrivatizationDecls(wsloopOp, privateDecls);
1862 
1863  for (mlir::Value privateVar : wsloopOp.getPrivateVars())
1864  mlirPrivateVars.push_back(privateVar);
1865 
1866  SmallVector<omp::DeclareReductionOp> reductionDecls;
1867  collectReductionDecls(wsloopOp, reductionDecls);
1868  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1869  findAllocaInsertPoint(builder, moduleTranslation);
1870 
1871  SmallVector<llvm::Value *> privateReductionVariables(
1872  wsloopOp.getNumReductionVars());
1873 
1874  splitBB(llvm::OpenMPIRBuilder::InsertPointTy(
1875  allocaIP.getBlock(),
1876  allocaIP.getBlock()->getTerminator()->getIterator()),
1877  true, "omp.region.after_alloca");
1878 
1880  builder, moduleTranslation, privateBlockArgs, privateDecls,
1881  mlirPrivateVars, llvmPrivateVars, allocaIP);
1882  if (handleError(afterAllocas, opInst).failed())
1883  return failure();
1884 
1885  DenseMap<Value, llvm::Value *> reductionVariableMap;
1886 
1887  MutableArrayRef<BlockArgument> reductionArgs =
1888  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1889 
1890  SmallVector<DeferredStore> deferredStores;
1891 
1892  if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
1893  moduleTranslation, allocaIP, reductionDecls,
1894  privateReductionVariables, reductionVariableMap,
1895  deferredStores, isByRef)))
1896  return failure();
1897 
1898  if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1899  llvmPrivateVars, privateDecls,
1900  afterAllocas.get())))
1901  return failure();
1902 
1903  assert(afterAllocas.get()->getSinglePredecessor());
1904  if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
1905  moduleTranslation,
1906  afterAllocas.get()->getSinglePredecessor(),
1907  reductionDecls, privateReductionVariables,
1908  reductionVariableMap, isByRef, deferredStores)))
1909  return failure();
1910 
1911  // TODO: Replace this with proper composite translation support.
1912  // Currently, all nested wrappers are ignored, so 'do/for simd' will be
1913  // treated the same as a standalone 'do/for'. This is allowed by the spec,
1914  // since it's equivalent to always using a SIMD length of 1.
1915  if (failed(convertIgnoredWrappers(loopOp, wsloopOp, moduleTranslation)))
1916  return failure();
1917 
1918  // Store the mapping between reduction variables and their private copies on
1919  // ModuleTranslation stack. It can be then recovered when translating
1920  // omp.reduce operations in a separate call.
1922  moduleTranslation, reductionVariableMap);
1923 
1924  // Set up the source location value for OpenMP runtime.
1925  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1926 
1927  // Generator of the canonical loop body.
1930  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
1931  llvm::Value *iv) -> llvm::Error {
1932  // Make sure further conversions know about the induction variable.
1933  moduleTranslation.mapValue(
1934  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1935 
1936  // Capture the body insertion point for use in nested loops. BodyIP of the
1937  // CanonicalLoopInfo always points to the beginning of the entry block of
1938  // the body.
1939  bodyInsertPoints.push_back(ip);
1940 
1941  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1942  return llvm::Error::success();
1943 
1944  // Convert the body of the loop.
1945  builder.restoreIP(ip);
1946  return convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
1947  moduleTranslation)
1948  .takeError();
1949  };
1950 
1951  // Delegate actual loop construction to the OpenMP IRBuilder.
1952  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1953  // loop, i.e. it has a positive step, uses signed integer semantics.
1954  // Reconsider this code when the nested loop operation clearly supports more
1955  // cases.
1956  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1957  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1958  llvm::Value *lowerBound =
1959  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
1960  llvm::Value *upperBound =
1961  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
1962  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
1963 
1964  // Make sure loop trip count are emitted in the preheader of the outermost
1965  // loop at the latest so that they are all available for the new collapsed
1966  // loop will be created below.
1967  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1968  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1969  if (i != 0) {
1970  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
1971  computeIP = loopInfos.front()->getPreheaderIP();
1972  }
1973 
1975  ompBuilder->createCanonicalLoop(
1976  loc, bodyGen, lowerBound, upperBound, step,
1977  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
1978 
1979  if (failed(handleError(loopResult, *loopOp)))
1980  return failure();
1981 
1982  loopInfos.push_back(*loopResult);
1983  }
1984 
1985  // Collapse loops. Store the insertion point because LoopInfos may get
1986  // invalidated.
1987  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1988  llvm::CanonicalLoopInfo *loopInfo =
1989  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1990 
1991  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1992 
1993  // TODO: Handle doacross loops when the ordered clause has a parameter.
1994  bool isOrdered = wsloopOp.getOrdered().has_value();
1995  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
1996  bool isSimd = wsloopOp.getScheduleSimd();
1997 
1998  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
1999  ompBuilder->applyWorkshareLoop(
2000  ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
2001  convertToScheduleKind(schedule), chunk, isSimd,
2002  scheduleMod == omp::ScheduleModifier::monotonic,
2003  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
2004 
2005  if (failed(handleError(wsloopIP, opInst)))
2006  return failure();
2007 
2008  // Continue building IR after the loop. Note that the LoopInfo returned by
2009  // `collapseLoops` points inside the outermost loop and is intended for
2010  // potential further loop transformations. Use the insertion point stored
2011  // before collapsing loops instead.
2012  builder.restoreIP(afterIP);
2013 
2014  // Process the reductions if required.
2015  if (failed(createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
2016  allocaIP, reductionDecls,
2017  privateReductionVariables, isByRef)))
2018  return failure();
2019 
2020  return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
2021  llvmPrivateVars, privateDecls);
2022 }
2023 
2024 /// Converts the OpenMP parallel operation to LLVM IR.
2025 static LogicalResult
2026 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
2027  LLVM::ModuleTranslation &moduleTranslation) {
2028  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2029  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
2030  assert(isByRef.size() == opInst.getNumReductionVars());
2031  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2032 
2033  if (failed(checkImplementationStatus(*opInst)))
2034  return failure();
2035 
2036  // Collect delayed privatization declarations
2037  MutableArrayRef<BlockArgument> privateBlockArgs =
2038  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getPrivateBlockArgs();
2039  SmallVector<mlir::Value> mlirPrivateVars;
2040  SmallVector<llvm::Value *> llvmPrivateVars;
2041  SmallVector<omp::PrivateClauseOp> privateDecls;
2042  mlirPrivateVars.reserve(privateBlockArgs.size());
2043  llvmPrivateVars.reserve(privateBlockArgs.size());
2044  collectPrivatizationDecls(opInst, privateDecls);
2045  for (mlir::Value privateVar : opInst.getPrivateVars())
2046  mlirPrivateVars.push_back(privateVar);
2047 
2048  // Collect reduction declarations
2049  SmallVector<omp::DeclareReductionOp> reductionDecls;
2050  collectReductionDecls(opInst, reductionDecls);
2051  SmallVector<llvm::Value *> privateReductionVariables(
2052  opInst.getNumReductionVars());
2053  SmallVector<DeferredStore> deferredStores;
2054 
2055  auto bodyGenCB = [&](InsertPointTy allocaIP,
2056  InsertPointTy codeGenIP) -> llvm::Error {
2058  builder, moduleTranslation, privateBlockArgs, privateDecls,
2059  mlirPrivateVars, llvmPrivateVars, allocaIP);
2060  if (handleError(afterAllocas, *opInst).failed())
2061  return llvm::make_error<PreviouslyReportedError>();
2062 
2063  // Allocate reduction vars
2064  DenseMap<Value, llvm::Value *> reductionVariableMap;
2065 
2066  MutableArrayRef<BlockArgument> reductionArgs =
2067  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
2068 
2069  allocaIP =
2070  InsertPointTy(allocaIP.getBlock(),
2071  allocaIP.getBlock()->getTerminator()->getIterator());
2072 
2073  if (failed(allocReductionVars(
2074  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
2075  reductionDecls, privateReductionVariables, reductionVariableMap,
2076  deferredStores, isByRef)))
2077  return llvm::make_error<PreviouslyReportedError>();
2078 
2079  if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
2080  llvmPrivateVars, privateDecls,
2081  afterAllocas.get())))
2082  return llvm::make_error<PreviouslyReportedError>();
2083 
2084  assert(afterAllocas.get()->getSinglePredecessor());
2085  if (failed(
2086  initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
2087  afterAllocas.get()->getSinglePredecessor(),
2088  reductionDecls, privateReductionVariables,
2089  reductionVariableMap, isByRef, deferredStores)))
2090  return llvm::make_error<PreviouslyReportedError>();
2091 
2092  // Store the mapping between reduction variables and their private copies on
2093  // ModuleTranslation stack. It can be then recovered when translating
2094  // omp.reduce operations in a separate call.
2096  moduleTranslation, reductionVariableMap);
2097 
2098  // Save the alloca insertion point on ModuleTranslation stack for use in
2099  // nested regions.
2101  moduleTranslation, allocaIP);
2102 
2103  // ParallelOp has only one region associated with it.
2104  builder.restoreIP(codeGenIP);
2106  opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
2107  if (!regionBlock)
2108  return regionBlock.takeError();
2109 
2110  // Process the reductions if required.
2111  if (opInst.getNumReductionVars() > 0) {
2112  // Collect reduction info
2113  SmallVector<OwningReductionGen> owningReductionGens;
2114  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
2116  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
2117  owningReductionGens, owningAtomicReductionGens,
2118  privateReductionVariables, reductionInfos);
2119 
2120  // Move to region cont block
2121  builder.SetInsertPoint((*regionBlock)->getTerminator());
2122 
2123  // Generate reductions from info
2124  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
2125  builder.SetInsertPoint(tempTerminator);
2126 
2127  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2128  ompBuilder->createReductions(builder.saveIP(), allocaIP,
2129  reductionInfos, isByRef, false);
2130  if (!contInsertPoint)
2131  return contInsertPoint.takeError();
2132 
2133  if (!contInsertPoint->getBlock())
2134  return llvm::make_error<PreviouslyReportedError>();
2135 
2136  tempTerminator->eraseFromParent();
2137  builder.restoreIP(*contInsertPoint);
2138  }
2139  return llvm::Error::success();
2140  };
2141 
2142  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
2143  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
2144  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
2145  // bodyGenCB.
2146  replVal = &val;
2147  return codeGenIP;
2148  };
2149 
2150  // TODO: Perform finalization actions for variables. This has to be
2151  // called for variables which have destructors/finalizers.
2152  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
2153  InsertPointTy oldIP = builder.saveIP();
2154  builder.restoreIP(codeGenIP);
2155 
2156  // if the reduction has a cleanup region, inline it here to finalize the
2157  // reduction variables
2158  SmallVector<Region *> reductionCleanupRegions;
2159  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
2160  [](omp::DeclareReductionOp reductionDecl) {
2161  return &reductionDecl.getCleanupRegion();
2162  });
2163  if (failed(inlineOmpRegionCleanup(
2164  reductionCleanupRegions, privateReductionVariables,
2165  moduleTranslation, builder, "omp.reduction.cleanup")))
2166  return llvm::createStringError(
2167  "failed to inline `cleanup` region of `omp.declare_reduction`");
2168 
2169  if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
2170  llvmPrivateVars, privateDecls)))
2171  return llvm::make_error<PreviouslyReportedError>();
2172 
2173  builder.restoreIP(oldIP);
2174  return llvm::Error::success();
2175  };
2176 
2177  llvm::Value *ifCond = nullptr;
2178  if (auto ifVar = opInst.getIfExpr())
2179  ifCond = moduleTranslation.lookupValue(ifVar);
2180  llvm::Value *numThreads = nullptr;
2181  if (auto numThreadsVar = opInst.getNumThreads())
2182  numThreads = moduleTranslation.lookupValue(numThreadsVar);
2183  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
2184  if (auto bind = opInst.getProcBindKind())
2185  pbKind = getProcBindKind(*bind);
2186  // TODO: Is the Parallel construct cancellable?
2187  bool isCancellable = false;
2188 
2189  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2190  findAllocaInsertPoint(builder, moduleTranslation);
2191  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2192 
2193  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2194  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
2195  ifCond, numThreads, pbKind, isCancellable);
2196 
2197  if (failed(handleError(afterIP, *opInst)))
2198  return failure();
2199 
2200  builder.restoreIP(*afterIP);
2201  return success();
2202 }
2203 
2204 /// Convert Order attribute to llvm::omp::OrderKind.
2205 static llvm::omp::OrderKind
2206 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
2207  if (!o)
2208  return llvm::omp::OrderKind::OMP_ORDER_unknown;
2209  switch (*o) {
2210  case omp::ClauseOrderKind::Concurrent:
2211  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
2212  }
2213  llvm_unreachable("Unknown ClauseOrderKind kind");
2214 }
2215 
2216 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
2217 static LogicalResult
2218 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
2219  LLVM::ModuleTranslation &moduleTranslation) {
2220  auto simdOp = cast<omp::SimdOp>(opInst);
2221  auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
2222 
2223  if (failed(checkImplementationStatus(opInst)))
2224  return failure();
2225 
2226  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2227 
2228  // Generator of the canonical loop body.
2231  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
2232  llvm::Value *iv) -> llvm::Error {
2233  // Make sure further conversions know about the induction variable.
2234  moduleTranslation.mapValue(
2235  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
2236 
2237  // Capture the body insertion point for use in nested loops. BodyIP of the
2238  // CanonicalLoopInfo always points to the beginning of the entry block of
2239  // the body.
2240  bodyInsertPoints.push_back(ip);
2241 
2242  if (loopInfos.size() != loopOp.getNumLoops() - 1)
2243  return llvm::Error::success();
2244 
2245  // Convert the body of the loop.
2246  builder.restoreIP(ip);
2247  return convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
2248  moduleTranslation)
2249  .takeError();
2250  };
2251 
2252  // Delegate actual loop construction to the OpenMP IRBuilder.
2253  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
2254  // loop, i.e. it has a positive step, uses signed integer semantics.
2255  // Reconsider this code when the nested loop operation clearly supports more
2256  // cases.
2257  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2258  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
2259  llvm::Value *lowerBound =
2260  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
2261  llvm::Value *upperBound =
2262  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
2263  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
2264 
2265  // Make sure loop trip count are emitted in the preheader of the outermost
2266  // loop at the latest so that they are all available for the new collapsed
2267  // loop will be created below.
2268  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
2269  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
2270  if (i != 0) {
2271  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
2272  ompLoc.DL);
2273  computeIP = loopInfos.front()->getPreheaderIP();
2274  }
2275 
2277  ompBuilder->createCanonicalLoop(
2278  loc, bodyGen, lowerBound, upperBound, step,
2279  /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP);
2280 
2281  if (failed(handleError(loopResult, *loopOp)))
2282  return failure();
2283 
2284  loopInfos.push_back(*loopResult);
2285  }
2286 
2287  // Collapse loops.
2288  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
2289  llvm::CanonicalLoopInfo *loopInfo =
2290  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
2291 
2292  llvm::ConstantInt *simdlen = nullptr;
2293  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
2294  simdlen = builder.getInt64(simdlenVar.value());
2295 
2296  llvm::ConstantInt *safelen = nullptr;
2297  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
2298  safelen = builder.getInt64(safelenVar.value());
2299 
2300  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
2301  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
2302  ompBuilder->applySimd(loopInfo, alignedVars,
2303  simdOp.getIfExpr()
2304  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
2305  : nullptr,
2306  order, simdlen, safelen);
2307 
2308  builder.restoreIP(afterIP);
2309  return success();
2310 }
2311 
2312 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
2313 static llvm::AtomicOrdering
2314 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
2315  if (!ao)
2316  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
2317 
2318  switch (*ao) {
2319  case omp::ClauseMemoryOrderKind::Seq_cst:
2320  return llvm::AtomicOrdering::SequentiallyConsistent;
2321  case omp::ClauseMemoryOrderKind::Acq_rel:
2322  return llvm::AtomicOrdering::AcquireRelease;
2323  case omp::ClauseMemoryOrderKind::Acquire:
2324  return llvm::AtomicOrdering::Acquire;
2325  case omp::ClauseMemoryOrderKind::Release:
2326  return llvm::AtomicOrdering::Release;
2327  case omp::ClauseMemoryOrderKind::Relaxed:
2328  return llvm::AtomicOrdering::Monotonic;
2329  }
2330  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
2331 }
2332 
2333 /// Convert omp.atomic.read operation to LLVM IR.
2334 static LogicalResult
2335 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
2336  LLVM::ModuleTranslation &moduleTranslation) {
2337  auto readOp = cast<omp::AtomicReadOp>(opInst);
2338  if (failed(checkImplementationStatus(opInst)))
2339  return failure();
2340 
2341  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2342 
2343  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2344 
2345  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
2346  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
2347  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
2348 
2349  llvm::Type *elementType =
2350  moduleTranslation.convertType(readOp.getElementType());
2351 
2352  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
2353  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
2354  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
2355  return success();
2356 }
2357 
2358 /// Converts an omp.atomic.write operation to LLVM IR.
2359 static LogicalResult
2360 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
2361  LLVM::ModuleTranslation &moduleTranslation) {
2362  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
2363  if (failed(checkImplementationStatus(opInst)))
2364  return failure();
2365 
2366  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2367 
2368  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2369  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
2370  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
2371  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
2372  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
2373  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
2374  /*isVolatile=*/false};
2375  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
2376  return success();
2377 }
2378 
2379 /// Converts an LLVM dialect binary operation to the corresponding enum value
2380 /// for `atomicrmw` supported binary operation.
2381 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
2383  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
2384  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
2385  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
2386  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
2387  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
2388  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
2389  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
2390  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
2391  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
2392  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
2393 }
2394 
2395 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
2396 static LogicalResult
2397 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
2398  llvm::IRBuilderBase &builder,
2399  LLVM::ModuleTranslation &moduleTranslation) {
2400  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2401  if (failed(checkImplementationStatus(*opInst)))
2402  return failure();
2403 
2404  // Convert values and types.
2405  auto &innerOpList = opInst.getRegion().front().getOperations();
2406  bool isXBinopExpr{false};
2407  llvm::AtomicRMWInst::BinOp binop;
2408  mlir::Value mlirExpr;
2409  llvm::Value *llvmExpr = nullptr;
2410  llvm::Value *llvmX = nullptr;
2411  llvm::Type *llvmXElementType = nullptr;
2412  if (innerOpList.size() == 2) {
2413  // The two operations here are the update and the terminator.
2414  // Since we can identify the update operation, there is a possibility
2415  // that we can generate the atomicrmw instruction.
2416  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
2417  if (!llvm::is_contained(innerOp.getOperands(),
2418  opInst.getRegion().getArgument(0))) {
2419  return opInst.emitError("no atomic update operation with region argument"
2420  " as operand found inside atomic.update region");
2421  }
2422  binop = convertBinOpToAtomic(innerOp);
2423  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
2424  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2425  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2426  } else {
2427  // Since the update region includes more than one operation
2428  // we will resort to generating a cmpxchg loop.
2429  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2430  }
2431  llvmX = moduleTranslation.lookupValue(opInst.getX());
2432  llvmXElementType = moduleTranslation.convertType(
2433  opInst.getRegion().getArgument(0).getType());
2434  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2435  /*isSigned=*/false,
2436  /*isVolatile=*/false};
2437 
2438  llvm::AtomicOrdering atomicOrdering =
2439  convertAtomicOrdering(opInst.getMemoryOrder());
2440 
2441  // Generate update code.
2442  auto updateFn =
2443  [&opInst, &moduleTranslation](
2444  llvm::Value *atomicx,
2445  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2446  Block &bb = *opInst.getRegion().begin();
2447  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
2448  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2449  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2450  return llvm::make_error<PreviouslyReportedError>();
2451 
2452  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2453  assert(yieldop && yieldop.getResults().size() == 1 &&
2454  "terminator must be omp.yield op and it must have exactly one "
2455  "argument");
2456  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2457  };
2458 
2459  // Handle ambiguous alloca, if any.
2460  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2461  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2462  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2463  ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
2464  atomicOrdering, binop, updateFn,
2465  isXBinopExpr);
2466 
2467  if (failed(handleError(afterIP, *opInst)))
2468  return failure();
2469 
2470  builder.restoreIP(*afterIP);
2471  return success();
2472 }
2473 
2474 static LogicalResult
2475 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
2476  llvm::IRBuilderBase &builder,
2477  LLVM::ModuleTranslation &moduleTranslation) {
2478  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2479  if (failed(checkImplementationStatus(*atomicCaptureOp)))
2480  return failure();
2481 
2482  mlir::Value mlirExpr;
2483  bool isXBinopExpr = false, isPostfixUpdate = false;
2484  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2485 
2486  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
2487  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
2488 
2489  assert((atomicUpdateOp || atomicWriteOp) &&
2490  "internal op must be an atomic.update or atomic.write op");
2491 
2492  if (atomicWriteOp) {
2493  isPostfixUpdate = true;
2494  mlirExpr = atomicWriteOp.getExpr();
2495  } else {
2496  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
2497  atomicCaptureOp.getAtomicUpdateOp().getOperation();
2498  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
2499  // Find the binary update operation that uses the region argument
2500  // and get the expression to update
2501  if (innerOpList.size() == 2) {
2502  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
2503  if (!llvm::is_contained(innerOp.getOperands(),
2504  atomicUpdateOp.getRegion().getArgument(0))) {
2505  return atomicUpdateOp.emitError(
2506  "no atomic update operation with region argument"
2507  " as operand found inside atomic.update region");
2508  }
2509  binop = convertBinOpToAtomic(innerOp);
2510  isXBinopExpr =
2511  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
2512  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
2513  } else {
2514  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
2515  }
2516  }
2517 
2518  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
2519  llvm::Value *llvmX =
2520  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
2521  llvm::Value *llvmV =
2522  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
2523  llvm::Type *llvmXElementType = moduleTranslation.convertType(
2524  atomicCaptureOp.getAtomicReadOp().getElementType());
2525  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2526  /*isSigned=*/false,
2527  /*isVolatile=*/false};
2528  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
2529  /*isSigned=*/false,
2530  /*isVolatile=*/false};
2531 
2532  llvm::AtomicOrdering atomicOrdering =
2533  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
2534 
2535  auto updateFn =
2536  [&](llvm::Value *atomicx,
2537  llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
2538  if (atomicWriteOp)
2539  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
2540  Block &bb = *atomicUpdateOp.getRegion().begin();
2541  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
2542  atomicx);
2543  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2544  if (failed(moduleTranslation.convertBlock(bb, true, builder)))
2545  return llvm::make_error<PreviouslyReportedError>();
2546 
2547  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2548  assert(yieldop && yieldop.getResults().size() == 1 &&
2549  "terminator must be omp.yield op and it must have exactly one "
2550  "argument");
2551  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2552  };
2553 
2554  // Handle ambiguous alloca, if any.
2555  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2556  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2557  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
2558  ompBuilder->createAtomicCapture(
2559  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
2560  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
2561 
2562  if (failed(handleError(afterIP, *atomicCaptureOp)))
2563  return failure();
2564 
2565  builder.restoreIP(*afterIP);
2566  return success();
2567 }
2568 
2569 /// Converts an OpenMP Threadprivate operation into LLVM IR using
2570 /// OpenMPIRBuilder.
2571 static LogicalResult
2572 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
2573  LLVM::ModuleTranslation &moduleTranslation) {
2574  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2575  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
2576 
2577  if (failed(checkImplementationStatus(opInst)))
2578  return failure();
2579 
2580  Value symAddr = threadprivateOp.getSymAddr();
2581  auto *symOp = symAddr.getDefiningOp();
2582  if (!isa<LLVM::AddressOfOp>(symOp))
2583  return opInst.emitError("Addressing symbol not found");
2584  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
2585 
2586  LLVM::GlobalOp global =
2587  addressOfOp.getGlobal(moduleTranslation.symbolTable());
2588  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
2589  llvm::Type *type = globalValue->getValueType();
2590  llvm::TypeSize typeSize =
2591  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
2592  type);
2593  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
2594  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
2595  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
2596  llvm::Value *callInst =
2597  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
2598  ompLoc, globalValue, size, cacheName);
2599  moduleTranslation.mapValue(opInst.getResult(0), callInst);
2600  return success();
2601 }
2602 
2603 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
2604 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
2605  switch (deviceClause) {
2606  case mlir::omp::DeclareTargetDeviceType::host:
2607  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
2608  break;
2609  case mlir::omp::DeclareTargetDeviceType::nohost:
2610  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
2611  break;
2612  case mlir::omp::DeclareTargetDeviceType::any:
2613  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
2614  break;
2615  }
2616  llvm_unreachable("unhandled device clause");
2617 }
2618 
2619 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
2621  mlir::omp::DeclareTargetCaptureClause captureClause) {
2622  switch (captureClause) {
2623  case mlir::omp::DeclareTargetCaptureClause::to:
2624  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
2625  case mlir::omp::DeclareTargetCaptureClause::link:
2626  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
2627  case mlir::omp::DeclareTargetCaptureClause::enter:
2628  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
2629  }
2630  llvm_unreachable("unhandled capture clause");
2631 }
2632 
2633 static llvm::SmallString<64>
2634 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
2635  llvm::OpenMPIRBuilder &ompBuilder) {
2636  llvm::SmallString<64> suffix;
2637  llvm::raw_svector_ostream os(suffix);
2638  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
2639  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
2640  auto fileInfoCallBack = [&loc]() {
2641  return std::pair<std::string, uint64_t>(
2642  llvm::StringRef(loc.getFilename()), loc.getLine());
2643  };
2644 
2645  os << llvm::format(
2646  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
2647  }
2648  os << "_decl_tgt_ref_ptr";
2649 
2650  return suffix;
2651 }
2652 
2653 static bool isDeclareTargetLink(mlir::Value value) {
2654  if (auto addressOfOp =
2655  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2656  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
2657  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
2658  if (auto declareTargetGlobal =
2659  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
2660  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2661  mlir::omp::DeclareTargetCaptureClause::link)
2662  return true;
2663  }
2664  return false;
2665 }
2666 
2667 // Returns the reference pointer generated by the lowering of the declare target
2668 // operation in cases where the link clause is used or the to clause is used in
2669 // USM mode.
2670 static llvm::Value *
2672  LLVM::ModuleTranslation &moduleTranslation) {
2673  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2674 
2675  // An easier way to do this may just be to keep track of any pointer
2676  // references and their mapping to their respective operation
2677  if (auto addressOfOp =
2678  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2679  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
2680  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
2681  addressOfOp.getGlobalName()))) {
2682 
2683  if (auto declareTargetGlobal =
2684  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
2685  gOp.getOperation())) {
2686 
2687  // In this case, we must utilise the reference pointer generated by the
2688  // declare target operation, similar to Clang
2689  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
2690  mlir::omp::DeclareTargetCaptureClause::link) ||
2691  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2692  mlir::omp::DeclareTargetCaptureClause::to &&
2693  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
2694  llvm::SmallString<64> suffix =
2695  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
2696 
2697  if (gOp.getSymName().contains(suffix))
2698  return moduleTranslation.getLLVMModule()->getNamedValue(
2699  gOp.getSymName());
2700 
2701  return moduleTranslation.getLLVMModule()->getNamedValue(
2702  (gOp.getSymName().str() + suffix.str()).str());
2703  }
2704  }
2705  }
2706  }
2707 
2708  return nullptr;
2709 }
2710 
2711 namespace {
2712 // A small helper structure to contain data gathered
2713 // for map lowering and coalese it into one area and
2714 // avoiding extra computations such as searches in the
2715 // llvm module for lowered mapped variables or checking
2716 // if something is declare target (and retrieving the
2717 // value) more than neccessary.
2718 struct MapInfoData : llvm::OpenMPIRBuilder::MapInfosTy {
2719  llvm::SmallVector<bool, 4> IsDeclareTarget;
2720  llvm::SmallVector<bool, 4> IsAMember;
2721  // Identify if mapping was added by mapClause or use_device clauses.
2722  llvm::SmallVector<bool, 4> IsAMapping;
2725  // Stripped off array/pointer to get the underlying
2726  // element type
2728 
2729  /// Append arrays in \a CurInfo.
2730  void append(MapInfoData &CurInfo) {
2731  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
2732  CurInfo.IsDeclareTarget.end());
2733  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
2734  OriginalValue.append(CurInfo.OriginalValue.begin(),
2735  CurInfo.OriginalValue.end());
2736  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
2737  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
2738  }
2739 };
2740 } // namespace
2741 
2742 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
2743  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
2744  arrTy.getElementType()))
2745  return getArrayElementSizeInBits(nestedArrTy, dl);
2746  return dl.getTypeSizeInBits(arrTy.getElementType());
2747 }
2748 
2749 // This function calculates the size to be offloaded for a specified type, given
2750 // its associated map clause (which can contain bounds information which affects
2751 // the total size), this size is calculated based on the underlying element type
2752 // e.g. given a 1-D array of ints, we will calculate the size from the integer
2753 // type * number of elements in the array. This size can be used in other
2754 // calculations but is ultimately used as an argument to the OpenMP runtimes
2755 // kernel argument structure which is generated through the combinedInfo data
2756 // structures.
2757 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
2758 // CGOpenMPRuntime.cpp.
2759 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
2760  Operation *clauseOp, llvm::Value *basePointer,
2761  llvm::Type *baseType, llvm::IRBuilderBase &builder,
2762  LLVM::ModuleTranslation &moduleTranslation) {
2763  if (auto memberClause =
2764  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
2765  // This calculates the size to transfer based on bounds and the underlying
2766  // element type, provided bounds have been specified (Fortran
2767  // pointers/allocatables/target and arrays that have sections specified fall
2768  // into this as well).
2769  if (!memberClause.getBounds().empty()) {
2770  llvm::Value *elementCount = builder.getInt64(1);
2771  for (auto bounds : memberClause.getBounds()) {
2772  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2773  bounds.getDefiningOp())) {
2774  // The below calculation for the size to be mapped calculated from the
2775  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
2776  // multiply by the underlying element types byte size to get the full
2777  // size to be offloaded based on the bounds
2778  elementCount = builder.CreateMul(
2779  elementCount,
2780  builder.CreateAdd(
2781  builder.CreateSub(
2782  moduleTranslation.lookupValue(boundOp.getUpperBound()),
2783  moduleTranslation.lookupValue(boundOp.getLowerBound())),
2784  builder.getInt64(1)));
2785  }
2786  }
2787 
2788  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
2789  // the size in inconsistent byte or bit format.
2790  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
2791  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
2792  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
2793 
2794  // The size in bytes x number of elements, the sizeInBytes stored is
2795  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
2796  // size, so we do some on the fly runtime math to get the size in
2797  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2798  // some adjustment for members with more complex types.
2799  return builder.CreateMul(elementCount,
2800  builder.getInt64(underlyingTypeSzInBits / 8));
2801  }
2802  }
2803 
2804  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
2805 }
2806 
2808  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
2809  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
2810  llvm::IRBuilderBase &builder, const ArrayRef<Value> &useDevPtrOperands = {},
2811  const ArrayRef<Value> &useDevAddrOperands = {}) {
2812  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
2813  // Check if this is a member mapping and correctly assign that it is, if
2814  // it is a member of a larger object.
2815  // TODO: Need better handling of members, and distinguishing of members
2816  // that are implicitly allocated on device vs explicitly passed in as
2817  // arguments.
2818  // TODO: May require some further additions to support nested record
2819  // types, i.e. member maps that can have member maps.
2820  for (Value mapValue : mapVars) {
2821  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2822  for (auto member : map.getMembers())
2823  if (member == mapOp)
2824  return true;
2825  }
2826  return false;
2827  };
2828 
2829  // Process MapOperands
2830  for (Value mapValue : mapVars) {
2831  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2832  Value offloadPtr =
2833  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2834  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
2835  mapData.Pointers.push_back(mapData.OriginalValue.back());
2836 
2837  if (llvm::Value *refPtr =
2838  getRefPtrIfDeclareTarget(offloadPtr,
2839  moduleTranslation)) { // declare target
2840  mapData.IsDeclareTarget.push_back(true);
2841  mapData.BasePointers.push_back(refPtr);
2842  } else { // regular mapped variable
2843  mapData.IsDeclareTarget.push_back(false);
2844  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2845  }
2846 
2847  mapData.BaseType.push_back(
2848  moduleTranslation.convertType(mapOp.getVarType()));
2849  mapData.Sizes.push_back(
2850  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
2851  mapData.BaseType.back(), builder, moduleTranslation));
2852  mapData.MapClause.push_back(mapOp.getOperation());
2853  mapData.Types.push_back(
2854  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
2855  mapData.Names.push_back(LLVM::createMappingInformation(
2856  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2857  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
2858  mapData.IsAMapping.push_back(true);
2859  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
2860  }
2861 
2862  auto findMapInfo = [&mapData](llvm::Value *val,
2863  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
2864  unsigned index = 0;
2865  bool found = false;
2866  for (llvm::Value *basePtr : mapData.OriginalValue) {
2867  if (basePtr == val && mapData.IsAMapping[index]) {
2868  found = true;
2869  mapData.Types[index] |=
2870  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
2871  mapData.DevicePointers[index] = devInfoTy;
2872  }
2873  index++;
2874  }
2875  return found;
2876  };
2877 
2878  // Process useDevPtr(Addr)Operands
2879  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
2880  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
2881  for (Value mapValue : useDevOperands) {
2882  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2883  Value offloadPtr =
2884  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2885  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
2886 
2887  // Check if map info is already present for this entry.
2888  if (!findMapInfo(origValue, devInfoTy)) {
2889  mapData.OriginalValue.push_back(origValue);
2890  mapData.Pointers.push_back(mapData.OriginalValue.back());
2891  mapData.IsDeclareTarget.push_back(false);
2892  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2893  mapData.BaseType.push_back(
2894  moduleTranslation.convertType(mapOp.getVarType()));
2895  mapData.Sizes.push_back(builder.getInt64(0));
2896  mapData.MapClause.push_back(mapOp.getOperation());
2897  mapData.Types.push_back(
2898  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
2899  mapData.Names.push_back(LLVM::createMappingInformation(
2900  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2901  mapData.DevicePointers.push_back(devInfoTy);
2902  mapData.IsAMapping.push_back(false);
2903  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
2904  }
2905  }
2906  };
2907 
2908  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
2909  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
2910 }
2911 
2912 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
2913  auto *res = llvm::find(mapData.MapClause, memberOp);
2914  assert(res != mapData.MapClause.end() &&
2915  "MapInfoOp for member not found in MapData, cannot return index");
2916  return std::distance(mapData.MapClause.begin(), res);
2917 }
2918 
2919 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
2920  bool first) {
2921  ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
2922  // Only 1 member has been mapped, we can return it.
2923  if (indexAttr.size() == 1)
2924  return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
2925 
2926  llvm::SmallVector<size_t> indices(indexAttr.size());
2927  std::iota(indices.begin(), indices.end(), 0);
2928 
2929  llvm::sort(indices.begin(), indices.end(),
2930  [&](const size_t a, const size_t b) {
2931  auto memberIndicesA = cast<ArrayAttr>(indexAttr[a]);
2932  auto memberIndicesB = cast<ArrayAttr>(indexAttr[b]);
2933  for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
2934  int64_t aIndex = cast<IntegerAttr>(std::get<0>(it)).getInt();
2935  int64_t bIndex = cast<IntegerAttr>(std::get<1>(it)).getInt();
2936 
2937  if (aIndex == bIndex)
2938  continue;
2939 
2940  if (aIndex < bIndex)
2941  return first;
2942 
2943  if (aIndex > bIndex)
2944  return !first;
2945  }
2946 
2947  // Iterated the up until the end of the smallest member and
2948  // they were found to be equal up to that point, so select
2949  // the member with the lowest index count, so the "parent"
2950  return memberIndicesA.size() < memberIndicesB.size();
2951  });
2952 
2953  return llvm::cast<omp::MapInfoOp>(
2954  mapInfo.getMembers()[indices.front()].getDefiningOp());
2955 }
2956 
2957 /// This function calculates the array/pointer offset for map data provided
2958 /// with bounds operations, e.g. when provided something like the following:
2959 ///
2960 /// Fortran
2961 /// map(tofrom: array(2:5, 3:2))
2962 /// or
2963 /// C++
2964 /// map(tofrom: array[1:4][2:3])
2965 /// We must calculate the initial pointer offset to pass across, this function
2966 /// performs this using bounds.
2967 ///
2968 /// NOTE: which while specified in row-major order it currently needs to be
2969 /// flipped for Fortran's column order array allocation and access (as
2970 /// opposed to C++'s row-major, hence the backwards processing where order is
2971 /// important). This is likely important to keep in mind for the future when
2972 /// we incorporate a C++ frontend, both frontends will need to agree on the
2973 /// ordering of generated bounds operations (one may have to flip them) to
2974 /// make the below lowering frontend agnostic. The offload size
2975 /// calcualtion may also have to be adjusted for C++.
2976 std::vector<llvm::Value *>
2978  llvm::IRBuilderBase &builder, bool isArrayTy,
2979  OperandRange bounds) {
2980  std::vector<llvm::Value *> idx;
2981  // There's no bounds to calculate an offset from, we can safely
2982  // ignore and return no indices.
2983  if (bounds.empty())
2984  return idx;
2985 
2986  // If we have an array type, then we have its type so can treat it as a
2987  // normal GEP instruction where the bounds operations are simply indexes
2988  // into the array. We currently do reverse order of the bounds, which
2989  // I believe leans more towards Fortran's column-major in memory.
2990  if (isArrayTy) {
2991  idx.push_back(builder.getInt64(0));
2992  for (int i = bounds.size() - 1; i >= 0; --i) {
2993  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
2994  bounds[i].getDefiningOp())) {
2995  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
2996  }
2997  }
2998  } else {
2999  // If we do not have an array type, but we have bounds, then we're dealing
3000  // with a pointer that's being treated like an array and we have the
3001  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
3002  // address (pointer pointing to the actual data) so we must caclulate the
3003  // offset using a single index which the following two loops attempts to
3004  // compute.
3005 
3006  // Calculates the size offset we need to make per row e.g. first row or
3007  // column only needs to be offset by one, but the next would have to be
3008  // the previous row/column offset multiplied by the extent of current row.
3009  //
3010  // For example ([1][10][100]):
3011  //
3012  // - First row/column we move by 1 for each index increment
3013  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
3014  // current) for 10 for each index increment
3015  // - Third row/column we would move by 10 (second row/column) *
3016  // (extent/size of current) 100 for 1000 for each index increment
3017  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
3018  for (size_t i = 1; i < bounds.size(); ++i) {
3019  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3020  bounds[i].getDefiningOp())) {
3021  dimensionIndexSizeOffset.push_back(builder.CreateMul(
3022  moduleTranslation.lookupValue(boundOp.getExtent()),
3023  dimensionIndexSizeOffset[i - 1]));
3024  }
3025  }
3026 
3027  // Now that we have calculated how much we move by per index, we must
3028  // multiply each lower bound offset in indexes by the size offset we
3029  // have calculated in the previous and accumulate the results to get
3030  // our final resulting offset.
3031  for (int i = bounds.size() - 1; i >= 0; --i) {
3032  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
3033  bounds[i].getDefiningOp())) {
3034  if (idx.empty())
3035  idx.emplace_back(builder.CreateMul(
3036  moduleTranslation.lookupValue(boundOp.getLowerBound()),
3037  dimensionIndexSizeOffset[i]));
3038  else
3039  idx.back() = builder.CreateAdd(
3040  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
3041  boundOp.getLowerBound()),
3042  dimensionIndexSizeOffset[i]));
3043  }
3044  }
3045  }
3046 
3047  return idx;
3048 }
3049 
3050 // This creates two insertions into the MapInfosTy data structure for the
3051 // "parent" of a set of members, (usually a container e.g.
3052 // class/structure/derived type) when subsequent members have also been
3053 // explicitly mapped on the same map clause. Certain types, such as Fortran
3054 // descriptors are mapped like this as well, however, the members are
3055 // implicit as far as a user is concerned, but we must explicitly map them
3056 // internally.
3057 //
3058 // This function also returns the memberOfFlag for this particular parent,
3059 // which is utilised in subsequent member mappings (by modifying there map type
3060 // with it) to indicate that a member is part of this parent and should be
3061 // treated by the runtime as such. Important to achieve the correct mapping.
3062 //
3063 // This function borrows a lot from Clang's emitCombinedEntry function
3064 // inside of CGOpenMPRuntime.cpp
3065 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
3066  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3067  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
3068  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
3069  uint64_t mapDataIndex, bool isTargetParams) {
3070  // Map the first segment of our structure
3071  combinedInfo.Types.emplace_back(
3072  isTargetParams
3073  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
3074  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
3075  combinedInfo.DevicePointers.emplace_back(
3076  mapData.DevicePointers[mapDataIndex]);
3077  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3078  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3079  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3080 
3081  // Calculate size of the parent object being mapped based on the
3082  // addresses at runtime, highAddr - lowAddr = size. This of course
3083  // doesn't factor in allocated data like pointers, hence the further
3084  // processing of members specified by users, or in the case of
3085  // Fortran pointers and allocatables, the mapping of the pointed to
3086  // data by the descriptor (which itself, is a structure containing
3087  // runtime information on the dynamically allocated data).
3088  auto parentClause =
3089  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3090 
3091  llvm::Value *lowAddr, *highAddr;
3092  if (!parentClause.getPartialMap()) {
3093  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
3094  builder.getPtrTy());
3095  highAddr = builder.CreatePointerCast(
3096  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
3097  mapData.Pointers[mapDataIndex], 1),
3098  builder.getPtrTy());
3099  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3100  } else {
3101  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3102  int firstMemberIdx = getMapDataMemberIdx(
3103  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
3104  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
3105  builder.getPtrTy());
3106  int lastMemberIdx = getMapDataMemberIdx(
3107  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
3108  highAddr = builder.CreatePointerCast(
3109  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
3110  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
3111  builder.getPtrTy());
3112  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
3113  }
3114 
3115  llvm::Value *size = builder.CreateIntCast(
3116  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
3117  builder.getInt64Ty(),
3118  /*isSigned=*/false);
3119  combinedInfo.Sizes.push_back(size);
3120 
3121  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
3122  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
3123 
3124  // This creates the initial MEMBER_OF mapping that consists of
3125  // the parent/top level container (same as above effectively, except
3126  // with a fixed initial compile time size and separate maptype which
3127  // indicates the true mape type (tofrom etc.). This parent mapping is
3128  // only relevant if the structure in its totality is being mapped,
3129  // otherwise the above suffices.
3130  if (!parentClause.getPartialMap()) {
3131  // TODO: This will need to be expanded to include the whole host of logic
3132  // for the map flags that Clang currently supports (e.g. it should do some
3133  // further case specific flag modifications). For the moment, it handles
3134  // what we support as expected.
3135  llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
3136  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3137  combinedInfo.Types.emplace_back(mapFlag);
3138  combinedInfo.DevicePointers.emplace_back(
3140  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
3141  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
3142  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
3143  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
3144  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
3145  }
3146  return memberOfFlag;
3147 }
3148 
3149 // The intent is to verify if the mapped data being passed is a
3150 // pointer -> pointee that requires special handling in certain cases,
3151 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
3152 //
3153 // There may be a better way to verify this, but unfortunately with
3154 // opaque pointers we lose the ability to easily check if something is
3155 // a pointer whilst maintaining access to the underlying type.
3156 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
3157  // If we have a varPtrPtr field assigned then the underlying type is a pointer
3158  if (mapOp.getVarPtrPtr())
3159  return true;
3160 
3161  // If the map data is declare target with a link clause, then it's represented
3162  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
3163  // no relation to pointers.
3164  if (isDeclareTargetLink(mapOp.getVarPtr()))
3165  return true;
3166 
3167  return false;
3168 }
3169 
3170 // This function is intended to add explicit mappings of members
3172  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3173  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
3174  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
3175  uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
3176 
3177  auto parentClause =
3178  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3179 
3180  for (auto mappedMembers : parentClause.getMembers()) {
3181  auto memberClause =
3182  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
3183  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3184 
3185  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
3186 
3187  // If we're currently mapping a pointer to a block of data, we must
3188  // initially map the pointer, and then attatch/bind the data with a
3189  // subsequent map to the pointer. This segment of code generates the
3190  // pointer mapping, which can in certain cases be optimised out as Clang
3191  // currently does in its lowering. However, for the moment we do not do so,
3192  // in part as we currently have substantially less information on the data
3193  // being mapped at this stage.
3194  if (checkIfPointerMap(memberClause)) {
3195  auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
3196  memberClause.getMapType().value());
3197  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3198  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3199  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3200  combinedInfo.Types.emplace_back(mapFlag);
3201  combinedInfo.DevicePointers.emplace_back(
3203  combinedInfo.Names.emplace_back(
3204  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3205  combinedInfo.BasePointers.emplace_back(
3206  mapData.BasePointers[mapDataIndex]);
3207  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
3208  combinedInfo.Sizes.emplace_back(builder.getInt64(
3209  moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
3210  }
3211 
3212  // Same MemberOfFlag to indicate its link with parent and other members
3213  // of.
3214  auto mapFlag =
3215  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
3216  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3217  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
3218  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
3219  if (checkIfPointerMap(memberClause))
3220  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3221 
3222  combinedInfo.Types.emplace_back(mapFlag);
3223  combinedInfo.DevicePointers.emplace_back(
3224  mapData.DevicePointers[memberDataIdx]);
3225  combinedInfo.Names.emplace_back(
3226  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
3227  uint64_t basePointerIndex =
3228  checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
3229  combinedInfo.BasePointers.emplace_back(
3230  mapData.BasePointers[basePointerIndex]);
3231  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
3232  combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
3233  }
3234 }
3235 
3236 static void
3237 processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
3238  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
3239  bool isTargetParams, int mapDataParentIdx = -1) {
3240  // Declare Target Mappings are excluded from being marked as
3241  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
3242  // marked with OMP_MAP_PTR_AND_OBJ instead.
3243  auto mapFlag = mapData.Types[mapDataIdx];
3244  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
3245 
3246  bool isPtrTy = checkIfPointerMap(mapInfoOp);
3247  if (isPtrTy)
3248  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
3249 
3250  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
3251  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
3252 
3253  if (mapInfoOp.getMapCaptureType().value() ==
3254  omp::VariableCaptureKind::ByCopy &&
3255  !isPtrTy)
3256  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
3257 
3258  // if we're provided a mapDataParentIdx, then the data being mapped is
3259  // part of a larger object (in a parent <-> member mapping) and in this
3260  // case our BasePointer should be the parent.
3261  if (mapDataParentIdx >= 0)
3262  combinedInfo.BasePointers.emplace_back(
3263  mapData.BasePointers[mapDataParentIdx]);
3264  else
3265  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
3266 
3267  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
3268  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
3269  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
3270  combinedInfo.Types.emplace_back(mapFlag);
3271  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
3272 }
3273 
3275  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
3276  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
3277  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
3278  uint64_t mapDataIndex, bool isTargetParams) {
3279  auto parentClause =
3280  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
3281 
3282  // If we have a partial map (no parent referenced in the map clauses of the
3283  // directive, only members) and only a single member, we do not need to bind
3284  // the map of the member to the parent, we can pass the member separately.
3285  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
3286  auto memberClause = llvm::cast<omp::MapInfoOp>(
3287  parentClause.getMembers()[0].getDefiningOp());
3288  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
3289  // Note: Clang treats arrays with explicit bounds that fall into this
3290  // category as a parent with map case, however, it seems this isn't a
3291  // requirement, and processing them as an individual map is fine. So,
3292  // we will handle them as individual maps for the moment, as it's
3293  // difficult for us to check this as we always require bounds to be
3294  // specified currently and it's also marginally more optimal (single
3295  // map rather than two). The difference may come from the fact that
3296  // Clang maps array without bounds as pointers (which we do not
3297  // currently do), whereas we treat them as arrays in all cases
3298  // currently.
3299  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
3300  mapDataIndex);
3301  return;
3302  }
3303 
3304  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
3305  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
3306  combinedInfo, mapData, mapDataIndex, isTargetParams);
3307  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
3308  combinedInfo, mapData, mapDataIndex,
3309  memberOfParentFlag);
3310 }
3311 
3312 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
3313 // generates different operation (e.g. load/store) combinations for
3314 // arguments to the kernel, based on map capture kinds which are then
3315 // utilised in the combinedInfo in place of the original Map value.
3316 static void
3317 createAlteredByCaptureMap(MapInfoData &mapData,
3318  LLVM::ModuleTranslation &moduleTranslation,
3319  llvm::IRBuilderBase &builder) {
3320  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3321  // if it's declare target, skip it, it's handled separately.
3322  if (!mapData.IsDeclareTarget[i]) {
3323  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3324  omp::VariableCaptureKind captureKind =
3325  mapOp.getMapCaptureType().value_or(omp::VariableCaptureKind::ByRef);
3326  bool isPtrTy = checkIfPointerMap(mapOp);
3327 
3328  // Currently handles array sectioning lowerbound case, but more
3329  // logic may be required in the future. Clang invokes EmitLValue,
3330  // which has specialised logic for special Clang types such as user
3331  // defines, so it is possible we will have to extend this for
3332  // structures or other complex types. As the general idea is that this
3333  // function mimics some of the logic from Clang that we require for
3334  // kernel argument passing from host -> device.
3335  switch (captureKind) {
3336  case omp::VariableCaptureKind::ByRef: {
3337  llvm::Value *newV = mapData.Pointers[i];
3338  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
3339  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
3340  mapOp.getBounds());
3341  if (isPtrTy)
3342  newV = builder.CreateLoad(builder.getPtrTy(), newV);
3343 
3344  if (!offsetIdx.empty())
3345  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
3346  "array_offset");
3347  mapData.Pointers[i] = newV;
3348  } break;
3349  case omp::VariableCaptureKind::ByCopy: {
3350  llvm::Type *type = mapData.BaseType[i];
3351  llvm::Value *newV;
3352  if (mapData.Pointers[i]->getType()->isPointerTy())
3353  newV = builder.CreateLoad(type, mapData.Pointers[i]);
3354  else
3355  newV = mapData.Pointers[i];
3356 
3357  if (!isPtrTy) {
3358  auto curInsert = builder.saveIP();
3359  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
3360  auto *memTempAlloc =
3361  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
3362  builder.restoreIP(curInsert);
3363 
3364  builder.CreateStore(newV, memTempAlloc);
3365  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
3366  }
3367 
3368  mapData.Pointers[i] = newV;
3369  mapData.BasePointers[i] = newV;
3370  } break;
3371  case omp::VariableCaptureKind::This:
3372  case omp::VariableCaptureKind::VLAType:
3373  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
3374  break;
3375  }
3376  }
3377  }
3378 }
3379 
3380 // Generate all map related information and fill the combinedInfo.
3381 static void genMapInfos(llvm::IRBuilderBase &builder,
3382  LLVM::ModuleTranslation &moduleTranslation,
3383  DataLayout &dl,
3384  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
3385  MapInfoData &mapData, bool isTargetParams = false) {
3386  // We wish to modify some of the methods in which arguments are
3387  // passed based on their capture type by the target region, this can
3388  // involve generating new loads and stores, which changes the
3389  // MLIR value to LLVM value mapping, however, we only wish to do this
3390  // locally for the current function/target and also avoid altering
3391  // ModuleTranslation, so we remap the base pointer or pointer stored
3392  // in the map infos corresponding MapInfoData, which is later accessed
3393  // by genMapInfos and createTarget to help generate the kernel and
3394  // kernel arg structure. It primarily becomes relevant in cases like
3395  // bycopy, or byref range'd arrays. In the default case, we simply
3396  // pass thee pointer byref as both basePointer and pointer.
3397  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3398  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
3399 
3400  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3401 
3402  // We operate under the assumption that all vectors that are
3403  // required in MapInfoData are of equal lengths (either filled with
3404  // default constructed data or appropiate information) so we can
3405  // utilise the size from any component of MapInfoData, if we can't
3406  // something is missing from the initial MapInfoData construction.
3407  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3408  // NOTE/TODO: We currently do not support arbitrary depth record
3409  // type mapping.
3410  if (mapData.IsAMember[i])
3411  continue;
3412 
3413  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
3414  if (!mapInfoOp.getMembers().empty()) {
3415  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
3416  combinedInfo, mapData, i, isTargetParams);
3417  continue;
3418  }
3419 
3420  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
3421  }
3422 }
3423 
3424 static LogicalResult
3425 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
3426  LLVM::ModuleTranslation &moduleTranslation) {
3427  llvm::Value *ifCond = nullptr;
3428  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
3429  SmallVector<Value> mapVars;
3430  SmallVector<Value> useDevicePtrVars;
3431  SmallVector<Value> useDeviceAddrVars;
3432  llvm::omp::RuntimeFunction RTLFn;
3433  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
3434 
3435  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3436  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
3437  /*SeparateBeginEndCalls=*/true);
3438 
3439  LogicalResult result =
3441  .Case([&](omp::TargetDataOp dataOp) {
3442  if (failed(checkImplementationStatus(*dataOp)))
3443  return failure();
3444 
3445  if (auto ifVar = dataOp.getIfExpr())
3446  ifCond = moduleTranslation.lookupValue(ifVar);
3447 
3448  if (auto devId = dataOp.getDevice())
3449  if (auto constOp =
3450  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3451  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3452  deviceID = intAttr.getInt();
3453 
3454  mapVars = dataOp.getMapVars();
3455  useDevicePtrVars = dataOp.getUseDevicePtrVars();
3456  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
3457  return success();
3458  })
3459  .Case([&](omp::TargetEnterDataOp enterDataOp) -> LogicalResult {
3460  if (failed(checkImplementationStatus(*enterDataOp)))
3461  return failure();
3462 
3463  if (auto ifVar = enterDataOp.getIfExpr())
3464  ifCond = moduleTranslation.lookupValue(ifVar);
3465 
3466  if (auto devId = enterDataOp.getDevice())
3467  if (auto constOp =
3468  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3469  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3470  deviceID = intAttr.getInt();
3471  RTLFn =
3472  enterDataOp.getNowait()
3473  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
3474  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
3475  mapVars = enterDataOp.getMapVars();
3476  info.HasNoWait = enterDataOp.getNowait();
3477  return success();
3478  })
3479  .Case([&](omp::TargetExitDataOp exitDataOp) -> LogicalResult {
3480  if (failed(checkImplementationStatus(*exitDataOp)))
3481  return failure();
3482 
3483  if (auto ifVar = exitDataOp.getIfExpr())
3484  ifCond = moduleTranslation.lookupValue(ifVar);
3485 
3486  if (auto devId = exitDataOp.getDevice())
3487  if (auto constOp =
3488  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3489  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3490  deviceID = intAttr.getInt();
3491 
3492  RTLFn = exitDataOp.getNowait()
3493  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
3494  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
3495  mapVars = exitDataOp.getMapVars();
3496  info.HasNoWait = exitDataOp.getNowait();
3497  return success();
3498  })
3499  .Case([&](omp::TargetUpdateOp updateDataOp) -> LogicalResult {
3500  if (failed(checkImplementationStatus(*updateDataOp)))
3501  return failure();
3502 
3503  if (auto ifVar = updateDataOp.getIfExpr())
3504  ifCond = moduleTranslation.lookupValue(ifVar);
3505 
3506  if (auto devId = updateDataOp.getDevice())
3507  if (auto constOp =
3508  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
3509  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
3510  deviceID = intAttr.getInt();
3511 
3512  RTLFn =
3513  updateDataOp.getNowait()
3514  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
3515  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
3516  mapVars = updateDataOp.getMapVars();
3517  info.HasNoWait = updateDataOp.getNowait();
3518  return success();
3519  })
3520  .Default([&](Operation *op) {
3521  llvm_unreachable("unexpected operation");
3522  return failure();
3523  });
3524 
3525  if (failed(result))
3526  return failure();
3527 
3528  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3529 
3530  MapInfoData mapData;
3531  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
3532  builder, useDevicePtrVars, useDeviceAddrVars);
3533 
3534  // Fill up the arrays with all the mapped variables.
3535  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
3536  auto genMapInfoCB =
3537  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
3538  builder.restoreIP(codeGenIP);
3539  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
3540  return combinedInfo;
3541  };
3542 
3543  // Define a lambda to apply mappings between use_device_addr and
3544  // use_device_ptr base pointers, and their associated block arguments.
3545  auto mapUseDevice =
3546  [&moduleTranslation](
3547  llvm::OpenMPIRBuilder::DeviceInfoTy type,
3549  llvm::SmallVectorImpl<Value> &useDeviceVars, MapInfoData &mapInfoData,
3550  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
3551  for (auto [arg, useDevVar] :
3552  llvm::zip_equal(blockArgs, useDeviceVars)) {
3553 
3554  auto getMapBasePtr = [](omp::MapInfoOp mapInfoOp) {
3555  return mapInfoOp.getVarPtrPtr() ? mapInfoOp.getVarPtrPtr()
3556  : mapInfoOp.getVarPtr();
3557  };
3558 
3559  auto useDevMap = cast<omp::MapInfoOp>(useDevVar.getDefiningOp());
3560  for (auto [mapClause, devicePointer, basePointer] : llvm::zip_equal(
3561  mapInfoData.MapClause, mapInfoData.DevicePointers,
3562  mapInfoData.BasePointers)) {
3563  auto mapOp = cast<omp::MapInfoOp>(mapClause);
3564  if (getMapBasePtr(mapOp) != getMapBasePtr(useDevMap) ||
3565  devicePointer != type)
3566  continue;
3567 
3568  if (llvm::Value *devPtrInfoMap =
3569  mapper ? mapper(basePointer) : basePointer) {
3570  moduleTranslation.mapValue(arg, devPtrInfoMap);
3571  break;
3572  }
3573  }
3574  }
3575  };
3576 
3577  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
3578  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
3579  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
3580  assert(isa<omp::TargetDataOp>(op) &&
3581  "BodyGen requested for non TargetDataOp");
3582  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
3583  Region &region = cast<omp::TargetDataOp>(op).getRegion();
3584  switch (bodyGenType) {
3585  case BodyGenTy::Priv:
3586  // Check if any device ptr/addr info is available
3587  if (!info.DevicePtrInfoMap.empty()) {
3588  builder.restoreIP(codeGenIP);
3589 
3590  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
3591  blockArgIface.getUseDeviceAddrBlockArgs(),
3592  useDeviceAddrVars, mapData,
3593  [&](llvm::Value *basePointer) -> llvm::Value * {
3594  if (!info.DevicePtrInfoMap[basePointer].second)
3595  return nullptr;
3596  return builder.CreateLoad(
3597  builder.getPtrTy(),
3598  info.DevicePtrInfoMap[basePointer].second);
3599  });
3600  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
3601  blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
3602  mapData, [&](llvm::Value *basePointer) {
3603  return info.DevicePtrInfoMap[basePointer].second;
3604  });
3605 
3606  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
3607  moduleTranslation)))
3608  return llvm::make_error<PreviouslyReportedError>();
3609  }
3610  break;
3611  case BodyGenTy::DupNoPriv:
3612  break;
3613  case BodyGenTy::NoPriv:
3614  // If device info is available then region has already been generated
3615  if (info.DevicePtrInfoMap.empty()) {
3616  builder.restoreIP(codeGenIP);
3617  // For device pass, if use_device_ptr(addr) mappings were present,
3618  // we need to link them here before codegen.
3619  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
3620  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
3621  blockArgIface.getUseDeviceAddrBlockArgs(),
3622  useDeviceAddrVars, mapData);
3623  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
3624  blockArgIface.getUseDevicePtrBlockArgs(),
3625  useDevicePtrVars, mapData);
3626  }
3627 
3628  if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
3629  moduleTranslation)))
3630  return llvm::make_error<PreviouslyReportedError>();
3631  }
3632  break;
3633  }
3634  return builder.saveIP();
3635  };
3636 
3637  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3638  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3639  findAllocaInsertPoint(builder, moduleTranslation);
3640  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
3641  if (isa<omp::TargetDataOp>(op))
3642  return ompBuilder->createTargetData(
3643  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID),
3644  ifCond, info, genMapInfoCB, nullptr, bodyGenCB);
3645  return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
3646  builder.getInt64(deviceID), ifCond,
3647  info, genMapInfoCB, &RTLFn);
3648  }();
3649 
3650  if (failed(handleError(afterIP, *op)))
3651  return failure();
3652 
3653  builder.restoreIP(*afterIP);
3654  return success();
3655 }
3656 
3657 /// Lowers the FlagsAttr which is applied to the module on the device
3658 /// pass when offloading, this attribute contains OpenMP RTL globals that can
3659 /// be passed as flags to the frontend, otherwise they are set to default
3660 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
3661  LLVM::ModuleTranslation &moduleTranslation) {
3662  if (!cast<mlir::ModuleOp>(op))
3663  return failure();
3664 
3665  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3666 
3667  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
3668  attribute.getOpenmpDeviceVersion());
3669 
3670  if (attribute.getNoGpuLib())
3671  return success();
3672 
3673  ompBuilder->createGlobalFlag(
3674  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
3675  "__omp_rtl_debug_kind");
3676  ompBuilder->createGlobalFlag(
3677  attribute
3678  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
3679  ,
3680  "__omp_rtl_assume_teams_oversubscription");
3681  ompBuilder->createGlobalFlag(
3682  attribute
3683  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
3684  ,
3685  "__omp_rtl_assume_threads_oversubscription");
3686  ompBuilder->createGlobalFlag(
3687  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
3688  "__omp_rtl_assume_no_thread_state");
3689  ompBuilder->createGlobalFlag(
3690  attribute
3691  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
3692  ,
3693  "__omp_rtl_assume_no_nested_parallelism");
3694  return success();
3695 }
3696 
3697 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
3698  omp::TargetOp targetOp,
3699  llvm::StringRef parentName = "") {
3700  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
3701 
3702  assert(fileLoc && "No file found from location");
3703  StringRef fileName = fileLoc.getFilename().getValue();
3704 
3705  llvm::sys::fs::UniqueID id;
3706  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
3707  targetOp.emitError("Unable to get unique ID for file");
3708  return false;
3709  }
3710 
3711  uint64_t line = fileLoc.getLine();
3712  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
3713  id.getFile(), line);
3714  return true;
3715 }
3716 
3717 static void
3718 handleDeclareTargetMapVar(MapInfoData &mapData,
3719  LLVM::ModuleTranslation &moduleTranslation,
3720  llvm::IRBuilderBase &builder, llvm::Function *func) {
3721  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3722  // In the case of declare target mapped variables, the basePointer is
3723  // the reference pointer generated by the convertDeclareTargetAttr
3724  // method. Whereas the kernelValue is the original variable, so for
3725  // the device we must replace all uses of this original global variable
3726  // (stored in kernelValue) with the reference pointer (stored in
3727  // basePointer for declare target mapped variables), as for device the
3728  // data is mapped into this reference pointer and should be loaded
3729  // from it, the original variable is discarded. On host both exist and
3730  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
3731  // function to link the two variables in the runtime and then both the
3732  // reference pointer and the pointer are assigned in the kernel argument
3733  // structure for the host.
3734  if (mapData.IsDeclareTarget[i]) {
3735  // If the original map value is a constant, then we have to make sure all
3736  // of it's uses within the current kernel/function that we are going to
3737  // rewrite are converted to instructions, as we will be altering the old
3738  // use (OriginalValue) from a constant to an instruction, which will be
3739  // illegal and ICE the compiler if the user is a constant expression of
3740  // some kind e.g. a constant GEP.
3741  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
3742  convertUsersOfConstantsToInstructions(constant, func, false);
3743 
3744  // The users iterator will get invalidated if we modify an element,
3745  // so we populate this vector of uses to alter each user on an
3746  // individual basis to emit its own load (rather than one load for
3747  // all).
3749  for (llvm::User *user : mapData.OriginalValue[i]->users())
3750  userVec.push_back(user);
3751 
3752  for (llvm::User *user : userVec) {
3753  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
3754  if (insn->getFunction() == func) {
3755  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
3756  mapData.BasePointers[i]);
3757  load->moveBefore(insn);
3758  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
3759  }
3760  }
3761  }
3762  }
3763  }
3764 }
3765 
3766 // The createDeviceArgumentAccessor function generates
3767 // instructions for retrieving (acessing) kernel
3768 // arguments inside of the device kernel for use by
3769 // the kernel. This enables different semantics such as
3770 // the creation of temporary copies of data allowing
3771 // semantics like read-only/no host write back kernel
3772 // arguments.
3773 //
3774 // This currently implements a very light version of Clang's
3775 // EmitParmDecl's handling of direct argument handling as well
3776 // as a portion of the argument access generation based on
3777 // capture types found at the end of emitOutlinedFunctionPrologue
3778 // in Clang. The indirect path handling of EmitParmDecl's may be
3779 // required for future work, but a direct 1-to-1 copy doesn't seem
3780 // possible as the logic is rather scattered throughout Clang's
3781 // lowering and perhaps we wish to deviate slightly.
3782 //
3783 // \param mapData - A container containing vectors of information
3784 // corresponding to the input argument, which should have a
3785 // corresponding entry in the MapInfoData containers
3786 // OrigialValue's.
3787 // \param arg - This is the generated kernel function argument that
3788 // corresponds to the passed in input argument. We generated different
3789 // accesses of this Argument, based on capture type and other Input
3790 // related information.
3791 // \param input - This is the host side value that will be passed to
3792 // the kernel i.e. the kernel input, we rewrite all uses of this within
3793 // the kernel (as we generate the kernel body based on the target's region
3794 // which maintians references to the original input) to the retVal argument
3795 // apon exit of this function inside of the OMPIRBuilder. This interlinks
3796 // the kernel argument to future uses of it in the function providing
3797 // appropriate "glue" instructions inbetween.
3798 // \param retVal - This is the value that all uses of input inside of the
3799 // kernel will be re-written to, the goal of this function is to generate
3800 // an appropriate location for the kernel argument to be accessed from,
3801 // e.g. ByRef will result in a temporary allocation location and then
3802 // a store of the kernel argument into this allocated memory which
3803 // will then be loaded from, ByCopy will use the allocated memory
3804 // directly.
3805 static llvm::IRBuilderBase::InsertPoint
3806 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
3807  llvm::Value *input, llvm::Value *&retVal,
3808  llvm::IRBuilderBase &builder,
3809  llvm::OpenMPIRBuilder &ompBuilder,
3810  LLVM::ModuleTranslation &moduleTranslation,
3811  llvm::IRBuilderBase::InsertPoint allocaIP,
3812  llvm::IRBuilderBase::InsertPoint codeGenIP) {
3813  builder.restoreIP(allocaIP);
3814 
3815  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
3816 
3817  // Find the associated MapInfoData entry for the current input
3818  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
3819  if (mapData.OriginalValue[i] == input) {
3820  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3821  capture =
3822  mapOp.getMapCaptureType().value_or(omp::VariableCaptureKind::ByRef);
3823 
3824  break;
3825  }
3826 
3827  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
3828  unsigned int defaultAS =
3829  ompBuilder.M.getDataLayout().getProgramAddressSpace();
3830 
3831  // Create the alloca for the argument the current point.
3832  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
3833 
3834  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
3835  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
3836 
3837  builder.CreateStore(&arg, v);
3838 
3839  builder.restoreIP(codeGenIP);
3840 
3841  switch (capture) {
3842  case omp::VariableCaptureKind::ByCopy: {
3843  retVal = v;
3844  break;
3845  }
3846  case omp::VariableCaptureKind::ByRef: {
3847  retVal = builder.CreateAlignedLoad(
3848  v->getType(), v,
3849  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
3850  break;
3851  }
3852  case omp::VariableCaptureKind::This:
3853  case omp::VariableCaptureKind::VLAType:
3854  // TODO: Consider returning error to use standard reporting for
3855  // unimplemented features.
3856  assert(false && "Currently unsupported capture kind");
3857  break;
3858  }
3859 
3860  return builder.saveIP();
3861 }
3862 
3863 static LogicalResult
3864 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
3865  LLVM::ModuleTranslation &moduleTranslation) {
3866  auto targetOp = cast<omp::TargetOp>(opInst);
3867  if (failed(checkImplementationStatus(opInst)))
3868  return failure();
3869 
3870  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3871  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
3872  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
3873  auto &targetRegion = targetOp.getRegion();
3874  // Holds the private vars that have been mapped along with the block argument
3875  // that corresponds to the MapInfoOp corresponding to the private var in
3876  // question. So, for instance:
3877  //
3878  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
3879  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
3880  //
3881  // Then, %10 has been created so that the descriptor can be used by the
3882  // privatizer @box.privatizer on the device side. Here we'd record {%6#0,
3883  // %arg0} in the mappedPrivateVars map.
3884  llvm::DenseMap<Value, Value> mappedPrivateVars;
3885  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
3886  SmallVector<Value> mapVars = targetOp.getMapVars();
3887  ArrayRef<BlockArgument> mapBlockArgs =
3888  cast<omp::BlockArgOpenMPOpInterface>(opInst).getMapBlockArgs();
3889  llvm::Function *llvmOutlinedFn = nullptr;
3890 
3891  // TODO: It can also be false if a compile-time constant `false` IF clause is
3892  // specified.
3893  bool isOffloadEntry =
3894  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
3895 
3896  // For some private variables, the MapsForPrivatizedVariablesPass
3897  // creates MapInfoOp instances. Go through the private variables and
3898  // the mapped variables so that during codegeneration we are able
3899  // to quickly look up the corresponding map variable, if any for each
3900  // private variable.
3901  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
3902  auto argIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
3903  OperandRange privateVars = targetOp.getPrivateVars();
3904  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
3905  std::optional<DenseI64ArrayAttr> privateMapIndices =
3906  targetOp.getPrivateMapsAttr();
3907 
3908  for (auto [privVarIdx, privVarSymPair] :
3909  llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
3910  auto privVar = std::get<0>(privVarSymPair);
3911  auto privSym = std::get<1>(privVarSymPair);
3912 
3913  SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
3914  omp::PrivateClauseOp privatizer =
3915  findPrivatizer(targetOp, privatizerName);
3916 
3917  if (!privatizer.needsMap())
3918  continue;
3919 
3920  mlir::Value mappedValue =
3921  targetOp.getMappedValueForPrivateVar(privVarIdx);
3922  assert(mappedValue && "Expected to find mapped value for a privatized "
3923  "variable that needs mapping");
3924 
3925  // The MapInfoOp defining the map var isn't really needed later.
3926  // So, we don't store it in any datastructure. Instead, we just
3927  // do some sanity checks on it right now.
3928  auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
3929  [[maybe_unused]] Type varType = mapInfoOp.getVarType();
3930 
3931  // Check #1: Check that the type of the private variable matches
3932  // the type of the variable being mapped.
3933  if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
3934  assert(
3935  varType == privVar.getType() &&
3936  "Type of private var doesn't match the type of the mapped value");
3937 
3938  // Ok, only 1 sanity check for now.
3939  // Record the block argument corresponding to this mapvar.
3940  mappedPrivateVars.insert(
3941  {privVar,
3942  targetRegion.getArgument(argIface.getMapBlockArgsStart() +
3943  (*privateMapIndices)[privVarIdx])});
3944  }
3945  }
3946 
3947  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3948  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
3949  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
3950  // Forward target-cpu and target-features function attributes from the
3951  // original function to the new outlined function.
3952  llvm::Function *llvmParentFn =
3953  moduleTranslation.lookupFunction(parentFn.getName());
3954  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
3955  assert(llvmParentFn && llvmOutlinedFn &&
3956  "Both parent and outlined functions must exist at this point");
3957 
3958  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
3959  attr.isStringAttribute())
3960  llvmOutlinedFn->addFnAttr(attr);
3961 
3962  if (auto attr = llvmParentFn->getFnAttribute("target-features");
3963  attr.isStringAttribute())
3964  llvmOutlinedFn->addFnAttr(attr);
3965 
3966  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
3967  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
3968  llvm::Value *mapOpValue =
3969  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
3970  moduleTranslation.mapValue(arg, mapOpValue);
3971  }
3972 
3973  // Do privatization after moduleTranslation has already recorded
3974  // mapped values.
3975  MutableArrayRef<BlockArgument> privateBlockArgs =
3976  cast<omp::BlockArgOpenMPOpInterface>(opInst).getPrivateBlockArgs();
3977  SmallVector<mlir::Value> mlirPrivateVars;
3978  SmallVector<llvm::Value *> llvmPrivateVars;
3979  SmallVector<omp::PrivateClauseOp> privateDecls;
3980  mlirPrivateVars.reserve(privateBlockArgs.size());
3981  llvmPrivateVars.reserve(privateBlockArgs.size());
3982  collectPrivatizationDecls(targetOp, privateDecls);
3983  for (mlir::Value privateVar : targetOp.getPrivateVars())
3984  mlirPrivateVars.push_back(privateVar);
3985 
3987  builder, moduleTranslation, privateBlockArgs, privateDecls,
3988  mlirPrivateVars, llvmPrivateVars, allocaIP, &mappedPrivateVars);
3989 
3990  if (failed(handleError(afterAllocas, *targetOp)))
3991  return llvm::make_error<PreviouslyReportedError>();
3992 
3993  SmallVector<Region *> privateCleanupRegions;
3994  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
3995  [](omp::PrivateClauseOp privatizer) {
3996  return &privatizer.getDeallocRegion();
3997  });
3998 
3999  builder.restoreIP(codeGenIP);
4001  targetRegion, "omp.target", builder, moduleTranslation);
4002 
4003  if (!exitBlock)
4004  return exitBlock.takeError();
4005 
4006  builder.SetInsertPoint(*exitBlock);
4007  if (!privateCleanupRegions.empty()) {
4008  if (failed(inlineOmpRegionCleanup(
4009  privateCleanupRegions, llvmPrivateVars, moduleTranslation,
4010  builder, "omp.targetop.private.cleanup",
4011  /*shouldLoadCleanupRegionArg=*/false))) {
4012  return llvm::createStringError(
4013  "failed to inline `dealloc` region of `omp.private` "
4014  "op in the target region");
4015  }
4016  }
4017 
4018  return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
4019  };
4020 
4021  StringRef parentName = parentFn.getName();
4022 
4023  llvm::TargetRegionEntryInfo entryInfo;
4024 
4025  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
4026  return failure();
4027 
4028  int32_t defaultValTeams = -1;
4029  int32_t defaultValThreads = 0;
4030 
4031  MapInfoData mapData;
4032  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
4033  builder);
4034 
4035  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
4036  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
4037  -> llvm::OpenMPIRBuilder::MapInfosTy & {
4038  builder.restoreIP(codeGenIP);
4039  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
4040  return combinedInfos;
4041  };
4042 
4043  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
4044  llvm::Value *&retVal, InsertPointTy allocaIP,
4045  InsertPointTy codeGenIP)
4046  -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
4047  // We just return the unaltered argument for the host function
4048  // for now, some alterations may be required in the future to
4049  // keep host fallback functions working identically to the device
4050  // version (e.g. pass ByCopy values should be treated as such on
4051  // host and device, currently not always the case)
4052  if (!isTargetDevice) {
4053  retVal = cast<llvm::Value>(&arg);
4054  return codeGenIP;
4055  }
4056 
4057  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
4058  *ompBuilder, moduleTranslation,
4059  allocaIP, codeGenIP);
4060  };
4061 
4063  for (size_t i = 0; i < mapVars.size(); ++i) {
4064  // declare target arguments are not passed to kernels as arguments
4065  // TODO: We currently do not handle cases where a member is explicitly
4066  // passed in as an argument, this will likley need to be handled in
4067  // the near future, rather than using IsAMember, it may be better to
4068  // test if the relevant BlockArg is used within the target region and
4069  // then use that as a basis for exclusion in the kernel inputs.
4070  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
4071  kernelInput.push_back(mapData.OriginalValue[i]);
4072  }
4073 
4075  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
4076  moduleTranslation, dds);
4077 
4078  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
4079  findAllocaInsertPoint(builder, moduleTranslation);
4080  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
4081 
4082  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4083  moduleTranslation.getOpenMPBuilder()->createTarget(
4084  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
4085  defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
4086  argAccessorCB, dds, targetOp.getNowait());
4087 
4088  if (failed(handleError(afterIP, opInst)))
4089  return failure();
4090 
4091  builder.restoreIP(*afterIP);
4092 
4093  // Remap access operations to declare target reference pointers for the
4094  // device, essentially generating extra loadop's as necessary
4095  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
4096  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
4097  llvmOutlinedFn);
4098 
4099  return success();
4100 }
4101 
4102 static LogicalResult
4103 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
4104  LLVM::ModuleTranslation &moduleTranslation) {
4105  // Amend omp.declare_target by deleting the IR of the outlined functions
4106  // created for target regions. They cannot be filtered out from MLIR earlier
4107  // because the omp.target operation inside must be translated to LLVM, but
4108  // the wrapper functions themselves must not remain at the end of the
4109  // process. We know that functions where omp.declare_target does not match
4110  // omp.is_target_device at this stage can only be wrapper functions because
4111  // those that aren't are removed earlier as an MLIR transformation pass.
4112  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
4113  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
4114  op->getParentOfType<ModuleOp>().getOperation())) {
4115  if (!offloadMod.getIsTargetDevice())
4116  return success();
4117 
4118  omp::DeclareTargetDeviceType declareType =
4119  attribute.getDeviceType().getValue();
4120 
4121  if (declareType == omp::DeclareTargetDeviceType::host) {
4122  llvm::Function *llvmFunc =
4123  moduleTranslation.lookupFunction(funcOp.getName());
4124  llvmFunc->dropAllReferences();
4125  llvmFunc->eraseFromParent();
4126  }
4127  }
4128  return success();
4129  }
4130 
4131  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
4132  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
4133  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
4134  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4135  bool isDeclaration = gOp.isDeclaration();
4136  bool isExternallyVisible =
4137  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
4138  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
4139  llvm::StringRef mangledName = gOp.getSymName();
4140  auto captureClause =
4141  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
4142  auto deviceClause =
4143  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
4144  // unused for MLIR at the moment, required in Clang for book
4145  // keeping
4146  std::vector<llvm::GlobalVariable *> generatedRefs;
4147 
4148  std::vector<llvm::Triple> targetTriple;
4149  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
4150  op->getParentOfType<mlir::ModuleOp>()->getAttr(
4151  LLVM::LLVMDialect::getTargetTripleAttrName()));
4152  if (targetTripleAttr)
4153  targetTriple.emplace_back(targetTripleAttr.data());
4154 
4155  auto fileInfoCallBack = [&loc]() {
4156  std::string filename = "";
4157  std::uint64_t lineNo = 0;
4158 
4159  if (loc) {
4160  filename = loc.getFilename().str();
4161  lineNo = loc.getLine();
4162  }
4163 
4164  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
4165  lineNo);
4166  };
4167 
4168  ompBuilder->registerTargetGlobalVariable(
4169  captureClause, deviceClause, isDeclaration, isExternallyVisible,
4170  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
4171  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
4172  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
4173  gVal->getType(), gVal);
4174 
4175  if (ompBuilder->Config.isTargetDevice() &&
4176  (attribute.getCaptureClause().getValue() !=
4177  mlir::omp::DeclareTargetCaptureClause::to ||
4178  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
4179  ompBuilder->getAddrOfDeclareTargetVar(
4180  captureClause, deviceClause, isDeclaration, isExternallyVisible,
4181  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
4182  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
4183  /*GlobalInitializer*/ nullptr,
4184  /*VariableLinkage*/ nullptr);
4185  }
4186  }
4187  }
4188 
4189  return success();
4190 }
4191 
4192 // Returns true if the operation is inside a TargetOp or
4193 // is part of a declare target function.
4194 static bool isTargetDeviceOp(Operation *op) {
4195  // Assumes no reverse offloading
4196  if (op->getParentOfType<omp::TargetOp>())
4197  return true;
4198 
4199  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
4200  if (auto declareTargetIface =
4201  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
4202  parentFn.getOperation()))
4203  if (declareTargetIface.isDeclareTarget() &&
4204  declareTargetIface.getDeclareTargetDeviceType() !=
4205  mlir::omp::DeclareTargetDeviceType::host)
4206  return true;
4207 
4208  return false;
4209 }
4210 
4211 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
4212 /// (including OpenMP runtime calls).
4213 static LogicalResult
4214 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
4215  LLVM::ModuleTranslation &moduleTranslation) {
4216 
4217  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4218 
4220  .Case([&](omp::BarrierOp op) -> LogicalResult {
4221  if (failed(checkImplementationStatus(*op)))
4222  return failure();
4223 
4224  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
4225  ompBuilder->createBarrier(builder.saveIP(),
4226  llvm::omp::OMPD_barrier);
4227  return handleError(afterIP, *op);
4228  })
4229  .Case([&](omp::TaskyieldOp op) {
4230  if (failed(checkImplementationStatus(*op)))
4231  return failure();
4232 
4233  ompBuilder->createTaskyield(builder.saveIP());
4234  return success();
4235  })
4236  .Case([&](omp::FlushOp op) {
4237  if (failed(checkImplementationStatus(*op)))
4238  return failure();
4239 
4240  // No support in Openmp runtime function (__kmpc_flush) to accept
4241  // the argument list.
4242  // OpenMP standard states the following:
4243  // "An implementation may implement a flush with a list by ignoring
4244  // the list, and treating it the same as a flush without a list."
4245  //
4246  // The argument list is discarded so that, flush with a list is treated
4247  // same as a flush without a list.
4248  ompBuilder->createFlush(builder.saveIP());
4249  return success();
4250  })
4251  .Case([&](omp::ParallelOp op) {
4252  return convertOmpParallel(op, builder, moduleTranslation);
4253  })
4254  .Case([&](omp::MaskedOp) {
4255  return convertOmpMasked(*op, builder, moduleTranslation);
4256  })
4257  .Case([&](omp::MasterOp) {
4258  return convertOmpMaster(*op, builder, moduleTranslation);
4259  })
4260  .Case([&](omp::CriticalOp) {
4261  return convertOmpCritical(*op, builder, moduleTranslation);
4262  })
4263  .Case([&](omp::OrderedRegionOp) {
4264  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
4265  })
4266  .Case([&](omp::OrderedOp) {
4267  return convertOmpOrdered(*op, builder, moduleTranslation);
4268  })
4269  .Case([&](omp::WsloopOp) {
4270  return convertOmpWsloop(*op, builder, moduleTranslation);
4271  })
4272  .Case([&](omp::SimdOp) {
4273  return convertOmpSimd(*op, builder, moduleTranslation);
4274  })
4275  .Case([&](omp::AtomicReadOp) {
4276  return convertOmpAtomicRead(*op, builder, moduleTranslation);
4277  })
4278  .Case([&](omp::AtomicWriteOp) {
4279  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
4280  })
4281  .Case([&](omp::AtomicUpdateOp op) {
4282  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
4283  })
4284  .Case([&](omp::AtomicCaptureOp op) {
4285  return convertOmpAtomicCapture(op, builder, moduleTranslation);
4286  })
4287  .Case([&](omp::SectionsOp) {
4288  return convertOmpSections(*op, builder, moduleTranslation);
4289  })
4290  .Case([&](omp::SingleOp op) {
4291  return convertOmpSingle(op, builder, moduleTranslation);
4292  })
4293  .Case([&](omp::TeamsOp op) {
4294  return convertOmpTeams(op, builder, moduleTranslation);
4295  })
4296  .Case([&](omp::TaskOp op) {
4297  return convertOmpTaskOp(op, builder, moduleTranslation);
4298  })
4299  .Case([&](omp::TaskgroupOp op) {
4300  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
4301  })
4302  .Case([&](omp::TaskwaitOp op) {
4303  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
4304  })
4305  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp,
4306  omp::CriticalDeclareOp>([](auto op) {
4307  // `yield` and `terminator` can be just omitted. The block structure
4308  // was created in the region that handles their parent operation.
4309  // `declare_reduction` will be used by reductions and is not
4310  // converted directly, skip it.
4311  // `critical.declare` is only used to declare names of critical
4312  // sections which will be used by `critical` ops and hence can be
4313  // ignored for lowering. The OpenMP IRBuilder will create unique
4314  // name for critical section names.
4315  return success();
4316  })
4317  .Case([&](omp::ThreadprivateOp) {
4318  return convertOmpThreadprivate(*op, builder, moduleTranslation);
4319  })
4320  .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp,
4321  omp::TargetUpdateOp>([&](auto op) {
4322  return convertOmpTargetData(op, builder, moduleTranslation);
4323  })
4324  .Case([&](omp::TargetOp) {
4325  return convertOmpTarget(*op, builder, moduleTranslation);
4326  })
4327  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
4328  [&](auto op) {
4329  // No-op, should be handled by relevant owning operations e.g.
4330  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc.
4331  // and then discarded
4332  return success();
4333  })
4334  .Default([&](Operation *inst) {
4335  return inst->emitError() << "not yet implemented: " << inst->getName();
4336  });
4337 }
4338 
4339 static LogicalResult
4340 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
4341  LLVM::ModuleTranslation &moduleTranslation) {
4342  return convertHostOrTargetOperation(op, builder, moduleTranslation);
4343 }
4344 
4345 static LogicalResult
4346 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
4347  LLVM::ModuleTranslation &moduleTranslation) {
4348  if (isa<omp::TargetOp>(op))
4349  return convertOmpTarget(*op, builder, moduleTranslation);
4350  if (isa<omp::TargetDataOp>(op))
4351  return convertOmpTargetData(op, builder, moduleTranslation);
4352  bool interrupted =
4353  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
4354  if (isa<omp::TargetOp>(oper)) {
4355  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
4356  return WalkResult::interrupt();
4357  return WalkResult::skip();
4358  }
4359  if (isa<omp::TargetDataOp>(oper)) {
4360  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
4361  return WalkResult::interrupt();
4362  return WalkResult::skip();
4363  }
4364  return WalkResult::advance();
4365  }).wasInterrupted();
4366  return failure(interrupted);
4367 }
4368 
4369 namespace {
4370 
4371 /// Implementation of the dialect interface that converts operations belonging
4372 /// to the OpenMP dialect to LLVM IR.
4373 class OpenMPDialectLLVMIRTranslationInterface
4375 public:
4377 
4378  /// Translates the given operation to LLVM IR using the provided IR builder
4379  /// and saving the state in `moduleTranslation`.
4380  LogicalResult
4381  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
4382  LLVM::ModuleTranslation &moduleTranslation) const final;
4383 
4384  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
4385  /// runtime calls, or operation amendments
4386  LogicalResult
4388  NamedAttribute attribute,
4389  LLVM::ModuleTranslation &moduleTranslation) const final;
4390 };
4391 
4392 } // namespace
4393 
4394 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
4395  Operation *op, ArrayRef<llvm::Instruction *> instructions,
4396  NamedAttribute attribute,
4397  LLVM::ModuleTranslation &moduleTranslation) const {
4398  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
4399  attribute.getName())
4400  .Case("omp.is_target_device",
4401  [&](Attribute attr) {
4402  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
4403  llvm::OpenMPIRBuilderConfig &config =
4404  moduleTranslation.getOpenMPBuilder()->Config;
4405  config.setIsTargetDevice(deviceAttr.getValue());
4406  return success();
4407  }
4408  return failure();
4409  })
4410  .Case("omp.is_gpu",
4411  [&](Attribute attr) {
4412  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
4413  llvm::OpenMPIRBuilderConfig &config =
4414  moduleTranslation.getOpenMPBuilder()->Config;
4415  config.setIsGPU(gpuAttr.getValue());
4416  return success();
4417  }
4418  return failure();
4419  })
4420  .Case("omp.host_ir_filepath",
4421  [&](Attribute attr) {
4422  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
4423  llvm::OpenMPIRBuilder *ompBuilder =
4424  moduleTranslation.getOpenMPBuilder();
4425  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
4426  return success();
4427  }
4428  return failure();
4429  })
4430  .Case("omp.flags",
4431  [&](Attribute attr) {
4432  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
4433  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
4434  return failure();
4435  })
4436  .Case("omp.version",
4437  [&](Attribute attr) {
4438  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
4439  llvm::OpenMPIRBuilder *ompBuilder =
4440  moduleTranslation.getOpenMPBuilder();
4441  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
4442  versionAttr.getVersion());
4443  return success();
4444  }
4445  return failure();
4446  })
4447  .Case("omp.declare_target",
4448  [&](Attribute attr) {
4449  if (auto declareTargetAttr =
4450  dyn_cast<omp::DeclareTargetAttr>(attr))
4451  return convertDeclareTargetAttr(op, declareTargetAttr,
4452  moduleTranslation);
4453  return failure();
4454  })
4455  .Case("omp.requires",
4456  [&](Attribute attr) {
4457  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
4458  using Requires = omp::ClauseRequires;
4459  Requires flags = requiresAttr.getValue();
4460  llvm::OpenMPIRBuilderConfig &config =
4461  moduleTranslation.getOpenMPBuilder()->Config;
4462  config.setHasRequiresReverseOffload(
4463  bitEnumContainsAll(flags, Requires::reverse_offload));
4464  config.setHasRequiresUnifiedAddress(
4465  bitEnumContainsAll(flags, Requires::unified_address));
4466  config.setHasRequiresUnifiedSharedMemory(
4467  bitEnumContainsAll(flags, Requires::unified_shared_memory));
4468  config.setHasRequiresDynamicAllocators(
4469  bitEnumContainsAll(flags, Requires::dynamic_allocators));
4470  return success();
4471  }
4472  return failure();
4473  })
4474  .Case("omp.target_triples",
4475  [&](Attribute attr) {
4476  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
4477  llvm::OpenMPIRBuilderConfig &config =
4478  moduleTranslation.getOpenMPBuilder()->Config;
4479  config.TargetTriples.clear();
4480  config.TargetTriples.reserve(triplesAttr.size());
4481  for (Attribute tripleAttr : triplesAttr) {
4482  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
4483  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
4484  else
4485  return failure();
4486  }
4487  return success();
4488  }
4489  return failure();
4490  })
4491  .Default([](Attribute) {
4492  // Fall through for omp attributes that do not require lowering.
4493  return success();
4494  })(attribute.getValue());
4495 
4496  return failure();
4497 }
4498 
4499 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
4500 /// (including OpenMP runtime calls).
4501 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
4502  Operation *op, llvm::IRBuilderBase &builder,
4503  LLVM::ModuleTranslation &moduleTranslation) const {
4504 
4505  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
4506  if (ompBuilder->Config.isTargetDevice()) {
4507  if (isTargetDeviceOp(op)) {
4508  return convertTargetDeviceOp(op, builder, moduleTranslation);
4509  } else {
4510  return convertTargetOpsInNest(op, builder, moduleTranslation);
4511  }
4512  }
4513  return convertHostOrTargetOperation(op, builder, moduleTranslation);
4514 }
4515 
4517  registry.insert<omp::OpenMPDialect>();
4518  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
4519  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
4520  });
4521 }
4522 
4524  DialectRegistry registry;
4526  context.appendDialectRegistry(registry);
4527 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static LogicalResult convertIgnoredWrappers(omp::LoopNestOp loopOp, omp::LoopWrapperInterface parentOp, LLVM::ModuleTranslation &moduleTranslation)
Helper function to call convertIgnoredWrapper() for all wrappers of the given loopOp nested inside of...
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Value * findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Return the llvm::Value * corresponding to the privateVar that is being privatized.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult cleanupPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, Location loc, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static llvm::Expected< llvm::BasicBlock * > allocatePrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, MutableArrayRef< BlockArgument > privateBlockArgs, MutableArrayRef< omp::PrivateClauseOp > privateDecls, MutableArrayRef< mlir::Value > mlirPrivateVars, llvm::SmallVectorImpl< llvm::Value * > &llvmPrivateVars, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap< Value, Value > *mappedPrivateVars=nullptr)
Allocate delayed private variables.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static LogicalResult initReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::BasicBlock *latestAllocaBlock, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef, SmallVectorImpl< DeferredStore > &deferredStores)
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static void collectPrivatizationDecls(OP op, SmallVectorImpl< omp::PrivateClauseOp > &privatizations)
Populates privatizations with privatization declarations used for the given op.
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static LogicalResult handleError(llvm::Error error, Operation &op)
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult checkImplementationStatus(Operation &op)
Check whether translation to LLVM IR for the given operation is currently supported.
static LogicalResult convertIgnoredWrapper(omp::LoopWrapperInterface &opInst, LLVM::ModuleTranslation &moduleTranslation)
Helper function to map block arguments defined by ignored loop wrappers to LLVM values and prevent an...
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, const ArrayRef< Value > &useDevPtrOperands={}, const ArrayRef< Value > &useDevAddrOperands={})
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult initFirstPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< mlir::Value > &mlirPrivateVars, SmallVectorImpl< llvm::Value * > &llvmPrivateVars, SmallVectorImpl< omp::PrivateClauseOp > &privateDecls, llvm::BasicBlock *afterAllocas)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::Expected< llvm::BasicBlock * > convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:128
Operation & back()
Definition: Block.h:152
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
Operation & front()
Definition: Block.h:153
iterator begin()
Definition: Block.h:143
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
An instance of this location represents a tuple of file, line number, and column number.
Definition: Location.h:181
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:44
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:798
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:687
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
const FrozenRewritePatternSet GreedyRewriteConfig config
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
RAII object calling stackPush/stackPop on construction/destruction.