MLIR  20.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
18 #include "mlir/IR/IRMapping.h"
19 #include "mlir/IR/Operation.h"
20 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Frontend/OpenMP/OMPConstants.h"
29 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/ReplaceConstant.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include "llvm/Transforms/Utils/ModuleUtils.h"
36 
37 #include <any>
38 #include <cstdint>
39 #include <iterator>
40 #include <numeric>
41 #include <optional>
42 #include <utility>
43 
44 using namespace mlir;
45 
46 namespace {
47 static llvm::omp::ScheduleKind
48 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
49  if (!schedKind.has_value())
50  return llvm::omp::OMP_SCHEDULE_Default;
51  switch (schedKind.value()) {
52  case omp::ClauseScheduleKind::Static:
53  return llvm::omp::OMP_SCHEDULE_Static;
54  case omp::ClauseScheduleKind::Dynamic:
55  return llvm::omp::OMP_SCHEDULE_Dynamic;
56  case omp::ClauseScheduleKind::Guided:
57  return llvm::omp::OMP_SCHEDULE_Guided;
58  case omp::ClauseScheduleKind::Auto:
59  return llvm::omp::OMP_SCHEDULE_Auto;
61  return llvm::omp::OMP_SCHEDULE_Runtime;
62  }
63  llvm_unreachable("unhandled schedule clause argument");
64 }
65 
66 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
67 /// insertion points for allocas.
68 class OpenMPAllocaStackFrame
69  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
70 public:
71  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
72 
73  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
74  : allocaInsertPoint(allocaIP) {}
75  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
76 };
77 
78 /// ModuleTranslation stack frame containing the partial mapping between MLIR
79 /// values and their LLVM IR equivalents.
80 class OpenMPVarMappingStackFrame
82  OpenMPVarMappingStackFrame> {
83 public:
84  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
85 
86  explicit OpenMPVarMappingStackFrame(
87  const DenseMap<Value, llvm::Value *> &mapping)
88  : mapping(mapping) {}
89 
91 };
92 } // namespace
93 
94 /// Find the insertion point for allocas given the current insertion point for
95 /// normal operations in the builder.
96 static llvm::OpenMPIRBuilder::InsertPointTy
97 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
98  const LLVM::ModuleTranslation &moduleTranslation) {
99  // If there is an alloca insertion point on stack, i.e. we are in a nested
100  // operation and a specific point was provided by some surrounding operation,
101  // use it.
102  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
103  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
104  [&](const OpenMPAllocaStackFrame &frame) {
105  allocaInsertPoint = frame.allocaInsertPoint;
106  return WalkResult::interrupt();
107  });
108  if (walkResult.wasInterrupted())
109  return allocaInsertPoint;
110 
111  // Otherwise, insert to the entry block of the surrounding function.
112  // If the current IRBuilder InsertPoint is the function's entry, it cannot
113  // also be used for alloca insertion which would result in insertion order
114  // confusion. Create a new BasicBlock for the Builder and use the entry block
115  // for the allocs.
116  // TODO: Create a dedicated alloca BasicBlock at function creation such that
117  // we do not need to move the current InertPoint here.
118  if (builder.GetInsertBlock() ==
119  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
120  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
121  "Assuming end of basic block");
122  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
123  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
124  builder.GetInsertBlock()->getNextNode());
125  builder.CreateBr(entryBB);
126  builder.SetInsertPoint(entryBB);
127  }
128 
129  llvm::BasicBlock &funcEntryBlock =
130  builder.GetInsertBlock()->getParent()->getEntryBlock();
131  return llvm::OpenMPIRBuilder::InsertPointTy(
132  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
133 }
134 
135 /// Converts the given region that appears within an OpenMP dialect operation to
136 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
137 /// region, and a branch from any block with an successor-less OpenMP terminator
138 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
139 /// of the continuation block if provided.
140 static llvm::BasicBlock *convertOmpOpRegions(
141  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
142  LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
143  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
144  llvm::BasicBlock *continuationBlock =
145  splitBB(builder, true, "omp.region.cont");
146  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
147 
148  llvm::LLVMContext &llvmContext = builder.getContext();
149  for (Block &bb : region) {
150  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
151  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
152  builder.GetInsertBlock()->getNextNode());
153  moduleTranslation.mapBlock(&bb, llvmBB);
154  }
155 
156  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
157 
158  // Terminators (namely YieldOp) may be forwarding values to the region that
159  // need to be available in the continuation block. Collect the types of these
160  // operands in preparation of creating PHI nodes.
161  SmallVector<llvm::Type *> continuationBlockPHITypes;
162  bool operandsProcessed = false;
163  unsigned numYields = 0;
164  for (Block &bb : region.getBlocks()) {
165  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
166  if (!operandsProcessed) {
167  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
168  continuationBlockPHITypes.push_back(
169  moduleTranslation.convertType(yield->getOperand(i).getType()));
170  }
171  operandsProcessed = true;
172  } else {
173  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
174  "mismatching number of values yielded from the region");
175  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
176  llvm::Type *operandType =
177  moduleTranslation.convertType(yield->getOperand(i).getType());
178  (void)operandType;
179  assert(continuationBlockPHITypes[i] == operandType &&
180  "values of mismatching types yielded from the region");
181  }
182  }
183  numYields++;
184  }
185  }
186 
187  // Insert PHI nodes in the continuation block for any values forwarded by the
188  // terminators in this region.
189  if (!continuationBlockPHITypes.empty())
190  assert(
191  continuationBlockPHIs &&
192  "expected continuation block PHIs if converted regions yield values");
193  if (continuationBlockPHIs) {
194  llvm::IRBuilderBase::InsertPointGuard guard(builder);
195  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
196  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
197  for (llvm::Type *ty : continuationBlockPHITypes)
198  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
199  }
200 
201  // Convert blocks one by one in topological order to ensure
202  // defs are converted before uses.
204  for (Block *bb : blocks) {
205  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
206  // Retarget the branch of the entry block to the entry block of the
207  // converted region (regions are single-entry).
208  if (bb->isEntryBlock()) {
209  assert(sourceTerminator->getNumSuccessors() == 1 &&
210  "provided entry block has multiple successors");
211  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
212  "ContinuationBlock is not the successor of the entry block");
213  sourceTerminator->setSuccessor(0, llvmBB);
214  }
215 
216  llvm::IRBuilderBase::InsertPointGuard guard(builder);
217  if (failed(
218  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
219  bodyGenStatus = failure();
220  return continuationBlock;
221  }
222 
223  // Special handling for `omp.yield` and `omp.terminator` (we may have more
224  // than one): they return the control to the parent OpenMP dialect operation
225  // so replace them with the branch to the continuation block. We handle this
226  // here to avoid relying inter-function communication through the
227  // ModuleTranslation class to set up the correct insertion point. This is
228  // also consistent with MLIR's idiom of handling special region terminators
229  // in the same code that handles the region-owning operation.
230  Operation *terminator = bb->getTerminator();
231  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
232  builder.CreateBr(continuationBlock);
233 
234  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
235  (*continuationBlockPHIs)[i]->addIncoming(
236  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
237  }
238  }
239  // After all blocks have been traversed and values mapped, connect the PHI
240  // nodes to the results of preceding blocks.
241  LLVM::detail::connectPHINodes(region, moduleTranslation);
242 
243  // Remove the blocks and values defined in this region from the mapping since
244  // they are not visible outside of this region. This allows the same region to
245  // be converted several times, that is cloned, without clashes, and slightly
246  // speeds up the lookups.
247  moduleTranslation.forgetMapping(region);
248 
249  return continuationBlock;
250 }
251 
252 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
253 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
254  switch (kind) {
255  case omp::ClauseProcBindKind::Close:
256  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
257  case omp::ClauseProcBindKind::Master:
258  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
259  case omp::ClauseProcBindKind::Primary:
260  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
261  case omp::ClauseProcBindKind::Spread:
262  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
263  }
264  llvm_unreachable("Unknown ClauseProcBindKind kind");
265 }
266 
267 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
268 static LogicalResult
269 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
270  LLVM::ModuleTranslation &moduleTranslation) {
271  auto maskedOp = cast<omp::MaskedOp>(opInst);
272  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
273  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
274  // relying on captured variables.
275  LogicalResult bodyGenStatus = success();
276 
277  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
278  // MaskedOp has only one region associated with it.
279  auto &region = maskedOp.getRegion();
280  builder.restoreIP(codeGenIP);
281  convertOmpOpRegions(region, "omp.masked.region", builder, moduleTranslation,
282  bodyGenStatus);
283  };
284 
285  // TODO: Perform finalization actions for variables. This has to be
286  // called for variables which have destructors/finalizers.
287  auto finiCB = [&](InsertPointTy codeGenIP) {};
288 
289  llvm::Value *filterVal = nullptr;
290  if (auto filterVar = maskedOp.getFilteredThreadId()) {
291  filterVal = moduleTranslation.lookupValue(filterVar);
292  } else {
293  llvm::LLVMContext &llvmContext = builder.getContext();
294  filterVal =
295  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
296  }
297  assert(filterVal != nullptr);
298  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
299  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMasked(
300  ompLoc, bodyGenCB, finiCB, filterVal));
301  return success();
302 }
303 
304 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
305 static LogicalResult
306 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
307  LLVM::ModuleTranslation &moduleTranslation) {
308  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
309  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
310  // relying on captured variables.
311  LogicalResult bodyGenStatus = success();
312 
313  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
314  // MasterOp has only one region associated with it.
315  auto &region = cast<omp::MasterOp>(opInst).getRegion();
316  builder.restoreIP(codeGenIP);
317  convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
318  bodyGenStatus);
319  };
320 
321  // TODO: Perform finalization actions for variables. This has to be
322  // called for variables which have destructors/finalizers.
323  auto finiCB = [&](InsertPointTy codeGenIP) {};
324 
325  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
326  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
327  ompLoc, bodyGenCB, finiCB));
328  return success();
329 }
330 
331 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
332 static LogicalResult
333 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
334  LLVM::ModuleTranslation &moduleTranslation) {
335  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
336  auto criticalOp = cast<omp::CriticalOp>(opInst);
337  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
338  // relying on captured variables.
339  LogicalResult bodyGenStatus = success();
340 
341  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
342  // CriticalOp has only one region associated with it.
343  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
344  builder.restoreIP(codeGenIP);
345  convertOmpOpRegions(region, "omp.critical.region", builder,
346  moduleTranslation, bodyGenStatus);
347  };
348 
349  // TODO: Perform finalization actions for variables. This has to be
350  // called for variables which have destructors/finalizers.
351  auto finiCB = [&](InsertPointTy codeGenIP) {};
352 
353  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
354  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
355  llvm::Constant *hint = nullptr;
356 
357  // If it has a name, it probably has a hint too.
358  if (criticalOp.getNameAttr()) {
359  // The verifiers in OpenMP Dialect guarentee that all the pointers are
360  // non-null
361  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
362  auto criticalDeclareOp =
363  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
364  symbolRef);
365  hint =
366  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
367  static_cast<int>(criticalDeclareOp.getHint()));
368  }
369  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
370  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
371  return success();
372 }
373 
374 /// Looks up from the operation from and returns the PrivateClauseOp with
375 /// name symbolName
376 static omp::PrivateClauseOp findPrivatizer(Operation *from,
377  SymbolRefAttr symbolName) {
378  omp::PrivateClauseOp privatizer =
379  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
380  symbolName);
381  assert(privatizer && "privatizer not found in the symbol table");
382  return privatizer;
383 }
384 
385 /// Populates `privatizations` with privatization declarations used for the
386 /// given op.
387 /// TODO: generalise beyond ParallelOp
389  omp::ParallelOp op, SmallVectorImpl<omp::PrivateClauseOp> &privatizations) {
390  std::optional<ArrayAttr> attr = op.getPrivateSyms();
391  if (!attr)
392  return;
393 
394  privatizations.reserve(privatizations.size() + attr->size());
395  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
396  privatizations.push_back(findPrivatizer(op, symbolRef));
397  }
398 }
399 
400 /// Populates `reductions` with reduction declarations used in the given op.
401 template <typename T>
402 static void
405  std::optional<ArrayAttr> attr = op.getReductionSyms();
406  if (!attr)
407  return;
408 
409  reductions.reserve(reductions.size() + op.getNumReductionVars());
410  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
411  reductions.push_back(
412  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
413  op, symbolRef));
414  }
415 }
416 
417 /// Translates the blocks contained in the given region and appends them to at
418 /// the current insertion point of `builder`. The operations of the entry block
419 /// are appended to the current insertion block. If set, `continuationBlockArgs`
420 /// is populated with translated values that correspond to the values
421 /// omp.yield'ed from the region.
422 static LogicalResult inlineConvertOmpRegions(
423  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
424  LLVM::ModuleTranslation &moduleTranslation,
425  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
426  if (region.empty())
427  return success();
428 
429  // Special case for single-block regions that don't create additional blocks:
430  // insert operations without creating additional blocks.
431  if (llvm::hasSingleElement(region)) {
432  llvm::Instruction *potentialTerminator =
433  builder.GetInsertBlock()->empty() ? nullptr
434  : &builder.GetInsertBlock()->back();
435 
436  if (potentialTerminator && potentialTerminator->isTerminator())
437  potentialTerminator->removeFromParent();
438  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
439 
440  if (failed(moduleTranslation.convertBlock(
441  region.front(), /*ignoreArguments=*/true, builder)))
442  return failure();
443 
444  // The continuation arguments are simply the translated terminator operands.
445  if (continuationBlockArgs)
446  llvm::append_range(
447  *continuationBlockArgs,
448  moduleTranslation.lookupValues(region.front().back().getOperands()));
449 
450  // Drop the mapping that is no longer necessary so that the same region can
451  // be processed multiple times.
452  moduleTranslation.forgetMapping(region);
453 
454  if (potentialTerminator && potentialTerminator->isTerminator()) {
455  llvm::BasicBlock *block = builder.GetInsertBlock();
456  if (block->empty()) {
457  // this can happen for really simple reduction init regions e.g.
458  // %0 = llvm.mlir.constant(0 : i32) : i32
459  // omp.yield(%0 : i32)
460  // because the llvm.mlir.constant (MLIR op) isn't converted into any
461  // llvm op
462  potentialTerminator->insertInto(block, block->begin());
463  } else {
464  potentialTerminator->insertAfter(&block->back());
465  }
466  }
467 
468  return success();
469  }
470 
471  LogicalResult bodyGenStatus = success();
473  llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
474  region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
475  if (failed(bodyGenStatus))
476  return failure();
477  if (continuationBlockArgs)
478  llvm::append_range(*continuationBlockArgs, phis);
479  builder.SetInsertPoint(continuationBlock,
480  continuationBlock->getFirstInsertionPt());
481  return success();
482 }
483 
484 namespace {
485 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
486 /// store lambdas with capture.
487 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
488  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
489  llvm::Value *&)>;
490 using OwningAtomicReductionGen =
491  std::function<llvm::OpenMPIRBuilder::InsertPointTy(
492  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
493  llvm::Value *)>;
494 } // namespace
495 
496 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
497 /// reduction declaration. The generator uses `builder` but ignores its
498 /// insertion point.
499 static OwningReductionGen
500 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
501  LLVM::ModuleTranslation &moduleTranslation) {
502  // The lambda is mutable because we need access to non-const methods of decl
503  // (which aren't actually mutating it), and we must capture decl by-value to
504  // avoid the dangling reference after the parent function returns.
505  OwningReductionGen gen =
506  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
507  llvm::Value *lhs, llvm::Value *rhs,
508  llvm::Value *&result) mutable {
509  moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
510  moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
511  builder.restoreIP(insertPoint);
513  if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
514  "omp.reduction.nonatomic.body",
515  builder, moduleTranslation, &phis)))
516  return llvm::OpenMPIRBuilder::InsertPointTy();
517  assert(phis.size() == 1);
518  result = phis[0];
519  return builder.saveIP();
520  };
521  return gen;
522 }
523 
524 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
525 /// given reduction declaration. The generator uses `builder` but ignores its
526 /// insertion point. Returns null if there is no atomic region available in the
527 /// reduction declaration.
528 static OwningAtomicReductionGen
529 makeAtomicReductionGen(omp::DeclareReductionOp decl,
530  llvm::IRBuilderBase &builder,
531  LLVM::ModuleTranslation &moduleTranslation) {
532  if (decl.getAtomicReductionRegion().empty())
533  return OwningAtomicReductionGen();
534 
535  // The lambda is mutable because we need access to non-const methods of decl
536  // (which aren't actually mutating it), and we must capture decl by-value to
537  // avoid the dangling reference after the parent function returns.
538  OwningAtomicReductionGen atomicGen =
539  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
540  llvm::Value *lhs, llvm::Value *rhs) mutable {
541  moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
542  moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
543  builder.restoreIP(insertPoint);
545  if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
546  "omp.reduction.atomic.body", builder,
547  moduleTranslation, &phis)))
548  return llvm::OpenMPIRBuilder::InsertPointTy();
549  assert(phis.empty());
550  return builder.saveIP();
551  };
552  return atomicGen;
553 }
554 
555 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
556 static LogicalResult
557 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
558  LLVM::ModuleTranslation &moduleTranslation) {
559  auto orderedOp = cast<omp::OrderedOp>(opInst);
560 
561  omp::ClauseDepend dependType = *orderedOp.getDoacrossDependType();
562  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
563  unsigned numLoops = *orderedOp.getDoacrossNumLoops();
564  SmallVector<llvm::Value *> vecValues =
565  moduleTranslation.lookupValues(orderedOp.getDoacrossDependVars());
566 
567  size_t indexVecValues = 0;
568  while (indexVecValues < vecValues.size()) {
569  SmallVector<llvm::Value *> storeValues;
570  storeValues.reserve(numLoops);
571  for (unsigned i = 0; i < numLoops; i++) {
572  storeValues.push_back(vecValues[indexVecValues]);
573  indexVecValues++;
574  }
575  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
576  findAllocaInsertPoint(builder, moduleTranslation);
577  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
578  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
579  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
580  }
581  return success();
582 }
583 
584 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
585 /// OpenMPIRBuilder.
586 static LogicalResult
587 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
588  LLVM::ModuleTranslation &moduleTranslation) {
589  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
590  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
591 
592  // TODO: The code generation for ordered simd directive is not supported yet.
593  if (orderedRegionOp.getParLevelSimd())
594  return failure();
595 
596  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
597  // relying on captured variables.
598  LogicalResult bodyGenStatus = success();
599 
600  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
601  // OrderedOp has only one region associated with it.
602  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
603  builder.restoreIP(codeGenIP);
604  convertOmpOpRegions(region, "omp.ordered.region", builder,
605  moduleTranslation, bodyGenStatus);
606  };
607 
608  // TODO: Perform finalization actions for variables. This has to be
609  // called for variables which have destructors/finalizers.
610  auto finiCB = [&](InsertPointTy codeGenIP) {};
611 
612  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
613  builder.restoreIP(
614  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
615  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd()));
616  return bodyGenStatus;
617 }
618 
619 namespace {
620 /// Contains the arguments for an LLVM store operation
621 struct DeferredStore {
622  DeferredStore(llvm::Value *value, llvm::Value *address)
623  : value(value), address(address) {}
624 
625  llvm::Value *value;
626  llvm::Value *address;
627 };
628 } // namespace
629 
630 /// Allocate space for privatized reduction variables.
631 /// `deferredStores` contains information to create store operations which needs
632 /// to be inserted after all allocas
633 template <typename T>
634 static LogicalResult
636  llvm::IRBuilderBase &builder,
637  LLVM::ModuleTranslation &moduleTranslation,
638  const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
640  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
641  DenseMap<Value, llvm::Value *> &reductionVariableMap,
642  SmallVectorImpl<DeferredStore> &deferredStores,
643  llvm::ArrayRef<bool> isByRefs) {
644  llvm::IRBuilderBase::InsertPointGuard guard(builder);
645  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
646 
647  // delay creating stores until after all allocas
648  deferredStores.reserve(loop.getNumReductionVars());
649 
650  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
651  Region &allocRegion = reductionDecls[i].getAllocRegion();
652  if (isByRefs[i]) {
653  if (allocRegion.empty())
654  continue;
655 
657  if (failed(inlineConvertOmpRegions(allocRegion, "omp.reduction.alloc",
658  builder, moduleTranslation, &phis)))
659  return failure();
660  assert(phis.size() == 1 && "expected one allocation to be yielded");
661 
662  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
663 
664  // Allocate reduction variable (which is a pointer to the real reduction
665  // variable allocated in the inlined region)
666  llvm::Value *var = builder.CreateAlloca(
667  moduleTranslation.convertType(reductionDecls[i].getType()));
668  deferredStores.emplace_back(phis[0], var);
669 
670  privateReductionVariables[i] = var;
671  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
672  reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
673  } else {
674  assert(allocRegion.empty() &&
675  "allocaction is implicit for by-val reduction");
676  llvm::Value *var = builder.CreateAlloca(
677  moduleTranslation.convertType(reductionDecls[i].getType()));
678  moduleTranslation.mapValue(reductionArgs[i], var);
679  privateReductionVariables[i] = var;
680  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
681  }
682  }
683 
684  return success();
685 }
686 
687 /// Map input arguments to reduction initialization region
688 template <typename T>
689 static void
692  DenseMap<Value, llvm::Value *> &reductionVariableMap,
693  unsigned i) {
694  // map input argument to the initialization region
695  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
696  Region &initializerRegion = reduction.getInitializerRegion();
697  Block &entry = initializerRegion.front();
698 
699  mlir::Value mlirSource = loop.getReductionVars()[i];
700  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
701  assert(llvmSource && "lookup reduction var");
702  moduleTranslation.mapValue(reduction.getInitializerMoldArg(), llvmSource);
703 
704  if (entry.getNumArguments() > 1) {
705  llvm::Value *allocation =
706  reductionVariableMap.lookup(loop.getReductionVars()[i]);
707  moduleTranslation.mapValue(reduction.getInitializerAllocArg(), allocation);
708  }
709 }
710 
711 /// Collect reduction info
712 template <typename T>
714  T loop, llvm::IRBuilderBase &builder,
715  LLVM::ModuleTranslation &moduleTranslation,
717  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
718  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
719  const ArrayRef<llvm::Value *> privateReductionVariables,
721  unsigned numReductions = loop.getNumReductionVars();
722 
723  for (unsigned i = 0; i < numReductions; ++i) {
724  owningReductionGens.push_back(
725  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
726  owningAtomicReductionGens.push_back(
727  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
728  }
729 
730  // Collect the reduction information.
731  reductionInfos.reserve(numReductions);
732  for (unsigned i = 0; i < numReductions; ++i) {
733  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
734  if (owningAtomicReductionGens[i])
735  atomicGen = owningAtomicReductionGens[i];
736  llvm::Value *variable =
737  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
738  reductionInfos.push_back(
739  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
740  privateReductionVariables[i],
741  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
742  owningReductionGens[i],
743  /*ReductionGenClang=*/nullptr, atomicGen});
744  }
745 }
746 
747 /// handling of DeclareReductionOp's cleanup region
748 static LogicalResult
750  llvm::ArrayRef<llvm::Value *> privateVariables,
751  LLVM::ModuleTranslation &moduleTranslation,
752  llvm::IRBuilderBase &builder, StringRef regionName,
753  bool shouldLoadCleanupRegionArg = true) {
754  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
755  if (cleanupRegion->empty())
756  continue;
757 
758  // map the argument to the cleanup region
759  Block &entry = cleanupRegion->front();
760 
761  llvm::Instruction *potentialTerminator =
762  builder.GetInsertBlock()->empty() ? nullptr
763  : &builder.GetInsertBlock()->back();
764  if (potentialTerminator && potentialTerminator->isTerminator())
765  builder.SetInsertPoint(potentialTerminator);
766  llvm::Value *privateVarValue =
767  shouldLoadCleanupRegionArg
768  ? builder.CreateLoad(
769  moduleTranslation.convertType(entry.getArgument(0).getType()),
770  privateVariables[i])
771  : privateVariables[i];
772 
773  moduleTranslation.mapValue(entry.getArgument(0), privateVarValue);
774 
775  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
776  moduleTranslation)))
777  return failure();
778 
779  // clear block argument mapping in case it needs to be re-created with a
780  // different source for another use of the same reduction decl
781  moduleTranslation.forgetMapping(*cleanupRegion);
782  }
783  return success();
784 }
785 
786 // TODO: not used by ParallelOp
787 template <class OP>
788 static LogicalResult createReductionsAndCleanup(
789  OP op, llvm::IRBuilderBase &builder,
790  LLVM::ModuleTranslation &moduleTranslation,
791  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
793  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
794  // Process the reductions if required.
795  if (op.getNumReductionVars() == 0)
796  return success();
797 
798  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
799 
800  // Create the reduction generators. We need to own them here because
801  // ReductionInfo only accepts references to the generators.
802  SmallVector<OwningReductionGen> owningReductionGens;
803  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
805  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
806  owningReductionGens, owningAtomicReductionGens,
807  privateReductionVariables, reductionInfos);
808 
809  // The call to createReductions below expects the block to have a
810  // terminator. Create an unreachable instruction to serve as terminator
811  // and remove it later.
812  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
813  builder.SetInsertPoint(tempTerminator);
814  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
815  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
816  isByRef, op.getNowait());
817  if (!contInsertPoint.getBlock())
818  return op->emitOpError() << "failed to convert reductions";
819  auto nextInsertionPoint =
820  ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
821  tempTerminator->eraseFromParent();
822  builder.restoreIP(nextInsertionPoint);
823 
824  // after the construct, deallocate private reduction variables
825  SmallVector<Region *> reductionRegions;
826  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
827  [](omp::DeclareReductionOp reductionDecl) {
828  return &reductionDecl.getCleanupRegion();
829  });
830  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
831  moduleTranslation, builder,
832  "omp.reduction.cleanup");
833  return success();
834 }
835 
836 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
837  if (!attr)
838  return {};
839  return *attr;
840 }
841 
842 // TODO: not used by omp.parallel
843 template <typename OP>
844 static LogicalResult allocAndInitializeReductionVars(
845  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
846  LLVM::ModuleTranslation &moduleTranslation,
847  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
849  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
850  DenseMap<Value, llvm::Value *> &reductionVariableMap,
851  llvm::ArrayRef<bool> isByRef) {
852  if (op.getNumReductionVars() == 0)
853  return success();
854 
855  SmallVector<DeferredStore> deferredStores;
856 
857  if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
858  allocaIP, reductionDecls,
859  privateReductionVariables, reductionVariableMap,
860  deferredStores, isByRef)))
861  return failure();
862 
863  // store result of the alloc region to the allocated pointer to the real
864  // reduction variable
865  for (auto [data, addr] : deferredStores)
866  builder.CreateStore(data, addr);
867 
868  // Before the loop, store the initial values of reductions into reduction
869  // variables. Although this could be done after allocas, we don't want to mess
870  // up with the alloca insertion point.
871  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
873 
874  // map block argument to initializer region
875  mapInitializationArgs(op, moduleTranslation, reductionDecls,
876  reductionVariableMap, i);
877 
878  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
879  "omp.reduction.neutral", builder,
880  moduleTranslation, &phis)))
881  return failure();
882  assert(phis.size() == 1 && "expected one value to be yielded from the "
883  "reduction neutral element declaration region");
884  if (isByRef[i]) {
885  if (!reductionDecls[i].getAllocRegion().empty())
886  // done in allocReductionVars
887  continue;
888 
889  // TODO: this path can be removed once all users of by-ref are updated to
890  // use an alloc region
891 
892  // Allocate reduction variable (which is a pointer to the real reduction
893  // variable allocated in the inlined region)
894  llvm::Value *var = builder.CreateAlloca(
895  moduleTranslation.convertType(reductionDecls[i].getType()));
896  // Store the result of the inlined region to the allocated reduction var
897  // ptr
898  builder.CreateStore(phis[0], var);
899 
900  privateReductionVariables[i] = var;
901  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
902  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
903  } else {
904  // for by-ref case the store is inside of the reduction region
905  builder.CreateStore(phis[0], privateReductionVariables[i]);
906  // the rest was handled in allocByValReductionVars
907  }
908 
909  // forget the mapping for the initializer region because we might need a
910  // different mapping if this reduction declaration is re-used for a
911  // different variable
912  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
913  }
914 
915  return success();
916 }
917 
918 static LogicalResult
919 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
920  LLVM::ModuleTranslation &moduleTranslation) {
921  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
922  using StorableBodyGenCallbackTy =
923  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
924 
925  auto sectionsOp = cast<omp::SectionsOp>(opInst);
926 
927  // TODO: Support the following clauses: private, firstprivate, lastprivate,
928  // allocate
929  if (!sectionsOp.getAllocateVars().empty() ||
930  !sectionsOp.getAllocatorVars().empty() ||
931  !sectionsOp.getPrivateVars().empty() || sectionsOp.getPrivateSyms())
932  return opInst.emitError("unhandled clauses for translation to LLVM IR");
933 
934  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionByref());
935  assert(isByRef.size() == sectionsOp.getNumReductionVars());
936 
938  collectReductionDecls(sectionsOp, reductionDecls);
939  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
940  findAllocaInsertPoint(builder, moduleTranslation);
941 
942  SmallVector<llvm::Value *> privateReductionVariables(
943  sectionsOp.getNumReductionVars());
944  DenseMap<Value, llvm::Value *> reductionVariableMap;
945 
946  MutableArrayRef<BlockArgument> reductionArgs =
947  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
948 
950  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
951  reductionDecls, privateReductionVariables, reductionVariableMap,
952  isByRef)))
953  return failure();
954 
955  // Store the mapping between reduction variables and their private copies on
956  // ModuleTranslation stack. It can be then recovered when translating
957  // omp.reduce operations in a separate call.
959  moduleTranslation, reductionVariableMap);
960 
961  LogicalResult bodyGenStatus = success();
963 
964  for (Operation &op : *sectionsOp.getRegion().begin()) {
965  auto sectionOp = dyn_cast<omp::SectionOp>(op);
966  if (!sectionOp) // omp.terminator
967  continue;
968 
969  Region &region = sectionOp.getRegion();
970  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation,
971  &bodyGenStatus](InsertPointTy allocaIP,
972  InsertPointTy codeGenIP) {
973  builder.restoreIP(codeGenIP);
974 
975  // map the omp.section reduction block argument to the omp.sections block
976  // arguments
977  // TODO: this assumes that the only block arguments are reduction
978  // variables
979  assert(region.getNumArguments() ==
980  sectionsOp.getRegion().getNumArguments());
981  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
982  sectionsOp.getRegion().getArguments(), region.getArguments())) {
983  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
984  assert(llvmVal);
985  moduleTranslation.mapValue(sectionArg, llvmVal);
986  }
987 
988  convertOmpOpRegions(region, "omp.section.region", builder,
989  moduleTranslation, bodyGenStatus);
990  };
991  sectionCBs.push_back(sectionCB);
992  }
993 
994  // No sections within omp.sections operation - skip generation. This situation
995  // is only possible if there is only a terminator operation inside the
996  // sections operation
997  if (sectionCBs.empty())
998  return success();
999 
1000  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
1001 
1002  // TODO: Perform appropriate actions according to the data-sharing
1003  // attribute (shared, private, firstprivate, ...) of variables.
1004  // Currently defaults to shared.
1005  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
1006  llvm::Value &vPtr,
1007  llvm::Value *&replacementValue) -> InsertPointTy {
1008  replacementValue = &vPtr;
1009  return codeGenIP;
1010  };
1011 
1012  // TODO: Perform finalization actions for variables. This has to be
1013  // called for variables which have destructors/finalizers.
1014  auto finiCB = [&](InsertPointTy codeGenIP) {};
1015 
1016  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1017  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1018  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
1019  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
1020  sectionsOp.getNowait()));
1021 
1022  if (failed(bodyGenStatus))
1023  return bodyGenStatus;
1024 
1025  // Process the reductions if required.
1026  return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
1027  allocaIP, reductionDecls,
1028  privateReductionVariables, isByRef);
1029 }
1030 
1031 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
1032 static LogicalResult
1033 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
1034  LLVM::ModuleTranslation &moduleTranslation) {
1035  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1036  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1037  LogicalResult bodyGenStatus = success();
1038  if (!singleOp.getPrivateVars().empty() || singleOp.getPrivateSyms())
1039  return singleOp.emitError("unhandled clauses for translation to LLVM IR");
1040 
1041  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1042  builder.restoreIP(codegenIP);
1043  convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
1044  moduleTranslation, bodyGenStatus);
1045  };
1046  auto finiCB = [&](InsertPointTy codeGenIP) {};
1047 
1048  // Handle copyprivate
1049  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
1050  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateSyms();
1053  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
1054  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
1055  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
1056  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
1057  llvmCPFuncs.push_back(
1058  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
1059  }
1060 
1061  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
1062  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs));
1063  return bodyGenStatus;
1064 }
1065 
1066 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
1067 static LogicalResult
1068 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
1069  LLVM::ModuleTranslation &moduleTranslation) {
1070  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1071  LogicalResult bodyGenStatus = success();
1072  if (!op.getAllocatorVars().empty() || op.getReductionSyms() ||
1073  !op.getPrivateVars().empty() || op.getPrivateSyms())
1074  return op.emitError("unhandled clauses for translation to LLVM IR");
1075 
1076  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1078  moduleTranslation, allocaIP);
1079  builder.restoreIP(codegenIP);
1080  convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
1081  moduleTranslation, bodyGenStatus);
1082  };
1083 
1084  llvm::Value *numTeamsLower = nullptr;
1085  if (Value numTeamsLowerVar = op.getNumTeamsLower())
1086  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
1087 
1088  llvm::Value *numTeamsUpper = nullptr;
1089  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
1090  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
1091 
1092  llvm::Value *threadLimit = nullptr;
1093  if (Value threadLimitVar = op.getThreadLimit())
1094  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1095 
1096  llvm::Value *ifExpr = nullptr;
1097  if (Value ifVar = op.getIfExpr())
1098  ifExpr = moduleTranslation.lookupValue(ifVar);
1099 
1100  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1101  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
1102  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
1103  return bodyGenStatus;
1104 }
1105 
1106 static void
1107 buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
1108  LLVM::ModuleTranslation &moduleTranslation,
1110  if (dependVars.empty())
1111  return;
1112  for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) {
1113  llvm::omp::RTLDependenceKindTy type;
1114  switch (
1115  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1116  case mlir::omp::ClauseTaskDepend::taskdependin:
1117  type = llvm::omp::RTLDependenceKindTy::DepIn;
1118  break;
1119  // The OpenMP runtime requires that the codegen for 'depend' clause for
1120  // 'out' dependency kind must be the same as codegen for 'depend' clause
1121  // with 'inout' dependency.
1122  case mlir::omp::ClauseTaskDepend::taskdependout:
1123  case mlir::omp::ClauseTaskDepend::taskdependinout:
1124  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1125  break;
1126  };
1127  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1128  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1129  dds.emplace_back(dd);
1130  }
1131 }
1132 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
1133 static LogicalResult
1134 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1135  LLVM::ModuleTranslation &moduleTranslation) {
1136  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1137  LogicalResult bodyGenStatus = success();
1138  if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() ||
1139  taskOp.getInReductionSyms() || taskOp.getPriority() ||
1140  !taskOp.getAllocateVars().empty() || !taskOp.getPrivateVars().empty() ||
1141  taskOp.getPrivateSyms()) {
1142  return taskOp.emitError("unhandled clauses for translation to LLVM IR");
1143  }
1144  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1145  // Save the alloca insertion point on ModuleTranslation stack for use in
1146  // nested regions.
1148  moduleTranslation, allocaIP);
1149 
1150  builder.restoreIP(codegenIP);
1151  convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
1152  moduleTranslation, bodyGenStatus);
1153  };
1154 
1156  buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
1157  moduleTranslation, dds);
1158 
1159  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1160  findAllocaInsertPoint(builder, moduleTranslation);
1161  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1162  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
1163  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
1164  moduleTranslation.lookupValue(taskOp.getFinal()),
1165  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds));
1166  return bodyGenStatus;
1167 }
1168 
1169 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
1170 static LogicalResult
1171 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
1172  LLVM::ModuleTranslation &moduleTranslation) {
1173  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1174  LogicalResult bodyGenStatus = success();
1175  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
1176  return tgOp.emitError("unhandled clauses for translation to LLVM IR");
1177  }
1178  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1179  builder.restoreIP(codegenIP);
1180  convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
1181  moduleTranslation, bodyGenStatus);
1182  };
1183  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1184  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1185  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
1186  ompLoc, allocaIP, bodyCB));
1187  return bodyGenStatus;
1188 }
1189 
1190 static LogicalResult
1191 convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
1192  LLVM::ModuleTranslation &moduleTranslation) {
1193  if (!twOp.getDependVars().empty() || twOp.getDependKinds() ||
1194  twOp.getNowait())
1195  return twOp.emitError("unhandled clauses for translation to LLVM IR");
1196 
1197  moduleTranslation.getOpenMPBuilder()->createTaskwait(builder.saveIP());
1198  return success();
1199 }
1200 
1201 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
1202 static LogicalResult
1203 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
1204  LLVM::ModuleTranslation &moduleTranslation) {
1205  auto wsloopOp = cast<omp::WsloopOp>(opInst);
1206  if (!wsloopOp.getAllocateVars().empty() ||
1207  !wsloopOp.getAllocatorVars().empty() ||
1208  !wsloopOp.getPrivateVars().empty() || wsloopOp.getPrivateSyms())
1209  return opInst.emitError("unhandled clauses for translation to LLVM IR");
1210 
1211  // FIXME: Here any other nested wrappers (e.g. omp.simd) are skipped, so
1212  // codegen for composite constructs like 'DO/FOR SIMD' will be the same as for
1213  // 'DO/FOR'.
1214  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
1215 
1216  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
1217  assert(isByRef.size() == wsloopOp.getNumReductionVars());
1218 
1219  // Static is the default.
1220  auto schedule =
1221  wsloopOp.getScheduleKind().value_or(omp::ClauseScheduleKind::Static);
1222 
1223  // Find the loop configuration.
1224  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]);
1225  llvm::Type *ivType = step->getType();
1226  llvm::Value *chunk = nullptr;
1227  if (wsloopOp.getScheduleChunk()) {
1228  llvm::Value *chunkVar =
1229  moduleTranslation.lookupValue(wsloopOp.getScheduleChunk());
1230  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
1231  }
1232 
1233  SmallVector<omp::DeclareReductionOp> reductionDecls;
1234  collectReductionDecls(wsloopOp, reductionDecls);
1235  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1236  findAllocaInsertPoint(builder, moduleTranslation);
1237 
1238  SmallVector<llvm::Value *> privateReductionVariables(
1239  wsloopOp.getNumReductionVars());
1240  DenseMap<Value, llvm::Value *> reductionVariableMap;
1241 
1242  MutableArrayRef<BlockArgument> reductionArgs =
1243  cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
1244 
1246  wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
1247  reductionDecls, privateReductionVariables, reductionVariableMap,
1248  isByRef)))
1249  return failure();
1250 
1251  // Store the mapping between reduction variables and their private copies on
1252  // ModuleTranslation stack. It can be then recovered when translating
1253  // omp.reduce operations in a separate call.
1255  moduleTranslation, reductionVariableMap);
1256 
1257  // Set up the source location value for OpenMP runtime.
1258  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1259 
1260  // Generator of the canonical loop body.
1261  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1262  // relying on captured variables.
1265  LogicalResult bodyGenStatus = success();
1266  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1267  // Make sure further conversions know about the induction variable.
1268  moduleTranslation.mapValue(
1269  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1270 
1271  // Capture the body insertion point for use in nested loops. BodyIP of the
1272  // CanonicalLoopInfo always points to the beginning of the entry block of
1273  // the body.
1274  bodyInsertPoints.push_back(ip);
1275 
1276  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1277  return;
1278 
1279  // Convert the body of the loop.
1280  builder.restoreIP(ip);
1281  convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
1282  moduleTranslation, bodyGenStatus);
1283  };
1284 
1285  // Delegate actual loop construction to the OpenMP IRBuilder.
1286  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1287  // loop, i.e. it has a positive step, uses signed integer semantics.
1288  // Reconsider this code when the nested loop operation clearly supports more
1289  // cases.
1290  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1291  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1292  llvm::Value *lowerBound =
1293  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
1294  llvm::Value *upperBound =
1295  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
1296  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
1297 
1298  // Make sure loop trip count are emitted in the preheader of the outermost
1299  // loop at the latest so that they are all available for the new collapsed
1300  // loop will be created below.
1301  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1302  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1303  if (i != 0) {
1304  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
1305  computeIP = loopInfos.front()->getPreheaderIP();
1306  }
1307  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1308  loc, bodyGen, lowerBound, upperBound, step,
1309  /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP));
1310 
1311  if (failed(bodyGenStatus))
1312  return failure();
1313  }
1314 
1315  // Collapse loops. Store the insertion point because LoopInfos may get
1316  // invalidated.
1317  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1318  llvm::CanonicalLoopInfo *loopInfo =
1319  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1320 
1321  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1322 
1323  // TODO: Handle doacross loops when the ordered clause has a parameter.
1324  bool isOrdered = wsloopOp.getOrdered().has_value();
1325  std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
1326  bool isSimd = wsloopOp.getScheduleSimd();
1327 
1328  ompBuilder->applyWorkshareLoop(
1329  ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
1330  convertToScheduleKind(schedule), chunk, isSimd,
1331  scheduleMod == omp::ScheduleModifier::monotonic,
1332  scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
1333 
1334  // Continue building IR after the loop. Note that the LoopInfo returned by
1335  // `collapseLoops` points inside the outermost loop and is intended for
1336  // potential further loop transformations. Use the insertion point stored
1337  // before collapsing loops instead.
1338  builder.restoreIP(afterIP);
1339 
1340  // Process the reductions if required.
1341  return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
1342  allocaIP, reductionDecls,
1343  privateReductionVariables, isByRef);
1344 }
1345 
1346 /// Converts the OpenMP parallel operation to LLVM IR.
1347 static LogicalResult
1348 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1349  LLVM::ModuleTranslation &moduleTranslation) {
1350  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1351  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
1352  assert(isByRef.size() == opInst.getNumReductionVars());
1353 
1354  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1355  // relying on captured variables.
1356  LogicalResult bodyGenStatus = success();
1357  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1358 
1359  // Collect delayed privatization declarations
1360  MutableArrayRef<BlockArgument> privateBlockArgs =
1361  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getPrivateBlockArgs();
1362  SmallVector<llvm::Value *> llvmPrivateVars;
1363  SmallVector<omp::PrivateClauseOp> privateDecls;
1364  llvmPrivateVars.reserve(privateBlockArgs.size());
1365  privateDecls.reserve(privateBlockArgs.size());
1366  collectPrivatizationDecls(opInst, privateDecls);
1367 
1368  // Collect reduction declarations
1369  SmallVector<omp::DeclareReductionOp> reductionDecls;
1370  collectReductionDecls(opInst, reductionDecls);
1371  SmallVector<llvm::Value *> privateReductionVariables(
1372  opInst.getNumReductionVars());
1373  SmallVector<DeferredStore> deferredStores;
1374 
1375  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1376  // Allocate private vars
1377  llvm::BranchInst *allocaTerminator =
1378  llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
1379  builder.SetInsertPoint(allocaTerminator);
1380  assert(allocaTerminator->getNumSuccessors() == 1 &&
1381  "This is an unconditional branch created by OpenMPIRBuilder");
1382  llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);
1383 
1384  // FIXME: Some of the allocation regions do more than just allocating.
1385  // They read from their block argument (amongst other non-alloca things).
1386  // When OpenMPIRBuilder outlines the parallel region into a different
1387  // function it places the loads for live in-values (such as these block
1388  // arguments) at the end of the entry block (because the entry block is
1389  // assumed to contain only allocas). Therefore, if we put these complicated
1390  // alloc blocks in the entry block, these will not dominate the availability
1391  // of the live-in values they are using. Fix this by adding a latealloc
1392  // block after the entry block to put these in (this also helps to avoid
1393  // mixing non-alloca code with allocas).
1394  // Alloc regions which do not use the block argument can still be placed in
1395  // the entry block (therefore keeping the allocas together).
1396  llvm::BasicBlock *privAllocBlock = nullptr;
1397  if (!privateBlockArgs.empty())
1398  privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
1399  for (unsigned i = 0; i < privateBlockArgs.size(); ++i) {
1400  Region &allocRegion = privateDecls[i].getAllocRegion();
1401 
1402  // map allocation region block argument
1403  llvm::Value *nonPrivateVar =
1404  moduleTranslation.lookupValue(opInst.getPrivateVars()[i]);
1405  assert(nonPrivateVar);
1406  moduleTranslation.mapValue(privateDecls[i].getAllocMoldArg(),
1407  nonPrivateVar);
1408 
1409  // in-place convert the private allocation region
1411  if (privateDecls[i].getAllocMoldArg().getUses().empty()) {
1412  // TODO this should use
1413  // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1414  // the code for fetching the thread id. Not doing this for now to avoid
1415  // test churn.
1416  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1417  } else {
1418  builder.SetInsertPoint(privAllocBlock->getTerminator());
1419  }
1420  if (failed(inlineConvertOmpRegions(allocRegion, "omp.private.alloc",
1421  builder, moduleTranslation, &phis))) {
1422  bodyGenStatus = failure();
1423  return;
1424  }
1425  assert(phis.size() == 1 && "expected one allocation to be yielded");
1426 
1427  moduleTranslation.mapValue(privateBlockArgs[i], phis[0]);
1428  llvmPrivateVars.push_back(phis[0]);
1429 
1430  // clear alloc region block argument mapping in case it needs to be
1431  // re-created with a different source for another use of the same
1432  // reduction decl
1433  moduleTranslation.forgetMapping(allocRegion);
1434  }
1435 
1436  // Allocate reduction vars
1437  DenseMap<Value, llvm::Value *> reductionVariableMap;
1438 
1439  MutableArrayRef<BlockArgument> reductionArgs =
1440  cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
1441 
1442  allocaIP =
1443  InsertPointTy(allocaIP.getBlock(),
1444  allocaIP.getBlock()->getTerminator()->getIterator());
1445 
1446  if (failed(allocReductionVars(
1447  opInst, reductionArgs, builder, moduleTranslation, allocaIP,
1448  reductionDecls, privateReductionVariables, reductionVariableMap,
1449  deferredStores, isByRef)))
1450  bodyGenStatus = failure();
1451 
1452  // Apply copy region for firstprivate.
1453  bool needsFirstprivate =
1454  llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
1455  return privOp.getDataSharingType() ==
1456  omp::DataSharingClauseType::FirstPrivate;
1457  });
1458  if (needsFirstprivate) {
1459  // Find the end of the allocation blocks
1460  assert(afterAllocas->getSinglePredecessor());
1461  builder.SetInsertPoint(
1462  afterAllocas->getSinglePredecessor()->getTerminator());
1463  llvm::BasicBlock *copyBlock =
1464  splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1465  builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
1466  }
1467  for (unsigned i = 0; i < privateBlockArgs.size(); ++i) {
1468  if (privateDecls[i].getDataSharingType() !=
1469  omp::DataSharingClauseType::FirstPrivate)
1470  continue;
1471 
1472  // copyRegion implements `lhs = rhs`
1473  Region &copyRegion = privateDecls[i].getCopyRegion();
1474 
1475  // map copyRegion rhs arg
1476  llvm::Value *nonPrivateVar =
1477  moduleTranslation.lookupValue(opInst.getPrivateVars()[i]);
1478  assert(nonPrivateVar);
1479  moduleTranslation.mapValue(privateDecls[i].getCopyMoldArg(),
1480  nonPrivateVar);
1481 
1482  // map copyRegion lhs arg
1483  moduleTranslation.mapValue(privateDecls[i].getCopyPrivateArg(),
1484  llvmPrivateVars[i]);
1485 
1486  // in-place convert copy region
1487  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1488  if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
1489  builder, moduleTranslation))) {
1490  bodyGenStatus = failure();
1491  return;
1492  }
1493 
1494  // ignore unused value yielded from copy region
1495 
1496  // clear copy region block argument mapping in case it needs to be
1497  // re-created with different sources for reuse of the same reduction
1498  // decl
1499  moduleTranslation.forgetMapping(copyRegion);
1500  }
1501 
1502  // Initialize reduction vars
1503  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1504  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1505  allocaIP =
1506  InsertPointTy(allocaIP.getBlock(),
1507  allocaIP.getBlock()->getTerminator()->getIterator());
1508 
1509  builder.restoreIP(allocaIP);
1510  SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
1511  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1512  if (isByRef[i]) {
1513  if (!reductionDecls[i].getAllocRegion().empty())
1514  continue;
1515 
1516  // TODO: remove after all users of by-ref are updated to use the alloc
1517  // region: Allocate reduction variable (which is a pointer to the real
1518  // reduciton variable allocated in the inlined region)
1519  byRefVars[i] = builder.CreateAlloca(
1520  moduleTranslation.convertType(reductionDecls[i].getType()));
1521  }
1522  }
1523 
1524  builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
1525 
1526  // insert stores deferred until after all allocas
1527  // these store the results of the alloc region into the allocation for the
1528  // pointer to the reduction variable
1529  for (auto [data, addr] : deferredStores)
1530  builder.CreateStore(data, addr);
1531 
1532  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1534 
1535  // map the block argument
1536  mapInitializationArgs(opInst, moduleTranslation, reductionDecls,
1537  reductionVariableMap, i);
1538  if (failed(inlineConvertOmpRegions(
1539  reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
1540  builder, moduleTranslation, &phis)))
1541  bodyGenStatus = failure();
1542  assert(phis.size() == 1 &&
1543  "expected one value to be yielded from the "
1544  "reduction neutral element declaration region");
1545 
1546  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1547 
1548  if (isByRef[i]) {
1549  if (!reductionDecls[i].getAllocRegion().empty())
1550  continue;
1551 
1552  // TODO: remove after all users of by-ref are updated to use the alloc
1553 
1554  // Store the result of the inlined region to the allocated reduction var
1555  // ptr
1556  builder.CreateStore(phis[0], byRefVars[i]);
1557 
1558  privateReductionVariables[i] = byRefVars[i];
1559  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1560  reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
1561  } else {
1562  // for by-ref case the store is inside of the reduction init region
1563  builder.CreateStore(phis[0], privateReductionVariables[i]);
1564  // the rest is done in allocByValReductionVars
1565  }
1566 
1567  // clear block argument mapping in case it needs to be re-created with a
1568  // different source for another use of the same reduction decl
1569  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1570  }
1571 
1572  // Store the mapping between reduction variables and their private copies on
1573  // ModuleTranslation stack. It can be then recovered when translating
1574  // omp.reduce operations in a separate call.
1576  moduleTranslation, reductionVariableMap);
1577 
1578  // Save the alloca insertion point on ModuleTranslation stack for use in
1579  // nested regions.
1581  moduleTranslation, allocaIP);
1582 
1583  // ParallelOp has only one region associated with it.
1584  builder.restoreIP(codeGenIP);
1585  auto regionBlock =
1586  convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
1587  moduleTranslation, bodyGenStatus);
1588 
1589  // Process the reductions if required.
1590  if (opInst.getNumReductionVars() > 0) {
1591  // Collect reduction info
1592  SmallVector<OwningReductionGen> owningReductionGens;
1593  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1595  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
1596  owningReductionGens, owningAtomicReductionGens,
1597  privateReductionVariables, reductionInfos);
1598 
1599  // Move to region cont block
1600  builder.SetInsertPoint(regionBlock->getTerminator());
1601 
1602  // Generate reductions from info
1603  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1604  builder.SetInsertPoint(tempTerminator);
1605 
1606  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1607  ompBuilder->createReductions(builder.saveIP(), allocaIP,
1608  reductionInfos, isByRef, false);
1609  if (!contInsertPoint.getBlock()) {
1610  bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
1611  return;
1612  }
1613 
1614  tempTerminator->eraseFromParent();
1615  builder.restoreIP(contInsertPoint);
1616  }
1617  };
1618 
1619  auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
1620  llvm::Value &, llvm::Value &val, llvm::Value *&replVal) {
1621  // tell OpenMPIRBuilder not to do anything. We handled Privatisation in
1622  // bodyGenCB.
1623  replVal = &val;
1624  return codeGenIP;
1625  };
1626 
1627  // TODO: Perform finalization actions for variables. This has to be
1628  // called for variables which have destructors/finalizers.
1629  auto finiCB = [&](InsertPointTy codeGenIP) {
1630  InsertPointTy oldIP = builder.saveIP();
1631  builder.restoreIP(codeGenIP);
1632 
1633  // if the reduction has a cleanup region, inline it here to finalize the
1634  // reduction variables
1635  SmallVector<Region *> reductionCleanupRegions;
1636  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
1637  [](omp::DeclareReductionOp reductionDecl) {
1638  return &reductionDecl.getCleanupRegion();
1639  });
1640  if (failed(inlineOmpRegionCleanup(
1641  reductionCleanupRegions, privateReductionVariables,
1642  moduleTranslation, builder, "omp.reduction.cleanup")))
1643  bodyGenStatus = failure();
1644 
1645  SmallVector<Region *> privateCleanupRegions;
1646  llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
1647  [](omp::PrivateClauseOp privatizer) {
1648  return &privatizer.getDeallocRegion();
1649  });
1650 
1651  if (failed(inlineOmpRegionCleanup(
1652  privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1653  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1654  bodyGenStatus = failure();
1655 
1656  builder.restoreIP(oldIP);
1657  };
1658 
1659  llvm::Value *ifCond = nullptr;
1660  if (auto ifVar = opInst.getIfExpr())
1661  ifCond = moduleTranslation.lookupValue(ifVar);
1662  llvm::Value *numThreads = nullptr;
1663  if (auto numThreadsVar = opInst.getNumThreads())
1664  numThreads = moduleTranslation.lookupValue(numThreadsVar);
1665  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
1666  if (auto bind = opInst.getProcBindKind())
1667  pbKind = getProcBindKind(*bind);
1668  // TODO: Is the Parallel construct cancellable?
1669  bool isCancellable = false;
1670 
1671  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1672  findAllocaInsertPoint(builder, moduleTranslation);
1673  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1674 
1675  builder.restoreIP(
1676  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
1677  ifCond, numThreads, pbKind, isCancellable));
1678 
1679  return bodyGenStatus;
1680 }
1681 
1682 /// Convert Order attribute to llvm::omp::OrderKind.
1683 static llvm::omp::OrderKind
1684 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
1685  if (!o)
1686  return llvm::omp::OrderKind::OMP_ORDER_unknown;
1687  switch (*o) {
1688  case omp::ClauseOrderKind::Concurrent:
1689  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
1690  }
1691  llvm_unreachable("Unknown ClauseOrderKind kind");
1692 }
1693 
1694 static LogicalResult simdOpSupported(omp::SimdOp op) {
1695  if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
1696  return op.emitError("linear clause not yet supported");
1697 
1698  if (!op.getPrivateVars().empty() || op.getPrivateSyms())
1699  return op.emitError("privatization clauses not yet supported");
1700 
1701  if (!op.getReductionVars().empty() || op.getReductionByref() ||
1702  op.getReductionSyms())
1703  return op.emitError("reduction clause not yet supported");
1704 
1705  return success();
1706 }
1707 
1708 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
1709 static LogicalResult
1710 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
1711  LLVM::ModuleTranslation &moduleTranslation) {
1712  auto simdOp = cast<omp::SimdOp>(opInst);
1713  auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
1714 
1715  if (failed(simdOpSupported(simdOp)))
1716  return failure();
1717 
1718  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1719 
1720  // Generator of the canonical loop body.
1721  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1722  // relying on captured variables.
1725  LogicalResult bodyGenStatus = success();
1726  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1727  // Make sure further conversions know about the induction variable.
1728  moduleTranslation.mapValue(
1729  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1730 
1731  // Capture the body insertion point for use in nested loops. BodyIP of the
1732  // CanonicalLoopInfo always points to the beginning of the entry block of
1733  // the body.
1734  bodyInsertPoints.push_back(ip);
1735 
1736  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1737  return;
1738 
1739  // Convert the body of the loop.
1740  builder.restoreIP(ip);
1741  convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
1742  moduleTranslation, bodyGenStatus);
1743  };
1744 
1745  // Delegate actual loop construction to the OpenMP IRBuilder.
1746  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1747  // loop, i.e. it has a positive step, uses signed integer semantics.
1748  // Reconsider this code when the nested loop operation clearly supports more
1749  // cases.
1750  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1751  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1752  llvm::Value *lowerBound =
1753  moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[i]);
1754  llvm::Value *upperBound =
1755  moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[i]);
1756  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getLoopSteps()[i]);
1757 
1758  // Make sure loop trip count are emitted in the preheader of the outermost
1759  // loop at the latest so that they are all available for the new collapsed
1760  // loop will be created below.
1761  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1762  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1763  if (i != 0) {
1764  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1765  ompLoc.DL);
1766  computeIP = loopInfos.front()->getPreheaderIP();
1767  }
1768  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1769  loc, bodyGen, lowerBound, upperBound, step,
1770  /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1771 
1772  if (failed(bodyGenStatus))
1773  return failure();
1774  }
1775 
1776  // Collapse loops.
1777  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1778  llvm::CanonicalLoopInfo *loopInfo =
1779  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1780 
1781  llvm::ConstantInt *simdlen = nullptr;
1782  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
1783  simdlen = builder.getInt64(simdlenVar.value());
1784 
1785  llvm::ConstantInt *safelen = nullptr;
1786  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
1787  safelen = builder.getInt64(safelenVar.value());
1788 
1789  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
1790  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
1791  ompBuilder->applySimd(loopInfo, alignedVars,
1792  simdOp.getIfExpr()
1793  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
1794  : nullptr,
1795  order, simdlen, safelen);
1796 
1797  builder.restoreIP(afterIP);
1798  return success();
1799 }
1800 
1801 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1802 static llvm::AtomicOrdering
1803 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
1804  if (!ao)
1805  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1806 
1807  switch (*ao) {
1808  case omp::ClauseMemoryOrderKind::Seq_cst:
1809  return llvm::AtomicOrdering::SequentiallyConsistent;
1810  case omp::ClauseMemoryOrderKind::Acq_rel:
1811  return llvm::AtomicOrdering::AcquireRelease;
1812  case omp::ClauseMemoryOrderKind::Acquire:
1813  return llvm::AtomicOrdering::Acquire;
1814  case omp::ClauseMemoryOrderKind::Release:
1815  return llvm::AtomicOrdering::Release;
1816  case omp::ClauseMemoryOrderKind::Relaxed:
1817  return llvm::AtomicOrdering::Monotonic;
1818  }
1819  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1820 }
1821 
1822 /// Convert omp.atomic.read operation to LLVM IR.
1823 static LogicalResult
1824 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1825  LLVM::ModuleTranslation &moduleTranslation) {
1826 
1827  auto readOp = cast<omp::AtomicReadOp>(opInst);
1828  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1829 
1830  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1831 
1832  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrder());
1833  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
1834  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
1835 
1836  llvm::Type *elementType =
1837  moduleTranslation.convertType(readOp.getElementType());
1838 
1839  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
1840  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
1841  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1842  return success();
1843 }
1844 
1845 /// Converts an omp.atomic.write operation to LLVM IR.
1846 static LogicalResult
1847 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1848  LLVM::ModuleTranslation &moduleTranslation) {
1849  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1850  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1851 
1852  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1853  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
1854  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
1855  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
1856  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
1857  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1858  /*isVolatile=*/false};
1859  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1860  return success();
1861 }
1862 
1863 /// Converts an LLVM dialect binary operation to the corresponding enum value
1864 /// for `atomicrmw` supported binary operation.
1865 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1867  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1868  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1869  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1870  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1871  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1872  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1873  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1874  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1875  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1876  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1877 }
1878 
1879 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1880 static LogicalResult
1881 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1882  llvm::IRBuilderBase &builder,
1883  LLVM::ModuleTranslation &moduleTranslation) {
1884  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1885 
1886  // Convert values and types.
1887  auto &innerOpList = opInst.getRegion().front().getOperations();
1888  bool isXBinopExpr{false};
1889  llvm::AtomicRMWInst::BinOp binop;
1890  mlir::Value mlirExpr;
1891  llvm::Value *llvmExpr = nullptr;
1892  llvm::Value *llvmX = nullptr;
1893  llvm::Type *llvmXElementType = nullptr;
1894  if (innerOpList.size() == 2) {
1895  // The two operations here are the update and the terminator.
1896  // Since we can identify the update operation, there is a possibility
1897  // that we can generate the atomicrmw instruction.
1898  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
1899  if (!llvm::is_contained(innerOp.getOperands(),
1900  opInst.getRegion().getArgument(0))) {
1901  return opInst.emitError("no atomic update operation with region argument"
1902  " as operand found inside atomic.update region");
1903  }
1904  binop = convertBinOpToAtomic(innerOp);
1905  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
1906  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1907  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1908  } else {
1909  // Since the update region includes more than one operation
1910  // we will resort to generating a cmpxchg loop.
1911  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1912  }
1913  llvmX = moduleTranslation.lookupValue(opInst.getX());
1914  llvmXElementType = moduleTranslation.convertType(
1915  opInst.getRegion().getArgument(0).getType());
1916  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1917  /*isSigned=*/false,
1918  /*isVolatile=*/false};
1919 
1920  llvm::AtomicOrdering atomicOrdering =
1921  convertAtomicOrdering(opInst.getMemoryOrder());
1922 
1923  // Generate update code.
1924  LogicalResult updateGenStatus = success();
1925  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1926  llvm::Value *atomicx,
1927  llvm::IRBuilder<> &builder) -> llvm::Value * {
1928  Block &bb = *opInst.getRegion().begin();
1929  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
1930  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1931  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1932  updateGenStatus = (opInst.emitError()
1933  << "unable to convert update operation to llvm IR");
1934  return nullptr;
1935  }
1936  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1937  assert(yieldop && yieldop.getResults().size() == 1 &&
1938  "terminator must be omp.yield op and it must have exactly one "
1939  "argument");
1940  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1941  };
1942 
1943  // Handle ambiguous alloca, if any.
1944  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1945  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1946  builder.restoreIP(ompBuilder->createAtomicUpdate(
1947  ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1948  isXBinopExpr));
1949  return updateGenStatus;
1950 }
1951 
1952 static LogicalResult
1953 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1954  llvm::IRBuilderBase &builder,
1955  LLVM::ModuleTranslation &moduleTranslation) {
1956  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1957  mlir::Value mlirExpr;
1958  bool isXBinopExpr = false, isPostfixUpdate = false;
1959  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1960 
1961  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1962  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1963 
1964  assert((atomicUpdateOp || atomicWriteOp) &&
1965  "internal op must be an atomic.update or atomic.write op");
1966 
1967  if (atomicWriteOp) {
1968  isPostfixUpdate = true;
1969  mlirExpr = atomicWriteOp.getExpr();
1970  } else {
1971  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1972  atomicCaptureOp.getAtomicUpdateOp().getOperation();
1973  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
1974  // Find the binary update operation that uses the region argument
1975  // and get the expression to update
1976  if (innerOpList.size() == 2) {
1977  mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin();
1978  if (!llvm::is_contained(innerOp.getOperands(),
1979  atomicUpdateOp.getRegion().getArgument(0))) {
1980  return atomicUpdateOp.emitError(
1981  "no atomic update operation with region argument"
1982  " as operand found inside atomic.update region");
1983  }
1984  binop = convertBinOpToAtomic(innerOp);
1985  isXBinopExpr =
1986  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
1987  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1988  } else {
1989  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1990  }
1991  }
1992 
1993  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1994  llvm::Value *llvmX =
1995  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
1996  llvm::Value *llvmV =
1997  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
1998  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1999  atomicCaptureOp.getAtomicReadOp().getElementType());
2000  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
2001  /*isSigned=*/false,
2002  /*isVolatile=*/false};
2003  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
2004  /*isSigned=*/false,
2005  /*isVolatile=*/false};
2006 
2007  llvm::AtomicOrdering atomicOrdering =
2008  convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
2009 
2010  LogicalResult updateGenStatus = success();
2011  auto updateFn = [&](llvm::Value *atomicx,
2012  llvm::IRBuilder<> &builder) -> llvm::Value * {
2013  if (atomicWriteOp)
2014  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
2015  Block &bb = *atomicUpdateOp.getRegion().begin();
2016  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
2017  atomicx);
2018  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
2019  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
2020  updateGenStatus = (atomicUpdateOp.emitError()
2021  << "unable to convert update operation to llvm IR");
2022  return nullptr;
2023  }
2024  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
2025  assert(yieldop && yieldop.getResults().size() == 1 &&
2026  "terminator must be omp.yield op and it must have exactly one "
2027  "argument");
2028  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
2029  };
2030 
2031  // Handle ambiguous alloca, if any.
2032  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
2033  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2034  builder.restoreIP(ompBuilder->createAtomicCapture(
2035  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
2036  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
2037  return updateGenStatus;
2038 }
2039 
2040 /// Converts an OpenMP Threadprivate operation into LLVM IR using
2041 /// OpenMPIRBuilder.
2042 static LogicalResult
2043 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
2044  LLVM::ModuleTranslation &moduleTranslation) {
2045  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2046  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
2047 
2048  Value symAddr = threadprivateOp.getSymAddr();
2049  auto *symOp = symAddr.getDefiningOp();
2050  if (!isa<LLVM::AddressOfOp>(symOp))
2051  return opInst.emitError("Addressing symbol not found");
2052  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
2053 
2054  LLVM::GlobalOp global =
2055  addressOfOp.getGlobal(moduleTranslation.symbolTable());
2056  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
2057  llvm::Type *type = globalValue->getValueType();
2058  llvm::TypeSize typeSize =
2059  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
2060  type);
2061  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
2062  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
2063  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
2064  llvm::Value *callInst =
2065  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
2066  ompLoc, globalValue, size, cacheName);
2067  moduleTranslation.mapValue(opInst.getResult(0), callInst);
2068  return success();
2069 }
2070 
2071 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
2072 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
2073  switch (deviceClause) {
2074  case mlir::omp::DeclareTargetDeviceType::host:
2075  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
2076  break;
2077  case mlir::omp::DeclareTargetDeviceType::nohost:
2078  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
2079  break;
2080  case mlir::omp::DeclareTargetDeviceType::any:
2081  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
2082  break;
2083  }
2084  llvm_unreachable("unhandled device clause");
2085 }
2086 
2087 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
2089  mlir::omp::DeclareTargetCaptureClause captureClause) {
2090  switch (captureClause) {
2091  case mlir::omp::DeclareTargetCaptureClause::to:
2092  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
2093  case mlir::omp::DeclareTargetCaptureClause::link:
2094  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
2095  case mlir::omp::DeclareTargetCaptureClause::enter:
2096  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
2097  }
2098  llvm_unreachable("unhandled capture clause");
2099 }
2100 
2101 static llvm::SmallString<64>
2102 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
2103  llvm::OpenMPIRBuilder &ompBuilder) {
2104  llvm::SmallString<64> suffix;
2105  llvm::raw_svector_ostream os(suffix);
2106  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
2107  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
2108  auto fileInfoCallBack = [&loc]() {
2109  return std::pair<std::string, uint64_t>(
2110  llvm::StringRef(loc.getFilename()), loc.getLine());
2111  };
2112 
2113  os << llvm::format(
2114  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
2115  }
2116  os << "_decl_tgt_ref_ptr";
2117 
2118  return suffix;
2119 }
2120 
2121 static bool isDeclareTargetLink(mlir::Value value) {
2122  if (auto addressOfOp =
2123  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2124  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
2125  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
2126  if (auto declareTargetGlobal =
2127  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
2128  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2129  mlir::omp::DeclareTargetCaptureClause::link)
2130  return true;
2131  }
2132  return false;
2133 }
2134 
2135 // Returns the reference pointer generated by the lowering of the declare target
2136 // operation in cases where the link clause is used or the to clause is used in
2137 // USM mode.
2138 static llvm::Value *
2140  LLVM::ModuleTranslation &moduleTranslation) {
2141  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2142 
2143  // An easier way to do this may just be to keep track of any pointer
2144  // references and their mapping to their respective operation
2145  if (auto addressOfOp =
2146  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2147  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
2148  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
2149  addressOfOp.getGlobalName()))) {
2150 
2151  if (auto declareTargetGlobal =
2152  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
2153  gOp.getOperation())) {
2154 
2155  // In this case, we must utilise the reference pointer generated by the
2156  // declare target operation, similar to Clang
2157  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
2158  mlir::omp::DeclareTargetCaptureClause::link) ||
2159  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2160  mlir::omp::DeclareTargetCaptureClause::to &&
2161  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
2162  llvm::SmallString<64> suffix =
2163  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
2164 
2165  if (gOp.getSymName().contains(suffix))
2166  return moduleTranslation.getLLVMModule()->getNamedValue(
2167  gOp.getSymName());
2168 
2169  return moduleTranslation.getLLVMModule()->getNamedValue(
2170  (gOp.getSymName().str() + suffix.str()).str());
2171  }
2172  }
2173  }
2174  }
2175 
2176  return nullptr;
2177 }
2178 
2179 namespace {
2180 // A small helper structure to contain data gathered
2181 // for map lowering and coalese it into one area and
2182 // avoiding extra computations such as searches in the
2183 // llvm module for lowered mapped variables or checking
2184 // if something is declare target (and retrieving the
2185 // value) more than neccessary.
2186 struct MapInfoData : llvm::OpenMPIRBuilder::MapInfosTy {
2187  llvm::SmallVector<bool, 4> IsDeclareTarget;
2188  llvm::SmallVector<bool, 4> IsAMember;
2189  // Identify if mapping was added by mapClause or use_device clauses.
2190  llvm::SmallVector<bool, 4> IsAMapping;
2193  // Stripped off array/pointer to get the underlying
2194  // element type
2196 
2197  /// Append arrays in \a CurInfo.
2198  void append(MapInfoData &CurInfo) {
2199  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
2200  CurInfo.IsDeclareTarget.end());
2201  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
2202  OriginalValue.append(CurInfo.OriginalValue.begin(),
2203  CurInfo.OriginalValue.end());
2204  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
2205  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
2206  }
2207 };
2208 } // namespace
2209 
2210 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
2211  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
2212  arrTy.getElementType()))
2213  return getArrayElementSizeInBits(nestedArrTy, dl);
2214  return dl.getTypeSizeInBits(arrTy.getElementType());
2215 }
2216 
2217 // This function calculates the size to be offloaded for a specified type, given
2218 // its associated map clause (which can contain bounds information which affects
2219 // the total size), this size is calculated based on the underlying element type
2220 // e.g. given a 1-D array of ints, we will calculate the size from the integer
2221 // type * number of elements in the array. This size can be used in other
2222 // calculations but is ultimately used as an argument to the OpenMP runtimes
2223 // kernel argument structure which is generated through the combinedInfo data
2224 // structures.
2225 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
2226 // CGOpenMPRuntime.cpp.
2227 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
2228  Operation *clauseOp, llvm::Value *basePointer,
2229  llvm::Type *baseType, llvm::IRBuilderBase &builder,
2230  LLVM::ModuleTranslation &moduleTranslation) {
2231  if (auto memberClause =
2232  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
2233  // This calculates the size to transfer based on bounds and the underlying
2234  // element type, provided bounds have been specified (Fortran
2235  // pointers/allocatables/target and arrays that have sections specified fall
2236  // into this as well).
2237  if (!memberClause.getBounds().empty()) {
2238  llvm::Value *elementCount = builder.getInt64(1);
2239  for (auto bounds : memberClause.getBounds()) {
2240  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2241  bounds.getDefiningOp())) {
2242  // The below calculation for the size to be mapped calculated from the
2243  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
2244  // multiply by the underlying element types byte size to get the full
2245  // size to be offloaded based on the bounds
2246  elementCount = builder.CreateMul(
2247  elementCount,
2248  builder.CreateAdd(
2249  builder.CreateSub(
2250  moduleTranslation.lookupValue(boundOp.getUpperBound()),
2251  moduleTranslation.lookupValue(boundOp.getLowerBound())),
2252  builder.getInt64(1)));
2253  }
2254  }
2255 
2256  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
2257  // the size in inconsistent byte or bit format.
2258  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
2259  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
2260  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
2261 
2262  // The size in bytes x number of elements, the sizeInBytes stored is
2263  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
2264  // size, so we do some on the fly runtime math to get the size in
2265  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2266  // some adjustment for members with more complex types.
2267  return builder.CreateMul(elementCount,
2268  builder.getInt64(underlyingTypeSzInBits / 8));
2269  }
2270  }
2271 
2272  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
2273 }
2274 
2276  MapInfoData &mapData, SmallVectorImpl<Value> &mapVars,
2277  LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl,
2278  llvm::IRBuilderBase &builder, const ArrayRef<Value> &useDevPtrOperands = {},
2279  const ArrayRef<Value> &useDevAddrOperands = {}) {
2280  auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
2281  // Check if this is a member mapping and correctly assign that it is, if
2282  // it is a member of a larger object.
2283  // TODO: Need better handling of members, and distinguishing of members
2284  // that are implicitly allocated on device vs explicitly passed in as
2285  // arguments.
2286  // TODO: May require some further additions to support nested record
2287  // types, i.e. member maps that can have member maps.
2288  for (Value mapValue : mapVars) {
2289  auto map = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2290  for (auto member : map.getMembers())
2291  if (member == mapOp)
2292  return true;
2293  }
2294  return false;
2295  };
2296 
2297  // Process MapOperands
2298  for (Value mapValue : mapVars) {
2299  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2300  Value offloadPtr =
2301  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2302  mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
2303  mapData.Pointers.push_back(mapData.OriginalValue.back());
2304 
2305  if (llvm::Value *refPtr =
2306  getRefPtrIfDeclareTarget(offloadPtr,
2307  moduleTranslation)) { // declare target
2308  mapData.IsDeclareTarget.push_back(true);
2309  mapData.BasePointers.push_back(refPtr);
2310  } else { // regular mapped variable
2311  mapData.IsDeclareTarget.push_back(false);
2312  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2313  }
2314 
2315  mapData.BaseType.push_back(
2316  moduleTranslation.convertType(mapOp.getVarType()));
2317  mapData.Sizes.push_back(
2318  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
2319  mapData.BaseType.back(), builder, moduleTranslation));
2320  mapData.MapClause.push_back(mapOp.getOperation());
2321  mapData.Types.push_back(
2322  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
2323  mapData.Names.push_back(LLVM::createMappingInformation(
2324  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2325  mapData.DevicePointers.push_back(llvm::OpenMPIRBuilder::DeviceInfoTy::None);
2326  mapData.IsAMapping.push_back(true);
2327  mapData.IsAMember.push_back(checkIsAMember(mapVars, mapOp));
2328  }
2329 
2330  auto findMapInfo = [&mapData](llvm::Value *val,
2331  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
2332  unsigned index = 0;
2333  bool found = false;
2334  for (llvm::Value *basePtr : mapData.OriginalValue) {
2335  if (basePtr == val && mapData.IsAMapping[index]) {
2336  found = true;
2337  mapData.Types[index] |=
2338  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
2339  mapData.DevicePointers[index] = devInfoTy;
2340  }
2341  index++;
2342  }
2343  return found;
2344  };
2345 
2346  // Process useDevPtr(Addr)Operands
2347  auto addDevInfos = [&](const llvm::ArrayRef<Value> &useDevOperands,
2348  llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
2349  for (Value mapValue : useDevOperands) {
2350  auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
2351  Value offloadPtr =
2352  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2353  llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
2354 
2355  // Check if map info is already present for this entry.
2356  if (!findMapInfo(origValue, devInfoTy)) {
2357  mapData.OriginalValue.push_back(origValue);
2358  mapData.Pointers.push_back(mapData.OriginalValue.back());
2359  mapData.IsDeclareTarget.push_back(false);
2360  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2361  mapData.BaseType.push_back(
2362  moduleTranslation.convertType(mapOp.getVarType()));
2363  mapData.Sizes.push_back(builder.getInt64(0));
2364  mapData.MapClause.push_back(mapOp.getOperation());
2365  mapData.Types.push_back(
2366  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
2367  mapData.Names.push_back(LLVM::createMappingInformation(
2368  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2369  mapData.DevicePointers.push_back(devInfoTy);
2370  mapData.IsAMapping.push_back(false);
2371  mapData.IsAMember.push_back(checkIsAMember(useDevOperands, mapOp));
2372  }
2373  }
2374  };
2375 
2376  addDevInfos(useDevAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
2377  addDevInfos(useDevPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
2378 }
2379 
2380 static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
2381  auto *res = llvm::find(mapData.MapClause, memberOp);
2382  assert(res != mapData.MapClause.end() &&
2383  "MapInfoOp for member not found in MapData, cannot return index");
2384  return std::distance(mapData.MapClause.begin(), res);
2385 }
2386 
2387 static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
2388  bool first) {
2389  DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr();
2390 
2391  // Only 1 member has been mapped, we can return it.
2392  if (indexAttr.size() == 1)
2393  if (auto mapOp =
2394  dyn_cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp()))
2395  return mapOp;
2396 
2397  llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape();
2398  llvm::SmallVector<size_t> indices(shape[0]);
2399  std::iota(indices.begin(), indices.end(), 0);
2400 
2401  llvm::sort(indices.begin(), indices.end(),
2402  [&](const size_t a, const size_t b) {
2403  auto indexValues = indexAttr.getValues<int32_t>();
2404  for (int i = 0; i < shape[1]; ++i) {
2405  int aIndex = indexValues[a * shape[1] + i];
2406  int bIndex = indexValues[b * shape[1] + i];
2407 
2408  if (aIndex == bIndex)
2409  continue;
2410 
2411  if (aIndex != -1 && bIndex == -1)
2412  return false;
2413 
2414  if (aIndex == -1 && bIndex != -1)
2415  return true;
2416 
2417  // A is earlier in the record type layout than B
2418  if (aIndex < bIndex)
2419  return first;
2420 
2421  if (bIndex < aIndex)
2422  return !first;
2423  }
2424 
2425  // Iterated the entire list and couldn't make a decision, all
2426  // elements were likely the same. Return false, since the sort
2427  // comparator should return false for equal elements.
2428  return false;
2429  });
2430 
2431  return llvm::cast<omp::MapInfoOp>(
2432  mapInfo.getMembers()[indices.front()].getDefiningOp());
2433 }
2434 
2435 /// This function calculates the array/pointer offset for map data provided
2436 /// with bounds operations, e.g. when provided something like the following:
2437 ///
2438 /// Fortran
2439 /// map(tofrom: array(2:5, 3:2))
2440 /// or
2441 /// C++
2442 /// map(tofrom: array[1:4][2:3])
2443 /// We must calculate the initial pointer offset to pass across, this function
2444 /// performs this using bounds.
2445 ///
2446 /// NOTE: which while specified in row-major order it currently needs to be
2447 /// flipped for Fortran's column order array allocation and access (as
2448 /// opposed to C++'s row-major, hence the backwards processing where order is
2449 /// important). This is likely important to keep in mind for the future when
2450 /// we incorporate a C++ frontend, both frontends will need to agree on the
2451 /// ordering of generated bounds operations (one may have to flip them) to
2452 /// make the below lowering frontend agnostic. The offload size
2453 /// calcualtion may also have to be adjusted for C++.
2454 std::vector<llvm::Value *>
2456  llvm::IRBuilderBase &builder, bool isArrayTy,
2457  OperandRange bounds) {
2458  std::vector<llvm::Value *> idx;
2459  // There's no bounds to calculate an offset from, we can safely
2460  // ignore and return no indices.
2461  if (bounds.empty())
2462  return idx;
2463 
2464  // If we have an array type, then we have its type so can treat it as a
2465  // normal GEP instruction where the bounds operations are simply indexes
2466  // into the array. We currently do reverse order of the bounds, which
2467  // I believe leans more towards Fortran's column-major in memory.
2468  if (isArrayTy) {
2469  idx.push_back(builder.getInt64(0));
2470  for (int i = bounds.size() - 1; i >= 0; --i) {
2471  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
2472  bounds[i].getDefiningOp())) {
2473  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
2474  }
2475  }
2476  } else {
2477  // If we do not have an array type, but we have bounds, then we're dealing
2478  // with a pointer that's being treated like an array and we have the
2479  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
2480  // address (pointer pointing to the actual data) so we must caclulate the
2481  // offset using a single index which the following two loops attempts to
2482  // compute.
2483 
2484  // Calculates the size offset we need to make per row e.g. first row or
2485  // column only needs to be offset by one, but the next would have to be
2486  // the previous row/column offset multiplied by the extent of current row.
2487  //
2488  // For example ([1][10][100]):
2489  //
2490  // - First row/column we move by 1 for each index increment
2491  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
2492  // current) for 10 for each index increment
2493  // - Third row/column we would move by 10 (second row/column) *
2494  // (extent/size of current) 100 for 1000 for each index increment
2495  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
2496  for (size_t i = 1; i < bounds.size(); ++i) {
2497  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
2498  bounds[i].getDefiningOp())) {
2499  dimensionIndexSizeOffset.push_back(builder.CreateMul(
2500  moduleTranslation.lookupValue(boundOp.getExtent()),
2501  dimensionIndexSizeOffset[i - 1]));
2502  }
2503  }
2504 
2505  // Now that we have calculated how much we move by per index, we must
2506  // multiply each lower bound offset in indexes by the size offset we
2507  // have calculated in the previous and accumulate the results to get
2508  // our final resulting offset.
2509  for (int i = bounds.size() - 1; i >= 0; --i) {
2510  if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
2511  bounds[i].getDefiningOp())) {
2512  if (idx.empty())
2513  idx.emplace_back(builder.CreateMul(
2514  moduleTranslation.lookupValue(boundOp.getLowerBound()),
2515  dimensionIndexSizeOffset[i]));
2516  else
2517  idx.back() = builder.CreateAdd(
2518  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
2519  boundOp.getLowerBound()),
2520  dimensionIndexSizeOffset[i]));
2521  }
2522  }
2523  }
2524 
2525  return idx;
2526 }
2527 
2528 // This creates two insertions into the MapInfosTy data structure for the
2529 // "parent" of a set of members, (usually a container e.g.
2530 // class/structure/derived type) when subsequent members have also been
2531 // explicitly mapped on the same map clause. Certain types, such as Fortran
2532 // descriptors are mapped like this as well, however, the members are
2533 // implicit as far as a user is concerned, but we must explicitly map them
2534 // internally.
2535 //
2536 // This function also returns the memberOfFlag for this particular parent,
2537 // which is utilised in subsequent member mappings (by modifying there map type
2538 // with it) to indicate that a member is part of this parent and should be
2539 // treated by the runtime as such. Important to achieve the correct mapping.
2540 //
2541 // This function borrows a lot from Clang's emitCombinedEntry function
2542 // inside of CGOpenMPRuntime.cpp
2543 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
2544  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2545  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2546  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2547  uint64_t mapDataIndex, bool isTargetParams) {
2548  // Map the first segment of our structure
2549  combinedInfo.Types.emplace_back(
2550  isTargetParams
2551  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
2552  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
2553  combinedInfo.DevicePointers.emplace_back(
2554  mapData.DevicePointers[mapDataIndex]);
2555  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2556  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2557  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2558 
2559  // Calculate size of the parent object being mapped based on the
2560  // addresses at runtime, highAddr - lowAddr = size. This of course
2561  // doesn't factor in allocated data like pointers, hence the further
2562  // processing of members specified by users, or in the case of
2563  // Fortran pointers and allocatables, the mapping of the pointed to
2564  // data by the descriptor (which itself, is a structure containing
2565  // runtime information on the dynamically allocated data).
2566  auto parentClause =
2567  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2568 
2569  llvm::Value *lowAddr, *highAddr;
2570  if (!parentClause.getPartialMap()) {
2571  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
2572  builder.getPtrTy());
2573  highAddr = builder.CreatePointerCast(
2574  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
2575  mapData.Pointers[mapDataIndex], 1),
2576  builder.getPtrTy());
2577  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2578  } else {
2579  auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2580  int firstMemberIdx = getMapDataMemberIdx(
2581  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
2582  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
2583  builder.getPtrTy());
2584  int lastMemberIdx = getMapDataMemberIdx(
2585  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
2586  highAddr = builder.CreatePointerCast(
2587  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
2588  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
2589  builder.getPtrTy());
2590  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
2591  }
2592 
2593  llvm::Value *size = builder.CreateIntCast(
2594  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
2595  builder.getInt64Ty(),
2596  /*isSigned=*/false);
2597  combinedInfo.Sizes.push_back(size);
2598 
2599  // TODO: This will need to be expanded to include the whole host of logic for
2600  // the map flags that Clang currently supports (e.g. it should take the map
2601  // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some
2602  // further case specific flag modifications). For the moment, it handles what
2603  // we support as expected.
2604  llvm::omp::OpenMPOffloadMappingFlags mapFlag =
2605  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
2606 
2607  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
2608  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
2609  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2610 
2611  // This creates the initial MEMBER_OF mapping that consists of
2612  // the parent/top level container (same as above effectively, except
2613  // with a fixed initial compile time size and separate maptype which
2614  // indicates the true mape type (tofrom etc.). This parent mapping is
2615  // only relevant if the structure in its totality is being mapped,
2616  // otherwise the above suffices.
2617  if (!parentClause.getPartialMap()) {
2618  combinedInfo.Types.emplace_back(mapFlag);
2619  combinedInfo.DevicePointers.emplace_back(
2621  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2622  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2623  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2624  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2625  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
2626  }
2627  return memberOfFlag;
2628 }
2629 
2630 // The intent is to verify if the mapped data being passed is a
2631 // pointer -> pointee that requires special handling in certain cases,
2632 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
2633 //
2634 // There may be a better way to verify this, but unfortunately with
2635 // opaque pointers we lose the ability to easily check if something is
2636 // a pointer whilst maintaining access to the underlying type.
2637 static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
2638  // If we have a varPtrPtr field assigned then the underlying type is a pointer
2639  if (mapOp.getVarPtrPtr())
2640  return true;
2641 
2642  // If the map data is declare target with a link clause, then it's represented
2643  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
2644  // no relation to pointers.
2645  if (isDeclareTargetLink(mapOp.getVarPtr()))
2646  return true;
2647 
2648  return false;
2649 }
2650 
2651 // This function is intended to add explicit mappings of members
2653  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2654  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2655  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2656  uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
2657 
2658  auto parentClause =
2659  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2660 
2661  for (auto mappedMembers : parentClause.getMembers()) {
2662  auto memberClause =
2663  llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
2664  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
2665 
2666  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
2667 
2668  // Same MemberOfFlag to indicate its link with parent and other members
2669  // of.
2670  auto mapFlag =
2671  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
2672  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2673  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
2674  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2675  if (checkIfPointerMap(memberClause))
2676  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2677 
2678  combinedInfo.Types.emplace_back(mapFlag);
2679  combinedInfo.DevicePointers.emplace_back(
2680  mapData.DevicePointers[memberDataIdx]);
2681  combinedInfo.Names.emplace_back(
2682  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
2683  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2684  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
2685  combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
2686  }
2687 }
2688 
2689 static void
2690 processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
2691  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2692  bool isTargetParams, int mapDataParentIdx = -1) {
2693  // Declare Target Mappings are excluded from being marked as
2694  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
2695  // marked with OMP_MAP_PTR_AND_OBJ instead.
2696  auto mapFlag = mapData.Types[mapDataIdx];
2697  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
2698 
2699  bool isPtrTy = checkIfPointerMap(mapInfoOp);
2700  if (isPtrTy)
2701  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2702 
2703  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
2704  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2705 
2706  if (mapInfoOp.getMapCaptureType().value() ==
2707  omp::VariableCaptureKind::ByCopy &&
2708  !isPtrTy)
2709  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
2710 
2711  // if we're provided a mapDataParentIdx, then the data being mapped is
2712  // part of a larger object (in a parent <-> member mapping) and in this
2713  // case our BasePointer should be the parent.
2714  if (mapDataParentIdx >= 0)
2715  combinedInfo.BasePointers.emplace_back(
2716  mapData.BasePointers[mapDataParentIdx]);
2717  else
2718  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
2719 
2720  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
2721  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
2722  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
2723  combinedInfo.Types.emplace_back(mapFlag);
2724  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
2725 }
2726 
2728  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2729  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2730  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2731  uint64_t mapDataIndex, bool isTargetParams) {
2732  auto parentClause =
2733  llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2734 
2735  // If we have a partial map (no parent referenced in the map clauses of the
2736  // directive, only members) and only a single member, we do not need to bind
2737  // the map of the member to the parent, we can pass the member separately.
2738  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
2739  auto memberClause = llvm::cast<omp::MapInfoOp>(
2740  parentClause.getMembers()[0].getDefiningOp());
2741  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
2742  // Note: Clang treats arrays with explicit bounds that fall into this
2743  // category as a parent with map case, however, it seems this isn't a
2744  // requirement, and processing them as an individual map is fine. So,
2745  // we will handle them as individual maps for the moment, as it's
2746  // difficult for us to check this as we always require bounds to be
2747  // specified currently and it's also marginally more optimal (single
2748  // map rather than two). The difference may come from the fact that
2749  // Clang maps array without bounds as pointers (which we do not
2750  // currently do), whereas we treat them as arrays in all cases
2751  // currently.
2752  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
2753  mapDataIndex);
2754  return;
2755  }
2756 
2757  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
2758  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
2759  combinedInfo, mapData, mapDataIndex, isTargetParams);
2760  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
2761  combinedInfo, mapData, mapDataIndex,
2762  memberOfParentFlag);
2763 }
2764 
2765 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2766 // generates different operation (e.g. load/store) combinations for
2767 // arguments to the kernel, based on map capture kinds which are then
2768 // utilised in the combinedInfo in place of the original Map value.
2769 static void
2770 createAlteredByCaptureMap(MapInfoData &mapData,
2771  LLVM::ModuleTranslation &moduleTranslation,
2772  llvm::IRBuilderBase &builder) {
2773  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2774  // if it's declare target, skip it, it's handled separately.
2775  if (!mapData.IsDeclareTarget[i]) {
2776  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
2777  omp::VariableCaptureKind captureKind =
2778  mapOp.getMapCaptureType().value_or(omp::VariableCaptureKind::ByRef);
2779  bool isPtrTy = checkIfPointerMap(mapOp);
2780 
2781  // Currently handles array sectioning lowerbound case, but more
2782  // logic may be required in the future. Clang invokes EmitLValue,
2783  // which has specialised logic for special Clang types such as user
2784  // defines, so it is possible we will have to extend this for
2785  // structures or other complex types. As the general idea is that this
2786  // function mimics some of the logic from Clang that we require for
2787  // kernel argument passing from host -> device.
2788  switch (captureKind) {
2789  case omp::VariableCaptureKind::ByRef: {
2790  llvm::Value *newV = mapData.Pointers[i];
2791  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
2792  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
2793  mapOp.getBounds());
2794  if (isPtrTy)
2795  newV = builder.CreateLoad(builder.getPtrTy(), newV);
2796 
2797  if (!offsetIdx.empty())
2798  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
2799  "array_offset");
2800  mapData.Pointers[i] = newV;
2801  } break;
2802  case omp::VariableCaptureKind::ByCopy: {
2803  llvm::Type *type = mapData.BaseType[i];
2804  llvm::Value *newV;
2805  if (mapData.Pointers[i]->getType()->isPointerTy())
2806  newV = builder.CreateLoad(type, mapData.Pointers[i]);
2807  else
2808  newV = mapData.Pointers[i];
2809 
2810  if (!isPtrTy) {
2811  auto curInsert = builder.saveIP();
2812  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
2813  auto *memTempAlloc =
2814  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
2815  builder.restoreIP(curInsert);
2816 
2817  builder.CreateStore(newV, memTempAlloc);
2818  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
2819  }
2820 
2821  mapData.Pointers[i] = newV;
2822  mapData.BasePointers[i] = newV;
2823  } break;
2824  case omp::VariableCaptureKind::This:
2825  case omp::VariableCaptureKind::VLAType:
2826  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
2827  break;
2828  }
2829  }
2830  }
2831 }
2832 
2833 // Generate all map related information and fill the combinedInfo.
2834 static void genMapInfos(llvm::IRBuilderBase &builder,
2835  LLVM::ModuleTranslation &moduleTranslation,
2836  DataLayout &dl,
2837  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2838  MapInfoData &mapData, bool isTargetParams = false) {
2839  // We wish to modify some of the methods in which arguments are
2840  // passed based on their capture type by the target region, this can
2841  // involve generating new loads and stores, which changes the
2842  // MLIR value to LLVM value mapping, however, we only wish to do this
2843  // locally for the current function/target and also avoid altering
2844  // ModuleTranslation, so we remap the base pointer or pointer stored
2845  // in the map infos corresponding MapInfoData, which is later accessed
2846  // by genMapInfos and createTarget to help generate the kernel and
2847  // kernel arg structure. It primarily becomes relevant in cases like
2848  // bycopy, or byref range'd arrays. In the default case, we simply
2849  // pass thee pointer byref as both basePointer and pointer.
2850  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
2851  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
2852 
2853  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2854 
2855  // We operate under the assumption that all vectors that are
2856  // required in MapInfoData are of equal lengths (either filled with
2857  // default constructed data or appropiate information) so we can
2858  // utilise the size from any component of MapInfoData, if we can't
2859  // something is missing from the initial MapInfoData construction.
2860  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2861  // NOTE/TODO: We currently do not support arbitrary depth record
2862  // type mapping.
2863  if (mapData.IsAMember[i])
2864  continue;
2865 
2866  auto mapInfoOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[i]);
2867  if (!mapInfoOp.getMembers().empty()) {
2868  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
2869  combinedInfo, mapData, i, isTargetParams);
2870  continue;
2871  }
2872 
2873  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
2874  }
2875 }
2876 
2877 static LogicalResult
2878 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
2879  LLVM::ModuleTranslation &moduleTranslation) {
2880  llvm::Value *ifCond = nullptr;
2881  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
2882  SmallVector<Value> mapVars;
2883  SmallVector<Value> useDevicePtrVars;
2884  SmallVector<Value> useDeviceAddrVars;
2885  llvm::omp::RuntimeFunction RTLFn;
2886  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
2887 
2888  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2889  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
2890  /*SeparateBeginEndCalls=*/true);
2891 
2892  LogicalResult result =
2894  .Case([&](omp::TargetDataOp dataOp) {
2895  if (auto ifVar = dataOp.getIfExpr())
2896  ifCond = moduleTranslation.lookupValue(ifVar);
2897 
2898  if (auto devId = dataOp.getDevice())
2899  if (auto constOp =
2900  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2901  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2902  deviceID = intAttr.getInt();
2903 
2904  mapVars = dataOp.getMapVars();
2905  useDevicePtrVars = dataOp.getUseDevicePtrVars();
2906  useDeviceAddrVars = dataOp.getUseDeviceAddrVars();
2907  return success();
2908  })
2909  .Case([&](omp::TargetEnterDataOp enterDataOp) {
2910  if (!enterDataOp.getDependVars().empty())
2911  return (LogicalResult)(enterDataOp.emitError(
2912  "`depend` is not supported yet"));
2913 
2914  if (auto ifVar = enterDataOp.getIfExpr())
2915  ifCond = moduleTranslation.lookupValue(ifVar);
2916 
2917  if (auto devId = enterDataOp.getDevice())
2918  if (auto constOp =
2919  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2920  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2921  deviceID = intAttr.getInt();
2922  RTLFn =
2923  enterDataOp.getNowait()
2924  ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
2925  : llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
2926  mapVars = enterDataOp.getMapVars();
2927  info.HasNoWait = enterDataOp.getNowait();
2928  return success();
2929  })
2930  .Case([&](omp::TargetExitDataOp exitDataOp) {
2931  if (!exitDataOp.getDependVars().empty())
2932  return (LogicalResult)(exitDataOp.emitError(
2933  "`depend` is not supported yet"));
2934 
2935  if (auto ifVar = exitDataOp.getIfExpr())
2936  ifCond = moduleTranslation.lookupValue(ifVar);
2937 
2938  if (auto devId = exitDataOp.getDevice())
2939  if (auto constOp =
2940  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2941  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2942  deviceID = intAttr.getInt();
2943 
2944  RTLFn = exitDataOp.getNowait()
2945  ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
2946  : llvm::omp::OMPRTL___tgt_target_data_end_mapper;
2947  mapVars = exitDataOp.getMapVars();
2948  info.HasNoWait = exitDataOp.getNowait();
2949  return success();
2950  })
2951  .Case([&](omp::TargetUpdateOp updateDataOp) {
2952  if (!updateDataOp.getDependVars().empty())
2953  return (LogicalResult)(updateDataOp.emitError(
2954  "`depend` is not supported yet"));
2955 
2956  if (auto ifVar = updateDataOp.getIfExpr())
2957  ifCond = moduleTranslation.lookupValue(ifVar);
2958 
2959  if (auto devId = updateDataOp.getDevice())
2960  if (auto constOp =
2961  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2962  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2963  deviceID = intAttr.getInt();
2964 
2965  RTLFn =
2966  updateDataOp.getNowait()
2967  ? llvm::omp::OMPRTL___tgt_target_data_update_nowait_mapper
2968  : llvm::omp::OMPRTL___tgt_target_data_update_mapper;
2969  mapVars = updateDataOp.getMapVars();
2970  info.HasNoWait = updateDataOp.getNowait();
2971  return success();
2972  })
2973  .Default([&](Operation *op) {
2974  return op->emitError("unsupported OpenMP operation: ")
2975  << op->getName();
2976  });
2977 
2978  if (failed(result))
2979  return failure();
2980 
2981  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2982 
2983  MapInfoData mapData;
2984  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, DL,
2985  builder, useDevicePtrVars, useDeviceAddrVars);
2986 
2987  // Fill up the arrays with all the mapped variables.
2988  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
2989  auto genMapInfoCB =
2990  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
2991  builder.restoreIP(codeGenIP);
2992  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
2993  return combinedInfo;
2994  };
2995 
2996  // Define a lambda to apply mappings between use_device_addr and
2997  // use_device_ptr base pointers, and their associated block arguments.
2998  auto mapUseDevice =
2999  [&moduleTranslation](
3000  llvm::OpenMPIRBuilder::DeviceInfoTy type,
3002  llvm::OpenMPIRBuilder::MapValuesArrayTy &basePointers,
3003  llvm::OpenMPIRBuilder::MapDeviceInfoArrayTy &devicePointers,
3004  llvm::function_ref<llvm::Value *(llvm::Value *)> mapper = nullptr) {
3005  // Get a range to iterate over `basePointers` after filtering based on
3006  // `devicePointers` and the given device info type.
3007  auto basePtrRange = llvm::map_range(
3008  llvm::make_filter_range(
3009  llvm::zip_equal(basePointers, devicePointers),
3010  [type](auto x) { return std::get<1>(x) == type; }),
3011  [](auto x) { return std::get<0>(x); });
3012 
3013  // Map block arguments to the corresponding processed base pointer. If
3014  // a mapper is not specified, map the block argument to the base pointer
3015  // directly.
3016  for (auto [arg, basePointer] : llvm::zip_equal(blockArgs, basePtrRange))
3017  moduleTranslation.mapValue(arg, mapper ? mapper(basePointer)
3018  : basePointer);
3019  };
3020 
3021  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
3022  LogicalResult bodyGenStatus = success();
3023  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) {
3024  assert(isa<omp::TargetDataOp>(op) &&
3025  "BodyGen requested for non TargetDataOp");
3026  auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
3027  Region &region = cast<omp::TargetDataOp>(op).getRegion();
3028  switch (bodyGenType) {
3029  case BodyGenTy::Priv:
3030  // Check if any device ptr/addr info is available
3031  if (!info.DevicePtrInfoMap.empty()) {
3032  builder.restoreIP(codeGenIP);
3033 
3034  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
3035  blockArgIface.getUseDeviceAddrBlockArgs(),
3036  combinedInfo.BasePointers, combinedInfo.DevicePointers,
3037  [&](llvm::Value *basePointer) -> llvm::Value * {
3038  return builder.CreateLoad(
3039  builder.getPtrTy(),
3040  info.DevicePtrInfoMap[basePointer].second);
3041  });
3042  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
3043  blockArgIface.getUseDevicePtrBlockArgs(),
3044  combinedInfo.BasePointers, combinedInfo.DevicePointers,
3045  [&](llvm::Value *basePointer) {
3046  return info.DevicePtrInfoMap[basePointer].second;
3047  });
3048 
3049  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
3050  builder, moduleTranslation);
3051  }
3052  break;
3053  case BodyGenTy::DupNoPriv:
3054  break;
3055  case BodyGenTy::NoPriv:
3056  // If device info is available then region has already been generated
3057  if (info.DevicePtrInfoMap.empty()) {
3058  builder.restoreIP(codeGenIP);
3059  // For device pass, if use_device_ptr(addr) mappings were present,
3060  // we need to link them here before codegen.
3061  if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
3062  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
3063  blockArgIface.getUseDeviceAddrBlockArgs(),
3064  mapData.BasePointers, mapData.DevicePointers);
3065  mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
3066  blockArgIface.getUseDevicePtrBlockArgs(),
3067  mapData.BasePointers, mapData.DevicePointers);
3068  }
3069 
3070  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
3071  builder, moduleTranslation);
3072  }
3073  break;
3074  }
3075  return builder.saveIP();
3076  };
3077 
3078  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3079  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3080  findAllocaInsertPoint(builder, moduleTranslation);
3081  if (isa<omp::TargetDataOp>(op)) {
3082  builder.restoreIP(ompBuilder->createTargetData(
3083  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
3084  info, genMapInfoCB, nullptr, bodyGenCB));
3085  } else {
3086  builder.restoreIP(ompBuilder->createTargetData(
3087  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
3088  info, genMapInfoCB, &RTLFn));
3089  }
3090 
3091  return bodyGenStatus;
3092 }
3093 
3094 /// Lowers the FlagsAttr which is applied to the module on the device
3095 /// pass when offloading, this attribute contains OpenMP RTL globals that can
3096 /// be passed as flags to the frontend, otherwise they are set to default
3097 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
3098  LLVM::ModuleTranslation &moduleTranslation) {
3099  if (!cast<mlir::ModuleOp>(op))
3100  return failure();
3101 
3102  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3103 
3104  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
3105  attribute.getOpenmpDeviceVersion());
3106 
3107  if (attribute.getNoGpuLib())
3108  return success();
3109 
3110  ompBuilder->createGlobalFlag(
3111  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
3112  "__omp_rtl_debug_kind");
3113  ompBuilder->createGlobalFlag(
3114  attribute
3115  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
3116  ,
3117  "__omp_rtl_assume_teams_oversubscription");
3118  ompBuilder->createGlobalFlag(
3119  attribute
3120  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
3121  ,
3122  "__omp_rtl_assume_threads_oversubscription");
3123  ompBuilder->createGlobalFlag(
3124  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
3125  "__omp_rtl_assume_no_thread_state");
3126  ompBuilder->createGlobalFlag(
3127  attribute
3128  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
3129  ,
3130  "__omp_rtl_assume_no_nested_parallelism");
3131  return success();
3132 }
3133 
3134 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
3135  omp::TargetOp targetOp,
3136  llvm::StringRef parentName = "") {
3137  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
3138 
3139  assert(fileLoc && "No file found from location");
3140  StringRef fileName = fileLoc.getFilename().getValue();
3141 
3142  llvm::sys::fs::UniqueID id;
3143  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
3144  targetOp.emitError("Unable to get unique ID for file");
3145  return false;
3146  }
3147 
3148  uint64_t line = fileLoc.getLine();
3149  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
3150  id.getFile(), line);
3151  return true;
3152 }
3153 
3154 static bool targetOpSupported(Operation &opInst) {
3155  auto targetOp = cast<omp::TargetOp>(opInst);
3156  if (targetOp.getIfExpr()) {
3157  opInst.emitError("If clause not yet supported");
3158  return false;
3159  }
3160 
3161  if (targetOp.getDevice()) {
3162  opInst.emitError("Device clause not yet supported");
3163  return false;
3164  }
3165 
3166  if (targetOp.getThreadLimit()) {
3167  opInst.emitError("Thread limit clause not yet supported");
3168  return false;
3169  }
3170 
3171  if (!targetOp.getAllocateVars().empty() ||
3172  !targetOp.getAllocatorVars().empty()) {
3173  opInst.emitError("Allocate clause not yet supported");
3174  return false;
3175  }
3176 
3177  if (!targetOp.getInReductionVars().empty() ||
3178  targetOp.getInReductionByref() || targetOp.getInReductionSyms()) {
3179  opInst.emitError("In reduction clause not yet supported");
3180  return false;
3181  }
3182 
3183  return true;
3184 }
3185 
3186 static void
3187 handleDeclareTargetMapVar(MapInfoData &mapData,
3188  LLVM::ModuleTranslation &moduleTranslation,
3189  llvm::IRBuilderBase &builder, llvm::Function *func) {
3190  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3191  // In the case of declare target mapped variables, the basePointer is
3192  // the reference pointer generated by the convertDeclareTargetAttr
3193  // method. Whereas the kernelValue is the original variable, so for
3194  // the device we must replace all uses of this original global variable
3195  // (stored in kernelValue) with the reference pointer (stored in
3196  // basePointer for declare target mapped variables), as for device the
3197  // data is mapped into this reference pointer and should be loaded
3198  // from it, the original variable is discarded. On host both exist and
3199  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
3200  // function to link the two variables in the runtime and then both the
3201  // reference pointer and the pointer are assigned in the kernel argument
3202  // structure for the host.
3203  if (mapData.IsDeclareTarget[i]) {
3204  // If the original map value is a constant, then we have to make sure all
3205  // of it's uses within the current kernel/function that we are going to
3206  // rewrite are converted to instructions, as we will be altering the old
3207  // use (OriginalValue) from a constant to an instruction, which will be
3208  // illegal and ICE the compiler if the user is a constant expression of
3209  // some kind e.g. a constant GEP.
3210  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
3211  convertUsersOfConstantsToInstructions(constant, func, false);
3212 
3213  // The users iterator will get invalidated if we modify an element,
3214  // so we populate this vector of uses to alter each user on an
3215  // individual basis to emit its own load (rather than one load for
3216  // all).
3218  for (llvm::User *user : mapData.OriginalValue[i]->users())
3219  userVec.push_back(user);
3220 
3221  for (llvm::User *user : userVec) {
3222  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
3223  if (insn->getFunction() == func) {
3224  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
3225  mapData.BasePointers[i]);
3226  load->moveBefore(insn);
3227  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
3228  }
3229  }
3230  }
3231  }
3232  }
3233 }
3234 
3235 // The createDeviceArgumentAccessor function generates
3236 // instructions for retrieving (acessing) kernel
3237 // arguments inside of the device kernel for use by
3238 // the kernel. This enables different semantics such as
3239 // the creation of temporary copies of data allowing
3240 // semantics like read-only/no host write back kernel
3241 // arguments.
3242 //
3243 // This currently implements a very light version of Clang's
3244 // EmitParmDecl's handling of direct argument handling as well
3245 // as a portion of the argument access generation based on
3246 // capture types found at the end of emitOutlinedFunctionPrologue
3247 // in Clang. The indirect path handling of EmitParmDecl's may be
3248 // required for future work, but a direct 1-to-1 copy doesn't seem
3249 // possible as the logic is rather scattered throughout Clang's
3250 // lowering and perhaps we wish to deviate slightly.
3251 //
3252 // \param mapData - A container containing vectors of information
3253 // corresponding to the input argument, which should have a
3254 // corresponding entry in the MapInfoData containers
3255 // OrigialValue's.
3256 // \param arg - This is the generated kernel function argument that
3257 // corresponds to the passed in input argument. We generated different
3258 // accesses of this Argument, based on capture type and other Input
3259 // related information.
3260 // \param input - This is the host side value that will be passed to
3261 // the kernel i.e. the kernel input, we rewrite all uses of this within
3262 // the kernel (as we generate the kernel body based on the target's region
3263 // which maintians references to the original input) to the retVal argument
3264 // apon exit of this function inside of the OMPIRBuilder. This interlinks
3265 // the kernel argument to future uses of it in the function providing
3266 // appropriate "glue" instructions inbetween.
3267 // \param retVal - This is the value that all uses of input inside of the
3268 // kernel will be re-written to, the goal of this function is to generate
3269 // an appropriate location for the kernel argument to be accessed from,
3270 // e.g. ByRef will result in a temporary allocation location and then
3271 // a store of the kernel argument into this allocated memory which
3272 // will then be loaded from, ByCopy will use the allocated memory
3273 // directly.
3274 static llvm::IRBuilderBase::InsertPoint
3275 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
3276  llvm::Value *input, llvm::Value *&retVal,
3277  llvm::IRBuilderBase &builder,
3278  llvm::OpenMPIRBuilder &ompBuilder,
3279  LLVM::ModuleTranslation &moduleTranslation,
3280  llvm::IRBuilderBase::InsertPoint allocaIP,
3281  llvm::IRBuilderBase::InsertPoint codeGenIP) {
3282  builder.restoreIP(allocaIP);
3283 
3284  omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
3285 
3286  // Find the associated MapInfoData entry for the current input
3287  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
3288  if (mapData.OriginalValue[i] == input) {
3289  auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
3290  capture =
3291  mapOp.getMapCaptureType().value_or(omp::VariableCaptureKind::ByRef);
3292 
3293  break;
3294  }
3295 
3296  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
3297  unsigned int defaultAS =
3298  ompBuilder.M.getDataLayout().getProgramAddressSpace();
3299 
3300  // Create the alloca for the argument the current point.
3301  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
3302 
3303  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
3304  v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
3305 
3306  builder.CreateStore(&arg, v);
3307 
3308  builder.restoreIP(codeGenIP);
3309 
3310  switch (capture) {
3311  case omp::VariableCaptureKind::ByCopy: {
3312  retVal = v;
3313  break;
3314  }
3315  case omp::VariableCaptureKind::ByRef: {
3316  retVal = builder.CreateAlignedLoad(
3317  v->getType(), v,
3318  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
3319  break;
3320  }
3321  case omp::VariableCaptureKind::This:
3322  case omp::VariableCaptureKind::VLAType:
3323  assert(false && "Currently unsupported capture kind");
3324  break;
3325  }
3326 
3327  return builder.saveIP();
3328 }
3329 
3330 static LogicalResult
3331 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
3332  LLVM::ModuleTranslation &moduleTranslation) {
3333 
3334  if (!targetOpSupported(opInst))
3335  return failure();
3336 
3337  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3338  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
3339  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
3340  auto targetOp = cast<omp::TargetOp>(opInst);
3341  auto &targetRegion = targetOp.getRegion();
3342  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
3343  SmallVector<Value> mapVars = targetOp.getMapVars();
3344  ArrayRef<BlockArgument> mapBlockArgs =
3345  cast<omp::BlockArgOpenMPOpInterface>(opInst).getMapBlockArgs();
3346  llvm::Function *llvmOutlinedFn = nullptr;
3347 
3348  // TODO: It can also be false if a compile-time constant `false` IF clause is
3349  // specified.
3350  bool isOffloadEntry =
3351  isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
3352 
3353  LogicalResult bodyGenStatus = success();
3354  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3355  auto bodyCB = [&](InsertPointTy allocaIP,
3356  InsertPointTy codeGenIP) -> InsertPointTy {
3357  // Forward target-cpu and target-features function attributes from the
3358  // original function to the new outlined function.
3359  llvm::Function *llvmParentFn =
3360  moduleTranslation.lookupFunction(parentFn.getName());
3361  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
3362  assert(llvmParentFn && llvmOutlinedFn &&
3363  "Both parent and outlined functions must exist at this point");
3364 
3365  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
3366  attr.isStringAttribute())
3367  llvmOutlinedFn->addFnAttr(attr);
3368 
3369  if (auto attr = llvmParentFn->getFnAttribute("target-features");
3370  attr.isStringAttribute())
3371  llvmOutlinedFn->addFnAttr(attr);
3372 
3373  builder.restoreIP(codeGenIP);
3374  for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
3375  auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
3376  llvm::Value *mapOpValue =
3377  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
3378  moduleTranslation.mapValue(arg, mapOpValue);
3379  }
3380 
3381  // Do privatization after moduleTranslation has already recorded
3382  // mapped values.
3383  if (!targetOp.getPrivateVars().empty()) {
3384  builder.restoreIP(allocaIP);
3385 
3386  OperandRange privateVars = targetOp.getPrivateVars();
3387  std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
3388  MutableArrayRef<BlockArgument> privateBlockArgs =
3389  cast<omp::BlockArgOpenMPOpInterface>(opInst).getPrivateBlockArgs();
3390 
3391  for (auto [privVar, privatizerNameAttr, privBlockArg] :
3392  llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {
3393 
3394  SymbolRefAttr privSym = cast<SymbolRefAttr>(privatizerNameAttr);
3395  omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
3396  if (privatizer.getDataSharingType() ==
3397  omp::DataSharingClauseType::FirstPrivate ||
3398  !privatizer.getDeallocRegion().empty()) {
3399  opInst.emitError("Translation of omp.target from MLIR to LLVMIR "
3400  "failed because translation of firstprivate and "
3401  " private allocatables is not supported yet");
3402  bodyGenStatus = failure();
3403  } else {
3404  moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
3405  moduleTranslation.lookupValue(privVar));
3406  Region &allocRegion = privatizer.getAllocRegion();
3407  SmallVector<llvm::Value *, 1> yieldedValues;
3408  if (failed(inlineConvertOmpRegions(
3409  allocRegion, "omp.targetop.privatizer", builder,
3410  moduleTranslation, &yieldedValues))) {
3411  opInst.emitError(
3412  "failed to inline `alloc` region of an `omp.private` "
3413  "op in the target region");
3414  bodyGenStatus = failure();
3415  } else {
3416  assert(yieldedValues.size() == 1);
3417  moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
3418  }
3419  moduleTranslation.forgetMapping(allocRegion);
3420  builder.restoreIP(builder.saveIP());
3421  }
3422  }
3423  }
3424  llvm::BasicBlock *exitBlock = convertOmpOpRegions(
3425  targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
3426  builder.SetInsertPoint(exitBlock);
3427  return builder.saveIP();
3428  };
3429 
3430  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3431  StringRef parentName = parentFn.getName();
3432 
3433  llvm::TargetRegionEntryInfo entryInfo;
3434 
3435  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
3436  return failure();
3437 
3438  int32_t defaultValTeams = -1;
3439  int32_t defaultValThreads = 0;
3440 
3441  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3442  findAllocaInsertPoint(builder, moduleTranslation);
3443 
3444  MapInfoData mapData;
3445  collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
3446  builder);
3447 
3448  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
3449  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
3450  -> llvm::OpenMPIRBuilder::MapInfosTy & {
3451  builder.restoreIP(codeGenIP);
3452  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, true);
3453  return combinedInfos;
3454  };
3455 
3456  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
3457  llvm::Value *&retVal, InsertPointTy allocaIP,
3458  InsertPointTy codeGenIP) {
3459  // We just return the unaltered argument for the host function
3460  // for now, some alterations may be required in the future to
3461  // keep host fallback functions working identically to the device
3462  // version (e.g. pass ByCopy values should be treated as such on
3463  // host and device, currently not always the case)
3464  if (!isTargetDevice) {
3465  retVal = cast<llvm::Value>(&arg);
3466  return codeGenIP;
3467  }
3468 
3469  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
3470  *ompBuilder, moduleTranslation,
3471  allocaIP, codeGenIP);
3472  };
3473 
3475  for (size_t i = 0; i < mapVars.size(); ++i) {
3476  // declare target arguments are not passed to kernels as arguments
3477  // TODO: We currently do not handle cases where a member is explicitly
3478  // passed in as an argument, this will likley need to be handled in
3479  // the near future, rather than using IsAMember, it may be better to
3480  // test if the relevant BlockArg is used within the target region and
3481  // then use that as a basis for exclusion in the kernel inputs.
3482  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
3483  kernelInput.push_back(mapData.OriginalValue[i]);
3484  }
3486  buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
3487  moduleTranslation, dds);
3488 
3489  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
3490  ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
3491  defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
3492  argAccessorCB, dds, targetOp.getNowait()));
3493 
3494  // Remap access operations to declare target reference pointers for the
3495  // device, essentially generating extra loadop's as necessary
3496  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3497  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
3498  llvmOutlinedFn);
3499 
3500  return bodyGenStatus;
3501 }
3502 
3503 static LogicalResult
3504 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
3505  LLVM::ModuleTranslation &moduleTranslation) {
3506  // Amend omp.declare_target by deleting the IR of the outlined functions
3507  // created for target regions. They cannot be filtered out from MLIR earlier
3508  // because the omp.target operation inside must be translated to LLVM, but
3509  // the wrapper functions themselves must not remain at the end of the
3510  // process. We know that functions where omp.declare_target does not match
3511  // omp.is_target_device at this stage can only be wrapper functions because
3512  // those that aren't are removed earlier as an MLIR transformation pass.
3513  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
3514  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
3515  op->getParentOfType<ModuleOp>().getOperation())) {
3516  if (!offloadMod.getIsTargetDevice())
3517  return success();
3518 
3519  omp::DeclareTargetDeviceType declareType =
3520  attribute.getDeviceType().getValue();
3521 
3522  if (declareType == omp::DeclareTargetDeviceType::host) {
3523  llvm::Function *llvmFunc =
3524  moduleTranslation.lookupFunction(funcOp.getName());
3525  llvmFunc->dropAllReferences();
3526  llvmFunc->eraseFromParent();
3527  }
3528  }
3529  return success();
3530  }
3531 
3532  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
3533  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
3534  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
3535  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3536  bool isDeclaration = gOp.isDeclaration();
3537  bool isExternallyVisible =
3538  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
3539  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
3540  llvm::StringRef mangledName = gOp.getSymName();
3541  auto captureClause =
3542  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
3543  auto deviceClause =
3544  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
3545  // unused for MLIR at the moment, required in Clang for book
3546  // keeping
3547  std::vector<llvm::GlobalVariable *> generatedRefs;
3548 
3549  std::vector<llvm::Triple> targetTriple;
3550  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
3551  op->getParentOfType<mlir::ModuleOp>()->getAttr(
3552  LLVM::LLVMDialect::getTargetTripleAttrName()));
3553  if (targetTripleAttr)
3554  targetTriple.emplace_back(targetTripleAttr.data());
3555 
3556  auto fileInfoCallBack = [&loc]() {
3557  std::string filename = "";
3558  std::uint64_t lineNo = 0;
3559 
3560  if (loc) {
3561  filename = loc.getFilename().str();
3562  lineNo = loc.getLine();
3563  }
3564 
3565  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
3566  lineNo);
3567  };
3568 
3569  ompBuilder->registerTargetGlobalVariable(
3570  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3571  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3572  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
3573  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
3574  gVal->getType(), gVal);
3575 
3576  if (ompBuilder->Config.isTargetDevice() &&
3577  (attribute.getCaptureClause().getValue() !=
3578  mlir::omp::DeclareTargetCaptureClause::to ||
3579  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3580  ompBuilder->getAddrOfDeclareTargetVar(
3581  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3582  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3583  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
3584  /*GlobalInitializer*/ nullptr,
3585  /*VariableLinkage*/ nullptr);
3586  }
3587  }
3588  }
3589 
3590  return success();
3591 }
3592 
3593 // Returns true if the operation is inside a TargetOp or
3594 // is part of a declare target function.
3595 static bool isTargetDeviceOp(Operation *op) {
3596  // Assumes no reverse offloading
3597  if (op->getParentOfType<omp::TargetOp>())
3598  return true;
3599 
3600  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
3601  if (auto declareTargetIface =
3602  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3603  parentFn.getOperation()))
3604  if (declareTargetIface.isDeclareTarget() &&
3605  declareTargetIface.getDeclareTargetDeviceType() !=
3606  mlir::omp::DeclareTargetDeviceType::host)
3607  return true;
3608 
3609  return false;
3610 }
3611 
3612 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3613 /// (including OpenMP runtime calls).
3614 static LogicalResult
3615 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
3616  LLVM::ModuleTranslation &moduleTranslation) {
3617 
3618  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3619 
3621  .Case([&](omp::BarrierOp) {
3622  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
3623  return success();
3624  })
3625  .Case([&](omp::TaskyieldOp) {
3626  ompBuilder->createTaskyield(builder.saveIP());
3627  return success();
3628  })
3629  .Case([&](omp::FlushOp) {
3630  // No support in Openmp runtime function (__kmpc_flush) to accept
3631  // the argument list.
3632  // OpenMP standard states the following:
3633  // "An implementation may implement a flush with a list by ignoring
3634  // the list, and treating it the same as a flush without a list."
3635  //
3636  // The argument list is discarded so that, flush with a list is treated
3637  // same as a flush without a list.
3638  ompBuilder->createFlush(builder.saveIP());
3639  return success();
3640  })
3641  .Case([&](omp::ParallelOp op) {
3642  return convertOmpParallel(op, builder, moduleTranslation);
3643  })
3644  .Case([&](omp::MaskedOp) {
3645  return convertOmpMasked(*op, builder, moduleTranslation);
3646  })
3647  .Case([&](omp::MasterOp) {
3648  return convertOmpMaster(*op, builder, moduleTranslation);
3649  })
3650  .Case([&](omp::CriticalOp) {
3651  return convertOmpCritical(*op, builder, moduleTranslation);
3652  })
3653  .Case([&](omp::OrderedRegionOp) {
3654  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
3655  })
3656  .Case([&](omp::OrderedOp) {
3657  return convertOmpOrdered(*op, builder, moduleTranslation);
3658  })
3659  .Case([&](omp::WsloopOp) {
3660  return convertOmpWsloop(*op, builder, moduleTranslation);
3661  })
3662  .Case([&](omp::SimdOp) {
3663  return convertOmpSimd(*op, builder, moduleTranslation);
3664  })
3665  .Case([&](omp::AtomicReadOp) {
3666  return convertOmpAtomicRead(*op, builder, moduleTranslation);
3667  })
3668  .Case([&](omp::AtomicWriteOp) {
3669  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
3670  })
3671  .Case([&](omp::AtomicUpdateOp op) {
3672  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
3673  })
3674  .Case([&](omp::AtomicCaptureOp op) {
3675  return convertOmpAtomicCapture(op, builder, moduleTranslation);
3676  })
3677  .Case([&](omp::SectionsOp) {
3678  return convertOmpSections(*op, builder, moduleTranslation);
3679  })
3680  .Case([&](omp::SingleOp op) {
3681  return convertOmpSingle(op, builder, moduleTranslation);
3682  })
3683  .Case([&](omp::TeamsOp op) {
3684  return convertOmpTeams(op, builder, moduleTranslation);
3685  })
3686  .Case([&](omp::TaskOp op) {
3687  return convertOmpTaskOp(op, builder, moduleTranslation);
3688  })
3689  .Case([&](omp::TaskgroupOp op) {
3690  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
3691  })
3692  .Case([&](omp::TaskwaitOp op) {
3693  return convertOmpTaskwaitOp(op, builder, moduleTranslation);
3694  })
3695  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp,
3696  omp::CriticalDeclareOp>([](auto op) {
3697  // `yield` and `terminator` can be just omitted. The block structure
3698  // was created in the region that handles their parent operation.
3699  // `declare_reduction` will be used by reductions and is not
3700  // converted directly, skip it.
3701  // `critical.declare` is only used to declare names of critical
3702  // sections which will be used by `critical` ops and hence can be
3703  // ignored for lowering. The OpenMP IRBuilder will create unique
3704  // name for critical section names.
3705  return success();
3706  })
3707  .Case([&](omp::ThreadprivateOp) {
3708  return convertOmpThreadprivate(*op, builder, moduleTranslation);
3709  })
3710  .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp,
3711  omp::TargetUpdateOp>([&](auto op) {
3712  return convertOmpTargetData(op, builder, moduleTranslation);
3713  })
3714  .Case([&](omp::TargetOp) {
3715  return convertOmpTarget(*op, builder, moduleTranslation);
3716  })
3717  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
3718  [&](auto op) {
3719  // No-op, should be handled by relevant owning operations e.g.
3720  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc.
3721  // and then discarded
3722  return success();
3723  })
3724  .Default([&](Operation *inst) {
3725  return inst->emitError("unsupported OpenMP operation: ")
3726  << inst->getName();
3727  });
3728 }
3729 
3730 static LogicalResult
3731 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
3732  LLVM::ModuleTranslation &moduleTranslation) {
3733  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3734 }
3735 
3736 static LogicalResult
3737 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
3738  LLVM::ModuleTranslation &moduleTranslation) {
3739  if (isa<omp::TargetOp>(op))
3740  return convertOmpTarget(*op, builder, moduleTranslation);
3741  if (isa<omp::TargetDataOp>(op))
3742  return convertOmpTargetData(op, builder, moduleTranslation);
3743  bool interrupted =
3744  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
3745  if (isa<omp::TargetOp>(oper)) {
3746  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
3747  return WalkResult::interrupt();
3748  return WalkResult::skip();
3749  }
3750  if (isa<omp::TargetDataOp>(oper)) {
3751  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
3752  return WalkResult::interrupt();
3753  return WalkResult::skip();
3754  }
3755  return WalkResult::advance();
3756  }).wasInterrupted();
3757  return failure(interrupted);
3758 }
3759 
3760 namespace {
3761 
3762 /// Implementation of the dialect interface that converts operations belonging
3763 /// to the OpenMP dialect to LLVM IR.
3764 class OpenMPDialectLLVMIRTranslationInterface
3766 public:
3768 
3769  /// Translates the given operation to LLVM IR using the provided IR builder
3770  /// and saving the state in `moduleTranslation`.
3771  LogicalResult
3772  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
3773  LLVM::ModuleTranslation &moduleTranslation) const final;
3774 
3775  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
3776  /// runtime calls, or operation amendments
3777  LogicalResult
3779  NamedAttribute attribute,
3780  LLVM::ModuleTranslation &moduleTranslation) const final;
3781 };
3782 
3783 } // namespace
3784 
3785 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
3786  Operation *op, ArrayRef<llvm::Instruction *> instructions,
3787  NamedAttribute attribute,
3788  LLVM::ModuleTranslation &moduleTranslation) const {
3789  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
3790  attribute.getName())
3791  .Case("omp.is_target_device",
3792  [&](Attribute attr) {
3793  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
3794  llvm::OpenMPIRBuilderConfig &config =
3795  moduleTranslation.getOpenMPBuilder()->Config;
3796  config.setIsTargetDevice(deviceAttr.getValue());
3797  return success();
3798  }
3799  return failure();
3800  })
3801  .Case("omp.is_gpu",
3802  [&](Attribute attr) {
3803  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
3804  llvm::OpenMPIRBuilderConfig &config =
3805  moduleTranslation.getOpenMPBuilder()->Config;
3806  config.setIsGPU(gpuAttr.getValue());
3807  return success();
3808  }
3809  return failure();
3810  })
3811  .Case("omp.host_ir_filepath",
3812  [&](Attribute attr) {
3813  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
3814  llvm::OpenMPIRBuilder *ompBuilder =
3815  moduleTranslation.getOpenMPBuilder();
3816  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
3817  return success();
3818  }
3819  return failure();
3820  })
3821  .Case("omp.flags",
3822  [&](Attribute attr) {
3823  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
3824  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
3825  return failure();
3826  })
3827  .Case("omp.version",
3828  [&](Attribute attr) {
3829  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
3830  llvm::OpenMPIRBuilder *ompBuilder =
3831  moduleTranslation.getOpenMPBuilder();
3832  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
3833  versionAttr.getVersion());
3834  return success();
3835  }
3836  return failure();
3837  })
3838  .Case("omp.declare_target",
3839  [&](Attribute attr) {
3840  if (auto declareTargetAttr =
3841  dyn_cast<omp::DeclareTargetAttr>(attr))
3842  return convertDeclareTargetAttr(op, declareTargetAttr,
3843  moduleTranslation);
3844  return failure();
3845  })
3846  .Case("omp.requires",
3847  [&](Attribute attr) {
3848  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
3849  using Requires = omp::ClauseRequires;
3850  Requires flags = requiresAttr.getValue();
3851  llvm::OpenMPIRBuilderConfig &config =
3852  moduleTranslation.getOpenMPBuilder()->Config;
3853  config.setHasRequiresReverseOffload(
3854  bitEnumContainsAll(flags, Requires::reverse_offload));
3855  config.setHasRequiresUnifiedAddress(
3856  bitEnumContainsAll(flags, Requires::unified_address));
3857  config.setHasRequiresUnifiedSharedMemory(
3858  bitEnumContainsAll(flags, Requires::unified_shared_memory));
3859  config.setHasRequiresDynamicAllocators(
3860  bitEnumContainsAll(flags, Requires::dynamic_allocators));
3861  return success();
3862  }
3863  return failure();
3864  })
3865  .Case("omp.target_triples",
3866  [&](Attribute attr) {
3867  if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
3868  llvm::OpenMPIRBuilderConfig &config =
3869  moduleTranslation.getOpenMPBuilder()->Config;
3870  config.TargetTriples.clear();
3871  config.TargetTriples.reserve(triplesAttr.size());
3872  for (Attribute tripleAttr : triplesAttr) {
3873  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
3874  config.TargetTriples.emplace_back(tripleStrAttr.getValue());
3875  else
3876  return failure();
3877  }
3878  return success();
3879  }
3880  return failure();
3881  })
3882  .Default([](Attribute) {
3883  // Fall through for omp attributes that do not require lowering.
3884  return success();
3885  })(attribute.getValue());
3886 
3887  return failure();
3888 }
3889 
3890 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3891 /// (including OpenMP runtime calls).
3892 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
3893  Operation *op, llvm::IRBuilderBase &builder,
3894  LLVM::ModuleTranslation &moduleTranslation) const {
3895 
3896  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3897  if (ompBuilder->Config.isTargetDevice()) {
3898  if (isTargetDeviceOp(op)) {
3899  return convertTargetDeviceOp(op, builder, moduleTranslation);
3900  } else {
3901  return convertTargetOpsInNest(op, builder, moduleTranslation);
3902  }
3903  }
3904  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3905 }
3906 
3908  registry.insert<omp::OpenMPDialect>();
3909  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
3910  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
3911  });
3912 }
3913 
3915  DialectRegistry registry;
3917  context.appendDialectRegistry(registry);
3918 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClause)
static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, bool first)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult simdOpSupported(omp::SimdOp op)
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static void collectPrivatizationDecls(omp::ParallelOp op, SmallVectorImpl< omp::PrivateClauseOp > &privatizations)
Populates privatizations with privatization declarations used for the given op.
static LogicalResult convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
static void mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, DenseMap< Value, llvm::Value * > &reductionVariableMap, unsigned i)
Map input arguments to reduction initialization region.
static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName)
Looks up from the operation from and returns the PrivateClauseOp with name symbolName.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static LogicalResult allocReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, SmallVectorImpl< DeferredStore > &deferredStores, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool targetOpSupported(Operation &opInst)
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams=false)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void collectReductionDecls(T op, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given op.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static void collectMapDataFromMapOperands(MapInfoData &mapData, SmallVectorImpl< Value > &mapVars, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder, const ArrayRef< Value > &useDevPtrOperands={}, const ArrayRef< Value > &useDevAddrOperands={})
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void buildDependData(std::optional< ArrayAttr > dependKinds, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::BasicBlock * convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:31
BlockArgument getArgument(unsigned i)
Definition: Block.h:127
unsigned getNumArguments()
Definition: Block.h:126
Operation & back()
Definition: Block.h:150
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:243
Operation & front()
Definition: Block.h:151
iterator begin()
Definition: Block.h:141
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
An attribute that represents a reference to a dense integer vector or tensor object.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:44
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:671
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
bool empty()
Definition: Region.h:60
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
RAII object calling stackPush/stackPop on construction/destruction.