MLIR  19.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
17 #include "mlir/IR/IRMapping.h"
18 #include "mlir/IR/Operation.h"
19 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DebugInfoMetadata.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/TargetParser/Triple.h"
33 #include "llvm/Transforms/Utils/ModuleUtils.h"
34 
35 #include <any>
36 #include <optional>
37 #include <utility>
38 
39 using namespace mlir;
40 
41 namespace {
42 static llvm::omp::ScheduleKind
43 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
44  if (!schedKind.has_value())
45  return llvm::omp::OMP_SCHEDULE_Default;
46  switch (schedKind.value()) {
47  case omp::ClauseScheduleKind::Static:
48  return llvm::omp::OMP_SCHEDULE_Static;
49  case omp::ClauseScheduleKind::Dynamic:
50  return llvm::omp::OMP_SCHEDULE_Dynamic;
51  case omp::ClauseScheduleKind::Guided:
52  return llvm::omp::OMP_SCHEDULE_Guided;
53  case omp::ClauseScheduleKind::Auto:
54  return llvm::omp::OMP_SCHEDULE_Auto;
56  return llvm::omp::OMP_SCHEDULE_Runtime;
57  }
58  llvm_unreachable("unhandled schedule clause argument");
59 }
60 
61 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
62 /// insertion points for allocas.
63 class OpenMPAllocaStackFrame
64  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
65 public:
66  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
67 
68  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
69  : allocaInsertPoint(allocaIP) {}
70  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
71 };
72 
73 /// ModuleTranslation stack frame containing the partial mapping between MLIR
74 /// values and their LLVM IR equivalents.
75 class OpenMPVarMappingStackFrame
77  OpenMPVarMappingStackFrame> {
78 public:
79  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
80 
81  explicit OpenMPVarMappingStackFrame(
82  const DenseMap<Value, llvm::Value *> &mapping)
83  : mapping(mapping) {}
84 
86 };
87 } // namespace
88 
89 /// Find the insertion point for allocas given the current insertion point for
90 /// normal operations in the builder.
91 static llvm::OpenMPIRBuilder::InsertPointTy
92 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
93  const LLVM::ModuleTranslation &moduleTranslation) {
94  // If there is an alloca insertion point on stack, i.e. we are in a nested
95  // operation and a specific point was provided by some surrounding operation,
96  // use it.
97  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
98  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
99  [&](const OpenMPAllocaStackFrame &frame) {
100  allocaInsertPoint = frame.allocaInsertPoint;
101  return WalkResult::interrupt();
102  });
103  if (walkResult.wasInterrupted())
104  return allocaInsertPoint;
105 
106  // Otherwise, insert to the entry block of the surrounding function.
107  // If the current IRBuilder InsertPoint is the function's entry, it cannot
108  // also be used for alloca insertion which would result in insertion order
109  // confusion. Create a new BasicBlock for the Builder and use the entry block
110  // for the allocs.
111  // TODO: Create a dedicated alloca BasicBlock at function creation such that
112  // we do not need to move the current InertPoint here.
113  if (builder.GetInsertBlock() ==
114  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
115  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
116  "Assuming end of basic block");
117  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
118  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
119  builder.GetInsertBlock()->getNextNode());
120  builder.CreateBr(entryBB);
121  builder.SetInsertPoint(entryBB);
122  }
123 
124  llvm::BasicBlock &funcEntryBlock =
125  builder.GetInsertBlock()->getParent()->getEntryBlock();
126  return llvm::OpenMPIRBuilder::InsertPointTy(
127  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
128 }
129 
130 /// Converts the given region that appears within an OpenMP dialect operation to
131 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
132 /// region, and a branch from any block with an successor-less OpenMP terminator
133 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
134 /// of the continuation block if provided.
135 static llvm::BasicBlock *convertOmpOpRegions(
136  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
137  LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
138  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
139  llvm::BasicBlock *continuationBlock =
140  splitBB(builder, true, "omp.region.cont");
141  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
142 
143  llvm::LLVMContext &llvmContext = builder.getContext();
144  for (Block &bb : region) {
145  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
146  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
147  builder.GetInsertBlock()->getNextNode());
148  moduleTranslation.mapBlock(&bb, llvmBB);
149  }
150 
151  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
152 
153  // Terminators (namely YieldOp) may be forwarding values to the region that
154  // need to be available in the continuation block. Collect the types of these
155  // operands in preparation of creating PHI nodes.
156  SmallVector<llvm::Type *> continuationBlockPHITypes;
157  bool operandsProcessed = false;
158  unsigned numYields = 0;
159  for (Block &bb : region.getBlocks()) {
160  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
161  if (!operandsProcessed) {
162  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
163  continuationBlockPHITypes.push_back(
164  moduleTranslation.convertType(yield->getOperand(i).getType()));
165  }
166  operandsProcessed = true;
167  } else {
168  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
169  "mismatching number of values yielded from the region");
170  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
171  llvm::Type *operandType =
172  moduleTranslation.convertType(yield->getOperand(i).getType());
173  (void)operandType;
174  assert(continuationBlockPHITypes[i] == operandType &&
175  "values of mismatching types yielded from the region");
176  }
177  }
178  numYields++;
179  }
180  }
181 
182  // Insert PHI nodes in the continuation block for any values forwarded by the
183  // terminators in this region.
184  if (!continuationBlockPHITypes.empty())
185  assert(
186  continuationBlockPHIs &&
187  "expected continuation block PHIs if converted regions yield values");
188  if (continuationBlockPHIs) {
189  llvm::IRBuilderBase::InsertPointGuard guard(builder);
190  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
191  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
192  for (llvm::Type *ty : continuationBlockPHITypes)
193  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
194  }
195 
196  // Convert blocks one by one in topological order to ensure
197  // defs are converted before uses.
199  for (Block *bb : blocks) {
200  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
201  // Retarget the branch of the entry block to the entry block of the
202  // converted region (regions are single-entry).
203  if (bb->isEntryBlock()) {
204  assert(sourceTerminator->getNumSuccessors() == 1 &&
205  "provided entry block has multiple successors");
206  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
207  "ContinuationBlock is not the successor of the entry block");
208  sourceTerminator->setSuccessor(0, llvmBB);
209  }
210 
211  llvm::IRBuilderBase::InsertPointGuard guard(builder);
212  if (failed(
213  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
214  bodyGenStatus = failure();
215  return continuationBlock;
216  }
217 
218  // Special handling for `omp.yield` and `omp.terminator` (we may have more
219  // than one): they return the control to the parent OpenMP dialect operation
220  // so replace them with the branch to the continuation block. We handle this
221  // here to avoid relying inter-function communication through the
222  // ModuleTranslation class to set up the correct insertion point. This is
223  // also consistent with MLIR's idiom of handling special region terminators
224  // in the same code that handles the region-owning operation.
225  Operation *terminator = bb->getTerminator();
226  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
227  builder.CreateBr(continuationBlock);
228 
229  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
230  (*continuationBlockPHIs)[i]->addIncoming(
231  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
232  }
233  }
234  // After all blocks have been traversed and values mapped, connect the PHI
235  // nodes to the results of preceding blocks.
236  LLVM::detail::connectPHINodes(region, moduleTranslation);
237 
238  // Remove the blocks and values defined in this region from the mapping since
239  // they are not visible outside of this region. This allows the same region to
240  // be converted several times, that is cloned, without clashes, and slightly
241  // speeds up the lookups.
242  moduleTranslation.forgetMapping(region);
243 
244  return continuationBlock;
245 }
246 
247 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
248 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
249  switch (kind) {
250  case omp::ClauseProcBindKind::Close:
251  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
252  case omp::ClauseProcBindKind::Master:
253  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
254  case omp::ClauseProcBindKind::Primary:
255  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
256  case omp::ClauseProcBindKind::Spread:
257  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
258  }
259  llvm_unreachable("Unknown ClauseProcBindKind kind");
260 }
261 
262 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
263 static LogicalResult
264 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
265  LLVM::ModuleTranslation &moduleTranslation) {
266  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
267  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
268  // relying on captured variables.
269  LogicalResult bodyGenStatus = success();
270 
271  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
272  // MasterOp has only one region associated with it.
273  auto &region = cast<omp::MasterOp>(opInst).getRegion();
274  builder.restoreIP(codeGenIP);
275  convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
276  bodyGenStatus);
277  };
278 
279  // TODO: Perform finalization actions for variables. This has to be
280  // called for variables which have destructors/finalizers.
281  auto finiCB = [&](InsertPointTy codeGenIP) {};
282 
283  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
284  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
285  ompLoc, bodyGenCB, finiCB));
286  return success();
287 }
288 
289 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
290 static LogicalResult
291 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
292  LLVM::ModuleTranslation &moduleTranslation) {
293  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
294  auto criticalOp = cast<omp::CriticalOp>(opInst);
295  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
296  // relying on captured variables.
297  LogicalResult bodyGenStatus = success();
298 
299  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
300  // CriticalOp has only one region associated with it.
301  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
302  builder.restoreIP(codeGenIP);
303  convertOmpOpRegions(region, "omp.critical.region", builder,
304  moduleTranslation, bodyGenStatus);
305  };
306 
307  // TODO: Perform finalization actions for variables. This has to be
308  // called for variables which have destructors/finalizers.
309  auto finiCB = [&](InsertPointTy codeGenIP) {};
310 
311  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
312  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
313  llvm::Constant *hint = nullptr;
314 
315  // If it has a name, it probably has a hint too.
316  if (criticalOp.getNameAttr()) {
317  // The verifiers in OpenMP Dialect guarentee that all the pointers are
318  // non-null
319  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
320  auto criticalDeclareOp =
321  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
322  symbolRef);
323  hint = llvm::ConstantInt::get(
324  llvm::Type::getInt32Ty(llvmContext),
325  static_cast<int>(criticalDeclareOp.getHintVal()));
326  }
327  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
328  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
329  return success();
330 }
331 
332 /// Returns a reduction declaration that corresponds to the given reduction
333 /// operation in the given container. Currently only supports reductions inside
334 /// WsloopOp and ParallelOp but can be easily extended as long as the given
335 /// construct implements getNumReductionVars.
336 template <typename T>
337 static std::optional<omp::DeclareReductionOp>
338 findReductionDeclInContainer(T container, omp::ReductionOp reduction) {
339  for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
340  if (container.getReductionVars()[i] != reduction.getAccumulator())
341  continue;
342 
343  SymbolRefAttr reductionSymbol =
344  cast<SymbolRefAttr>((*container.getReductions())[i]);
345  auto declareOp =
346  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
347  container, reductionSymbol);
348  return declareOp;
349  }
350  return std::nullopt;
351 }
352 
353 /// Searches for a reduction in a provided region and the regions
354 /// it is nested in
355 static omp::DeclareReductionOp findReductionDecl(Operation &containerOp,
356  omp::ReductionOp reduction) {
357  std::optional<omp::DeclareReductionOp> declareOp = std::nullopt;
358  Operation *container = &containerOp;
359 
360  while (!declareOp.has_value() && container) {
361  // Check if current container is supported for reductions searches
362  if (auto par = dyn_cast<omp::ParallelOp>(*container)) {
363  declareOp = findReductionDeclInContainer(par, reduction);
364  } else if (auto loop = dyn_cast<omp::WsloopOp>(*container)) {
365  declareOp = findReductionDeclInContainer(loop, reduction);
366  } else {
367  break;
368  }
369 
370  // See if we can search parent for reductions as well
371  container = containerOp.getParentOp();
372  }
373 
374  assert(declareOp.has_value() &&
375  "reduction operation must be associated with a declaration");
376 
377  return *declareOp;
378 }
379 
380 /// Populates `reductions` with reduction declarations used in the given loop.
381 template <typename T>
382 static void
385  std::optional<ArrayAttr> attr = loop.getReductions();
386  if (!attr)
387  return;
388 
389  reductions.reserve(reductions.size() + loop.getNumReductionVars());
390  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
391  reductions.push_back(
392  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
393  loop, symbolRef));
394  }
395 }
396 
397 /// Translates the blocks contained in the given region and appends them to at
398 /// the current insertion point of `builder`. The operations of the entry block
399 /// are appended to the current insertion block. If set, `continuationBlockArgs`
400 /// is populated with translated values that correspond to the values
401 /// omp.yield'ed from the region.
403  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
404  LLVM::ModuleTranslation &moduleTranslation,
405  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
406  if (region.empty())
407  return success();
408 
409  // Special case for single-block regions that don't create additional blocks:
410  // insert operations without creating additional blocks.
411  if (llvm::hasSingleElement(region)) {
412  llvm::Instruction *potentialTerminator =
413  builder.GetInsertBlock()->empty() ? nullptr
414  : &builder.GetInsertBlock()->back();
415 
416  if (potentialTerminator && potentialTerminator->isTerminator())
417  potentialTerminator->removeFromParent();
418  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
419 
420  if (failed(moduleTranslation.convertBlock(
421  region.front(), /*ignoreArguments=*/true, builder)))
422  return failure();
423 
424  // The continuation arguments are simply the translated terminator operands.
425  if (continuationBlockArgs)
426  llvm::append_range(
427  *continuationBlockArgs,
428  moduleTranslation.lookupValues(region.front().back().getOperands()));
429 
430  // Drop the mapping that is no longer necessary so that the same region can
431  // be processed multiple times.
432  moduleTranslation.forgetMapping(region);
433 
434  if (potentialTerminator && potentialTerminator->isTerminator())
435  potentialTerminator->insertAfter(&builder.GetInsertBlock()->back());
436 
437  return success();
438  }
439 
440  LogicalResult bodyGenStatus = success();
442  llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
443  region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
444  if (failed(bodyGenStatus))
445  return failure();
446  if (continuationBlockArgs)
447  llvm::append_range(*continuationBlockArgs, phis);
448  builder.SetInsertPoint(continuationBlock,
449  continuationBlock->getFirstInsertionPt());
450  return success();
451 }
452 
453 namespace {
454 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
455 /// store lambdas with capture.
456 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
457  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
458  llvm::Value *&)>;
459 using OwningAtomicReductionGen =
460  std::function<llvm::OpenMPIRBuilder::InsertPointTy(
461  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
462  llvm::Value *)>;
463 } // namespace
464 
465 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
466 /// reduction declaration. The generator uses `builder` but ignores its
467 /// insertion point.
468 static OwningReductionGen
469 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
470  LLVM::ModuleTranslation &moduleTranslation) {
471  // The lambda is mutable because we need access to non-const methods of decl
472  // (which aren't actually mutating it), and we must capture decl by-value to
473  // avoid the dangling reference after the parent function returns.
474  OwningReductionGen gen =
475  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
476  llvm::Value *lhs, llvm::Value *rhs,
477  llvm::Value *&result) mutable {
478  Region &reductionRegion = decl.getReductionRegion();
479  moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
480  moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
481  builder.restoreIP(insertPoint);
483  if (failed(inlineConvertOmpRegions(reductionRegion,
484  "omp.reduction.nonatomic.body",
485  builder, moduleTranslation, &phis)))
486  return llvm::OpenMPIRBuilder::InsertPointTy();
487  assert(phis.size() == 1);
488  result = phis[0];
489  return builder.saveIP();
490  };
491  return gen;
492 }
493 
494 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
495 /// given reduction declaration. The generator uses `builder` but ignores its
496 /// insertion point. Returns null if there is no atomic region available in the
497 /// reduction declaration.
498 static OwningAtomicReductionGen
499 makeAtomicReductionGen(omp::DeclareReductionOp decl,
500  llvm::IRBuilderBase &builder,
501  LLVM::ModuleTranslation &moduleTranslation) {
502  if (decl.getAtomicReductionRegion().empty())
503  return OwningAtomicReductionGen();
504 
505  // The lambda is mutable because we need access to non-const methods of decl
506  // (which aren't actually mutating it), and we must capture decl by-value to
507  // avoid the dangling reference after the parent function returns.
508  OwningAtomicReductionGen atomicGen =
509  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
510  llvm::Value *lhs, llvm::Value *rhs) mutable {
511  Region &atomicRegion = decl.getAtomicReductionRegion();
512  moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
513  moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
514  builder.restoreIP(insertPoint);
516  if (failed(inlineConvertOmpRegions(atomicRegion,
517  "omp.reduction.atomic.body", builder,
518  moduleTranslation, &phis)))
519  return llvm::OpenMPIRBuilder::InsertPointTy();
520  assert(phis.empty());
521  return builder.saveIP();
522  };
523  return atomicGen;
524 }
525 
526 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
527 static LogicalResult
528 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
529  LLVM::ModuleTranslation &moduleTranslation) {
530  auto orderedOp = cast<omp::OrderedOp>(opInst);
531 
532  omp::ClauseDepend dependType = *orderedOp.getDependTypeVal();
533  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
534  unsigned numLoops = *orderedOp.getNumLoopsVal();
535  SmallVector<llvm::Value *> vecValues =
536  moduleTranslation.lookupValues(orderedOp.getDependVecVars());
537 
538  size_t indexVecValues = 0;
539  while (indexVecValues < vecValues.size()) {
540  SmallVector<llvm::Value *> storeValues;
541  storeValues.reserve(numLoops);
542  for (unsigned i = 0; i < numLoops; i++) {
543  storeValues.push_back(vecValues[indexVecValues]);
544  indexVecValues++;
545  }
546  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
547  findAllocaInsertPoint(builder, moduleTranslation);
548  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
549  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
550  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
551  }
552  return success();
553 }
554 
555 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
556 /// OpenMPIRBuilder.
557 static LogicalResult
558 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
559  LLVM::ModuleTranslation &moduleTranslation) {
560  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
561  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
562 
563  // TODO: The code generation for ordered simd directive is not supported yet.
564  if (orderedRegionOp.getSimd())
565  return failure();
566 
567  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
568  // relying on captured variables.
569  LogicalResult bodyGenStatus = success();
570 
571  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
572  // OrderedOp has only one region associated with it.
573  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
574  builder.restoreIP(codeGenIP);
575  convertOmpOpRegions(region, "omp.ordered.region", builder,
576  moduleTranslation, bodyGenStatus);
577  };
578 
579  // TODO: Perform finalization actions for variables. This has to be
580  // called for variables which have destructors/finalizers.
581  auto finiCB = [&](InsertPointTy codeGenIP) {};
582 
583  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
584  builder.restoreIP(
585  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
586  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getSimd()));
587  return bodyGenStatus;
588 }
589 
590 static LogicalResult
591 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
592  LLVM::ModuleTranslation &moduleTranslation) {
593  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
594  using StorableBodyGenCallbackTy =
595  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
596 
597  auto sectionsOp = cast<omp::SectionsOp>(opInst);
598 
599  // TODO: Support the following clauses: private, firstprivate, lastprivate,
600  // reduction, allocate
601  if (!sectionsOp.getReductionVars().empty() || sectionsOp.getReductions() ||
602  !sectionsOp.getAllocateVars().empty() ||
603  !sectionsOp.getAllocatorsVars().empty())
604  return emitError(sectionsOp.getLoc())
605  << "reduction and allocate clauses are not supported for sections "
606  "construct";
607 
608  LogicalResult bodyGenStatus = success();
610 
611  for (Operation &op : *sectionsOp.getRegion().begin()) {
612  auto sectionOp = dyn_cast<omp::SectionOp>(op);
613  if (!sectionOp) // omp.terminator
614  continue;
615 
616  Region &region = sectionOp.getRegion();
617  auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
618  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
619  builder.restoreIP(codeGenIP);
620  convertOmpOpRegions(region, "omp.section.region", builder,
621  moduleTranslation, bodyGenStatus);
622  };
623  sectionCBs.push_back(sectionCB);
624  }
625 
626  // No sections within omp.sections operation - skip generation. This situation
627  // is only possible if there is only a terminator operation inside the
628  // sections operation
629  if (sectionCBs.empty())
630  return success();
631 
632  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
633 
634  // TODO: Perform appropriate actions according to the data-sharing
635  // attribute (shared, private, firstprivate, ...) of variables.
636  // Currently defaults to shared.
637  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
638  llvm::Value &vPtr,
639  llvm::Value *&replacementValue) -> InsertPointTy {
640  replacementValue = &vPtr;
641  return codeGenIP;
642  };
643 
644  // TODO: Perform finalization actions for variables. This has to be
645  // called for variables which have destructors/finalizers.
646  auto finiCB = [&](InsertPointTy codeGenIP) {};
647 
648  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
649  findAllocaInsertPoint(builder, moduleTranslation);
650  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
651  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
652  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
653  sectionsOp.getNowait()));
654  return bodyGenStatus;
655 }
656 
657 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
658 static LogicalResult
659 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
660  LLVM::ModuleTranslation &moduleTranslation) {
661  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
662  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
663  LogicalResult bodyGenStatus = success();
664  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
665  builder.restoreIP(codegenIP);
666  convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
667  moduleTranslation, bodyGenStatus);
668  };
669  auto finiCB = [&](InsertPointTy codeGenIP) {};
670 
671  // Handle copyprivate
672  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
673  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateFuncs();
676  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
677  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
678  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
679  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
680  llvmCPFuncs.push_back(
681  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
682  }
683 
684  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
685  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs));
686  return bodyGenStatus;
687 }
688 
689 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
690 static LogicalResult
691 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
692  LLVM::ModuleTranslation &moduleTranslation) {
693  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
694  LogicalResult bodyGenStatus = success();
695  if (!op.getAllocatorsVars().empty() || op.getReductions())
696  return op.emitError("unhandled clauses for translation to LLVM IR");
697 
698  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
700  moduleTranslation, allocaIP);
701  builder.restoreIP(codegenIP);
702  convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
703  moduleTranslation, bodyGenStatus);
704  };
705 
706  llvm::Value *numTeamsLower = nullptr;
707  if (Value numTeamsLowerVar = op.getNumTeamsLower())
708  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
709 
710  llvm::Value *numTeamsUpper = nullptr;
711  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
712  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
713 
714  llvm::Value *threadLimit = nullptr;
715  if (Value threadLimitVar = op.getThreadLimit())
716  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
717 
718  llvm::Value *ifExpr = nullptr;
719  if (Value ifExprVar = op.getIfExpr())
720  ifExpr = moduleTranslation.lookupValue(ifExprVar);
721 
722  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
723  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
724  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
725  return bodyGenStatus;
726 }
727 
728 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
729 static LogicalResult
730 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
731  LLVM::ModuleTranslation &moduleTranslation) {
732  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
733  LogicalResult bodyGenStatus = success();
734  if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() ||
735  taskOp.getInReductions() || taskOp.getPriority() ||
736  !taskOp.getAllocateVars().empty()) {
737  return taskOp.emitError("unhandled clauses for translation to LLVM IR");
738  }
739  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
740  // Save the alloca insertion point on ModuleTranslation stack for use in
741  // nested regions.
743  moduleTranslation, allocaIP);
744 
745  builder.restoreIP(codegenIP);
746  convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
747  moduleTranslation, bodyGenStatus);
748  };
749 
751  if (!taskOp.getDependVars().empty() && taskOp.getDepends()) {
752  for (auto dep :
753  llvm::zip(taskOp.getDependVars(), taskOp.getDepends()->getValue())) {
754  llvm::omp::RTLDependenceKindTy type;
755  switch (
756  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
757  case mlir::omp::ClauseTaskDepend::taskdependin:
758  type = llvm::omp::RTLDependenceKindTy::DepIn;
759  break;
760  // The OpenMP runtime requires that the codegen for 'depend' clause for
761  // 'out' dependency kind must be the same as codegen for 'depend' clause
762  // with 'inout' dependency.
763  case mlir::omp::ClauseTaskDepend::taskdependout:
764  case mlir::omp::ClauseTaskDepend::taskdependinout:
765  type = llvm::omp::RTLDependenceKindTy::DepInOut;
766  break;
767  };
768  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
769  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
770  dds.emplace_back(dd);
771  }
772  }
773 
774  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
775  findAllocaInsertPoint(builder, moduleTranslation);
776  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
777  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
778  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
779  moduleTranslation.lookupValue(taskOp.getFinalExpr()),
780  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds));
781  return bodyGenStatus;
782 }
783 
784 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
785 static LogicalResult
786 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
787  LLVM::ModuleTranslation &moduleTranslation) {
788  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
789  LogicalResult bodyGenStatus = success();
790  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
791  return tgOp.emitError("unhandled clauses for translation to LLVM IR");
792  }
793  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
794  builder.restoreIP(codegenIP);
795  convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
796  moduleTranslation, bodyGenStatus);
797  };
798  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
799  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
800  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
801  ompLoc, allocaIP, bodyCB));
802  return bodyGenStatus;
803 }
804 
805 /// Allocate space for privatized reduction variables.
806 template <typename T>
808  T loop, llvm::IRBuilderBase &builder,
809  LLVM::ModuleTranslation &moduleTranslation,
810  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
812  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
813  DenseMap<Value, llvm::Value *> &reductionVariableMap) {
814  llvm::IRBuilderBase::InsertPointGuard guard(builder);
815  builder.restoreIP(allocaIP);
816  auto args =
817  loop.getRegion().getArguments().take_back(loop.getNumReductionVars());
818 
819  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
820  llvm::Value *var = builder.CreateAlloca(
821  moduleTranslation.convertType(reductionDecls[i].getType()));
822  moduleTranslation.mapValue(args[i], var);
823  privateReductionVariables.push_back(var);
824  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
825  }
826 }
827 
828 /// Map input argument to all reduction initialization regions
829 template <typename T>
830 static void
833  unsigned i) {
834  // map input argument to the initialization region
835  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
836  Region &initializerRegion = reduction.getInitializerRegion();
837  Block &entry = initializerRegion.front();
838  assert(entry.getNumArguments() == 1 &&
839  "the initialization region has one argument");
840 
841  mlir::Value mlirSource = loop.getReductionVars()[i];
842  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
843  assert(llvmSource && "lookup reduction var");
844  moduleTranslation.mapValue(entry.getArgument(0), llvmSource);
845 }
846 
847 /// Collect reduction info
848 template <typename T>
850  T loop, llvm::IRBuilderBase &builder,
851  LLVM::ModuleTranslation &moduleTranslation,
852  SmallVector<omp::DeclareReductionOp> &reductionDecls,
853  SmallVector<OwningReductionGen> &owningReductionGens,
854  SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,
855  const SmallVector<llvm::Value *> &privateReductionVariables,
857  unsigned numReductions = loop.getNumReductionVars();
858 
859  for (unsigned i = 0; i < numReductions; ++i) {
860  owningReductionGens.push_back(
861  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
862  owningAtomicReductionGens.push_back(
863  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
864  }
865 
866  // Collect the reduction information.
867  reductionInfos.reserve(numReductions);
868  for (unsigned i = 0; i < numReductions; ++i) {
869  llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
870  if (owningAtomicReductionGens[i])
871  atomicGen = owningAtomicReductionGens[i];
872  llvm::Value *variable =
873  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
874  reductionInfos.push_back(
875  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
876  privateReductionVariables[i], owningReductionGens[i], atomicGen});
877  }
878 }
879 
880 /// handling of DeclareReductionOp's cleanup region
883  llvm::ArrayRef<llvm::Value *> privateReductionVariables,
884  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) {
885  for (auto [i, reductionDecl] : llvm::enumerate(reductionDecls)) {
886  Region &cleanupRegion = reductionDecl.getCleanupRegion();
887  if (cleanupRegion.empty())
888  continue;
889 
890  // map the argument to the cleanup region
891  Block &entry = cleanupRegion.front();
892 
893  llvm::Instruction *potentialTerminator =
894  builder.GetInsertBlock()->empty() ? nullptr
895  : &builder.GetInsertBlock()->back();
896  if (potentialTerminator && potentialTerminator->isTerminator())
897  builder.SetInsertPoint(potentialTerminator);
898  llvm::Value *reductionVar = builder.CreateLoad(
899  moduleTranslation.convertType(entry.getArgument(0).getType()),
900  privateReductionVariables[i]);
901 
902  moduleTranslation.mapValue(entry.getArgument(0), reductionVar);
903 
904  if (failed(inlineConvertOmpRegions(cleanupRegion, "omp.reduction.cleanup",
905  builder, moduleTranslation)))
906  return failure();
907 
908  // clear block argument mapping in case it needs to be re-created with a
909  // different source for another use of the same reduction decl
910  moduleTranslation.forgetMapping(cleanupRegion);
911  }
912  return success();
913 }
914 
915 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
916 static LogicalResult
917 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
918  LLVM::ModuleTranslation &moduleTranslation) {
919  auto loop = cast<omp::WsloopOp>(opInst);
920  const bool isByRef = loop.getByref();
921  // TODO: this should be in the op verifier instead.
922  if (loop.getLowerBound().empty())
923  return failure();
924 
925  // Static is the default.
926  auto schedule =
927  loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
928 
929  // Find the loop configuration.
930  llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]);
931  llvm::Type *ivType = step->getType();
932  llvm::Value *chunk = nullptr;
933  if (loop.getScheduleChunkVar()) {
934  llvm::Value *chunkVar =
935  moduleTranslation.lookupValue(loop.getScheduleChunkVar());
936  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
937  }
938 
940  collectReductionDecls(loop, reductionDecls);
941  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
942  findAllocaInsertPoint(builder, moduleTranslation);
943 
944  SmallVector<llvm::Value *> privateReductionVariables;
945  DenseMap<Value, llvm::Value *> reductionVariableMap;
946  if (!isByRef) {
947  allocByValReductionVars(loop, builder, moduleTranslation, allocaIP,
948  reductionDecls, privateReductionVariables,
949  reductionVariableMap);
950  }
951 
952  // Before the loop, store the initial values of reductions into reduction
953  // variables. Although this could be done after allocas, we don't want to mess
954  // up with the alloca insertion point.
955  MutableArrayRef<BlockArgument> reductionArgs =
956  loop.getRegion().getArguments().take_back(loop.getNumReductionVars());
957  for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
959 
960  // map block argument to initializer region
961  mapInitializationArg(loop, moduleTranslation, reductionDecls, i);
962 
963  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
964  "omp.reduction.neutral", builder,
965  moduleTranslation, &phis)))
966  return failure();
967  assert(phis.size() == 1 && "expected one value to be yielded from the "
968  "reduction neutral element declaration region");
969  if (isByRef) {
970  // Allocate reduction variable (which is a pointer to the real reduction
971  // variable allocated in the inlined region)
972  llvm::Value *var = builder.CreateAlloca(
973  moduleTranslation.convertType(reductionDecls[i].getType()));
974  // Store the result of the inlined region to the allocated reduction var
975  // ptr
976  builder.CreateStore(phis[0], var);
977 
978  privateReductionVariables.push_back(var);
979  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
980  reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
981  } else {
982  // for by-ref case the store is inside of the reduction region
983  builder.CreateStore(phis[0], privateReductionVariables[i]);
984  // the rest was handled in allocByValReductionVars
985  }
986 
987  // forget the mapping for the initializer region because we might need a
988  // different mapping if this reduction declaration is re-used for a
989  // different variable
990  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
991  }
992 
993  // Store the mapping between reduction variables and their private copies on
994  // ModuleTranslation stack. It can be then recovered when translating
995  // omp.reduce operations in a separate call.
997  moduleTranslation, reductionVariableMap);
998 
999  // Set up the source location value for OpenMP runtime.
1000  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1001 
1002  // Generator of the canonical loop body.
1003  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1004  // relying on captured variables.
1007  LogicalResult bodyGenStatus = success();
1008  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1009  // Make sure further conversions know about the induction variable.
1010  moduleTranslation.mapValue(
1011  loop.getRegion().front().getArgument(loopInfos.size()), iv);
1012 
1013  // Capture the body insertion point for use in nested loops. BodyIP of the
1014  // CanonicalLoopInfo always points to the beginning of the entry block of
1015  // the body.
1016  bodyInsertPoints.push_back(ip);
1017 
1018  if (loopInfos.size() != loop.getNumLoops() - 1)
1019  return;
1020 
1021  // Convert the body of the loop.
1022  builder.restoreIP(ip);
1023  convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
1024  moduleTranslation, bodyGenStatus);
1025  };
1026 
1027  // Delegate actual loop construction to the OpenMP IRBuilder.
1028  // TODO: this currently assumes Wsloop is semantically similar to SCF loop,
1029  // i.e. it has a positive step, uses signed integer semantics. Reconsider
1030  // this code when Wsloop clearly supports more cases.
1031  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1032  for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
1033  llvm::Value *lowerBound =
1034  moduleTranslation.lookupValue(loop.getLowerBound()[i]);
1035  llvm::Value *upperBound =
1036  moduleTranslation.lookupValue(loop.getUpperBound()[i]);
1037  llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[i]);
1038 
1039  // Make sure loop trip count are emitted in the preheader of the outermost
1040  // loop at the latest so that they are all available for the new collapsed
1041  // loop will be created below.
1042  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1043  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1044  if (i != 0) {
1045  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
1046  computeIP = loopInfos.front()->getPreheaderIP();
1047  }
1048  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1049  loc, bodyGen, lowerBound, upperBound, step,
1050  /*IsSigned=*/true, loop.getInclusive(), computeIP));
1051 
1052  if (failed(bodyGenStatus))
1053  return failure();
1054  }
1055 
1056  // Collapse loops. Store the insertion point because LoopInfos may get
1057  // invalidated.
1058  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1059  llvm::CanonicalLoopInfo *loopInfo =
1060  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1061 
1062  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1063 
1064  // TODO: Handle doacross loops when the ordered clause has a parameter.
1065  bool isOrdered = loop.getOrderedVal().has_value();
1066  std::optional<omp::ScheduleModifier> scheduleModifier =
1067  loop.getScheduleModifier();
1068  bool isSimd = loop.getSimdModifier();
1069 
1070  ompBuilder->applyWorkshareLoop(
1071  ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
1072  convertToScheduleKind(schedule), chunk, isSimd,
1073  scheduleModifier == omp::ScheduleModifier::monotonic,
1074  scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
1075 
1076  // Continue building IR after the loop. Note that the LoopInfo returned by
1077  // `collapseLoops` points inside the outermost loop and is intended for
1078  // potential further loop transformations. Use the insertion point stored
1079  // before collapsing loops instead.
1080  builder.restoreIP(afterIP);
1081 
1082  // Process the reductions if required.
1083  if (loop.getNumReductionVars() == 0)
1084  return success();
1085 
1086  // Create the reduction generators. We need to own them here because
1087  // ReductionInfo only accepts references to the generators.
1088  SmallVector<OwningReductionGen> owningReductionGens;
1089  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1091  collectReductionInfo(loop, builder, moduleTranslation, reductionDecls,
1092  owningReductionGens, owningAtomicReductionGens,
1093  privateReductionVariables, reductionInfos);
1094 
1095  // The call to createReductions below expects the block to have a
1096  // terminator. Create an unreachable instruction to serve as terminator
1097  // and remove it later.
1098  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1099  builder.SetInsertPoint(tempTerminator);
1100  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1101  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1102  loop.getNowait(), isByRef);
1103  if (!contInsertPoint.getBlock())
1104  return loop->emitOpError() << "failed to convert reductions";
1105  auto nextInsertionPoint =
1106  ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
1107  tempTerminator->eraseFromParent();
1108  builder.restoreIP(nextInsertionPoint);
1109 
1110  // after the workshare loop, deallocate private reduction variables
1111  return inlineReductionCleanup(reductionDecls, privateReductionVariables,
1112  moduleTranslation, builder);
1113 }
1114 
1115 /// A RAII class that on construction replaces the region arguments of the
1116 /// parallel op (which correspond to private variables) with the actual private
1117 /// variables they correspond to. This prepares the parallel op so that it
1118 /// matches what is expected by the OMPIRBuilder.
1119 ///
1120 /// On destruction, it restores the original state of the operation so that on
1121 /// the MLIR side, the op is not affected by conversion to LLVM IR.
1123 public:
1124  OmpParallelOpConversionManager(omp::ParallelOp opInst)
1125  : region(opInst.getRegion()), privateVars(opInst.getPrivateVars()),
1126  privateArgBeginIdx(opInst.getNumReductionVars()),
1127  privateArgEndIdx(privateArgBeginIdx + privateVars.size()) {
1128  auto privateVarsIt = privateVars.begin();
1129 
1130  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1131  ++argIdx, ++privateVarsIt)
1132  mlir::replaceAllUsesInRegionWith(region.getArgument(argIdx),
1133  *privateVarsIt, region);
1134  }
1135 
1137  auto privateVarsIt = privateVars.begin();
1138 
1139  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1140  ++argIdx, ++privateVarsIt)
1141  mlir::replaceAllUsesInRegionWith(*privateVarsIt,
1142  region.getArgument(argIdx), region);
1143  }
1144 
1145 private:
1146  Region &region;
1147  OperandRange privateVars;
1148  unsigned privateArgBeginIdx;
1149  unsigned privateArgEndIdx;
1150 };
1151 
1152 /// Converts the OpenMP parallel operation to LLVM IR.
1153 static LogicalResult
1154 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1155  LLVM::ModuleTranslation &moduleTranslation) {
1156  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1157  OmpParallelOpConversionManager raii(opInst);
1158  const bool isByRef = opInst.getByref();
1159 
1160  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1161  // relying on captured variables.
1162  LogicalResult bodyGenStatus = success();
1163  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1164 
1165  // Collect reduction declarations
1166  SmallVector<omp::DeclareReductionOp> reductionDecls;
1167  collectReductionDecls(opInst, reductionDecls);
1168  SmallVector<llvm::Value *> privateReductionVariables;
1169 
1170  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1171  // Allocate reduction vars
1172  DenseMap<Value, llvm::Value *> reductionVariableMap;
1173  if (!isByRef) {
1174  allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP,
1175  reductionDecls, privateReductionVariables,
1176  reductionVariableMap);
1177  }
1178 
1179  // Initialize reduction vars
1180  builder.restoreIP(allocaIP);
1181  MutableArrayRef<BlockArgument> reductionArgs =
1182  opInst.getRegion().getArguments().take_back(
1183  opInst.getNumReductionVars());
1184  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1186 
1187  // map the block argument
1188  mapInitializationArg(opInst, moduleTranslation, reductionDecls, i);
1190  reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
1191  builder, moduleTranslation, &phis)))
1192  bodyGenStatus = failure();
1193  assert(phis.size() == 1 &&
1194  "expected one value to be yielded from the "
1195  "reduction neutral element declaration region");
1196  builder.restoreIP(allocaIP);
1197 
1198  if (isByRef) {
1199  // Allocate reduction variable (which is a pointer to the real reduciton
1200  // variable allocated in the inlined region)
1201  llvm::Value *var = builder.CreateAlloca(
1202  moduleTranslation.convertType(reductionDecls[i].getType()));
1203  // Store the result of the inlined region to the allocated reduction var
1204  // ptr
1205  builder.CreateStore(phis[0], var);
1206 
1207  privateReductionVariables.push_back(var);
1208  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1209  reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
1210  } else {
1211  // for by-ref case the store is inside of the reduction init region
1212  builder.CreateStore(phis[0], privateReductionVariables[i]);
1213  // the rest is done in allocByValReductionVars
1214  }
1215 
1216  // clear block argument mapping in case it needs to be re-created with a
1217  // different source for another use of the same reduction decl
1218  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1219  }
1220 
1221  // Store the mapping between reduction variables and their private copies on
1222  // ModuleTranslation stack. It can be then recovered when translating
1223  // omp.reduce operations in a separate call.
1225  moduleTranslation, reductionVariableMap);
1226 
1227  // Save the alloca insertion point on ModuleTranslation stack for use in
1228  // nested regions.
1230  moduleTranslation, allocaIP);
1231 
1232  // ParallelOp has only one region associated with it.
1233  builder.restoreIP(codeGenIP);
1234  auto regionBlock =
1235  convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
1236  moduleTranslation, bodyGenStatus);
1237 
1238  // Process the reductions if required.
1239  if (opInst.getNumReductionVars() > 0) {
1240  // Collect reduction info
1241  SmallVector<OwningReductionGen> owningReductionGens;
1242  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1244  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
1245  owningReductionGens, owningAtomicReductionGens,
1246  privateReductionVariables, reductionInfos);
1247 
1248  // Move to region cont block
1249  builder.SetInsertPoint(regionBlock->getTerminator());
1250 
1251  // Generate reductions from info
1252  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1253  builder.SetInsertPoint(tempTerminator);
1254 
1255  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1256  ompBuilder->createReductions(builder.saveIP(), allocaIP,
1257  reductionInfos, false, isByRef);
1258  if (!contInsertPoint.getBlock()) {
1259  bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
1260  return;
1261  }
1262 
1263  tempTerminator->eraseFromParent();
1264  builder.restoreIP(contInsertPoint);
1265  }
1266  };
1267 
1268  // TODO: Perform appropriate actions according to the data-sharing
1269  // attribute (shared, private, firstprivate, ...) of variables.
1270  // Currently shared and private are supported.
1271  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
1272  llvm::Value &, llvm::Value &vPtr,
1273  llvm::Value *&replacementValue) -> InsertPointTy {
1274  replacementValue = &vPtr;
1275 
1276  // If this is a private value, this lambda will return the corresponding
1277  // mlir value and its `PrivateClauseOp`. Otherwise, empty values are
1278  // returned.
1279  auto [privVar, privatizerClone] =
1280  [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
1281  if (!opInst.getPrivateVars().empty()) {
1282  auto privVars = opInst.getPrivateVars();
1283  auto privatizers = opInst.getPrivatizers();
1284 
1285  for (auto [privVar, privatizerAttr] :
1286  llvm::zip_equal(privVars, *privatizers)) {
1287  // Find the MLIR private variable corresponding to the LLVM value
1288  // being privatized.
1289  llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
1290  if (llvmPrivVar != &vPtr)
1291  continue;
1292 
1293  SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
1294  omp::PrivateClauseOp privatizer =
1295  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1296  opInst, privSym);
1297 
1298  // Clone the privatizer in case it is used by more than one parallel
1299  // region. The privatizer is processed in-place (see below) before it
1300  // gets inlined in the parallel region and therefore processing the
1301  // original op is dangerous.
1302  return {privVar, privatizer.clone()};
1303  }
1304  }
1305 
1306  return {mlir::Value(), omp::PrivateClauseOp()};
1307  }();
1308 
1309  if (privVar) {
1310  Region &allocRegion = privatizerClone.getAllocRegion();
1311 
1312  // If this is a `firstprivate` clause, prepare the `omp.private` op by:
1313  if (privatizerClone.getDataSharingType() ==
1314  omp::DataSharingClauseType::FirstPrivate) {
1315  auto oldAllocBackBlock = std::prev(allocRegion.end());
1316  omp::YieldOp oldAllocYieldOp =
1317  llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
1318 
1319  Region &copyRegion = privatizerClone.getCopyRegion();
1320 
1321  mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
1322  // 1. Cloning the `copy` region to the end of the `alloc` region.
1323  copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
1324  allocRegion.end());
1325 
1326  auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
1327  // 2. Merging the last `alloc` block with the first block in the `copy`
1328  // region clone.
1329  // 3. Re-mapping the first argument of the `copy` region to be the
1330  // argument of the `alloc` region and the second argument of the `copy`
1331  // region to be the yielded value of the `alloc` region (this is the
1332  // private clone of the privatized value).
1333  copyCloneBuilder.mergeBlocks(
1334  &*newCopyRegionFrontBlock, &*oldAllocBackBlock,
1335  {allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)});
1336 
1337  // 4. The old terminator of the `alloc` region is not needed anymore, so
1338  // delete it.
1339  oldAllocYieldOp.erase();
1340  }
1341 
1342  // Replace the privatizer block argument with mlir value being privatized.
1343  // This way, the body of the privatizer will be changed from using the
1344  // region/block argument to the value being privatized.
1345  auto allocRegionArg = allocRegion.getArgument(0);
1346  replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
1347 
1348  auto oldIP = builder.saveIP();
1349  builder.restoreIP(allocaIP);
1350 
1351  SmallVector<llvm::Value *, 1> yieldedValues;
1352  if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder,
1353  moduleTranslation, &yieldedValues))) {
1354  opInst.emitError("failed to inline `alloc` region of an `omp.private` "
1355  "op in the parallel region");
1356  bodyGenStatus = failure();
1357  } else {
1358  assert(yieldedValues.size() == 1);
1359  replacementValue = yieldedValues.front();
1360  }
1361 
1362  privatizerClone.erase();
1363  builder.restoreIP(oldIP);
1364  }
1365 
1366  return codeGenIP;
1367  };
1368 
1369  // TODO: Perform finalization actions for variables. This has to be
1370  // called for variables which have destructors/finalizers.
1371  auto finiCB = [&](InsertPointTy codeGenIP) {
1372  InsertPointTy oldIP = builder.saveIP();
1373  builder.restoreIP(codeGenIP);
1374 
1375  // if the reduction has a cleanup region, inline it here to finalize the
1376  // reduction variables
1377  if (failed(inlineReductionCleanup(reductionDecls, privateReductionVariables,
1378  moduleTranslation, builder)))
1379  bodyGenStatus = failure();
1380 
1381  builder.restoreIP(oldIP);
1382  };
1383 
1384  llvm::Value *ifCond = nullptr;
1385  if (auto ifExprVar = opInst.getIfExprVar())
1386  ifCond = moduleTranslation.lookupValue(ifExprVar);
1387  llvm::Value *numThreads = nullptr;
1388  if (auto numThreadsVar = opInst.getNumThreadsVar())
1389  numThreads = moduleTranslation.lookupValue(numThreadsVar);
1390  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
1391  if (auto bind = opInst.getProcBindVal())
1392  pbKind = getProcBindKind(*bind);
1393  // TODO: Is the Parallel construct cancellable?
1394  bool isCancellable = false;
1395 
1396  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1397  findAllocaInsertPoint(builder, moduleTranslation);
1398  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1399 
1400  builder.restoreIP(
1401  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
1402  ifCond, numThreads, pbKind, isCancellable));
1403 
1404  return bodyGenStatus;
1405 }
1406 
1407 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
1408 static LogicalResult
1409 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
1410  LLVM::ModuleTranslation &moduleTranslation) {
1411  auto loop = cast<omp::SimdLoopOp>(opInst);
1412 
1413  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1414 
1415  // Generator of the canonical loop body.
1416  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1417  // relying on captured variables.
1420  LogicalResult bodyGenStatus = success();
1421  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1422  // Make sure further conversions know about the induction variable.
1423  moduleTranslation.mapValue(
1424  loop.getRegion().front().getArgument(loopInfos.size()), iv);
1425 
1426  // Capture the body insertion point for use in nested loops. BodyIP of the
1427  // CanonicalLoopInfo always points to the beginning of the entry block of
1428  // the body.
1429  bodyInsertPoints.push_back(ip);
1430 
1431  if (loopInfos.size() != loop.getNumLoops() - 1)
1432  return;
1433 
1434  // Convert the body of the loop.
1435  builder.restoreIP(ip);
1436  convertOmpOpRegions(loop.getRegion(), "omp.simdloop.region", builder,
1437  moduleTranslation, bodyGenStatus);
1438  };
1439 
1440  // Delegate actual loop construction to the OpenMP IRBuilder.
1441  // TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
1442  // i.e. it has a positive step, uses signed integer semantics. Reconsider
1443  // this code when SimdLoop clearly supports more cases.
1444  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1445  for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
1446  llvm::Value *lowerBound =
1447  moduleTranslation.lookupValue(loop.getLowerBound()[i]);
1448  llvm::Value *upperBound =
1449  moduleTranslation.lookupValue(loop.getUpperBound()[i]);
1450  llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[i]);
1451 
1452  // Make sure loop trip count are emitted in the preheader of the outermost
1453  // loop at the latest so that they are all available for the new collapsed
1454  // loop will be created below.
1455  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1456  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1457  if (i != 0) {
1458  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1459  ompLoc.DL);
1460  computeIP = loopInfos.front()->getPreheaderIP();
1461  }
1462  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1463  loc, bodyGen, lowerBound, upperBound, step,
1464  /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1465 
1466  if (failed(bodyGenStatus))
1467  return failure();
1468  }
1469 
1470  // Collapse loops.
1471  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1472  llvm::CanonicalLoopInfo *loopInfo =
1473  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1474 
1475  llvm::ConstantInt *simdlen = nullptr;
1476  if (std::optional<uint64_t> simdlenVar = loop.getSimdlen())
1477  simdlen = builder.getInt64(simdlenVar.value());
1478 
1479  llvm::ConstantInt *safelen = nullptr;
1480  if (std::optional<uint64_t> safelenVar = loop.getSafelen())
1481  safelen = builder.getInt64(safelenVar.value());
1482 
1483  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
1484  ompBuilder->applySimd(
1485  loopInfo, alignedVars,
1486  loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
1487  : nullptr,
1488  llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
1489 
1490  builder.restoreIP(afterIP);
1491  return success();
1492 }
1493 
1494 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1495 static llvm::AtomicOrdering
1496 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
1497  if (!ao)
1498  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1499 
1500  switch (*ao) {
1501  case omp::ClauseMemoryOrderKind::Seq_cst:
1502  return llvm::AtomicOrdering::SequentiallyConsistent;
1503  case omp::ClauseMemoryOrderKind::Acq_rel:
1504  return llvm::AtomicOrdering::AcquireRelease;
1505  case omp::ClauseMemoryOrderKind::Acquire:
1506  return llvm::AtomicOrdering::Acquire;
1507  case omp::ClauseMemoryOrderKind::Release:
1508  return llvm::AtomicOrdering::Release;
1509  case omp::ClauseMemoryOrderKind::Relaxed:
1510  return llvm::AtomicOrdering::Monotonic;
1511  }
1512  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1513 }
1514 
1515 /// Convert omp.atomic.read operation to LLVM IR.
1516 static LogicalResult
1517 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1518  LLVM::ModuleTranslation &moduleTranslation) {
1519 
1520  auto readOp = cast<omp::AtomicReadOp>(opInst);
1521  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1522 
1523  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1524 
1525  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal());
1526  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
1527  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
1528 
1529  llvm::Type *elementType =
1530  moduleTranslation.convertType(readOp.getElementType());
1531 
1532  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
1533  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
1534  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1535  return success();
1536 }
1537 
1538 /// Converts an omp.atomic.write operation to LLVM IR.
1539 static LogicalResult
1540 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1541  LLVM::ModuleTranslation &moduleTranslation) {
1542  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1543  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1544 
1545  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1546  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal());
1547  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
1548  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
1549  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
1550  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1551  /*isVolatile=*/false};
1552  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1553  return success();
1554 }
1555 
1556 /// Converts an LLVM dialect binary operation to the corresponding enum value
1557 /// for `atomicrmw` supported binary operation.
1558 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1560  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1561  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1562  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1563  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1564  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1565  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1566  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1567  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1568  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1569  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1570 }
1571 
1572 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1573 static LogicalResult
1574 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1575  llvm::IRBuilderBase &builder,
1576  LLVM::ModuleTranslation &moduleTranslation) {
1577  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1578 
1579  // Convert values and types.
1580  auto &innerOpList = opInst.getRegion().front().getOperations();
1581  bool isRegionArgUsed{false}, isXBinopExpr{false};
1582  llvm::AtomicRMWInst::BinOp binop;
1583  mlir::Value mlirExpr;
1584  // Find the binary update operation that uses the region argument
1585  // and get the expression to update
1586  for (Operation &innerOp : innerOpList) {
1587  if (innerOp.getNumOperands() == 2) {
1588  binop = convertBinOpToAtomic(innerOp);
1589  if (!llvm::is_contained(innerOp.getOperands(),
1590  opInst.getRegion().getArgument(0)))
1591  continue;
1592  isRegionArgUsed = true;
1593  isXBinopExpr = innerOp.getNumOperands() > 0 &&
1594  innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
1595  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1596  break;
1597  }
1598  }
1599  if (!isRegionArgUsed)
1600  return opInst.emitError("no atomic update operation with region argument"
1601  " as operand found inside atomic.update region");
1602 
1603  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1604  llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.getX());
1605  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1606  opInst.getRegion().getArgument(0).getType());
1607  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1608  /*isSigned=*/false,
1609  /*isVolatile=*/false};
1610 
1611  llvm::AtomicOrdering atomicOrdering =
1612  convertAtomicOrdering(opInst.getMemoryOrderVal());
1613 
1614  // Generate update code.
1615  LogicalResult updateGenStatus = success();
1616  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1617  llvm::Value *atomicx,
1618  llvm::IRBuilder<> &builder) -> llvm::Value * {
1619  Block &bb = *opInst.getRegion().begin();
1620  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
1621  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1622  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1623  updateGenStatus = (opInst.emitError()
1624  << "unable to convert update operation to llvm IR");
1625  return nullptr;
1626  }
1627  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1628  assert(yieldop && yieldop.getResults().size() == 1 &&
1629  "terminator must be omp.yield op and it must have exactly one "
1630  "argument");
1631  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1632  };
1633 
1634  // Handle ambiguous alloca, if any.
1635  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1636  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1637  builder.restoreIP(ompBuilder->createAtomicUpdate(
1638  ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1639  isXBinopExpr));
1640  return updateGenStatus;
1641 }
1642 
1643 static LogicalResult
1644 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1645  llvm::IRBuilderBase &builder,
1646  LLVM::ModuleTranslation &moduleTranslation) {
1647  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1648  mlir::Value mlirExpr;
1649  bool isXBinopExpr = false, isPostfixUpdate = false;
1650  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1651 
1652  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1653  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1654 
1655  assert((atomicUpdateOp || atomicWriteOp) &&
1656  "internal op must be an atomic.update or atomic.write op");
1657 
1658  if (atomicWriteOp) {
1659  isPostfixUpdate = true;
1660  mlirExpr = atomicWriteOp.getExpr();
1661  } else {
1662  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1663  atomicCaptureOp.getAtomicUpdateOp().getOperation();
1664  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
1665  bool isRegionArgUsed{false};
1666  // Find the binary update operation that uses the region argument
1667  // and get the expression to update
1668  for (Operation &innerOp : innerOpList) {
1669  if (innerOp.getNumOperands() == 2) {
1670  binop = convertBinOpToAtomic(innerOp);
1671  if (!llvm::is_contained(innerOp.getOperands(),
1672  atomicUpdateOp.getRegion().getArgument(0)))
1673  continue;
1674  isRegionArgUsed = true;
1675  isXBinopExpr =
1676  innerOp.getNumOperands() > 0 &&
1677  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
1678  mlirExpr =
1679  (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1680  break;
1681  }
1682  }
1683  if (!isRegionArgUsed)
1684  return atomicUpdateOp.emitError(
1685  "no atomic update operation with region argument"
1686  " as operand found inside atomic.update region");
1687  }
1688 
1689  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1690  llvm::Value *llvmX =
1691  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
1692  llvm::Value *llvmV =
1693  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
1694  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1695  atomicCaptureOp.getAtomicReadOp().getElementType());
1696  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1697  /*isSigned=*/false,
1698  /*isVolatile=*/false};
1699  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1700  /*isSigned=*/false,
1701  /*isVolatile=*/false};
1702 
1703  llvm::AtomicOrdering atomicOrdering =
1704  convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal());
1705 
1706  LogicalResult updateGenStatus = success();
1707  auto updateFn = [&](llvm::Value *atomicx,
1708  llvm::IRBuilder<> &builder) -> llvm::Value * {
1709  if (atomicWriteOp)
1710  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
1711  Block &bb = *atomicUpdateOp.getRegion().begin();
1712  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
1713  atomicx);
1714  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1715  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1716  updateGenStatus = (atomicUpdateOp.emitError()
1717  << "unable to convert update operation to llvm IR");
1718  return nullptr;
1719  }
1720  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1721  assert(yieldop && yieldop.getResults().size() == 1 &&
1722  "terminator must be omp.yield op and it must have exactly one "
1723  "argument");
1724  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1725  };
1726 
1727  // Handle ambiguous alloca, if any.
1728  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1729  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1730  builder.restoreIP(ompBuilder->createAtomicCapture(
1731  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
1732  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
1733  return updateGenStatus;
1734 }
1735 
1736 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
1737 /// mapping between reduction variables and their private equivalents to have
1738 /// been stored on the ModuleTranslation stack. Currently only supports
1739 /// reduction within WsloopOp and ParallelOp, but can be easily extended.
1740 static LogicalResult
1741 convertOmpReductionOp(omp::ReductionOp reductionOp,
1742  llvm::IRBuilderBase &builder,
1743  LLVM::ModuleTranslation &moduleTranslation) {
1744  // Find the declaration that corresponds to the reduction op.
1745  omp::DeclareReductionOp declaration;
1746  Operation *reductionParent = reductionOp->getParentOp();
1747  if (dyn_cast<omp::ParallelOp>(reductionParent) ||
1748  dyn_cast<omp::WsloopOp>(reductionParent)) {
1749  declaration = findReductionDecl(*reductionParent, reductionOp);
1750  } else {
1751  llvm_unreachable("Unhandled reduction container");
1752  }
1753  assert(declaration && "could not find reduction declaration");
1754 
1755  // Retrieve the mapping between reduction variables and their private
1756  // equivalents.
1757  const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
1758  moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
1759  [&](const OpenMPVarMappingStackFrame &frame) {
1760  if (frame.mapping.contains(reductionOp.getAccumulator())) {
1761  reductionVariableMap = &frame.mapping;
1762  return WalkResult::interrupt();
1763  }
1764  return WalkResult::advance();
1765  });
1766  assert(reductionVariableMap && "couldn't find private reduction variables");
1767  // Translate the reduction operation by emitting the body of the corresponding
1768  // reduction declaration.
1769  Region &reductionRegion = declaration.getReductionRegion();
1770  llvm::Value *privateReductionVar =
1771  reductionVariableMap->lookup(reductionOp.getAccumulator());
1772  llvm::Value *reductionVal = builder.CreateLoad(
1773  moduleTranslation.convertType(reductionOp.getOperand().getType()),
1774  privateReductionVar);
1775 
1776  moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1777  reductionVal);
1778  moduleTranslation.mapValue(
1779  reductionRegion.front().getArgument(1),
1780  moduleTranslation.lookupValue(reductionOp.getOperand()));
1781 
1783  if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1784  builder, moduleTranslation, &phis)))
1785  return failure();
1786  assert(phis.size() == 1 && "expected one value to be yielded from "
1787  "the reduction body declaration region");
1788  builder.CreateStore(phis[0], privateReductionVar);
1789  return success();
1790 }
1791 
1792 /// Converts an OpenMP Threadprivate operation into LLVM IR using
1793 /// OpenMPIRBuilder.
1794 static LogicalResult
1795 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
1796  LLVM::ModuleTranslation &moduleTranslation) {
1797  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1798  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
1799 
1800  Value symAddr = threadprivateOp.getSymAddr();
1801  auto *symOp = symAddr.getDefiningOp();
1802  if (!isa<LLVM::AddressOfOp>(symOp))
1803  return opInst.emitError("Addressing symbol not found");
1804  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
1805 
1806  LLVM::GlobalOp global =
1807  addressOfOp.getGlobal(moduleTranslation.symbolTable());
1808  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
1809  llvm::Type *type = globalValue->getValueType();
1810  llvm::TypeSize typeSize =
1811  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
1812  type);
1813  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
1814  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
1815  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
1816  llvm::Value *callInst =
1817  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
1818  ompLoc, globalValue, size, cacheName);
1819  moduleTranslation.mapValue(opInst.getResult(0), callInst);
1820  return success();
1821 }
1822 
1823 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1824 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
1825  switch (deviceClause) {
1826  case mlir::omp::DeclareTargetDeviceType::host:
1827  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1828  break;
1829  case mlir::omp::DeclareTargetDeviceType::nohost:
1830  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1831  break;
1832  case mlir::omp::DeclareTargetDeviceType::any:
1833  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1834  break;
1835  }
1836  llvm_unreachable("unhandled device clause");
1837 }
1838 
1839 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1841  mlir::omp::DeclareTargetCaptureClause captureClasue) {
1842  switch (captureClasue) {
1843  case mlir::omp::DeclareTargetCaptureClause::to:
1844  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1845  case mlir::omp::DeclareTargetCaptureClause::link:
1846  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1847  case mlir::omp::DeclareTargetCaptureClause::enter:
1848  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1849  }
1850  llvm_unreachable("unhandled capture clause");
1851 }
1852 
1853 static llvm::SmallString<64>
1854 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
1855  llvm::OpenMPIRBuilder &ompBuilder) {
1856  llvm::SmallString<64> suffix;
1857  llvm::raw_svector_ostream os(suffix);
1858  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
1859  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
1860  auto fileInfoCallBack = [&loc]() {
1861  return std::pair<std::string, uint64_t>(
1862  llvm::StringRef(loc.getFilename()), loc.getLine());
1863  };
1864 
1865  os << llvm::format(
1866  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
1867  }
1868  os << "_decl_tgt_ref_ptr";
1869 
1870  return suffix;
1871 }
1872 
1873 static bool isDeclareTargetLink(mlir::Value value) {
1874  if (auto addressOfOp =
1875  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
1876  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
1877  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
1878  if (auto declareTargetGlobal =
1879  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
1880  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
1881  mlir::omp::DeclareTargetCaptureClause::link)
1882  return true;
1883  }
1884  return false;
1885 }
1886 
1887 // Returns the reference pointer generated by the lowering of the declare target
1888 // operation in cases where the link clause is used or the to clause is used in
1889 // USM mode.
1890 static llvm::Value *
1892  LLVM::ModuleTranslation &moduleTranslation) {
1893  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1894 
1895  // An easier way to do this may just be to keep track of any pointer
1896  // references and their mapping to their respective operation
1897  if (auto addressOfOp =
1898  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
1899  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
1900  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
1901  addressOfOp.getGlobalName()))) {
1902 
1903  if (auto declareTargetGlobal =
1904  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
1905  gOp.getOperation())) {
1906 
1907  // In this case, we must utilise the reference pointer generated by the
1908  // declare target operation, similar to Clang
1909  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
1910  mlir::omp::DeclareTargetCaptureClause::link) ||
1911  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
1912  mlir::omp::DeclareTargetCaptureClause::to &&
1913  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
1914  llvm::SmallString<64> suffix =
1915  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
1916 
1917  if (gOp.getSymName().contains(suffix))
1918  return moduleTranslation.getLLVMModule()->getNamedValue(
1919  gOp.getSymName());
1920 
1921  return moduleTranslation.getLLVMModule()->getNamedValue(
1922  (gOp.getSymName().str() + suffix.str()).str());
1923  }
1924  }
1925  }
1926  }
1927 
1928  return nullptr;
1929 }
1930 
1931 // A small helper structure to contain data gathered
1932 // for map lowering and coalese it into one area and
1933 // avoiding extra computations such as searches in the
1934 // llvm module for lowered mapped variables or checking
1935 // if something is declare target (and retrieving the
1936 // value) more than neccessary.
1942  // Stripped off array/pointer to get the underlying
1943  // element type
1945 
1946  /// Append arrays in \a CurInfo.
1947  void append(MapInfoData &CurInfo) {
1948  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
1949  CurInfo.IsDeclareTarget.end());
1950  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
1951  OriginalValue.append(CurInfo.OriginalValue.begin(),
1952  CurInfo.OriginalValue.end());
1953  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
1954  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
1955  }
1956 };
1957 
1958 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
1959  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
1960  arrTy.getElementType()))
1961  return getArrayElementSizeInBits(nestedArrTy, dl);
1962  return dl.getTypeSizeInBits(arrTy.getElementType());
1963 }
1964 
1965 // This function calculates the size to be offloaded for a specified type, given
1966 // its associated map clause (which can contain bounds information which affects
1967 // the total size), this size is calculated based on the underlying element type
1968 // e.g. given a 1-D array of ints, we will calculate the size from the integer
1969 // type * number of elements in the array. This size can be used in other
1970 // calculations but is ultimately used as an argument to the OpenMP runtimes
1971 // kernel argument structure which is generated through the combinedInfo data
1972 // structures.
1973 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
1974 // CGOpenMPRuntime.cpp.
1975 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
1976  Operation *clauseOp, llvm::Value *basePointer,
1977  llvm::Type *baseType, llvm::IRBuilderBase &builder,
1978  LLVM::ModuleTranslation &moduleTranslation) {
1979  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
1980  // the size in inconsistent byte or bit format.
1981  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
1982  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
1983  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
1984 
1985  if (auto memberClause =
1986  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
1987  // This calculates the size to transfer based on bounds and the underlying
1988  // element type, provided bounds have been specified (Fortran
1989  // pointers/allocatables/target and arrays that have sections specified fall
1990  // into this as well).
1991  if (!memberClause.getBounds().empty()) {
1992  llvm::Value *elementCount = builder.getInt64(1);
1993  for (auto bounds : memberClause.getBounds()) {
1994  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
1995  bounds.getDefiningOp())) {
1996  // The below calculation for the size to be mapped calculated from the
1997  // map_info's bounds is: (elemCount * [UB - LB] + 1), later we
1998  // multiply by the underlying element types byte size to get the full
1999  // size to be offloaded based on the bounds
2000  elementCount = builder.CreateMul(
2001  elementCount,
2002  builder.CreateAdd(
2003  builder.CreateSub(
2004  moduleTranslation.lookupValue(boundOp.getUpperBound()),
2005  moduleTranslation.lookupValue(boundOp.getLowerBound())),
2006  builder.getInt64(1)));
2007  }
2008  }
2009 
2010  // The size in bytes x number of elements, the sizeInBytes stored is
2011  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
2012  // size, so we do some on the fly runtime math to get the size in
2013  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2014  // some adjustment for members with more complex types.
2015  return builder.CreateMul(elementCount,
2016  builder.getInt64(underlyingTypeSzInBits / 8));
2017  }
2018  }
2019 
2020  return builder.getInt64(underlyingTypeSzInBits / 8);
2021 }
2022 
2024  llvm::SmallVectorImpl<Value> &mapOperands,
2025  LLVM::ModuleTranslation &moduleTranslation,
2026  DataLayout &dl,
2027  llvm::IRBuilderBase &builder) {
2028  for (mlir::Value mapValue : mapOperands) {
2029  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2030  mapValue.getDefiningOp())) {
2031  mlir::Value offloadPtr =
2032  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2033  mapData.OriginalValue.push_back(
2034  moduleTranslation.lookupValue(offloadPtr));
2035  mapData.Pointers.push_back(mapData.OriginalValue.back());
2036 
2037  if (llvm::Value *refPtr =
2038  getRefPtrIfDeclareTarget(offloadPtr,
2039  moduleTranslation)) { // declare target
2040  mapData.IsDeclareTarget.push_back(true);
2041  mapData.BasePointers.push_back(refPtr);
2042  } else { // regular mapped variable
2043  mapData.IsDeclareTarget.push_back(false);
2044  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2045  }
2046 
2047  mapData.BaseType.push_back(
2048  moduleTranslation.convertType(mapOp.getVarType()));
2049  mapData.Sizes.push_back(getSizeInBytes(
2050  dl, mapOp.getVarType(), mapOp, mapData.BasePointers.back(),
2051  mapData.BaseType.back(), builder, moduleTranslation));
2052  mapData.MapClause.push_back(mapOp.getOperation());
2053  mapData.Types.push_back(
2054  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
2055  mapData.Names.push_back(LLVM::createMappingInformation(
2056  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2057  mapData.DevicePointers.push_back(
2059 
2060  // Check if this is a member mapping and correctly assign that it is, if
2061  // it is a member of a larger object.
2062  // TODO: Need better handling of members, and distinguishing of members
2063  // that are implicitly allocated on device vs explicitly passed in as
2064  // arguments.
2065  // TODO: May require some further additions to support nested record
2066  // types, i.e. member maps that can have member maps.
2067  mapData.IsAMember.push_back(false);
2068  for (mlir::Value mapValue : mapOperands) {
2069  if (auto map = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2070  mapValue.getDefiningOp())) {
2071  for (auto member : map.getMembers()) {
2072  if (member == mapOp) {
2073  mapData.IsAMember.back() = true;
2074  }
2075  }
2076  }
2077  }
2078  }
2079  }
2080 }
2081 
2082 /// This function calculates the array/pointer offset for map data provided
2083 /// with bounds operations, e.g. when provided something like the following:
2084 ///
2085 /// Fortran
2086 /// map(tofrom: array(2:5, 3:2))
2087 /// or
2088 /// C++
2089 /// map(tofrom: array[1:4][2:3])
2090 /// We must calculate the initial pointer offset to pass across, this function
2091 /// performs this using bounds.
2092 ///
2093 /// NOTE: which while specified in row-major order it currently needs to be
2094 /// flipped for Fortran's column order array allocation and access (as
2095 /// opposed to C++'s row-major, hence the backwards processing where order is
2096 /// important). This is likely important to keep in mind for the future when
2097 /// we incorporate a C++ frontend, both frontends will need to agree on the
2098 /// ordering of generated bounds operations (one may have to flip them) to
2099 /// make the below lowering frontend agnostic. The offload size
2100 /// calcualtion may also have to be adjusted for C++.
2101 std::vector<llvm::Value *>
2103  llvm::IRBuilderBase &builder, bool isArrayTy,
2104  mlir::OperandRange bounds) {
2105  std::vector<llvm::Value *> idx;
2106  // There's no bounds to calculate an offset from, we can safely
2107  // ignore and return no indices.
2108  if (bounds.empty())
2109  return idx;
2110 
2111  // If we have an array type, then we have its type so can treat it as a
2112  // normal GEP instruction where the bounds operations are simply indexes
2113  // into the array. We currently do reverse order of the bounds, which
2114  // I believe leans more towards Fortran's column-major in memory.
2115  if (isArrayTy) {
2116  idx.push_back(builder.getInt64(0));
2117  for (int i = bounds.size() - 1; i >= 0; --i) {
2118  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2119  bounds[i].getDefiningOp())) {
2120  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
2121  }
2122  }
2123  } else {
2124  // If we do not have an array type, but we have bounds, then we're dealing
2125  // with a pointer that's being treated like an array and we have the
2126  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
2127  // address (pointer pointing to the actual data) so we must caclulate the
2128  // offset using a single index which the following two loops attempts to
2129  // compute.
2130 
2131  // Calculates the size offset we need to make per row e.g. first row or
2132  // column only needs to be offset by one, but the next would have to be
2133  // the previous row/column offset multiplied by the extent of current row.
2134  //
2135  // For example ([1][10][100]):
2136  //
2137  // - First row/column we move by 1 for each index increment
2138  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
2139  // current) for 10 for each index increment
2140  // - Third row/column we would move by 10 (second row/column) *
2141  // (extent/size of current) 100 for 1000 for each index increment
2142  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
2143  for (size_t i = 1; i < bounds.size(); ++i) {
2144  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2145  bounds[i].getDefiningOp())) {
2146  dimensionIndexSizeOffset.push_back(builder.CreateMul(
2147  moduleTranslation.lookupValue(boundOp.getExtent()),
2148  dimensionIndexSizeOffset[i - 1]));
2149  }
2150  }
2151 
2152  // Now that we have calculated how much we move by per index, we must
2153  // multiply each lower bound offset in indexes by the size offset we
2154  // have calculated in the previous and accumulate the results to get
2155  // our final resulting offset.
2156  for (int i = bounds.size() - 1; i >= 0; --i) {
2157  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2158  bounds[i].getDefiningOp())) {
2159  if (idx.empty())
2160  idx.emplace_back(builder.CreateMul(
2161  moduleTranslation.lookupValue(boundOp.getLowerBound()),
2162  dimensionIndexSizeOffset[i]));
2163  else
2164  idx.back() = builder.CreateAdd(
2165  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
2166  boundOp.getLowerBound()),
2167  dimensionIndexSizeOffset[i]));
2168  }
2169  }
2170  }
2171 
2172  return idx;
2173 }
2174 
2175 // This creates two insertions into the MapInfosTy data structure for the
2176 // "parent" of a set of members, (usually a container e.g.
2177 // class/structure/derived type) when subsequent members have also been
2178 // explicitly mapped on the same map clause. Certain types, such as Fortran
2179 // descriptors are mapped like this as well, however, the members are
2180 // implicit as far as a user is concerned, but we must explicitly map them
2181 // internally.
2182 //
2183 // This function also returns the memberOfFlag for this particular parent,
2184 // which is utilised in subsequent member mappings (by modifying there map type
2185 // with it) to indicate that a member is part of this parent and should be
2186 // treated by the runtime as such. Important to achieve the correct mapping.
2187 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
2188  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2189  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2190  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2191  uint64_t mapDataIndex, bool isTargetParams) {
2192  // Map the first segment of our structure
2193  combinedInfo.Types.emplace_back(
2194  isTargetParams
2195  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
2196  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
2197  combinedInfo.DevicePointers.emplace_back(
2199  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2200  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2201  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2202  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2203 
2204  // Calculate size of the parent object being mapped based on the
2205  // addresses at runtime, highAddr - lowAddr = size. This of course
2206  // doesn't factor in allocated data like pointers, hence the further
2207  // processing of members specified by users, or in the case of
2208  // Fortran pointers and allocatables, the mapping of the pointed to
2209  // data by the descriptor (which itself, is a structure containing
2210  // runtime information on the dynamically allocated data).
2211  llvm::Value *lowAddr = builder.CreatePointerCast(
2212  mapData.Pointers[mapDataIndex], builder.getPtrTy());
2213  llvm::Value *highAddr = builder.CreatePointerCast(
2214  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
2215  mapData.Pointers[mapDataIndex], 1),
2216  builder.getPtrTy());
2217  llvm::Value *size = builder.CreateIntCast(
2218  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
2219  builder.getInt64Ty(),
2220  /*isSigned=*/false);
2221  combinedInfo.Sizes.push_back(size);
2222 
2223  // This creates the initial MEMBER_OF mapping that consists of
2224  // the parent/top level container (same as above effectively, except
2225  // with a fixed initial compile time size and seperate maptype which
2226  // indicates the true mape type (tofrom etc.) and that it is a part
2227  // of a larger mapping and indicating the link between it and it's
2228  // members that are also explicitly mapped).
2229  llvm::omp::OpenMPOffloadMappingFlags mapFlag =
2230  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
2231  if (isTargetParams)
2232  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2233 
2234  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
2235  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
2236  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2237 
2238  combinedInfo.Types.emplace_back(mapFlag);
2239  combinedInfo.DevicePointers.emplace_back(
2241  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2242  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2243  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2244  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2245  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
2246 
2247  return memberOfFlag;
2248 }
2249 
2250 // The intent is to verify if the mapped data being passed is a
2251 // pointer -> pointee that requires special handling in certain cases,
2252 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
2253 //
2254 // There may be a better way to verify this, but unfortunately with
2255 // opaque pointers we lose the ability to easily check if something is
2256 // a pointer whilst maintaining access to the underlying type.
2257 static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp) {
2258  // If we have a varPtrPtr field assigned then the underlying type is a pointer
2259  if (mapOp.getVarPtrPtr())
2260  return true;
2261 
2262  // If the map data is declare target with a link clause, then it's represented
2263  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
2264  // no relation to pointers.
2265  if (isDeclareTargetLink(mapOp.getVarPtr()))
2266  return true;
2267 
2268  return false;
2269 }
2270 
2271 // This function is intended to add explicit mappings of members
2273  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2274  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2275  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2276  uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
2277 
2278  auto parentClause =
2279  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2280 
2281  for (auto mappedMembers : parentClause.getMembers()) {
2282  auto memberClause =
2283  mlir::dyn_cast<mlir::omp::MapInfoOp>(mappedMembers.getDefiningOp());
2284  int memberDataIdx = -1;
2285  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2286  if (mapData.MapClause[i] == memberClause)
2287  memberDataIdx = i;
2288  }
2289 
2290  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
2291 
2292  // Same MemberOfFlag to indicate its link with parent and other members
2293  // of, and we flag that it's part of a pointer and object coupling.
2294  auto mapFlag =
2295  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
2296  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2297  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
2298  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2299  if (checkIfPointerMap(memberClause))
2300  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2301 
2302  combinedInfo.Types.emplace_back(mapFlag);
2303  combinedInfo.DevicePointers.emplace_back(
2305  combinedInfo.Names.emplace_back(
2306  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
2307 
2308  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[memberDataIdx]);
2309  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
2310  combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
2311  }
2312 }
2313 
2315  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2316  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2317  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2318  uint64_t mapDataIndex, bool isTargetParams) {
2319  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
2320  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
2321  combinedInfo, mapData, mapDataIndex, isTargetParams);
2322  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
2323  combinedInfo, mapData, mapDataIndex,
2324  memberOfParentFlag);
2325 }
2326 
2327 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2328 // generates different operation (e.g. load/store) combinations for
2329 // arguments to the kernel, based on map capture kinds which are then
2330 // utilised in the combinedInfo in place of the original Map value.
2331 static void
2333  LLVM::ModuleTranslation &moduleTranslation,
2334  llvm::IRBuilderBase &builder) {
2335  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2336  // if it's declare target, skip it, it's handled seperately.
2337  if (!mapData.IsDeclareTarget[i]) {
2338  auto mapOp =
2339  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2340  mlir::omp::VariableCaptureKind captureKind =
2341  mapOp.getMapCaptureType().value_or(
2342  mlir::omp::VariableCaptureKind::ByRef);
2343  bool isPtrTy = checkIfPointerMap(mapOp);
2344 
2345  // Currently handles array sectioning lowerbound case, but more
2346  // logic may be required in the future. Clang invokes EmitLValue,
2347  // which has specialised logic for special Clang types such as user
2348  // defines, so it is possible we will have to extend this for
2349  // structures or other complex types. As the general idea is that this
2350  // function mimics some of the logic from Clang that we require for
2351  // kernel argument passing from host -> device.
2352  switch (captureKind) {
2353  case mlir::omp::VariableCaptureKind::ByRef: {
2354  llvm::Value *newV = mapData.Pointers[i];
2355  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
2356  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
2357  mapOp.getBounds());
2358  if (isPtrTy)
2359  newV = builder.CreateLoad(builder.getPtrTy(), newV);
2360 
2361  if (!offsetIdx.empty())
2362  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
2363  "array_offset");
2364  mapData.Pointers[i] = newV;
2365  } break;
2366  case mlir::omp::VariableCaptureKind::ByCopy: {
2367  llvm::Type *type = mapData.BaseType[i];
2368  llvm::Value *newV;
2369  if (mapData.Pointers[i]->getType()->isPointerTy())
2370  newV = builder.CreateLoad(type, mapData.Pointers[i]);
2371  else
2372  newV = mapData.Pointers[i];
2373 
2374  if (!isPtrTy) {
2375  auto curInsert = builder.saveIP();
2376  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
2377  auto *memTempAlloc =
2378  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
2379  builder.restoreIP(curInsert);
2380 
2381  builder.CreateStore(newV, memTempAlloc);
2382  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
2383  }
2384 
2385  mapData.Pointers[i] = newV;
2386  mapData.BasePointers[i] = newV;
2387  } break;
2388  case mlir::omp::VariableCaptureKind::This:
2389  case mlir::omp::VariableCaptureKind::VLAType:
2390  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
2391  break;
2392  }
2393  }
2394  }
2395 }
2396 
2397 // Generate all map related information and fill the combinedInfo.
2398 static void genMapInfos(llvm::IRBuilderBase &builder,
2399  LLVM::ModuleTranslation &moduleTranslation,
2400  DataLayout &dl,
2401  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2402  MapInfoData &mapData,
2403  const SmallVector<Value> &devPtrOperands = {},
2404  const SmallVector<Value> &devAddrOperands = {},
2405  bool isTargetParams = false) {
2406  // We wish to modify some of the methods in which arguments are
2407  // passed based on their capture type by the target region, this can
2408  // involve generating new loads and stores, which changes the
2409  // MLIR value to LLVM value mapping, however, we only wish to do this
2410  // locally for the current function/target and also avoid altering
2411  // ModuleTranslation, so we remap the base pointer or pointer stored
2412  // in the map infos corresponding MapInfoData, which is later accessed
2413  // by genMapInfos and createTarget to help generate the kernel and
2414  // kernel arg structure. It primarily becomes relevant in cases like
2415  // bycopy, or byref range'd arrays. In the default case, we simply
2416  // pass thee pointer byref as both basePointer and pointer.
2417  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
2418  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
2419 
2420  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2421 
2422  auto fail = [&combinedInfo]() -> void {
2423  combinedInfo.BasePointers.clear();
2424  combinedInfo.Pointers.clear();
2425  combinedInfo.DevicePointers.clear();
2426  combinedInfo.Sizes.clear();
2427  combinedInfo.Types.clear();
2428  combinedInfo.Names.clear();
2429  };
2430 
2431  // We operate under the assumption that all vectors that are
2432  // required in MapInfoData are of equal lengths (either filled with
2433  // default constructed data or appropiate information) so we can
2434  // utilise the size from any component of MapInfoData, if we can't
2435  // something is missing from the initial MapInfoData construction.
2436  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2437  // NOTE/TODO: We currently do not handle member mapping seperately from it's
2438  // parent or explicit mapping of a parent and member in the same operation,
2439  // this will need to change in the near future, for now we primarily handle
2440  // descriptor mapping from fortran, generalised as mapping record types
2441  // with implicit member maps. This lowering needs further generalisation to
2442  // fully support fortran derived types, and C/C++ structures and classes.
2443  if (mapData.IsAMember[i])
2444  continue;
2445 
2446  auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2447  if (!mapInfoOp.getMembers().empty()) {
2448  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
2449  combinedInfo, mapData, i, isTargetParams);
2450  continue;
2451  }
2452 
2453  auto mapFlag = mapData.Types[i];
2454  bool isPtrTy = checkIfPointerMap(mapInfoOp);
2455  if (isPtrTy)
2456  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2457 
2458  // Declare Target Mappings are excluded from being marked as
2459  // OMP_MAP_TARGET_PARAM as they are not passed as parameters.
2460  if (isTargetParams && !mapData.IsDeclareTarget[i])
2461  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2462 
2463  if (auto mapInfoOp = dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]))
2464  if (mapInfoOp.getMapCaptureType().value() ==
2465  mlir::omp::VariableCaptureKind::ByCopy &&
2466  !isPtrTy)
2467  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
2468 
2469  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]);
2470  combinedInfo.Pointers.emplace_back(mapData.Pointers[i]);
2471  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]);
2472  combinedInfo.Names.emplace_back(mapData.Names[i]);
2473  combinedInfo.Types.emplace_back(mapFlag);
2474  combinedInfo.Sizes.emplace_back(mapData.Sizes[i]);
2475  }
2476 
2477  auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) {
2478  index = 0;
2479  for (llvm::Value *basePtr : combinedInfo.BasePointers) {
2480  if (basePtr == val)
2481  return true;
2482  index++;
2483  }
2484  return false;
2485  };
2486 
2487  auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void {
2488  for (const auto &devOp : devOperands) {
2489  // TODO: Only LLVMPointerTypes are handled.
2490  if (!devOp.getType().template isa<LLVM::LLVMPointerType>())
2491  return fail();
2492 
2493  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devOp);
2494 
2495  // Check if map info is already present for this entry.
2496  unsigned infoIndex;
2497  if (findMapInfo(mapOpValue, infoIndex)) {
2498  combinedInfo.Types[infoIndex] |=
2499  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
2500  combinedInfo.DevicePointers[infoIndex] = devOpType;
2501  } else {
2502  combinedInfo.BasePointers.emplace_back(mapOpValue);
2503  combinedInfo.Pointers.emplace_back(mapOpValue);
2504  combinedInfo.DevicePointers.emplace_back(devOpType);
2505  combinedInfo.Names.emplace_back(
2506  LLVM::createMappingInformation(devOp.getLoc(), *ompBuilder));
2507  combinedInfo.Types.emplace_back(
2508  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
2509  combinedInfo.Sizes.emplace_back(builder.getInt64(0));
2510  }
2511  }
2512  };
2513 
2514  addDevInfos(devPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
2515  addDevInfos(devAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
2516 }
2517 
2518 static LogicalResult
2519 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
2520  LLVM::ModuleTranslation &moduleTranslation) {
2521  llvm::Value *ifCond = nullptr;
2522  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
2523  SmallVector<Value> mapOperands;
2524  SmallVector<Value> useDevPtrOperands;
2525  SmallVector<Value> useDevAddrOperands;
2526  llvm::omp::RuntimeFunction RTLFn;
2527  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
2528 
2529  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2530 
2531  LogicalResult result =
2533  .Case([&](omp::TargetDataOp dataOp) {
2534  if (auto ifExprVar = dataOp.getIfExpr())
2535  ifCond = moduleTranslation.lookupValue(ifExprVar);
2536 
2537  if (auto devId = dataOp.getDevice())
2538  if (auto constOp =
2539  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2540  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2541  deviceID = intAttr.getInt();
2542 
2543  mapOperands = dataOp.getMapOperands();
2544  useDevPtrOperands = dataOp.getUseDevicePtr();
2545  useDevAddrOperands = dataOp.getUseDeviceAddr();
2546  return success();
2547  })
2548  .Case([&](omp::TargetEnterDataOp enterDataOp) {
2549  if (enterDataOp.getNowait())
2550  return (LogicalResult)(enterDataOp.emitError(
2551  "`nowait` is not supported yet"));
2552 
2553  if (auto ifExprVar = enterDataOp.getIfExpr())
2554  ifCond = moduleTranslation.lookupValue(ifExprVar);
2555 
2556  if (auto devId = enterDataOp.getDevice())
2557  if (auto constOp =
2558  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2559  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2560  deviceID = intAttr.getInt();
2561  RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
2562  mapOperands = enterDataOp.getMapOperands();
2563  return success();
2564  })
2565  .Case([&](omp::TargetExitDataOp exitDataOp) {
2566  if (exitDataOp.getNowait())
2567  return (LogicalResult)(exitDataOp.emitError(
2568  "`nowait` is not supported yet"));
2569 
2570  if (auto ifExprVar = exitDataOp.getIfExpr())
2571  ifCond = moduleTranslation.lookupValue(ifExprVar);
2572 
2573  if (auto devId = exitDataOp.getDevice())
2574  if (auto constOp =
2575  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2576  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2577  deviceID = intAttr.getInt();
2578 
2579  RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper;
2580  mapOperands = exitDataOp.getMapOperands();
2581  return success();
2582  })
2583  .Case([&](omp::TargetUpdateOp updateDataOp) {
2584  if (updateDataOp.getNowait())
2585  return (LogicalResult)(updateDataOp.emitError(
2586  "`nowait` is not supported yet"));
2587 
2588  if (auto ifExprVar = updateDataOp.getIfExpr())
2589  ifCond = moduleTranslation.lookupValue(ifExprVar);
2590 
2591  if (auto devId = updateDataOp.getDevice())
2592  if (auto constOp =
2593  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2594  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2595  deviceID = intAttr.getInt();
2596 
2597  RTLFn = llvm::omp::OMPRTL___tgt_target_data_update_mapper;
2598  mapOperands = updateDataOp.getMapOperands();
2599  return success();
2600  })
2601  .Default([&](Operation *op) {
2602  return op->emitError("unsupported OpenMP operation: ")
2603  << op->getName();
2604  });
2605 
2606  if (failed(result))
2607  return failure();
2608 
2609  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2610 
2611  MapInfoData mapData;
2612  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL,
2613  builder);
2614 
2615  // Fill up the arrays with all the mapped variables.
2616  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
2617  auto genMapInfoCB =
2618  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
2619  builder.restoreIP(codeGenIP);
2620  if (auto dataOp = dyn_cast<omp::TargetDataOp>(op)) {
2621  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData,
2622  useDevPtrOperands, useDevAddrOperands);
2623  } else {
2624  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
2625  }
2626  return combinedInfo;
2627  };
2628 
2629  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
2630  /*SeparateBeginEndCalls=*/true);
2631 
2632  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
2633  LogicalResult bodyGenStatus = success();
2634  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) {
2635  assert(isa<omp::TargetDataOp>(op) &&
2636  "BodyGen requested for non TargetDataOp");
2637  Region &region = cast<omp::TargetDataOp>(op).getRegion();
2638  switch (bodyGenType) {
2639  case BodyGenTy::Priv:
2640  // Check if any device ptr/addr info is available
2641  if (!info.DevicePtrInfoMap.empty()) {
2642  builder.restoreIP(codeGenIP);
2643  unsigned argIndex = 0;
2644  for (auto &devPtrOp : useDevPtrOperands) {
2645  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devPtrOp);
2646  const auto &arg = region.front().getArgument(argIndex);
2647  moduleTranslation.mapValue(arg,
2648  info.DevicePtrInfoMap[mapOpValue].second);
2649  argIndex++;
2650  }
2651 
2652  for (auto &devAddrOp : useDevAddrOperands) {
2653  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devAddrOp);
2654  const auto &arg = region.front().getArgument(argIndex);
2655  auto *LI = builder.CreateLoad(
2656  builder.getPtrTy(), info.DevicePtrInfoMap[mapOpValue].second);
2657  moduleTranslation.mapValue(arg, LI);
2658  argIndex++;
2659  }
2660 
2661  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2662  builder, moduleTranslation);
2663  }
2664  break;
2665  case BodyGenTy::DupNoPriv:
2666  break;
2667  case BodyGenTy::NoPriv:
2668  // If device info is available then region has already been generated
2669  if (info.DevicePtrInfoMap.empty()) {
2670  builder.restoreIP(codeGenIP);
2671  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2672  builder, moduleTranslation);
2673  }
2674  break;
2675  }
2676  return builder.saveIP();
2677  };
2678 
2679  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2680  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2681  findAllocaInsertPoint(builder, moduleTranslation);
2682  if (isa<omp::TargetDataOp>(op)) {
2683  builder.restoreIP(ompBuilder->createTargetData(
2684  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2685  info, genMapInfoCB, nullptr, bodyGenCB));
2686  } else {
2687  builder.restoreIP(ompBuilder->createTargetData(
2688  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2689  info, genMapInfoCB, &RTLFn));
2690  }
2691 
2692  return bodyGenStatus;
2693 }
2694 
2695 /// Lowers the FlagsAttr which is applied to the module on the device
2696 /// pass when offloading, this attribute contains OpenMP RTL globals that can
2697 /// be passed as flags to the frontend, otherwise they are set to default
2698 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
2699  LLVM::ModuleTranslation &moduleTranslation) {
2700  if (!cast<mlir::ModuleOp>(op))
2701  return failure();
2702 
2703  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2704 
2705  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
2706  attribute.getOpenmpDeviceVersion());
2707 
2708  if (attribute.getNoGpuLib())
2709  return success();
2710 
2711  ompBuilder->createGlobalFlag(
2712  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
2713  "__omp_rtl_debug_kind");
2714  ompBuilder->createGlobalFlag(
2715  attribute
2716  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
2717  ,
2718  "__omp_rtl_assume_teams_oversubscription");
2719  ompBuilder->createGlobalFlag(
2720  attribute
2721  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
2722  ,
2723  "__omp_rtl_assume_threads_oversubscription");
2724  ompBuilder->createGlobalFlag(
2725  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
2726  "__omp_rtl_assume_no_thread_state");
2727  ompBuilder->createGlobalFlag(
2728  attribute
2729  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
2730  ,
2731  "__omp_rtl_assume_no_nested_parallelism");
2732  return success();
2733 }
2734 
2735 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
2736  omp::TargetOp targetOp,
2737  llvm::StringRef parentName = "") {
2738  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
2739 
2740  assert(fileLoc && "No file found from location");
2741  StringRef fileName = fileLoc.getFilename().getValue();
2742 
2743  llvm::sys::fs::UniqueID id;
2744  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
2745  targetOp.emitError("Unable to get unique ID for file");
2746  return false;
2747  }
2748 
2749  uint64_t line = fileLoc.getLine();
2750  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
2751  id.getFile(), line);
2752  return true;
2753 }
2754 
2755 static bool targetOpSupported(Operation &opInst) {
2756  auto targetOp = cast<omp::TargetOp>(opInst);
2757  if (targetOp.getIfExpr()) {
2758  opInst.emitError("If clause not yet supported");
2759  return false;
2760  }
2761 
2762  if (targetOp.getDevice()) {
2763  opInst.emitError("Device clause not yet supported");
2764  return false;
2765  }
2766 
2767  if (targetOp.getThreadLimit()) {
2768  opInst.emitError("Thread limit clause not yet supported");
2769  return false;
2770  }
2771 
2772  if (targetOp.getNowait()) {
2773  opInst.emitError("Nowait clause not yet supported");
2774  return false;
2775  }
2776 
2777  return true;
2778 }
2779 
2780 static void
2782  LLVM::ModuleTranslation &moduleTranslation,
2783  llvm::IRBuilderBase &builder) {
2784  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2785  // In the case of declare target mapped variables, the basePointer is
2786  // the reference pointer generated by the convertDeclareTargetAttr
2787  // method. Whereas the kernelValue is the original variable, so for
2788  // the device we must replace all uses of this original global variable
2789  // (stored in kernelValue) with the reference pointer (stored in
2790  // basePointer for declare target mapped variables), as for device the
2791  // data is mapped into this reference pointer and should be loaded
2792  // from it, the original variable is discarded. On host both exist and
2793  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
2794  // function to link the two variables in the runtime and then both the
2795  // reference pointer and the pointer are assigned in the kernel argument
2796  // structure for the host.
2797  if (mapData.IsDeclareTarget[i]) {
2798  // The users iterator will get invalidated if we modify an element,
2799  // so we populate this vector of uses to alter each user on an individual
2800  // basis to emit its own load (rather than one load for all).
2802  for (llvm::User *user : mapData.OriginalValue[i]->users())
2803  userVec.push_back(user);
2804 
2805  for (llvm::User *user : userVec) {
2806  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
2807  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
2808  mapData.BasePointers[i]);
2809  load->moveBefore(insn);
2810  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
2811  }
2812  }
2813  }
2814  }
2815 }
2816 
2817 // The createDeviceArgumentAccessor function generates
2818 // instructions for retrieving (acessing) kernel
2819 // arguments inside of the device kernel for use by
2820 // the kernel. This enables different semantics such as
2821 // the creation of temporary copies of data allowing
2822 // semantics like read-only/no host write back kernel
2823 // arguments.
2824 //
2825 // This currently implements a very light version of Clang's
2826 // EmitParmDecl's handling of direct argument handling as well
2827 // as a portion of the argument access generation based on
2828 // capture types found at the end of emitOutlinedFunctionPrologue
2829 // in Clang. The indirect path handling of EmitParmDecl's may be
2830 // required for future work, but a direct 1-to-1 copy doesn't seem
2831 // possible as the logic is rather scattered throughout Clang's
2832 // lowering and perhaps we wish to deviate slightly.
2833 //
2834 // \param mapData - A container containing vectors of information
2835 // corresponding to the input argument, which should have a
2836 // corresponding entry in the MapInfoData containers
2837 // OrigialValue's.
2838 // \param arg - This is the generated kernel function argument that
2839 // corresponds to the passed in input argument. We generated different
2840 // accesses of this Argument, based on capture type and other Input
2841 // related information.
2842 // \param input - This is the host side value that will be passed to
2843 // the kernel i.e. the kernel input, we rewrite all uses of this within
2844 // the kernel (as we generate the kernel body based on the target's region
2845 // which maintians references to the original input) to the retVal argument
2846 // apon exit of this function inside of the OMPIRBuilder. This interlinks
2847 // the kernel argument to future uses of it in the function providing
2848 // appropriate "glue" instructions inbetween.
2849 // \param retVal - This is the value that all uses of input inside of the
2850 // kernel will be re-written to, the goal of this function is to generate
2851 // an appropriate location for the kernel argument to be accessed from,
2852 // e.g. ByRef will result in a temporary allocation location and then
2853 // a store of the kernel argument into this allocated memory which
2854 // will then be loaded from, ByCopy will use the allocated memory
2855 // directly.
2856 static llvm::IRBuilderBase::InsertPoint
2857 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
2858  llvm::Value *input, llvm::Value *&retVal,
2859  llvm::IRBuilderBase &builder,
2860  llvm::OpenMPIRBuilder &ompBuilder,
2861  LLVM::ModuleTranslation &moduleTranslation,
2862  llvm::IRBuilderBase::InsertPoint allocaIP,
2863  llvm::IRBuilderBase::InsertPoint codeGenIP) {
2864  builder.restoreIP(allocaIP);
2865 
2866  mlir::omp::VariableCaptureKind capture =
2867  mlir::omp::VariableCaptureKind::ByRef;
2868 
2869  // Find the associated MapInfoData entry for the current input
2870  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
2871  if (mapData.OriginalValue[i] == input) {
2872  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2873  mapData.MapClause[i])) {
2874  capture = mapOp.getMapCaptureType().value_or(
2875  mlir::omp::VariableCaptureKind::ByRef);
2876  }
2877 
2878  break;
2879  }
2880 
2881  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
2882  unsigned int defaultAS =
2883  ompBuilder.M.getDataLayout().getProgramAddressSpace();
2884 
2885  // Create the alloca for the argument the current point.
2886  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
2887 
2888  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
2889  v = builder.CreatePointerBitCastOrAddrSpaceCast(
2890  v, arg.getType()->getPointerTo(defaultAS));
2891 
2892  builder.CreateStore(&arg, v);
2893 
2894  builder.restoreIP(codeGenIP);
2895 
2896  switch (capture) {
2897  case mlir::omp::VariableCaptureKind::ByCopy: {
2898  retVal = v;
2899  break;
2900  }
2901  case mlir::omp::VariableCaptureKind::ByRef: {
2902  retVal = builder.CreateAlignedLoad(
2903  v->getType(), v,
2904  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
2905  break;
2906  }
2907  case mlir::omp::VariableCaptureKind::This:
2908  case mlir::omp::VariableCaptureKind::VLAType:
2909  assert(false && "Currently unsupported capture kind");
2910  break;
2911  }
2912 
2913  return builder.saveIP();
2914 }
2915 
2916 static LogicalResult
2917 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
2918  LLVM::ModuleTranslation &moduleTranslation) {
2919 
2920  if (!targetOpSupported(opInst))
2921  return failure();
2922 
2923  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
2924  auto targetOp = cast<omp::TargetOp>(opInst);
2925  auto &targetRegion = targetOp.getRegion();
2926  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
2927  SmallVector<Value> mapOperands = targetOp.getMapOperands();
2928 
2929  LogicalResult bodyGenStatus = success();
2930  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2931  auto bodyCB = [&](InsertPointTy allocaIP,
2932  InsertPointTy codeGenIP) -> InsertPointTy {
2933  // Forward target-cpu and target-features function attributes from the
2934  // original function to the new outlined function.
2935  llvm::Function *llvmParentFn =
2936  moduleTranslation.lookupFunction(parentFn.getName());
2937  llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent();
2938  assert(llvmParentFn && llvmOutlinedFn &&
2939  "Both parent and outlined functions must exist at this point");
2940 
2941  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
2942  attr.isStringAttribute())
2943  llvmOutlinedFn->addFnAttr(attr);
2944 
2945  if (auto attr = llvmParentFn->getFnAttribute("target-features");
2946  attr.isStringAttribute())
2947  llvmOutlinedFn->addFnAttr(attr);
2948 
2949  builder.restoreIP(codeGenIP);
2950  unsigned argIndex = 0;
2951  for (auto &mapOp : mapOperands) {
2952  auto mapInfoOp =
2953  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
2954  llvm::Value *mapOpValue =
2955  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
2956  const auto &arg = targetRegion.front().getArgument(argIndex);
2957  moduleTranslation.mapValue(arg, mapOpValue);
2958  argIndex++;
2959  }
2960  llvm::BasicBlock *exitBlock = convertOmpOpRegions(
2961  targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
2962  builder.SetInsertPoint(exitBlock);
2963  return builder.saveIP();
2964  };
2965 
2966  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2967  StringRef parentName = parentFn.getName();
2968 
2969  llvm::TargetRegionEntryInfo entryInfo;
2970 
2971  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
2972  return failure();
2973 
2974  int32_t defaultValTeams = -1;
2975  int32_t defaultValThreads = 0;
2976 
2977  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2978  findAllocaInsertPoint(builder, moduleTranslation);
2979 
2980  MapInfoData mapData;
2981  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl,
2982  builder);
2983 
2984  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
2985  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
2987  builder.restoreIP(codeGenIP);
2988  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {},
2989  true);
2990  return combinedInfos;
2991  };
2992 
2993  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
2994  llvm::Value *&retVal, InsertPointTy allocaIP,
2995  InsertPointTy codeGenIP) {
2996  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2997 
2998  // We just return the unaltered argument for the host function
2999  // for now, some alterations may be required in the future to
3000  // keep host fallback functions working identically to the device
3001  // version (e.g. pass ByCopy values should be treated as such on
3002  // host and device, currently not always the case)
3003  if (!ompBuilder->Config.isTargetDevice()) {
3004  retVal = cast<llvm::Value>(&arg);
3005  return codeGenIP;
3006  }
3007 
3008  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
3009  *ompBuilder, moduleTranslation,
3010  allocaIP, codeGenIP);
3011  };
3012 
3014  for (size_t i = 0; i < mapOperands.size(); ++i) {
3015  // declare target arguments are not passed to kernels as arguments
3016  // TODO: We currently do not handle cases where a member is explicitly
3017  // passed in as an argument, this will likley need to be handled in
3018  // the near future, rather than using IsAMember, it may be better to
3019  // test if the relevant BlockArg is used within the target region and
3020  // then use that as a basis for exclusion in the kernel inputs.
3021  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
3022  kernelInput.push_back(mapData.OriginalValue[i]);
3023  }
3024 
3025  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
3026  ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
3027  defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB));
3028 
3029  // Remap access operations to declare target reference pointers for the
3030  // device, essentially generating extra loadop's as necessary
3031  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3032  handleDeclareTargetMapVar(mapData, moduleTranslation, builder);
3033 
3034  return bodyGenStatus;
3035 }
3036 
3037 static LogicalResult
3038 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
3039  LLVM::ModuleTranslation &moduleTranslation) {
3040  // Amend omp.declare_target by deleting the IR of the outlined functions
3041  // created for target regions. They cannot be filtered out from MLIR earlier
3042  // because the omp.target operation inside must be translated to LLVM, but
3043  // the wrapper functions themselves must not remain at the end of the
3044  // process. We know that functions where omp.declare_target does not match
3045  // omp.is_target_device at this stage can only be wrapper functions because
3046  // those that aren't are removed earlier as an MLIR transformation pass.
3047  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
3048  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
3049  op->getParentOfType<ModuleOp>().getOperation())) {
3050  if (!offloadMod.getIsTargetDevice())
3051  return success();
3052 
3053  omp::DeclareTargetDeviceType declareType =
3054  attribute.getDeviceType().getValue();
3055 
3056  if (declareType == omp::DeclareTargetDeviceType::host) {
3057  llvm::Function *llvmFunc =
3058  moduleTranslation.lookupFunction(funcOp.getName());
3059  llvmFunc->dropAllReferences();
3060  llvmFunc->eraseFromParent();
3061  }
3062  }
3063  return success();
3064  }
3065 
3066  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
3067  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
3068  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
3069  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3070  bool isDeclaration = gOp.isDeclaration();
3071  bool isExternallyVisible =
3072  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
3073  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
3074  llvm::StringRef mangledName = gOp.getSymName();
3075  auto captureClause =
3076  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
3077  auto deviceClause =
3078  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
3079  // unused for MLIR at the moment, required in Clang for book
3080  // keeping
3081  std::vector<llvm::GlobalVariable *> generatedRefs;
3082 
3083  std::vector<llvm::Triple> targetTriple;
3084  auto targetTripleAttr =
3085  op->getParentOfType<mlir::ModuleOp>()
3086  ->getAttr(LLVM::LLVMDialect::getTargetTripleAttrName())
3087  .dyn_cast_or_null<mlir::StringAttr>();
3088  if (targetTripleAttr)
3089  targetTriple.emplace_back(targetTripleAttr.data());
3090 
3091  auto fileInfoCallBack = [&loc]() {
3092  std::string filename = "";
3093  std::uint64_t lineNo = 0;
3094 
3095  if (loc) {
3096  filename = loc.getFilename().str();
3097  lineNo = loc.getLine();
3098  }
3099 
3100  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
3101  lineNo);
3102  };
3103 
3104  ompBuilder->registerTargetGlobalVariable(
3105  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3106  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3107  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
3108  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
3109  gVal->getType(), gVal);
3110 
3111  if (ompBuilder->Config.isTargetDevice() &&
3112  (attribute.getCaptureClause().getValue() !=
3113  mlir::omp::DeclareTargetCaptureClause::to ||
3114  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3115  ompBuilder->getAddrOfDeclareTargetVar(
3116  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3117  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3118  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
3119  /*GlobalInitializer*/ nullptr,
3120  /*VariableLinkage*/ nullptr);
3121  }
3122  }
3123  }
3124 
3125  return success();
3126 }
3127 
3128 // Returns true if the operation is inside a TargetOp or
3129 // is part of a declare target function.
3130 static bool isTargetDeviceOp(Operation *op) {
3131  // Assumes no reverse offloading
3132  if (op->getParentOfType<omp::TargetOp>())
3133  return true;
3134 
3135  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
3136  if (auto declareTargetIface =
3137  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3138  parentFn.getOperation()))
3139  if (declareTargetIface.isDeclareTarget() &&
3140  declareTargetIface.getDeclareTargetDeviceType() !=
3141  mlir::omp::DeclareTargetDeviceType::host)
3142  return true;
3143 
3144  return false;
3145 }
3146 
3147 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3148 /// (including OpenMP runtime calls).
3149 static LogicalResult
3150 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
3151  LLVM::ModuleTranslation &moduleTranslation) {
3152 
3153  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3154 
3156  .Case([&](omp::BarrierOp) {
3157  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
3158  return success();
3159  })
3160  .Case([&](omp::TaskwaitOp) {
3161  ompBuilder->createTaskwait(builder.saveIP());
3162  return success();
3163  })
3164  .Case([&](omp::TaskyieldOp) {
3165  ompBuilder->createTaskyield(builder.saveIP());
3166  return success();
3167  })
3168  .Case([&](omp::FlushOp) {
3169  // No support in Openmp runtime function (__kmpc_flush) to accept
3170  // the argument list.
3171  // OpenMP standard states the following:
3172  // "An implementation may implement a flush with a list by ignoring
3173  // the list, and treating it the same as a flush without a list."
3174  //
3175  // The argument list is discarded so that, flush with a list is treated
3176  // same as a flush without a list.
3177  ompBuilder->createFlush(builder.saveIP());
3178  return success();
3179  })
3180  .Case([&](omp::ParallelOp op) {
3181  return convertOmpParallel(op, builder, moduleTranslation);
3182  })
3183  .Case([&](omp::ReductionOp reductionOp) {
3184  return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
3185  })
3186  .Case([&](omp::MasterOp) {
3187  return convertOmpMaster(*op, builder, moduleTranslation);
3188  })
3189  .Case([&](omp::CriticalOp) {
3190  return convertOmpCritical(*op, builder, moduleTranslation);
3191  })
3192  .Case([&](omp::OrderedRegionOp) {
3193  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
3194  })
3195  .Case([&](omp::OrderedOp) {
3196  return convertOmpOrdered(*op, builder, moduleTranslation);
3197  })
3198  .Case([&](omp::WsloopOp) {
3199  return convertOmpWsloop(*op, builder, moduleTranslation);
3200  })
3201  .Case([&](omp::SimdLoopOp) {
3202  return convertOmpSimdLoop(*op, builder, moduleTranslation);
3203  })
3204  .Case([&](omp::AtomicReadOp) {
3205  return convertOmpAtomicRead(*op, builder, moduleTranslation);
3206  })
3207  .Case([&](omp::AtomicWriteOp) {
3208  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
3209  })
3210  .Case([&](omp::AtomicUpdateOp op) {
3211  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
3212  })
3213  .Case([&](omp::AtomicCaptureOp op) {
3214  return convertOmpAtomicCapture(op, builder, moduleTranslation);
3215  })
3216  .Case([&](omp::SectionsOp) {
3217  return convertOmpSections(*op, builder, moduleTranslation);
3218  })
3219  .Case([&](omp::SingleOp op) {
3220  return convertOmpSingle(op, builder, moduleTranslation);
3221  })
3222  .Case([&](omp::TeamsOp op) {
3223  return convertOmpTeams(op, builder, moduleTranslation);
3224  })
3225  .Case([&](omp::TaskOp op) {
3226  return convertOmpTaskOp(op, builder, moduleTranslation);
3227  })
3228  .Case([&](omp::TaskgroupOp op) {
3229  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
3230  })
3231  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp,
3232  omp::CriticalDeclareOp>([](auto op) {
3233  // `yield` and `terminator` can be just omitted. The block structure
3234  // was created in the region that handles their parent operation.
3235  // `declare_reduction` will be used by reductions and is not
3236  // converted directly, skip it.
3237  // `critical.declare` is only used to declare names of critical
3238  // sections which will be used by `critical` ops and hence can be
3239  // ignored for lowering. The OpenMP IRBuilder will create unique
3240  // name for critical section names.
3241  return success();
3242  })
3243  .Case([&](omp::ThreadprivateOp) {
3244  return convertOmpThreadprivate(*op, builder, moduleTranslation);
3245  })
3246  .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp,
3247  omp::TargetUpdateOp>([&](auto op) {
3248  return convertOmpTargetData(op, builder, moduleTranslation);
3249  })
3250  .Case([&](omp::TargetOp) {
3251  return convertOmpTarget(*op, builder, moduleTranslation);
3252  })
3253  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
3254  [&](auto op) {
3255  // No-op, should be handled by relevant owning operations e.g.
3256  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc.
3257  // and then discarded
3258  return success();
3259  })
3260  .Default([&](Operation *inst) {
3261  return inst->emitError("unsupported OpenMP operation: ")
3262  << inst->getName();
3263  });
3264 }
3265 
3266 static LogicalResult
3267 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
3268  LLVM::ModuleTranslation &moduleTranslation) {
3269  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3270 }
3271 
3272 static LogicalResult
3273 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
3274  LLVM::ModuleTranslation &moduleTranslation) {
3275  if (isa<omp::TargetOp>(op))
3276  return convertOmpTarget(*op, builder, moduleTranslation);
3277  if (isa<omp::TargetDataOp>(op))
3278  return convertOmpTargetData(op, builder, moduleTranslation);
3279  bool interrupted =
3280  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
3281  if (isa<omp::TargetOp>(oper)) {
3282  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
3283  return WalkResult::interrupt();
3284  return WalkResult::skip();
3285  }
3286  if (isa<omp::TargetDataOp>(oper)) {
3287  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
3288  return WalkResult::interrupt();
3289  return WalkResult::skip();
3290  }
3291  return WalkResult::advance();
3292  }).wasInterrupted();
3293  return failure(interrupted);
3294 }
3295 
3296 namespace {
3297 
3298 /// Implementation of the dialect interface that converts operations belonging
3299 /// to the OpenMP dialect to LLVM IR.
3300 class OpenMPDialectLLVMIRTranslationInterface
3302 public:
3304 
3305  /// Translates the given operation to LLVM IR using the provided IR builder
3306  /// and saving the state in `moduleTranslation`.
3308  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
3309  LLVM::ModuleTranslation &moduleTranslation) const final;
3310 
3311  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
3312  /// runtime calls, or operation amendments
3314  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
3315  NamedAttribute attribute,
3316  LLVM::ModuleTranslation &moduleTranslation) const final;
3317 };
3318 
3319 } // namespace
3320 
3321 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
3322  Operation *op, ArrayRef<llvm::Instruction *> instructions,
3323  NamedAttribute attribute,
3324  LLVM::ModuleTranslation &moduleTranslation) const {
3326  attribute.getName())
3327  .Case("omp.is_target_device",
3328  [&](Attribute attr) {
3329  if (auto deviceAttr = attr.dyn_cast<BoolAttr>()) {
3330  llvm::OpenMPIRBuilderConfig &config =
3331  moduleTranslation.getOpenMPBuilder()->Config;
3332  config.setIsTargetDevice(deviceAttr.getValue());
3333  return success();
3334  }
3335  return failure();
3336  })
3337  .Case("omp.is_gpu",
3338  [&](Attribute attr) {
3339  if (auto gpuAttr = attr.dyn_cast<BoolAttr>()) {
3340  llvm::OpenMPIRBuilderConfig &config =
3341  moduleTranslation.getOpenMPBuilder()->Config;
3342  config.setIsGPU(gpuAttr.getValue());
3343  return success();
3344  }
3345  return failure();
3346  })
3347  .Case("omp.host_ir_filepath",
3348  [&](Attribute attr) {
3349  if (auto filepathAttr = attr.dyn_cast<StringAttr>()) {
3350  llvm::OpenMPIRBuilder *ompBuilder =
3351  moduleTranslation.getOpenMPBuilder();
3352  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
3353  return success();
3354  }
3355  return failure();
3356  })
3357  .Case("omp.flags",
3358  [&](Attribute attr) {
3359  if (auto rtlAttr = attr.dyn_cast<omp::FlagsAttr>())
3360  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
3361  return failure();
3362  })
3363  .Case("omp.version",
3364  [&](Attribute attr) {
3365  if (auto versionAttr = attr.dyn_cast<omp::VersionAttr>()) {
3366  llvm::OpenMPIRBuilder *ompBuilder =
3367  moduleTranslation.getOpenMPBuilder();
3368  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
3369  versionAttr.getVersion());
3370  return success();
3371  }
3372  return failure();
3373  })
3374  .Case("omp.declare_target",
3375  [&](Attribute attr) {
3376  if (auto declareTargetAttr =
3377  attr.dyn_cast<omp::DeclareTargetAttr>())
3378  return convertDeclareTargetAttr(op, declareTargetAttr,
3379  moduleTranslation);
3380  return failure();
3381  })
3382  .Case("omp.requires",
3383  [&](Attribute attr) {
3384  if (auto requiresAttr =
3385  attr.dyn_cast<omp::ClauseRequiresAttr>()) {
3386  using Requires = omp::ClauseRequires;
3387  Requires flags = requiresAttr.getValue();
3388  llvm::OpenMPIRBuilderConfig &config =
3389  moduleTranslation.getOpenMPBuilder()->Config;
3390  config.setHasRequiresReverseOffload(
3391  bitEnumContainsAll(flags, Requires::reverse_offload));
3392  config.setHasRequiresUnifiedAddress(
3393  bitEnumContainsAll(flags, Requires::unified_address));
3394  config.setHasRequiresUnifiedSharedMemory(
3395  bitEnumContainsAll(flags, Requires::unified_shared_memory));
3396  config.setHasRequiresDynamicAllocators(
3397  bitEnumContainsAll(flags, Requires::dynamic_allocators));
3398  return success();
3399  }
3400  return failure();
3401  })
3402  .Default([](Attribute) {
3403  // Fall through for omp attributes that do not require lowering.
3404  return success();
3405  })(attribute.getValue());
3406 
3407  return failure();
3408 }
3409 
3410 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3411 /// (including OpenMP runtime calls).
3412 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
3413  Operation *op, llvm::IRBuilderBase &builder,
3414  LLVM::ModuleTranslation &moduleTranslation) const {
3415 
3416  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3417  if (ompBuilder->Config.isTargetDevice()) {
3418  if (isTargetDeviceOp(op)) {
3419  return convertTargetDeviceOp(op, builder, moduleTranslation);
3420  } else {
3421  return convertTargetOpsInNest(op, builder, moduleTranslation);
3422  }
3423  }
3424 
3425  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3426 }
3427 
3429  registry.insert<omp::OpenMPDialect>();
3430  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
3431  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
3432  });
3433 }
3434 
3436  DialectRegistry registry;
3438  context.appendDialectRegistry(registry);
3439 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpReductionOp(omp::ReductionOp reductionOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP reduction operation using OpenMPIRBuilder.
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, const SmallVector< Value > &devPtrOperands={}, const SmallVector< Value > &devAddrOperands={}, bool isTargetParams=false)
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult inlineReductionCleanup(llvm::SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, llvm::ArrayRef< llvm::Value * > privateReductionVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
handling of DeclareReductionOp's cleanup region
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, mlir::OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
void collectMapDataFromMapOperands(MapInfoData &mapData, llvm::SmallVectorImpl< Value > &mapOperands, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder)
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVector< omp::DeclareReductionOp > &reductionDecls, SmallVector< OwningReductionGen > &owningReductionGens, SmallVector< OwningAtomicReductionGen > &owningAtomicReductionGens, const SmallVector< llvm::Value * > &privateReductionVariables, SmallVector< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static std::optional< omp::DeclareReductionOp > findReductionDeclInContainer(T container, omp::ReductionOp reduction)
Returns a reduction declaration that corresponds to the given reduction operation in the given contai...
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClasue)
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
static void mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, unsigned i)
Map input argument to all reduction initialization regions.
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static bool targetOpSupported(Operation &opInst)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static void collectReductionDecls(T loop, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given loop.
static omp::DeclareReductionOp findReductionDecl(Operation &containerOp, omp::ReductionOp reduction)
Searches for a reduction in a provided region and the regions it is nested in.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static void allocByValReductionVars(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap)
Allocate space for privatized reduction variables.
static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::BasicBlock * convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
A RAII class that on construction replaces the region arguments of the parallel op (which correspond ...
OmpParallelOpConversionManager(omp::ParallelOp opInst)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
U dyn_cast() const
Definition: Attributes.h:179
Block represents an ordered list of Operations.
Definition: Block.h:30
bool empty()
Definition: Block.h:145
BlockArgument getArgument(unsigned i)
Definition: Block.h:126
unsigned getNumArguments()
Definition: Block.h:125
Operation & back()
Definition: Block.h:149
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:243
iterator begin()
Definition: Block.h:140
Special case of IntegerAttr to represent boolean integers, i.e., signless i1 integers.
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:766
Base class for dialect interfaces providing translation to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
MLIRContext & getContext()
Returns the MLIR context of the module being translated.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:41
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:202
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:216
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition: Builders.cpp:580
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
iterator end()
Definition: Region.h:56
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
BlockArgument getArgument(unsigned i)
Definition: Region.h:124
Block & front()
Definition: Region.h:65
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:125
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
static WalkResult skip()
Definition: Visitors.h:53
static WalkResult advance()
Definition: Visitors.h:52
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:56
static WalkResult interrupt()
Definition: Visitors.h:51
Include the generated interface declarations.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:28
SetVector< Block * > getTopologicallySortedBlocks(Region &region)
Get a topologically sorted list of blocks of the given region.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
llvm::SmallVector< bool, 4 > IsAMember
llvm::SmallVector< llvm::Value *, 4 > OriginalValue
llvm::SmallVector< bool, 4 > IsDeclareTarget
llvm::SmallVector< llvm::Type *, 4 > BaseType
void append(MapInfoData &CurInfo)
Append arrays in CurInfo.
llvm::SmallVector< mlir::Operation *, 4 > MapClause
RAII object calling stackPush/stackPop on construction/destruction.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26