MLIR  19.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
17 #include "mlir/IR/IRMapping.h"
18 #include "mlir/IR/Operation.h"
19 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DebugInfoMetadata.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/TargetParser/Triple.h"
33 #include "llvm/Transforms/Utils/ModuleUtils.h"
34 
35 #include <any>
36 #include <optional>
37 #include <utility>
38 
39 using namespace mlir;
40 
41 namespace {
42 static llvm::omp::ScheduleKind
43 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
44  if (!schedKind.has_value())
45  return llvm::omp::OMP_SCHEDULE_Default;
46  switch (schedKind.value()) {
47  case omp::ClauseScheduleKind::Static:
48  return llvm::omp::OMP_SCHEDULE_Static;
49  case omp::ClauseScheduleKind::Dynamic:
50  return llvm::omp::OMP_SCHEDULE_Dynamic;
51  case omp::ClauseScheduleKind::Guided:
52  return llvm::omp::OMP_SCHEDULE_Guided;
53  case omp::ClauseScheduleKind::Auto:
54  return llvm::omp::OMP_SCHEDULE_Auto;
56  return llvm::omp::OMP_SCHEDULE_Runtime;
57  }
58  llvm_unreachable("unhandled schedule clause argument");
59 }
60 
61 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
62 /// insertion points for allocas.
63 class OpenMPAllocaStackFrame
64  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
65 public:
66  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
67 
68  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
69  : allocaInsertPoint(allocaIP) {}
70  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
71 };
72 
73 /// ModuleTranslation stack frame containing the partial mapping between MLIR
74 /// values and their LLVM IR equivalents.
75 class OpenMPVarMappingStackFrame
77  OpenMPVarMappingStackFrame> {
78 public:
79  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
80 
81  explicit OpenMPVarMappingStackFrame(
82  const DenseMap<Value, llvm::Value *> &mapping)
83  : mapping(mapping) {}
84 
86 };
87 } // namespace
88 
89 /// Find the insertion point for allocas given the current insertion point for
90 /// normal operations in the builder.
91 static llvm::OpenMPIRBuilder::InsertPointTy
92 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
93  const LLVM::ModuleTranslation &moduleTranslation) {
94  // If there is an alloca insertion point on stack, i.e. we are in a nested
95  // operation and a specific point was provided by some surrounding operation,
96  // use it.
97  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
98  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
99  [&](const OpenMPAllocaStackFrame &frame) {
100  allocaInsertPoint = frame.allocaInsertPoint;
101  return WalkResult::interrupt();
102  });
103  if (walkResult.wasInterrupted())
104  return allocaInsertPoint;
105 
106  // Otherwise, insert to the entry block of the surrounding function.
107  // If the current IRBuilder InsertPoint is the function's entry, it cannot
108  // also be used for alloca insertion which would result in insertion order
109  // confusion. Create a new BasicBlock for the Builder and use the entry block
110  // for the allocs.
111  // TODO: Create a dedicated alloca BasicBlock at function creation such that
112  // we do not need to move the current InertPoint here.
113  if (builder.GetInsertBlock() ==
114  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
115  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
116  "Assuming end of basic block");
117  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
118  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
119  builder.GetInsertBlock()->getNextNode());
120  builder.CreateBr(entryBB);
121  builder.SetInsertPoint(entryBB);
122  }
123 
124  llvm::BasicBlock &funcEntryBlock =
125  builder.GetInsertBlock()->getParent()->getEntryBlock();
126  return llvm::OpenMPIRBuilder::InsertPointTy(
127  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
128 }
129 
130 /// Converts the given region that appears within an OpenMP dialect operation to
131 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
132 /// region, and a branch from any block with an successor-less OpenMP terminator
133 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
134 /// of the continuation block if provided.
135 static llvm::BasicBlock *convertOmpOpRegions(
136  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
137  LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
138  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
139  llvm::BasicBlock *continuationBlock =
140  splitBB(builder, true, "omp.region.cont");
141  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
142 
143  llvm::LLVMContext &llvmContext = builder.getContext();
144  for (Block &bb : region) {
145  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
146  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
147  builder.GetInsertBlock()->getNextNode());
148  moduleTranslation.mapBlock(&bb, llvmBB);
149  }
150 
151  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
152 
153  // Terminators (namely YieldOp) may be forwarding values to the region that
154  // need to be available in the continuation block. Collect the types of these
155  // operands in preparation of creating PHI nodes.
156  SmallVector<llvm::Type *> continuationBlockPHITypes;
157  bool operandsProcessed = false;
158  unsigned numYields = 0;
159  for (Block &bb : region.getBlocks()) {
160  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
161  if (!operandsProcessed) {
162  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
163  continuationBlockPHITypes.push_back(
164  moduleTranslation.convertType(yield->getOperand(i).getType()));
165  }
166  operandsProcessed = true;
167  } else {
168  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
169  "mismatching number of values yielded from the region");
170  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
171  llvm::Type *operandType =
172  moduleTranslation.convertType(yield->getOperand(i).getType());
173  (void)operandType;
174  assert(continuationBlockPHITypes[i] == operandType &&
175  "values of mismatching types yielded from the region");
176  }
177  }
178  numYields++;
179  }
180  }
181 
182  // Insert PHI nodes in the continuation block for any values forwarded by the
183  // terminators in this region.
184  if (!continuationBlockPHITypes.empty())
185  assert(
186  continuationBlockPHIs &&
187  "expected continuation block PHIs if converted regions yield values");
188  if (continuationBlockPHIs) {
189  llvm::IRBuilderBase::InsertPointGuard guard(builder);
190  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
191  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
192  for (llvm::Type *ty : continuationBlockPHITypes)
193  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
194  }
195 
196  // Convert blocks one by one in topological order to ensure
197  // defs are converted before uses.
199  for (Block *bb : blocks) {
200  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
201  // Retarget the branch of the entry block to the entry block of the
202  // converted region (regions are single-entry).
203  if (bb->isEntryBlock()) {
204  assert(sourceTerminator->getNumSuccessors() == 1 &&
205  "provided entry block has multiple successors");
206  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
207  "ContinuationBlock is not the successor of the entry block");
208  sourceTerminator->setSuccessor(0, llvmBB);
209  }
210 
211  llvm::IRBuilderBase::InsertPointGuard guard(builder);
212  if (failed(
213  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
214  bodyGenStatus = failure();
215  return continuationBlock;
216  }
217 
218  // Special handling for `omp.yield` and `omp.terminator` (we may have more
219  // than one): they return the control to the parent OpenMP dialect operation
220  // so replace them with the branch to the continuation block. We handle this
221  // here to avoid relying inter-function communication through the
222  // ModuleTranslation class to set up the correct insertion point. This is
223  // also consistent with MLIR's idiom of handling special region terminators
224  // in the same code that handles the region-owning operation.
225  Operation *terminator = bb->getTerminator();
226  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
227  builder.CreateBr(continuationBlock);
228 
229  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
230  (*continuationBlockPHIs)[i]->addIncoming(
231  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
232  }
233  }
234  // After all blocks have been traversed and values mapped, connect the PHI
235  // nodes to the results of preceding blocks.
236  LLVM::detail::connectPHINodes(region, moduleTranslation);
237 
238  // Remove the blocks and values defined in this region from the mapping since
239  // they are not visible outside of this region. This allows the same region to
240  // be converted several times, that is cloned, without clashes, and slightly
241  // speeds up the lookups.
242  moduleTranslation.forgetMapping(region);
243 
244  return continuationBlock;
245 }
246 
247 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
248 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
249  switch (kind) {
250  case omp::ClauseProcBindKind::Close:
251  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
252  case omp::ClauseProcBindKind::Master:
253  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
254  case omp::ClauseProcBindKind::Primary:
255  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
256  case omp::ClauseProcBindKind::Spread:
257  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
258  }
259  llvm_unreachable("Unknown ClauseProcBindKind kind");
260 }
261 
262 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
263 static LogicalResult
264 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
265  LLVM::ModuleTranslation &moduleTranslation) {
266  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
267  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
268  // relying on captured variables.
269  LogicalResult bodyGenStatus = success();
270 
271  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
272  // MasterOp has only one region associated with it.
273  auto &region = cast<omp::MasterOp>(opInst).getRegion();
274  builder.restoreIP(codeGenIP);
275  convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
276  bodyGenStatus);
277  };
278 
279  // TODO: Perform finalization actions for variables. This has to be
280  // called for variables which have destructors/finalizers.
281  auto finiCB = [&](InsertPointTy codeGenIP) {};
282 
283  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
284  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
285  ompLoc, bodyGenCB, finiCB));
286  return success();
287 }
288 
289 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
290 static LogicalResult
291 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
292  LLVM::ModuleTranslation &moduleTranslation) {
293  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
294  auto criticalOp = cast<omp::CriticalOp>(opInst);
295  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
296  // relying on captured variables.
297  LogicalResult bodyGenStatus = success();
298 
299  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
300  // CriticalOp has only one region associated with it.
301  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
302  builder.restoreIP(codeGenIP);
303  convertOmpOpRegions(region, "omp.critical.region", builder,
304  moduleTranslation, bodyGenStatus);
305  };
306 
307  // TODO: Perform finalization actions for variables. This has to be
308  // called for variables which have destructors/finalizers.
309  auto finiCB = [&](InsertPointTy codeGenIP) {};
310 
311  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
312  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
313  llvm::Constant *hint = nullptr;
314 
315  // If it has a name, it probably has a hint too.
316  if (criticalOp.getNameAttr()) {
317  // The verifiers in OpenMP Dialect guarentee that all the pointers are
318  // non-null
319  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
320  auto criticalDeclareOp =
321  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
322  symbolRef);
323  hint = llvm::ConstantInt::get(
324  llvm::Type::getInt32Ty(llvmContext),
325  static_cast<int>(criticalDeclareOp.getHintVal()));
326  }
327  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
328  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
329  return success();
330 }
331 
332 /// Returns a reduction declaration that corresponds to the given reduction
333 /// operation in the given container. Currently only supports reductions inside
334 /// WsloopOp and ParallelOp but can be easily extended as long as the given
335 /// construct implements getNumReductionVars.
336 template <typename T>
337 static std::optional<omp::DeclareReductionOp>
338 findReductionDeclInContainer(T container, omp::ReductionOp reduction) {
339  for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
340  if (container.getReductionVars()[i] != reduction.getAccumulator())
341  continue;
342 
343  SymbolRefAttr reductionSymbol =
344  cast<SymbolRefAttr>((*container.getReductions())[i]);
345  auto declareOp =
346  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
347  container, reductionSymbol);
348  return declareOp;
349  }
350  return std::nullopt;
351 }
352 
353 /// Searches for a reduction in a provided region and the regions
354 /// it is nested in
355 static omp::DeclareReductionOp findReductionDecl(Operation &containerOp,
356  omp::ReductionOp reduction) {
357  std::optional<omp::DeclareReductionOp> declareOp = std::nullopt;
358  Operation *container = &containerOp;
359 
360  while (!declareOp.has_value() && container) {
361  // Check if current container is supported for reductions searches
362  if (auto par = dyn_cast<omp::ParallelOp>(*container)) {
363  declareOp = findReductionDeclInContainer(par, reduction);
364  } else if (auto loop = dyn_cast<omp::WsloopOp>(*container)) {
365  declareOp = findReductionDeclInContainer(loop, reduction);
366  } else {
367  break;
368  }
369 
370  // See if we can search parent for reductions as well
371  container = containerOp.getParentOp();
372  }
373 
374  assert(declareOp.has_value() &&
375  "reduction operation must be associated with a declaration");
376 
377  return *declareOp;
378 }
379 
380 /// Populates `reductions` with reduction declarations used in the given loop.
381 template <typename T>
382 static void
385  std::optional<ArrayAttr> attr = loop.getReductions();
386  if (!attr)
387  return;
388 
389  reductions.reserve(reductions.size() + loop.getNumReductionVars());
390  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
391  reductions.push_back(
392  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
393  loop, symbolRef));
394  }
395 }
396 
397 /// Translates the blocks contained in the given region and appends them to at
398 /// the current insertion point of `builder`. The operations of the entry block
399 /// are appended to the current insertion block. If set, `continuationBlockArgs`
400 /// is populated with translated values that correspond to the values
401 /// omp.yield'ed from the region.
403  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
404  LLVM::ModuleTranslation &moduleTranslation,
405  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
406  if (region.empty())
407  return success();
408 
409  // Special case for single-block regions that don't create additional blocks:
410  // insert operations without creating additional blocks.
411  if (llvm::hasSingleElement(region)) {
412  llvm::Instruction *potentialTerminator =
413  builder.GetInsertBlock()->empty() ? nullptr
414  : &builder.GetInsertBlock()->back();
415 
416  if (potentialTerminator && potentialTerminator->isTerminator())
417  potentialTerminator->removeFromParent();
418  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
419 
420  if (failed(moduleTranslation.convertBlock(
421  region.front(), /*ignoreArguments=*/true, builder)))
422  return failure();
423 
424  // The continuation arguments are simply the translated terminator operands.
425  if (continuationBlockArgs)
426  llvm::append_range(
427  *continuationBlockArgs,
428  moduleTranslation.lookupValues(region.front().back().getOperands()));
429 
430  // Drop the mapping that is no longer necessary so that the same region can
431  // be processed multiple times.
432  moduleTranslation.forgetMapping(region);
433 
434  if (potentialTerminator && potentialTerminator->isTerminator())
435  potentialTerminator->insertAfter(&builder.GetInsertBlock()->back());
436 
437  return success();
438  }
439 
440  LogicalResult bodyGenStatus = success();
442  llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
443  region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
444  if (failed(bodyGenStatus))
445  return failure();
446  if (continuationBlockArgs)
447  llvm::append_range(*continuationBlockArgs, phis);
448  builder.SetInsertPoint(continuationBlock,
449  continuationBlock->getFirstInsertionPt());
450  return success();
451 }
452 
453 namespace {
454 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
455 /// store lambdas with capture.
456 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
457  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
458  llvm::Value *&)>;
459 using OwningAtomicReductionGen =
460  std::function<llvm::OpenMPIRBuilder::InsertPointTy(
461  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
462  llvm::Value *)>;
463 } // namespace
464 
465 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
466 /// reduction declaration. The generator uses `builder` but ignores its
467 /// insertion point.
468 static OwningReductionGen
469 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
470  LLVM::ModuleTranslation &moduleTranslation) {
471  // The lambda is mutable because we need access to non-const methods of decl
472  // (which aren't actually mutating it), and we must capture decl by-value to
473  // avoid the dangling reference after the parent function returns.
474  OwningReductionGen gen =
475  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
476  llvm::Value *lhs, llvm::Value *rhs,
477  llvm::Value *&result) mutable {
478  Region &reductionRegion = decl.getReductionRegion();
479  moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
480  moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
481  builder.restoreIP(insertPoint);
483  if (failed(inlineConvertOmpRegions(reductionRegion,
484  "omp.reduction.nonatomic.body",
485  builder, moduleTranslation, &phis)))
486  return llvm::OpenMPIRBuilder::InsertPointTy();
487  assert(phis.size() == 1);
488  result = phis[0];
489  return builder.saveIP();
490  };
491  return gen;
492 }
493 
494 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
495 /// given reduction declaration. The generator uses `builder` but ignores its
496 /// insertion point. Returns null if there is no atomic region available in the
497 /// reduction declaration.
498 static OwningAtomicReductionGen
499 makeAtomicReductionGen(omp::DeclareReductionOp decl,
500  llvm::IRBuilderBase &builder,
501  LLVM::ModuleTranslation &moduleTranslation) {
502  if (decl.getAtomicReductionRegion().empty())
503  return OwningAtomicReductionGen();
504 
505  // The lambda is mutable because we need access to non-const methods of decl
506  // (which aren't actually mutating it), and we must capture decl by-value to
507  // avoid the dangling reference after the parent function returns.
508  OwningAtomicReductionGen atomicGen =
509  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
510  llvm::Value *lhs, llvm::Value *rhs) mutable {
511  Region &atomicRegion = decl.getAtomicReductionRegion();
512  moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
513  moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
514  builder.restoreIP(insertPoint);
516  if (failed(inlineConvertOmpRegions(atomicRegion,
517  "omp.reduction.atomic.body", builder,
518  moduleTranslation, &phis)))
519  return llvm::OpenMPIRBuilder::InsertPointTy();
520  assert(phis.empty());
521  return builder.saveIP();
522  };
523  return atomicGen;
524 }
525 
526 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
527 static LogicalResult
528 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
529  LLVM::ModuleTranslation &moduleTranslation) {
530  auto orderedOp = cast<omp::OrderedOp>(opInst);
531 
532  omp::ClauseDepend dependType = *orderedOp.getDependTypeVal();
533  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
534  unsigned numLoops = *orderedOp.getNumLoopsVal();
535  SmallVector<llvm::Value *> vecValues =
536  moduleTranslation.lookupValues(orderedOp.getDependVecVars());
537 
538  size_t indexVecValues = 0;
539  while (indexVecValues < vecValues.size()) {
540  SmallVector<llvm::Value *> storeValues;
541  storeValues.reserve(numLoops);
542  for (unsigned i = 0; i < numLoops; i++) {
543  storeValues.push_back(vecValues[indexVecValues]);
544  indexVecValues++;
545  }
546  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
547  findAllocaInsertPoint(builder, moduleTranslation);
548  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
549  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
550  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
551  }
552  return success();
553 }
554 
555 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
556 /// OpenMPIRBuilder.
557 static LogicalResult
558 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
559  LLVM::ModuleTranslation &moduleTranslation) {
560  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
561  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
562 
563  // TODO: The code generation for ordered simd directive is not supported yet.
564  if (orderedRegionOp.getSimd())
565  return failure();
566 
567  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
568  // relying on captured variables.
569  LogicalResult bodyGenStatus = success();
570 
571  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
572  // OrderedOp has only one region associated with it.
573  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
574  builder.restoreIP(codeGenIP);
575  convertOmpOpRegions(region, "omp.ordered.region", builder,
576  moduleTranslation, bodyGenStatus);
577  };
578 
579  // TODO: Perform finalization actions for variables. This has to be
580  // called for variables which have destructors/finalizers.
581  auto finiCB = [&](InsertPointTy codeGenIP) {};
582 
583  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
584  builder.restoreIP(
585  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
586  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getSimd()));
587  return bodyGenStatus;
588 }
589 
590 static LogicalResult
591 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
592  LLVM::ModuleTranslation &moduleTranslation) {
593  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
594  using StorableBodyGenCallbackTy =
595  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
596 
597  auto sectionsOp = cast<omp::SectionsOp>(opInst);
598 
599  // TODO: Support the following clauses: private, firstprivate, lastprivate,
600  // reduction, allocate
601  if (!sectionsOp.getReductionVars().empty() || sectionsOp.getReductions() ||
602  !sectionsOp.getAllocateVars().empty() ||
603  !sectionsOp.getAllocatorsVars().empty())
604  return emitError(sectionsOp.getLoc())
605  << "reduction and allocate clauses are not supported for sections "
606  "construct";
607 
608  LogicalResult bodyGenStatus = success();
610 
611  for (Operation &op : *sectionsOp.getRegion().begin()) {
612  auto sectionOp = dyn_cast<omp::SectionOp>(op);
613  if (!sectionOp) // omp.terminator
614  continue;
615 
616  Region &region = sectionOp.getRegion();
617  auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
618  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
619  builder.restoreIP(codeGenIP);
620  convertOmpOpRegions(region, "omp.section.region", builder,
621  moduleTranslation, bodyGenStatus);
622  };
623  sectionCBs.push_back(sectionCB);
624  }
625 
626  // No sections within omp.sections operation - skip generation. This situation
627  // is only possible if there is only a terminator operation inside the
628  // sections operation
629  if (sectionCBs.empty())
630  return success();
631 
632  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
633 
634  // TODO: Perform appropriate actions according to the data-sharing
635  // attribute (shared, private, firstprivate, ...) of variables.
636  // Currently defaults to shared.
637  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
638  llvm::Value &vPtr,
639  llvm::Value *&replacementValue) -> InsertPointTy {
640  replacementValue = &vPtr;
641  return codeGenIP;
642  };
643 
644  // TODO: Perform finalization actions for variables. This has to be
645  // called for variables which have destructors/finalizers.
646  auto finiCB = [&](InsertPointTy codeGenIP) {};
647 
648  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
649  findAllocaInsertPoint(builder, moduleTranslation);
650  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
651  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
652  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
653  sectionsOp.getNowait()));
654  return bodyGenStatus;
655 }
656 
657 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
658 static LogicalResult
659 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
660  LLVM::ModuleTranslation &moduleTranslation) {
661  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
662  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
663  LogicalResult bodyGenStatus = success();
664  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
665  builder.restoreIP(codegenIP);
666  convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
667  moduleTranslation, bodyGenStatus);
668  };
669  auto finiCB = [&](InsertPointTy codeGenIP) {};
670 
671  // Handle copyprivate
672  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
673  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateFuncs();
676  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
677  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
678  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
679  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
680  llvmCPFuncs.push_back(
681  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
682  }
683 
684  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
685  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs));
686  return bodyGenStatus;
687 }
688 
689 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
690 static LogicalResult
691 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
692  LLVM::ModuleTranslation &moduleTranslation) {
693  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
694  LogicalResult bodyGenStatus = success();
695  if (!op.getAllocatorsVars().empty() || op.getReductions())
696  return op.emitError("unhandled clauses for translation to LLVM IR");
697 
698  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
700  moduleTranslation, allocaIP);
701  builder.restoreIP(codegenIP);
702  convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
703  moduleTranslation, bodyGenStatus);
704  };
705 
706  llvm::Value *numTeamsLower = nullptr;
707  if (Value numTeamsLowerVar = op.getNumTeamsLower())
708  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
709 
710  llvm::Value *numTeamsUpper = nullptr;
711  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
712  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
713 
714  llvm::Value *threadLimit = nullptr;
715  if (Value threadLimitVar = op.getThreadLimit())
716  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
717 
718  llvm::Value *ifExpr = nullptr;
719  if (Value ifExprVar = op.getIfExpr())
720  ifExpr = moduleTranslation.lookupValue(ifExprVar);
721 
722  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
723  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
724  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
725  return bodyGenStatus;
726 }
727 
728 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
729 static LogicalResult
730 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
731  LLVM::ModuleTranslation &moduleTranslation) {
732  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
733  LogicalResult bodyGenStatus = success();
734  if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() ||
735  taskOp.getInReductions() || taskOp.getPriority() ||
736  !taskOp.getAllocateVars().empty()) {
737  return taskOp.emitError("unhandled clauses for translation to LLVM IR");
738  }
739  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
740  // Save the alloca insertion point on ModuleTranslation stack for use in
741  // nested regions.
743  moduleTranslation, allocaIP);
744 
745  builder.restoreIP(codegenIP);
746  convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
747  moduleTranslation, bodyGenStatus);
748  };
749 
751  if (!taskOp.getDependVars().empty() && taskOp.getDepends()) {
752  for (auto dep :
753  llvm::zip(taskOp.getDependVars(), taskOp.getDepends()->getValue())) {
754  llvm::omp::RTLDependenceKindTy type;
755  switch (
756  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
757  case mlir::omp::ClauseTaskDepend::taskdependin:
758  type = llvm::omp::RTLDependenceKindTy::DepIn;
759  break;
760  // The OpenMP runtime requires that the codegen for 'depend' clause for
761  // 'out' dependency kind must be the same as codegen for 'depend' clause
762  // with 'inout' dependency.
763  case mlir::omp::ClauseTaskDepend::taskdependout:
764  case mlir::omp::ClauseTaskDepend::taskdependinout:
765  type = llvm::omp::RTLDependenceKindTy::DepInOut;
766  break;
767  };
768  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
769  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
770  dds.emplace_back(dd);
771  }
772  }
773 
774  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
775  findAllocaInsertPoint(builder, moduleTranslation);
776  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
777  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
778  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
779  moduleTranslation.lookupValue(taskOp.getFinalExpr()),
780  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds));
781  return bodyGenStatus;
782 }
783 
784 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
785 static LogicalResult
786 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
787  LLVM::ModuleTranslation &moduleTranslation) {
788  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
789  LogicalResult bodyGenStatus = success();
790  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
791  return tgOp.emitError("unhandled clauses for translation to LLVM IR");
792  }
793  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
794  builder.restoreIP(codegenIP);
795  convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
796  moduleTranslation, bodyGenStatus);
797  };
798  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
799  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
800  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
801  ompLoc, allocaIP, bodyCB));
802  return bodyGenStatus;
803 }
804 
805 /// Allocate space for privatized reduction variables.
806 template <typename T>
808  T loop, llvm::IRBuilderBase &builder,
809  LLVM::ModuleTranslation &moduleTranslation,
810  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
812  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
813  DenseMap<Value, llvm::Value *> &reductionVariableMap) {
814  llvm::IRBuilderBase::InsertPointGuard guard(builder);
815  builder.restoreIP(allocaIP);
816  auto args =
817  loop.getRegion().getArguments().take_back(loop.getNumReductionVars());
818 
819  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
820  llvm::Value *var = builder.CreateAlloca(
821  moduleTranslation.convertType(reductionDecls[i].getType()));
822  moduleTranslation.mapValue(args[i], var);
823  privateReductionVariables.push_back(var);
824  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
825  }
826 }
827 
828 /// Map input argument to all reduction initialization regions
829 template <typename T>
830 static void
833  unsigned i) {
834  // map input argument to the initialization region
835  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
836  Region &initializerRegion = reduction.getInitializerRegion();
837  Block &entry = initializerRegion.front();
838  assert(entry.getNumArguments() == 1 &&
839  "the initialization region has one argument");
840 
841  mlir::Value mlirSource = loop.getReductionVars()[i];
842  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
843  assert(llvmSource && "lookup reduction var");
844  moduleTranslation.mapValue(entry.getArgument(0), llvmSource);
845 }
846 
847 /// Collect reduction info
848 template <typename T>
850  T loop, llvm::IRBuilderBase &builder,
851  LLVM::ModuleTranslation &moduleTranslation,
852  SmallVector<omp::DeclareReductionOp> &reductionDecls,
853  SmallVector<OwningReductionGen> &owningReductionGens,
854  SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,
855  const SmallVector<llvm::Value *> &privateReductionVariables,
857  unsigned numReductions = loop.getNumReductionVars();
858 
859  for (unsigned i = 0; i < numReductions; ++i) {
860  owningReductionGens.push_back(
861  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
862  owningAtomicReductionGens.push_back(
863  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
864  }
865 
866  // Collect the reduction information.
867  reductionInfos.reserve(numReductions);
868  for (unsigned i = 0; i < numReductions; ++i) {
869  llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
870  if (owningAtomicReductionGens[i])
871  atomicGen = owningAtomicReductionGens[i];
872  llvm::Value *variable =
873  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
874  reductionInfos.push_back(
875  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
876  privateReductionVariables[i], owningReductionGens[i], atomicGen});
877  }
878 }
879 
880 /// handling of DeclareReductionOp's cleanup region
883  llvm::ArrayRef<llvm::Value *> privateReductionVariables,
884  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) {
885  for (auto [i, reductionDecl] : llvm::enumerate(reductionDecls)) {
886  Region &cleanupRegion = reductionDecl.getCleanupRegion();
887  if (cleanupRegion.empty())
888  continue;
889 
890  // map the argument to the cleanup region
891  Block &entry = cleanupRegion.front();
892 
893  llvm::Instruction *potentialTerminator =
894  builder.GetInsertBlock()->empty() ? nullptr
895  : &builder.GetInsertBlock()->back();
896  if (potentialTerminator && potentialTerminator->isTerminator())
897  builder.SetInsertPoint(potentialTerminator);
898  llvm::Value *reductionVar = builder.CreateLoad(
899  moduleTranslation.convertType(entry.getArgument(0).getType()),
900  privateReductionVariables[i]);
901 
902  moduleTranslation.mapValue(entry.getArgument(0), reductionVar);
903 
904  if (failed(inlineConvertOmpRegions(cleanupRegion, "omp.reduction.cleanup",
905  builder, moduleTranslation)))
906  return failure();
907 
908  // clear block argument mapping in case it needs to be re-created with a
909  // different source for another use of the same reduction decl
910  moduleTranslation.forgetMapping(cleanupRegion);
911  }
912  return success();
913 }
914 
915 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
916 static LogicalResult
917 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
918  LLVM::ModuleTranslation &moduleTranslation) {
919  auto wsloopOp = cast<omp::WsloopOp>(opInst);
920  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
921  const bool isByRef = wsloopOp.getByref();
922 
923  // TODO: this should be in the op verifier instead.
924  if (loopOp.getLowerBound().empty())
925  return failure();
926 
927  // Static is the default.
928  auto schedule =
929  wsloopOp.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
930 
931  // Find the loop configuration.
932  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[0]);
933  llvm::Type *ivType = step->getType();
934  llvm::Value *chunk = nullptr;
935  if (wsloopOp.getScheduleChunkVar()) {
936  llvm::Value *chunkVar =
937  moduleTranslation.lookupValue(wsloopOp.getScheduleChunkVar());
938  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
939  }
940 
942  collectReductionDecls(wsloopOp, reductionDecls);
943  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
944  findAllocaInsertPoint(builder, moduleTranslation);
945 
946  SmallVector<llvm::Value *> privateReductionVariables;
947  DenseMap<Value, llvm::Value *> reductionVariableMap;
948  if (!isByRef) {
949  allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP,
950  reductionDecls, privateReductionVariables,
951  reductionVariableMap);
952  }
953 
954  // Before the loop, store the initial values of reductions into reduction
955  // variables. Although this could be done after allocas, we don't want to mess
956  // up with the alloca insertion point.
957  ArrayRef<BlockArgument> reductionArgs = wsloopOp.getRegion().getArguments();
958  for (unsigned i = 0; i < wsloopOp.getNumReductionVars(); ++i) {
960 
961  // map block argument to initializer region
962  mapInitializationArg(wsloopOp, moduleTranslation, reductionDecls, i);
963 
964  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
965  "omp.reduction.neutral", builder,
966  moduleTranslation, &phis)))
967  return failure();
968  assert(phis.size() == 1 && "expected one value to be yielded from the "
969  "reduction neutral element declaration region");
970  if (isByRef) {
971  // Allocate reduction variable (which is a pointer to the real reduction
972  // variable allocated in the inlined region)
973  llvm::Value *var = builder.CreateAlloca(
974  moduleTranslation.convertType(reductionDecls[i].getType()));
975  // Store the result of the inlined region to the allocated reduction var
976  // ptr
977  builder.CreateStore(phis[0], var);
978 
979  privateReductionVariables.push_back(var);
980  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
981  reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]);
982  } else {
983  // for by-ref case the store is inside of the reduction region
984  builder.CreateStore(phis[0], privateReductionVariables[i]);
985  // the rest was handled in allocByValReductionVars
986  }
987 
988  // forget the mapping for the initializer region because we might need a
989  // different mapping if this reduction declaration is re-used for a
990  // different variable
991  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
992  }
993 
994  // Store the mapping between reduction variables and their private copies on
995  // ModuleTranslation stack. It can be then recovered when translating
996  // omp.reduce operations in a separate call.
998  moduleTranslation, reductionVariableMap);
999 
1000  // Set up the source location value for OpenMP runtime.
1001  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1002 
1003  // Generator of the canonical loop body.
1004  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1005  // relying on captured variables.
1008  LogicalResult bodyGenStatus = success();
1009  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1010  // Make sure further conversions know about the induction variable.
1011  moduleTranslation.mapValue(
1012  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1013 
1014  // Capture the body insertion point for use in nested loops. BodyIP of the
1015  // CanonicalLoopInfo always points to the beginning of the entry block of
1016  // the body.
1017  bodyInsertPoints.push_back(ip);
1018 
1019  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1020  return;
1021 
1022  // Convert the body of the loop.
1023  builder.restoreIP(ip);
1024  convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
1025  moduleTranslation, bodyGenStatus);
1026  };
1027 
1028  // Delegate actual loop construction to the OpenMP IRBuilder.
1029  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1030  // loop, i.e. it has a positive step, uses signed integer semantics.
1031  // Reconsider this code when the nested loop operation clearly supports more
1032  // cases.
1033  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1034  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1035  llvm::Value *lowerBound =
1036  moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
1037  llvm::Value *upperBound =
1038  moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
1039  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
1040 
1041  // Make sure loop trip count are emitted in the preheader of the outermost
1042  // loop at the latest so that they are all available for the new collapsed
1043  // loop will be created below.
1044  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1045  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1046  if (i != 0) {
1047  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
1048  computeIP = loopInfos.front()->getPreheaderIP();
1049  }
1050  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1051  loc, bodyGen, lowerBound, upperBound, step,
1052  /*IsSigned=*/true, loopOp.getInclusive(), computeIP));
1053 
1054  if (failed(bodyGenStatus))
1055  return failure();
1056  }
1057 
1058  // Collapse loops. Store the insertion point because LoopInfos may get
1059  // invalidated.
1060  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1061  llvm::CanonicalLoopInfo *loopInfo =
1062  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1063 
1064  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1065 
1066  // TODO: Handle doacross loops when the ordered clause has a parameter.
1067  bool isOrdered = wsloopOp.getOrderedVal().has_value();
1068  std::optional<omp::ScheduleModifier> scheduleModifier =
1069  wsloopOp.getScheduleModifier();
1070  bool isSimd = wsloopOp.getSimdModifier();
1071 
1072  ompBuilder->applyWorkshareLoop(
1073  ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
1074  convertToScheduleKind(schedule), chunk, isSimd,
1075  scheduleModifier == omp::ScheduleModifier::monotonic,
1076  scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
1077 
1078  // Continue building IR after the loop. Note that the LoopInfo returned by
1079  // `collapseLoops` points inside the outermost loop and is intended for
1080  // potential further loop transformations. Use the insertion point stored
1081  // before collapsing loops instead.
1082  builder.restoreIP(afterIP);
1083 
1084  // Process the reductions if required.
1085  if (wsloopOp.getNumReductionVars() == 0)
1086  return success();
1087 
1088  // Create the reduction generators. We need to own them here because
1089  // ReductionInfo only accepts references to the generators.
1090  SmallVector<OwningReductionGen> owningReductionGens;
1091  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1093  collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls,
1094  owningReductionGens, owningAtomicReductionGens,
1095  privateReductionVariables, reductionInfos);
1096 
1097  // The call to createReductions below expects the block to have a
1098  // terminator. Create an unreachable instruction to serve as terminator
1099  // and remove it later.
1100  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1101  builder.SetInsertPoint(tempTerminator);
1102  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1103  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
1104  wsloopOp.getNowait(), isByRef);
1105  if (!contInsertPoint.getBlock())
1106  return wsloopOp->emitOpError() << "failed to convert reductions";
1107  auto nextInsertionPoint =
1108  ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
1109  tempTerminator->eraseFromParent();
1110  builder.restoreIP(nextInsertionPoint);
1111 
1112  // after the workshare loop, deallocate private reduction variables
1113  return inlineReductionCleanup(reductionDecls, privateReductionVariables,
1114  moduleTranslation, builder);
1115 }
1116 
1117 /// A RAII class that on construction replaces the region arguments of the
1118 /// parallel op (which correspond to private variables) with the actual private
1119 /// variables they correspond to. This prepares the parallel op so that it
1120 /// matches what is expected by the OMPIRBuilder.
1121 ///
1122 /// On destruction, it restores the original state of the operation so that on
1123 /// the MLIR side, the op is not affected by conversion to LLVM IR.
1125 public:
1126  OmpParallelOpConversionManager(omp::ParallelOp opInst)
1127  : region(opInst.getRegion()), privateVars(opInst.getPrivateVars()),
1128  privateArgBeginIdx(opInst.getNumReductionVars()),
1129  privateArgEndIdx(privateArgBeginIdx + privateVars.size()) {
1130  auto privateVarsIt = privateVars.begin();
1131 
1132  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1133  ++argIdx, ++privateVarsIt)
1134  mlir::replaceAllUsesInRegionWith(region.getArgument(argIdx),
1135  *privateVarsIt, region);
1136  }
1137 
1139  auto privateVarsIt = privateVars.begin();
1140 
1141  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1142  ++argIdx, ++privateVarsIt)
1143  mlir::replaceAllUsesInRegionWith(*privateVarsIt,
1144  region.getArgument(argIdx), region);
1145  }
1146 
1147 private:
1148  Region &region;
1149  OperandRange privateVars;
1150  unsigned privateArgBeginIdx;
1151  unsigned privateArgEndIdx;
1152 };
1153 
1154 /// Converts the OpenMP parallel operation to LLVM IR.
1155 static LogicalResult
1156 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1157  LLVM::ModuleTranslation &moduleTranslation) {
1158  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1159  OmpParallelOpConversionManager raii(opInst);
1160  const bool isByRef = opInst.getByref();
1161 
1162  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1163  // relying on captured variables.
1164  LogicalResult bodyGenStatus = success();
1165  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1166 
1167  // Collect reduction declarations
1168  SmallVector<omp::DeclareReductionOp> reductionDecls;
1169  collectReductionDecls(opInst, reductionDecls);
1170  SmallVector<llvm::Value *> privateReductionVariables;
1171 
1172  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1173  // Allocate reduction vars
1174  DenseMap<Value, llvm::Value *> reductionVariableMap;
1175  if (!isByRef) {
1176  allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP,
1177  reductionDecls, privateReductionVariables,
1178  reductionVariableMap);
1179  }
1180 
1181  // Initialize reduction vars
1182  builder.restoreIP(allocaIP);
1183  MutableArrayRef<BlockArgument> reductionArgs =
1184  opInst.getRegion().getArguments().take_back(
1185  opInst.getNumReductionVars());
1186  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1188 
1189  // map the block argument
1190  mapInitializationArg(opInst, moduleTranslation, reductionDecls, i);
1192  reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
1193  builder, moduleTranslation, &phis)))
1194  bodyGenStatus = failure();
1195  assert(phis.size() == 1 &&
1196  "expected one value to be yielded from the "
1197  "reduction neutral element declaration region");
1198  builder.restoreIP(allocaIP);
1199 
1200  if (isByRef) {
1201  // Allocate reduction variable (which is a pointer to the real reduciton
1202  // variable allocated in the inlined region)
1203  llvm::Value *var = builder.CreateAlloca(
1204  moduleTranslation.convertType(reductionDecls[i].getType()));
1205  // Store the result of the inlined region to the allocated reduction var
1206  // ptr
1207  builder.CreateStore(phis[0], var);
1208 
1209  privateReductionVariables.push_back(var);
1210  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1211  reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
1212  } else {
1213  // for by-ref case the store is inside of the reduction init region
1214  builder.CreateStore(phis[0], privateReductionVariables[i]);
1215  // the rest is done in allocByValReductionVars
1216  }
1217 
1218  // clear block argument mapping in case it needs to be re-created with a
1219  // different source for another use of the same reduction decl
1220  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1221  }
1222 
1223  // Store the mapping between reduction variables and their private copies on
1224  // ModuleTranslation stack. It can be then recovered when translating
1225  // omp.reduce operations in a separate call.
1227  moduleTranslation, reductionVariableMap);
1228 
1229  // Save the alloca insertion point on ModuleTranslation stack for use in
1230  // nested regions.
1232  moduleTranslation, allocaIP);
1233 
1234  // ParallelOp has only one region associated with it.
1235  builder.restoreIP(codeGenIP);
1236  auto regionBlock =
1237  convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
1238  moduleTranslation, bodyGenStatus);
1239 
1240  // Process the reductions if required.
1241  if (opInst.getNumReductionVars() > 0) {
1242  // Collect reduction info
1243  SmallVector<OwningReductionGen> owningReductionGens;
1244  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1246  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
1247  owningReductionGens, owningAtomicReductionGens,
1248  privateReductionVariables, reductionInfos);
1249 
1250  // Move to region cont block
1251  builder.SetInsertPoint(regionBlock->getTerminator());
1252 
1253  // Generate reductions from info
1254  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1255  builder.SetInsertPoint(tempTerminator);
1256 
1257  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1258  ompBuilder->createReductions(builder.saveIP(), allocaIP,
1259  reductionInfos, false, isByRef);
1260  if (!contInsertPoint.getBlock()) {
1261  bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
1262  return;
1263  }
1264 
1265  tempTerminator->eraseFromParent();
1266  builder.restoreIP(contInsertPoint);
1267  }
1268  };
1269 
1270  // TODO: Perform appropriate actions according to the data-sharing
1271  // attribute (shared, private, firstprivate, ...) of variables.
1272  // Currently shared and private are supported.
1273  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
1274  llvm::Value &, llvm::Value &vPtr,
1275  llvm::Value *&replacementValue) -> InsertPointTy {
1276  replacementValue = &vPtr;
1277 
1278  // If this is a private value, this lambda will return the corresponding
1279  // mlir value and its `PrivateClauseOp`. Otherwise, empty values are
1280  // returned.
1281  auto [privVar, privatizerClone] =
1282  [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
1283  if (!opInst.getPrivateVars().empty()) {
1284  auto privVars = opInst.getPrivateVars();
1285  auto privatizers = opInst.getPrivatizers();
1286 
1287  for (auto [privVar, privatizerAttr] :
1288  llvm::zip_equal(privVars, *privatizers)) {
1289  // Find the MLIR private variable corresponding to the LLVM value
1290  // being privatized.
1291  llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
1292  if (llvmPrivVar != &vPtr)
1293  continue;
1294 
1295  SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
1296  omp::PrivateClauseOp privatizer =
1297  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1298  opInst, privSym);
1299 
1300  // Clone the privatizer in case it is used by more than one parallel
1301  // region. The privatizer is processed in-place (see below) before it
1302  // gets inlined in the parallel region and therefore processing the
1303  // original op is dangerous.
1304  return {privVar, privatizer.clone()};
1305  }
1306  }
1307 
1308  return {mlir::Value(), omp::PrivateClauseOp()};
1309  }();
1310 
1311  if (privVar) {
1312  Region &allocRegion = privatizerClone.getAllocRegion();
1313 
1314  // If this is a `firstprivate` clause, prepare the `omp.private` op by:
1315  if (privatizerClone.getDataSharingType() ==
1316  omp::DataSharingClauseType::FirstPrivate) {
1317  auto oldAllocBackBlock = std::prev(allocRegion.end());
1318  omp::YieldOp oldAllocYieldOp =
1319  llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
1320 
1321  Region &copyRegion = privatizerClone.getCopyRegion();
1322 
1323  mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
1324  // 1. Cloning the `copy` region to the end of the `alloc` region.
1325  copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
1326  allocRegion.end());
1327 
1328  auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
1329  // 2. Merging the last `alloc` block with the first block in the `copy`
1330  // region clone.
1331  // 3. Re-mapping the first argument of the `copy` region to be the
1332  // argument of the `alloc` region and the second argument of the `copy`
1333  // region to be the yielded value of the `alloc` region (this is the
1334  // private clone of the privatized value).
1335  copyCloneBuilder.mergeBlocks(
1336  &*newCopyRegionFrontBlock, &*oldAllocBackBlock,
1337  {allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)});
1338 
1339  // 4. The old terminator of the `alloc` region is not needed anymore, so
1340  // delete it.
1341  oldAllocYieldOp.erase();
1342  }
1343 
1344  // Replace the privatizer block argument with mlir value being privatized.
1345  // This way, the body of the privatizer will be changed from using the
1346  // region/block argument to the value being privatized.
1347  auto allocRegionArg = allocRegion.getArgument(0);
1348  replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
1349 
1350  auto oldIP = builder.saveIP();
1351  builder.restoreIP(allocaIP);
1352 
1353  SmallVector<llvm::Value *, 1> yieldedValues;
1354  if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder,
1355  moduleTranslation, &yieldedValues))) {
1356  opInst.emitError("failed to inline `alloc` region of an `omp.private` "
1357  "op in the parallel region");
1358  bodyGenStatus = failure();
1359  } else {
1360  assert(yieldedValues.size() == 1);
1361  replacementValue = yieldedValues.front();
1362  }
1363 
1364  privatizerClone.erase();
1365  builder.restoreIP(oldIP);
1366  }
1367 
1368  return codeGenIP;
1369  };
1370 
1371  // TODO: Perform finalization actions for variables. This has to be
1372  // called for variables which have destructors/finalizers.
1373  auto finiCB = [&](InsertPointTy codeGenIP) {
1374  InsertPointTy oldIP = builder.saveIP();
1375  builder.restoreIP(codeGenIP);
1376 
1377  // if the reduction has a cleanup region, inline it here to finalize the
1378  // reduction variables
1379  if (failed(inlineReductionCleanup(reductionDecls, privateReductionVariables,
1380  moduleTranslation, builder)))
1381  bodyGenStatus = failure();
1382 
1383  builder.restoreIP(oldIP);
1384  };
1385 
1386  llvm::Value *ifCond = nullptr;
1387  if (auto ifExprVar = opInst.getIfExprVar())
1388  ifCond = moduleTranslation.lookupValue(ifExprVar);
1389  llvm::Value *numThreads = nullptr;
1390  if (auto numThreadsVar = opInst.getNumThreadsVar())
1391  numThreads = moduleTranslation.lookupValue(numThreadsVar);
1392  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
1393  if (auto bind = opInst.getProcBindVal())
1394  pbKind = getProcBindKind(*bind);
1395  // TODO: Is the Parallel construct cancellable?
1396  bool isCancellable = false;
1397 
1398  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1399  findAllocaInsertPoint(builder, moduleTranslation);
1400  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1401 
1402  builder.restoreIP(
1403  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
1404  ifCond, numThreads, pbKind, isCancellable));
1405 
1406  return bodyGenStatus;
1407 }
1408 
1409 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
1410 static LogicalResult
1411 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
1412  LLVM::ModuleTranslation &moduleTranslation) {
1413  auto simdOp = cast<omp::SimdOp>(opInst);
1414  auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
1415 
1416  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1417 
1418  // Generator of the canonical loop body.
1419  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1420  // relying on captured variables.
1423  LogicalResult bodyGenStatus = success();
1424  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1425  // Make sure further conversions know about the induction variable.
1426  moduleTranslation.mapValue(
1427  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1428 
1429  // Capture the body insertion point for use in nested loops. BodyIP of the
1430  // CanonicalLoopInfo always points to the beginning of the entry block of
1431  // the body.
1432  bodyInsertPoints.push_back(ip);
1433 
1434  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1435  return;
1436 
1437  // Convert the body of the loop.
1438  builder.restoreIP(ip);
1439  convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
1440  moduleTranslation, bodyGenStatus);
1441  };
1442 
1443  // Delegate actual loop construction to the OpenMP IRBuilder.
1444  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1445  // loop, i.e. it has a positive step, uses signed integer semantics.
1446  // Reconsider this code when the nested loop operation clearly supports more
1447  // cases.
1448  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1449  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1450  llvm::Value *lowerBound =
1451  moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
1452  llvm::Value *upperBound =
1453  moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
1454  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
1455 
1456  // Make sure loop trip count are emitted in the preheader of the outermost
1457  // loop at the latest so that they are all available for the new collapsed
1458  // loop will be created below.
1459  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1460  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1461  if (i != 0) {
1462  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1463  ompLoc.DL);
1464  computeIP = loopInfos.front()->getPreheaderIP();
1465  }
1466  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1467  loc, bodyGen, lowerBound, upperBound, step,
1468  /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1469 
1470  if (failed(bodyGenStatus))
1471  return failure();
1472  }
1473 
1474  // Collapse loops.
1475  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1476  llvm::CanonicalLoopInfo *loopInfo =
1477  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1478 
1479  llvm::ConstantInt *simdlen = nullptr;
1480  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
1481  simdlen = builder.getInt64(simdlenVar.value());
1482 
1483  llvm::ConstantInt *safelen = nullptr;
1484  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
1485  safelen = builder.getInt64(safelenVar.value());
1486 
1487  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
1488  ompBuilder->applySimd(
1489  loopInfo, alignedVars,
1490  simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr())
1491  : nullptr,
1492  llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
1493 
1494  builder.restoreIP(afterIP);
1495  return success();
1496 }
1497 
1498 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1499 static llvm::AtomicOrdering
1500 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
1501  if (!ao)
1502  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1503 
1504  switch (*ao) {
1505  case omp::ClauseMemoryOrderKind::Seq_cst:
1506  return llvm::AtomicOrdering::SequentiallyConsistent;
1507  case omp::ClauseMemoryOrderKind::Acq_rel:
1508  return llvm::AtomicOrdering::AcquireRelease;
1509  case omp::ClauseMemoryOrderKind::Acquire:
1510  return llvm::AtomicOrdering::Acquire;
1511  case omp::ClauseMemoryOrderKind::Release:
1512  return llvm::AtomicOrdering::Release;
1513  case omp::ClauseMemoryOrderKind::Relaxed:
1514  return llvm::AtomicOrdering::Monotonic;
1515  }
1516  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1517 }
1518 
1519 /// Convert omp.atomic.read operation to LLVM IR.
1520 static LogicalResult
1521 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1522  LLVM::ModuleTranslation &moduleTranslation) {
1523 
1524  auto readOp = cast<omp::AtomicReadOp>(opInst);
1525  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1526 
1527  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1528 
1529  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal());
1530  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
1531  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
1532 
1533  llvm::Type *elementType =
1534  moduleTranslation.convertType(readOp.getElementType());
1535 
1536  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
1537  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
1538  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1539  return success();
1540 }
1541 
1542 /// Converts an omp.atomic.write operation to LLVM IR.
1543 static LogicalResult
1544 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1545  LLVM::ModuleTranslation &moduleTranslation) {
1546  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1547  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1548 
1549  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1550  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal());
1551  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
1552  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
1553  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
1554  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1555  /*isVolatile=*/false};
1556  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1557  return success();
1558 }
1559 
1560 /// Converts an LLVM dialect binary operation to the corresponding enum value
1561 /// for `atomicrmw` supported binary operation.
1562 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1564  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1565  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1566  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1567  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1568  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1569  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1570  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1571  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1572  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1573  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1574 }
1575 
1576 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1577 static LogicalResult
1578 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1579  llvm::IRBuilderBase &builder,
1580  LLVM::ModuleTranslation &moduleTranslation) {
1581  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1582 
1583  // Convert values and types.
1584  auto &innerOpList = opInst.getRegion().front().getOperations();
1585  bool isRegionArgUsed{false}, isXBinopExpr{false};
1586  llvm::AtomicRMWInst::BinOp binop;
1587  mlir::Value mlirExpr;
1588  // Find the binary update operation that uses the region argument
1589  // and get the expression to update
1590  for (Operation &innerOp : innerOpList) {
1591  if (innerOp.getNumOperands() == 2) {
1592  binop = convertBinOpToAtomic(innerOp);
1593  if (!llvm::is_contained(innerOp.getOperands(),
1594  opInst.getRegion().getArgument(0)))
1595  continue;
1596  isRegionArgUsed = true;
1597  isXBinopExpr = innerOp.getNumOperands() > 0 &&
1598  innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
1599  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1600  break;
1601  }
1602  }
1603  if (!isRegionArgUsed)
1604  return opInst.emitError("no atomic update operation with region argument"
1605  " as operand found inside atomic.update region");
1606 
1607  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1608  llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.getX());
1609  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1610  opInst.getRegion().getArgument(0).getType());
1611  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1612  /*isSigned=*/false,
1613  /*isVolatile=*/false};
1614 
1615  llvm::AtomicOrdering atomicOrdering =
1616  convertAtomicOrdering(opInst.getMemoryOrderVal());
1617 
1618  // Generate update code.
1619  LogicalResult updateGenStatus = success();
1620  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1621  llvm::Value *atomicx,
1622  llvm::IRBuilder<> &builder) -> llvm::Value * {
1623  Block &bb = *opInst.getRegion().begin();
1624  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
1625  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1626  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1627  updateGenStatus = (opInst.emitError()
1628  << "unable to convert update operation to llvm IR");
1629  return nullptr;
1630  }
1631  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1632  assert(yieldop && yieldop.getResults().size() == 1 &&
1633  "terminator must be omp.yield op and it must have exactly one "
1634  "argument");
1635  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1636  };
1637 
1638  // Handle ambiguous alloca, if any.
1639  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1640  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1641  builder.restoreIP(ompBuilder->createAtomicUpdate(
1642  ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1643  isXBinopExpr));
1644  return updateGenStatus;
1645 }
1646 
1647 static LogicalResult
1648 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1649  llvm::IRBuilderBase &builder,
1650  LLVM::ModuleTranslation &moduleTranslation) {
1651  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1652  mlir::Value mlirExpr;
1653  bool isXBinopExpr = false, isPostfixUpdate = false;
1654  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1655 
1656  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1657  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1658 
1659  assert((atomicUpdateOp || atomicWriteOp) &&
1660  "internal op must be an atomic.update or atomic.write op");
1661 
1662  if (atomicWriteOp) {
1663  isPostfixUpdate = true;
1664  mlirExpr = atomicWriteOp.getExpr();
1665  } else {
1666  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1667  atomicCaptureOp.getAtomicUpdateOp().getOperation();
1668  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
1669  bool isRegionArgUsed{false};
1670  // Find the binary update operation that uses the region argument
1671  // and get the expression to update
1672  for (Operation &innerOp : innerOpList) {
1673  if (innerOp.getNumOperands() == 2) {
1674  binop = convertBinOpToAtomic(innerOp);
1675  if (!llvm::is_contained(innerOp.getOperands(),
1676  atomicUpdateOp.getRegion().getArgument(0)))
1677  continue;
1678  isRegionArgUsed = true;
1679  isXBinopExpr =
1680  innerOp.getNumOperands() > 0 &&
1681  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
1682  mlirExpr =
1683  (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1684  break;
1685  }
1686  }
1687  if (!isRegionArgUsed)
1688  return atomicUpdateOp.emitError(
1689  "no atomic update operation with region argument"
1690  " as operand found inside atomic.update region");
1691  }
1692 
1693  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1694  llvm::Value *llvmX =
1695  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
1696  llvm::Value *llvmV =
1697  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
1698  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1699  atomicCaptureOp.getAtomicReadOp().getElementType());
1700  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1701  /*isSigned=*/false,
1702  /*isVolatile=*/false};
1703  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1704  /*isSigned=*/false,
1705  /*isVolatile=*/false};
1706 
1707  llvm::AtomicOrdering atomicOrdering =
1708  convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal());
1709 
1710  LogicalResult updateGenStatus = success();
1711  auto updateFn = [&](llvm::Value *atomicx,
1712  llvm::IRBuilder<> &builder) -> llvm::Value * {
1713  if (atomicWriteOp)
1714  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
1715  Block &bb = *atomicUpdateOp.getRegion().begin();
1716  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
1717  atomicx);
1718  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1719  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1720  updateGenStatus = (atomicUpdateOp.emitError()
1721  << "unable to convert update operation to llvm IR");
1722  return nullptr;
1723  }
1724  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1725  assert(yieldop && yieldop.getResults().size() == 1 &&
1726  "terminator must be omp.yield op and it must have exactly one "
1727  "argument");
1728  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1729  };
1730 
1731  // Handle ambiguous alloca, if any.
1732  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1733  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1734  builder.restoreIP(ompBuilder->createAtomicCapture(
1735  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
1736  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
1737  return updateGenStatus;
1738 }
1739 
1740 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
1741 /// mapping between reduction variables and their private equivalents to have
1742 /// been stored on the ModuleTranslation stack. Currently only supports
1743 /// reduction within WsloopOp and ParallelOp, but can be easily extended.
1744 static LogicalResult
1745 convertOmpReductionOp(omp::ReductionOp reductionOp,
1746  llvm::IRBuilderBase &builder,
1747  LLVM::ModuleTranslation &moduleTranslation) {
1748  // Find the declaration that corresponds to the reduction op.
1749  omp::DeclareReductionOp declaration;
1750  Operation *reductionParent = reductionOp->getParentOp();
1751  if (dyn_cast<omp::ParallelOp>(reductionParent) ||
1752  dyn_cast<omp::WsloopOp>(reductionParent)) {
1753  declaration = findReductionDecl(*reductionParent, reductionOp);
1754  } else {
1755  llvm_unreachable("Unhandled reduction container");
1756  }
1757  assert(declaration && "could not find reduction declaration");
1758 
1759  // Retrieve the mapping between reduction variables and their private
1760  // equivalents.
1761  const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
1762  moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
1763  [&](const OpenMPVarMappingStackFrame &frame) {
1764  if (frame.mapping.contains(reductionOp.getAccumulator())) {
1765  reductionVariableMap = &frame.mapping;
1766  return WalkResult::interrupt();
1767  }
1768  return WalkResult::advance();
1769  });
1770  assert(reductionVariableMap && "couldn't find private reduction variables");
1771  // Translate the reduction operation by emitting the body of the corresponding
1772  // reduction declaration.
1773  Region &reductionRegion = declaration.getReductionRegion();
1774  llvm::Value *privateReductionVar =
1775  reductionVariableMap->lookup(reductionOp.getAccumulator());
1776  llvm::Value *reductionVal = builder.CreateLoad(
1777  moduleTranslation.convertType(reductionOp.getOperand().getType()),
1778  privateReductionVar);
1779 
1780  moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1781  reductionVal);
1782  moduleTranslation.mapValue(
1783  reductionRegion.front().getArgument(1),
1784  moduleTranslation.lookupValue(reductionOp.getOperand()));
1785 
1787  if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1788  builder, moduleTranslation, &phis)))
1789  return failure();
1790  assert(phis.size() == 1 && "expected one value to be yielded from "
1791  "the reduction body declaration region");
1792  builder.CreateStore(phis[0], privateReductionVar);
1793  return success();
1794 }
1795 
1796 /// Converts an OpenMP Threadprivate operation into LLVM IR using
1797 /// OpenMPIRBuilder.
1798 static LogicalResult
1799 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
1800  LLVM::ModuleTranslation &moduleTranslation) {
1801  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1802  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
1803 
1804  Value symAddr = threadprivateOp.getSymAddr();
1805  auto *symOp = symAddr.getDefiningOp();
1806  if (!isa<LLVM::AddressOfOp>(symOp))
1807  return opInst.emitError("Addressing symbol not found");
1808  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
1809 
1810  LLVM::GlobalOp global =
1811  addressOfOp.getGlobal(moduleTranslation.symbolTable());
1812  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
1813  llvm::Type *type = globalValue->getValueType();
1814  llvm::TypeSize typeSize =
1815  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
1816  type);
1817  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
1818  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
1819  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
1820  llvm::Value *callInst =
1821  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
1822  ompLoc, globalValue, size, cacheName);
1823  moduleTranslation.mapValue(opInst.getResult(0), callInst);
1824  return success();
1825 }
1826 
1827 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1828 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
1829  switch (deviceClause) {
1830  case mlir::omp::DeclareTargetDeviceType::host:
1831  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1832  break;
1833  case mlir::omp::DeclareTargetDeviceType::nohost:
1834  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1835  break;
1836  case mlir::omp::DeclareTargetDeviceType::any:
1837  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1838  break;
1839  }
1840  llvm_unreachable("unhandled device clause");
1841 }
1842 
1843 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1845  mlir::omp::DeclareTargetCaptureClause captureClasue) {
1846  switch (captureClasue) {
1847  case mlir::omp::DeclareTargetCaptureClause::to:
1848  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1849  case mlir::omp::DeclareTargetCaptureClause::link:
1850  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1851  case mlir::omp::DeclareTargetCaptureClause::enter:
1852  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1853  }
1854  llvm_unreachable("unhandled capture clause");
1855 }
1856 
1857 static llvm::SmallString<64>
1858 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
1859  llvm::OpenMPIRBuilder &ompBuilder) {
1860  llvm::SmallString<64> suffix;
1861  llvm::raw_svector_ostream os(suffix);
1862  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
1863  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
1864  auto fileInfoCallBack = [&loc]() {
1865  return std::pair<std::string, uint64_t>(
1866  llvm::StringRef(loc.getFilename()), loc.getLine());
1867  };
1868 
1869  os << llvm::format(
1870  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
1871  }
1872  os << "_decl_tgt_ref_ptr";
1873 
1874  return suffix;
1875 }
1876 
1877 static bool isDeclareTargetLink(mlir::Value value) {
1878  if (auto addressOfOp =
1879  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
1880  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
1881  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
1882  if (auto declareTargetGlobal =
1883  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
1884  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
1885  mlir::omp::DeclareTargetCaptureClause::link)
1886  return true;
1887  }
1888  return false;
1889 }
1890 
1891 // Returns the reference pointer generated by the lowering of the declare target
1892 // operation in cases where the link clause is used or the to clause is used in
1893 // USM mode.
1894 static llvm::Value *
1896  LLVM::ModuleTranslation &moduleTranslation) {
1897  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1898 
1899  // An easier way to do this may just be to keep track of any pointer
1900  // references and their mapping to their respective operation
1901  if (auto addressOfOp =
1902  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
1903  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
1904  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
1905  addressOfOp.getGlobalName()))) {
1906 
1907  if (auto declareTargetGlobal =
1908  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
1909  gOp.getOperation())) {
1910 
1911  // In this case, we must utilise the reference pointer generated by the
1912  // declare target operation, similar to Clang
1913  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
1914  mlir::omp::DeclareTargetCaptureClause::link) ||
1915  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
1916  mlir::omp::DeclareTargetCaptureClause::to &&
1917  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
1918  llvm::SmallString<64> suffix =
1919  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
1920 
1921  if (gOp.getSymName().contains(suffix))
1922  return moduleTranslation.getLLVMModule()->getNamedValue(
1923  gOp.getSymName());
1924 
1925  return moduleTranslation.getLLVMModule()->getNamedValue(
1926  (gOp.getSymName().str() + suffix.str()).str());
1927  }
1928  }
1929  }
1930  }
1931 
1932  return nullptr;
1933 }
1934 
1935 // A small helper structure to contain data gathered
1936 // for map lowering and coalese it into one area and
1937 // avoiding extra computations such as searches in the
1938 // llvm module for lowered mapped variables or checking
1939 // if something is declare target (and retrieving the
1940 // value) more than neccessary.
1946  // Stripped off array/pointer to get the underlying
1947  // element type
1949 
1950  /// Append arrays in \a CurInfo.
1951  void append(MapInfoData &CurInfo) {
1952  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
1953  CurInfo.IsDeclareTarget.end());
1954  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
1955  OriginalValue.append(CurInfo.OriginalValue.begin(),
1956  CurInfo.OriginalValue.end());
1957  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
1958  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
1959  }
1960 };
1961 
1962 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
1963  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
1964  arrTy.getElementType()))
1965  return getArrayElementSizeInBits(nestedArrTy, dl);
1966  return dl.getTypeSizeInBits(arrTy.getElementType());
1967 }
1968 
1969 // This function calculates the size to be offloaded for a specified type, given
1970 // its associated map clause (which can contain bounds information which affects
1971 // the total size), this size is calculated based on the underlying element type
1972 // e.g. given a 1-D array of ints, we will calculate the size from the integer
1973 // type * number of elements in the array. This size can be used in other
1974 // calculations but is ultimately used as an argument to the OpenMP runtimes
1975 // kernel argument structure which is generated through the combinedInfo data
1976 // structures.
1977 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
1978 // CGOpenMPRuntime.cpp.
1979 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
1980  Operation *clauseOp, llvm::Value *basePointer,
1981  llvm::Type *baseType, llvm::IRBuilderBase &builder,
1982  LLVM::ModuleTranslation &moduleTranslation) {
1983  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
1984  // the size in inconsistent byte or bit format.
1985  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
1986  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
1987  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
1988 
1989  if (auto memberClause =
1990  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
1991  // This calculates the size to transfer based on bounds and the underlying
1992  // element type, provided bounds have been specified (Fortran
1993  // pointers/allocatables/target and arrays that have sections specified fall
1994  // into this as well).
1995  if (!memberClause.getBounds().empty()) {
1996  llvm::Value *elementCount = builder.getInt64(1);
1997  for (auto bounds : memberClause.getBounds()) {
1998  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
1999  bounds.getDefiningOp())) {
2000  // The below calculation for the size to be mapped calculated from the
2001  // map_info's bounds is: (elemCount * [UB - LB] + 1), later we
2002  // multiply by the underlying element types byte size to get the full
2003  // size to be offloaded based on the bounds
2004  elementCount = builder.CreateMul(
2005  elementCount,
2006  builder.CreateAdd(
2007  builder.CreateSub(
2008  moduleTranslation.lookupValue(boundOp.getUpperBound()),
2009  moduleTranslation.lookupValue(boundOp.getLowerBound())),
2010  builder.getInt64(1)));
2011  }
2012  }
2013 
2014  // The size in bytes x number of elements, the sizeInBytes stored is
2015  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
2016  // size, so we do some on the fly runtime math to get the size in
2017  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2018  // some adjustment for members with more complex types.
2019  return builder.CreateMul(elementCount,
2020  builder.getInt64(underlyingTypeSzInBits / 8));
2021  }
2022  }
2023 
2024  return builder.getInt64(underlyingTypeSzInBits / 8);
2025 }
2026 
2028  llvm::SmallVectorImpl<Value> &mapOperands,
2029  LLVM::ModuleTranslation &moduleTranslation,
2030  DataLayout &dl,
2031  llvm::IRBuilderBase &builder) {
2032  for (mlir::Value mapValue : mapOperands) {
2033  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2034  mapValue.getDefiningOp())) {
2035  mlir::Value offloadPtr =
2036  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2037  mapData.OriginalValue.push_back(
2038  moduleTranslation.lookupValue(offloadPtr));
2039  mapData.Pointers.push_back(mapData.OriginalValue.back());
2040 
2041  if (llvm::Value *refPtr =
2042  getRefPtrIfDeclareTarget(offloadPtr,
2043  moduleTranslation)) { // declare target
2044  mapData.IsDeclareTarget.push_back(true);
2045  mapData.BasePointers.push_back(refPtr);
2046  } else { // regular mapped variable
2047  mapData.IsDeclareTarget.push_back(false);
2048  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2049  }
2050 
2051  mapData.BaseType.push_back(
2052  moduleTranslation.convertType(mapOp.getVarType()));
2053  mapData.Sizes.push_back(getSizeInBytes(
2054  dl, mapOp.getVarType(), mapOp, mapData.BasePointers.back(),
2055  mapData.BaseType.back(), builder, moduleTranslation));
2056  mapData.MapClause.push_back(mapOp.getOperation());
2057  mapData.Types.push_back(
2058  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
2059  mapData.Names.push_back(LLVM::createMappingInformation(
2060  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2061  mapData.DevicePointers.push_back(
2063 
2064  // Check if this is a member mapping and correctly assign that it is, if
2065  // it is a member of a larger object.
2066  // TODO: Need better handling of members, and distinguishing of members
2067  // that are implicitly allocated on device vs explicitly passed in as
2068  // arguments.
2069  // TODO: May require some further additions to support nested record
2070  // types, i.e. member maps that can have member maps.
2071  mapData.IsAMember.push_back(false);
2072  for (mlir::Value mapValue : mapOperands) {
2073  if (auto map = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2074  mapValue.getDefiningOp())) {
2075  for (auto member : map.getMembers()) {
2076  if (member == mapOp) {
2077  mapData.IsAMember.back() = true;
2078  }
2079  }
2080  }
2081  }
2082  }
2083  }
2084 }
2085 
2086 /// This function calculates the array/pointer offset for map data provided
2087 /// with bounds operations, e.g. when provided something like the following:
2088 ///
2089 /// Fortran
2090 /// map(tofrom: array(2:5, 3:2))
2091 /// or
2092 /// C++
2093 /// map(tofrom: array[1:4][2:3])
2094 /// We must calculate the initial pointer offset to pass across, this function
2095 /// performs this using bounds.
2096 ///
2097 /// NOTE: which while specified in row-major order it currently needs to be
2098 /// flipped for Fortran's column order array allocation and access (as
2099 /// opposed to C++'s row-major, hence the backwards processing where order is
2100 /// important). This is likely important to keep in mind for the future when
2101 /// we incorporate a C++ frontend, both frontends will need to agree on the
2102 /// ordering of generated bounds operations (one may have to flip them) to
2103 /// make the below lowering frontend agnostic. The offload size
2104 /// calcualtion may also have to be adjusted for C++.
2105 std::vector<llvm::Value *>
2107  llvm::IRBuilderBase &builder, bool isArrayTy,
2108  mlir::OperandRange bounds) {
2109  std::vector<llvm::Value *> idx;
2110  // There's no bounds to calculate an offset from, we can safely
2111  // ignore and return no indices.
2112  if (bounds.empty())
2113  return idx;
2114 
2115  // If we have an array type, then we have its type so can treat it as a
2116  // normal GEP instruction where the bounds operations are simply indexes
2117  // into the array. We currently do reverse order of the bounds, which
2118  // I believe leans more towards Fortran's column-major in memory.
2119  if (isArrayTy) {
2120  idx.push_back(builder.getInt64(0));
2121  for (int i = bounds.size() - 1; i >= 0; --i) {
2122  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2123  bounds[i].getDefiningOp())) {
2124  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
2125  }
2126  }
2127  } else {
2128  // If we do not have an array type, but we have bounds, then we're dealing
2129  // with a pointer that's being treated like an array and we have the
2130  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
2131  // address (pointer pointing to the actual data) so we must caclulate the
2132  // offset using a single index which the following two loops attempts to
2133  // compute.
2134 
2135  // Calculates the size offset we need to make per row e.g. first row or
2136  // column only needs to be offset by one, but the next would have to be
2137  // the previous row/column offset multiplied by the extent of current row.
2138  //
2139  // For example ([1][10][100]):
2140  //
2141  // - First row/column we move by 1 for each index increment
2142  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
2143  // current) for 10 for each index increment
2144  // - Third row/column we would move by 10 (second row/column) *
2145  // (extent/size of current) 100 for 1000 for each index increment
2146  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
2147  for (size_t i = 1; i < bounds.size(); ++i) {
2148  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2149  bounds[i].getDefiningOp())) {
2150  dimensionIndexSizeOffset.push_back(builder.CreateMul(
2151  moduleTranslation.lookupValue(boundOp.getExtent()),
2152  dimensionIndexSizeOffset[i - 1]));
2153  }
2154  }
2155 
2156  // Now that we have calculated how much we move by per index, we must
2157  // multiply each lower bound offset in indexes by the size offset we
2158  // have calculated in the previous and accumulate the results to get
2159  // our final resulting offset.
2160  for (int i = bounds.size() - 1; i >= 0; --i) {
2161  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2162  bounds[i].getDefiningOp())) {
2163  if (idx.empty())
2164  idx.emplace_back(builder.CreateMul(
2165  moduleTranslation.lookupValue(boundOp.getLowerBound()),
2166  dimensionIndexSizeOffset[i]));
2167  else
2168  idx.back() = builder.CreateAdd(
2169  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
2170  boundOp.getLowerBound()),
2171  dimensionIndexSizeOffset[i]));
2172  }
2173  }
2174  }
2175 
2176  return idx;
2177 }
2178 
2179 // This creates two insertions into the MapInfosTy data structure for the
2180 // "parent" of a set of members, (usually a container e.g.
2181 // class/structure/derived type) when subsequent members have also been
2182 // explicitly mapped on the same map clause. Certain types, such as Fortran
2183 // descriptors are mapped like this as well, however, the members are
2184 // implicit as far as a user is concerned, but we must explicitly map them
2185 // internally.
2186 //
2187 // This function also returns the memberOfFlag for this particular parent,
2188 // which is utilised in subsequent member mappings (by modifying there map type
2189 // with it) to indicate that a member is part of this parent and should be
2190 // treated by the runtime as such. Important to achieve the correct mapping.
2191 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
2192  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2193  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2194  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2195  uint64_t mapDataIndex, bool isTargetParams) {
2196  // Map the first segment of our structure
2197  combinedInfo.Types.emplace_back(
2198  isTargetParams
2199  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
2200  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
2201  combinedInfo.DevicePointers.emplace_back(
2203  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2204  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2205  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2206  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2207 
2208  // Calculate size of the parent object being mapped based on the
2209  // addresses at runtime, highAddr - lowAddr = size. This of course
2210  // doesn't factor in allocated data like pointers, hence the further
2211  // processing of members specified by users, or in the case of
2212  // Fortran pointers and allocatables, the mapping of the pointed to
2213  // data by the descriptor (which itself, is a structure containing
2214  // runtime information on the dynamically allocated data).
2215  llvm::Value *lowAddr = builder.CreatePointerCast(
2216  mapData.Pointers[mapDataIndex], builder.getPtrTy());
2217  llvm::Value *highAddr = builder.CreatePointerCast(
2218  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
2219  mapData.Pointers[mapDataIndex], 1),
2220  builder.getPtrTy());
2221  llvm::Value *size = builder.CreateIntCast(
2222  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
2223  builder.getInt64Ty(),
2224  /*isSigned=*/false);
2225  combinedInfo.Sizes.push_back(size);
2226 
2227  // This creates the initial MEMBER_OF mapping that consists of
2228  // the parent/top level container (same as above effectively, except
2229  // with a fixed initial compile time size and seperate maptype which
2230  // indicates the true mape type (tofrom etc.) and that it is a part
2231  // of a larger mapping and indicating the link between it and it's
2232  // members that are also explicitly mapped).
2233  llvm::omp::OpenMPOffloadMappingFlags mapFlag =
2234  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
2235  if (isTargetParams)
2236  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2237 
2238  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
2239  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
2240  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2241 
2242  combinedInfo.Types.emplace_back(mapFlag);
2243  combinedInfo.DevicePointers.emplace_back(
2245  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2246  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2247  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2248  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2249  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
2250 
2251  return memberOfFlag;
2252 }
2253 
2254 // The intent is to verify if the mapped data being passed is a
2255 // pointer -> pointee that requires special handling in certain cases,
2256 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
2257 //
2258 // There may be a better way to verify this, but unfortunately with
2259 // opaque pointers we lose the ability to easily check if something is
2260 // a pointer whilst maintaining access to the underlying type.
2261 static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp) {
2262  // If we have a varPtrPtr field assigned then the underlying type is a pointer
2263  if (mapOp.getVarPtrPtr())
2264  return true;
2265 
2266  // If the map data is declare target with a link clause, then it's represented
2267  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
2268  // no relation to pointers.
2269  if (isDeclareTargetLink(mapOp.getVarPtr()))
2270  return true;
2271 
2272  return false;
2273 }
2274 
2275 // This function is intended to add explicit mappings of members
2277  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2278  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2279  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2280  uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
2281 
2282  auto parentClause =
2283  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2284 
2285  for (auto mappedMembers : parentClause.getMembers()) {
2286  auto memberClause =
2287  mlir::dyn_cast<mlir::omp::MapInfoOp>(mappedMembers.getDefiningOp());
2288  int memberDataIdx = -1;
2289  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2290  if (mapData.MapClause[i] == memberClause)
2291  memberDataIdx = i;
2292  }
2293 
2294  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
2295 
2296  // Same MemberOfFlag to indicate its link with parent and other members
2297  // of, and we flag that it's part of a pointer and object coupling.
2298  auto mapFlag =
2299  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
2300  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2301  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
2302  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2303  if (checkIfPointerMap(memberClause))
2304  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2305 
2306  combinedInfo.Types.emplace_back(mapFlag);
2307  combinedInfo.DevicePointers.emplace_back(
2309  combinedInfo.Names.emplace_back(
2310  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
2311 
2312  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[memberDataIdx]);
2313  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
2314  combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
2315  }
2316 }
2317 
2319  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2320  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2321  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2322  uint64_t mapDataIndex, bool isTargetParams) {
2323  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
2324  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
2325  combinedInfo, mapData, mapDataIndex, isTargetParams);
2326  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
2327  combinedInfo, mapData, mapDataIndex,
2328  memberOfParentFlag);
2329 }
2330 
2331 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2332 // generates different operation (e.g. load/store) combinations for
2333 // arguments to the kernel, based on map capture kinds which are then
2334 // utilised in the combinedInfo in place of the original Map value.
2335 static void
2337  LLVM::ModuleTranslation &moduleTranslation,
2338  llvm::IRBuilderBase &builder) {
2339  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2340  // if it's declare target, skip it, it's handled seperately.
2341  if (!mapData.IsDeclareTarget[i]) {
2342  auto mapOp =
2343  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2344  mlir::omp::VariableCaptureKind captureKind =
2345  mapOp.getMapCaptureType().value_or(
2346  mlir::omp::VariableCaptureKind::ByRef);
2347  bool isPtrTy = checkIfPointerMap(mapOp);
2348 
2349  // Currently handles array sectioning lowerbound case, but more
2350  // logic may be required in the future. Clang invokes EmitLValue,
2351  // which has specialised logic for special Clang types such as user
2352  // defines, so it is possible we will have to extend this for
2353  // structures or other complex types. As the general idea is that this
2354  // function mimics some of the logic from Clang that we require for
2355  // kernel argument passing from host -> device.
2356  switch (captureKind) {
2357  case mlir::omp::VariableCaptureKind::ByRef: {
2358  llvm::Value *newV = mapData.Pointers[i];
2359  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
2360  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
2361  mapOp.getBounds());
2362  if (isPtrTy)
2363  newV = builder.CreateLoad(builder.getPtrTy(), newV);
2364 
2365  if (!offsetIdx.empty())
2366  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
2367  "array_offset");
2368  mapData.Pointers[i] = newV;
2369  } break;
2370  case mlir::omp::VariableCaptureKind::ByCopy: {
2371  llvm::Type *type = mapData.BaseType[i];
2372  llvm::Value *newV;
2373  if (mapData.Pointers[i]->getType()->isPointerTy())
2374  newV = builder.CreateLoad(type, mapData.Pointers[i]);
2375  else
2376  newV = mapData.Pointers[i];
2377 
2378  if (!isPtrTy) {
2379  auto curInsert = builder.saveIP();
2380  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
2381  auto *memTempAlloc =
2382  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
2383  builder.restoreIP(curInsert);
2384 
2385  builder.CreateStore(newV, memTempAlloc);
2386  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
2387  }
2388 
2389  mapData.Pointers[i] = newV;
2390  mapData.BasePointers[i] = newV;
2391  } break;
2392  case mlir::omp::VariableCaptureKind::This:
2393  case mlir::omp::VariableCaptureKind::VLAType:
2394  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
2395  break;
2396  }
2397  }
2398  }
2399 }
2400 
2401 // Generate all map related information and fill the combinedInfo.
2402 static void genMapInfos(llvm::IRBuilderBase &builder,
2403  LLVM::ModuleTranslation &moduleTranslation,
2404  DataLayout &dl,
2405  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2406  MapInfoData &mapData,
2407  const SmallVector<Value> &devPtrOperands = {},
2408  const SmallVector<Value> &devAddrOperands = {},
2409  bool isTargetParams = false) {
2410  // We wish to modify some of the methods in which arguments are
2411  // passed based on their capture type by the target region, this can
2412  // involve generating new loads and stores, which changes the
2413  // MLIR value to LLVM value mapping, however, we only wish to do this
2414  // locally for the current function/target and also avoid altering
2415  // ModuleTranslation, so we remap the base pointer or pointer stored
2416  // in the map infos corresponding MapInfoData, which is later accessed
2417  // by genMapInfos and createTarget to help generate the kernel and
2418  // kernel arg structure. It primarily becomes relevant in cases like
2419  // bycopy, or byref range'd arrays. In the default case, we simply
2420  // pass thee pointer byref as both basePointer and pointer.
2421  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
2422  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
2423 
2424  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2425 
2426  auto fail = [&combinedInfo]() -> void {
2427  combinedInfo.BasePointers.clear();
2428  combinedInfo.Pointers.clear();
2429  combinedInfo.DevicePointers.clear();
2430  combinedInfo.Sizes.clear();
2431  combinedInfo.Types.clear();
2432  combinedInfo.Names.clear();
2433  };
2434 
2435  // We operate under the assumption that all vectors that are
2436  // required in MapInfoData are of equal lengths (either filled with
2437  // default constructed data or appropiate information) so we can
2438  // utilise the size from any component of MapInfoData, if we can't
2439  // something is missing from the initial MapInfoData construction.
2440  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2441  // NOTE/TODO: We currently do not handle member mapping seperately from it's
2442  // parent or explicit mapping of a parent and member in the same operation,
2443  // this will need to change in the near future, for now we primarily handle
2444  // descriptor mapping from fortran, generalised as mapping record types
2445  // with implicit member maps. This lowering needs further generalisation to
2446  // fully support fortran derived types, and C/C++ structures and classes.
2447  if (mapData.IsAMember[i])
2448  continue;
2449 
2450  auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2451  if (!mapInfoOp.getMembers().empty()) {
2452  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
2453  combinedInfo, mapData, i, isTargetParams);
2454  continue;
2455  }
2456 
2457  auto mapFlag = mapData.Types[i];
2458  bool isPtrTy = checkIfPointerMap(mapInfoOp);
2459  if (isPtrTy)
2460  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2461 
2462  // Declare Target Mappings are excluded from being marked as
2463  // OMP_MAP_TARGET_PARAM as they are not passed as parameters.
2464  if (isTargetParams && !mapData.IsDeclareTarget[i])
2465  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2466 
2467  if (auto mapInfoOp = dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]))
2468  if (mapInfoOp.getMapCaptureType().value() ==
2469  mlir::omp::VariableCaptureKind::ByCopy &&
2470  !isPtrTy)
2471  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
2472 
2473  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]);
2474  combinedInfo.Pointers.emplace_back(mapData.Pointers[i]);
2475  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]);
2476  combinedInfo.Names.emplace_back(mapData.Names[i]);
2477  combinedInfo.Types.emplace_back(mapFlag);
2478  combinedInfo.Sizes.emplace_back(mapData.Sizes[i]);
2479  }
2480 
2481  auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) {
2482  index = 0;
2483  for (llvm::Value *basePtr : combinedInfo.BasePointers) {
2484  if (basePtr == val)
2485  return true;
2486  index++;
2487  }
2488  return false;
2489  };
2490 
2491  auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void {
2492  for (const auto &devOp : devOperands) {
2493  // TODO: Only LLVMPointerTypes are handled.
2494  if (!isa<LLVM::LLVMPointerType>(devOp.getType()))
2495  return fail();
2496 
2497  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devOp);
2498 
2499  // Check if map info is already present for this entry.
2500  unsigned infoIndex;
2501  if (findMapInfo(mapOpValue, infoIndex)) {
2502  combinedInfo.Types[infoIndex] |=
2503  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
2504  combinedInfo.DevicePointers[infoIndex] = devOpType;
2505  } else {
2506  combinedInfo.BasePointers.emplace_back(mapOpValue);
2507  combinedInfo.Pointers.emplace_back(mapOpValue);
2508  combinedInfo.DevicePointers.emplace_back(devOpType);
2509  combinedInfo.Names.emplace_back(
2510  LLVM::createMappingInformation(devOp.getLoc(), *ompBuilder));
2511  combinedInfo.Types.emplace_back(
2512  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
2513  combinedInfo.Sizes.emplace_back(builder.getInt64(0));
2514  }
2515  }
2516  };
2517 
2518  addDevInfos(devPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
2519  addDevInfos(devAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
2520 }
2521 
2522 static LogicalResult
2523 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
2524  LLVM::ModuleTranslation &moduleTranslation) {
2525  llvm::Value *ifCond = nullptr;
2526  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
2527  SmallVector<Value> mapOperands;
2528  SmallVector<Value> useDevPtrOperands;
2529  SmallVector<Value> useDevAddrOperands;
2530  llvm::omp::RuntimeFunction RTLFn;
2531  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
2532 
2533  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2534 
2535  LogicalResult result =
2537  .Case([&](omp::TargetDataOp dataOp) {
2538  if (auto ifExprVar = dataOp.getIfExpr())
2539  ifCond = moduleTranslation.lookupValue(ifExprVar);
2540 
2541  if (auto devId = dataOp.getDevice())
2542  if (auto constOp =
2543  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2544  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2545  deviceID = intAttr.getInt();
2546 
2547  mapOperands = dataOp.getMapOperands();
2548  useDevPtrOperands = dataOp.getUseDevicePtr();
2549  useDevAddrOperands = dataOp.getUseDeviceAddr();
2550  return success();
2551  })
2552  .Case([&](omp::TargetEnterDataOp enterDataOp) {
2553  if (enterDataOp.getNowait())
2554  return (LogicalResult)(enterDataOp.emitError(
2555  "`nowait` is not supported yet"));
2556 
2557  if (auto ifExprVar = enterDataOp.getIfExpr())
2558  ifCond = moduleTranslation.lookupValue(ifExprVar);
2559 
2560  if (auto devId = enterDataOp.getDevice())
2561  if (auto constOp =
2562  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2563  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2564  deviceID = intAttr.getInt();
2565  RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
2566  mapOperands = enterDataOp.getMapOperands();
2567  return success();
2568  })
2569  .Case([&](omp::TargetExitDataOp exitDataOp) {
2570  if (exitDataOp.getNowait())
2571  return (LogicalResult)(exitDataOp.emitError(
2572  "`nowait` is not supported yet"));
2573 
2574  if (auto ifExprVar = exitDataOp.getIfExpr())
2575  ifCond = moduleTranslation.lookupValue(ifExprVar);
2576 
2577  if (auto devId = exitDataOp.getDevice())
2578  if (auto constOp =
2579  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2580  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2581  deviceID = intAttr.getInt();
2582 
2583  RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper;
2584  mapOperands = exitDataOp.getMapOperands();
2585  return success();
2586  })
2587  .Case([&](omp::TargetUpdateOp updateDataOp) {
2588  if (updateDataOp.getNowait())
2589  return (LogicalResult)(updateDataOp.emitError(
2590  "`nowait` is not supported yet"));
2591 
2592  if (auto ifExprVar = updateDataOp.getIfExpr())
2593  ifCond = moduleTranslation.lookupValue(ifExprVar);
2594 
2595  if (auto devId = updateDataOp.getDevice())
2596  if (auto constOp =
2597  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2598  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2599  deviceID = intAttr.getInt();
2600 
2601  RTLFn = llvm::omp::OMPRTL___tgt_target_data_update_mapper;
2602  mapOperands = updateDataOp.getMapOperands();
2603  return success();
2604  })
2605  .Default([&](Operation *op) {
2606  return op->emitError("unsupported OpenMP operation: ")
2607  << op->getName();
2608  });
2609 
2610  if (failed(result))
2611  return failure();
2612 
2613  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2614 
2615  MapInfoData mapData;
2616  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL,
2617  builder);
2618 
2619  // Fill up the arrays with all the mapped variables.
2620  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
2621  auto genMapInfoCB =
2622  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
2623  builder.restoreIP(codeGenIP);
2624  if (auto dataOp = dyn_cast<omp::TargetDataOp>(op)) {
2625  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData,
2626  useDevPtrOperands, useDevAddrOperands);
2627  } else {
2628  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
2629  }
2630  return combinedInfo;
2631  };
2632 
2633  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
2634  /*SeparateBeginEndCalls=*/true);
2635 
2636  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
2637  LogicalResult bodyGenStatus = success();
2638  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) {
2639  assert(isa<omp::TargetDataOp>(op) &&
2640  "BodyGen requested for non TargetDataOp");
2641  Region &region = cast<omp::TargetDataOp>(op).getRegion();
2642  switch (bodyGenType) {
2643  case BodyGenTy::Priv:
2644  // Check if any device ptr/addr info is available
2645  if (!info.DevicePtrInfoMap.empty()) {
2646  builder.restoreIP(codeGenIP);
2647  unsigned argIndex = 0;
2648  for (auto &devPtrOp : useDevPtrOperands) {
2649  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devPtrOp);
2650  const auto &arg = region.front().getArgument(argIndex);
2651  moduleTranslation.mapValue(arg,
2652  info.DevicePtrInfoMap[mapOpValue].second);
2653  argIndex++;
2654  }
2655 
2656  for (auto &devAddrOp : useDevAddrOperands) {
2657  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devAddrOp);
2658  const auto &arg = region.front().getArgument(argIndex);
2659  auto *LI = builder.CreateLoad(
2660  builder.getPtrTy(), info.DevicePtrInfoMap[mapOpValue].second);
2661  moduleTranslation.mapValue(arg, LI);
2662  argIndex++;
2663  }
2664 
2665  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2666  builder, moduleTranslation);
2667  }
2668  break;
2669  case BodyGenTy::DupNoPriv:
2670  break;
2671  case BodyGenTy::NoPriv:
2672  // If device info is available then region has already been generated
2673  if (info.DevicePtrInfoMap.empty()) {
2674  builder.restoreIP(codeGenIP);
2675  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2676  builder, moduleTranslation);
2677  }
2678  break;
2679  }
2680  return builder.saveIP();
2681  };
2682 
2683  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2684  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2685  findAllocaInsertPoint(builder, moduleTranslation);
2686  if (isa<omp::TargetDataOp>(op)) {
2687  builder.restoreIP(ompBuilder->createTargetData(
2688  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2689  info, genMapInfoCB, nullptr, bodyGenCB));
2690  } else {
2691  builder.restoreIP(ompBuilder->createTargetData(
2692  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2693  info, genMapInfoCB, &RTLFn));
2694  }
2695 
2696  return bodyGenStatus;
2697 }
2698 
2699 /// Lowers the FlagsAttr which is applied to the module on the device
2700 /// pass when offloading, this attribute contains OpenMP RTL globals that can
2701 /// be passed as flags to the frontend, otherwise they are set to default
2702 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
2703  LLVM::ModuleTranslation &moduleTranslation) {
2704  if (!cast<mlir::ModuleOp>(op))
2705  return failure();
2706 
2707  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2708 
2709  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
2710  attribute.getOpenmpDeviceVersion());
2711 
2712  if (attribute.getNoGpuLib())
2713  return success();
2714 
2715  ompBuilder->createGlobalFlag(
2716  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
2717  "__omp_rtl_debug_kind");
2718  ompBuilder->createGlobalFlag(
2719  attribute
2720  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
2721  ,
2722  "__omp_rtl_assume_teams_oversubscription");
2723  ompBuilder->createGlobalFlag(
2724  attribute
2725  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
2726  ,
2727  "__omp_rtl_assume_threads_oversubscription");
2728  ompBuilder->createGlobalFlag(
2729  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
2730  "__omp_rtl_assume_no_thread_state");
2731  ompBuilder->createGlobalFlag(
2732  attribute
2733  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
2734  ,
2735  "__omp_rtl_assume_no_nested_parallelism");
2736  return success();
2737 }
2738 
2739 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
2740  omp::TargetOp targetOp,
2741  llvm::StringRef parentName = "") {
2742  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
2743 
2744  assert(fileLoc && "No file found from location");
2745  StringRef fileName = fileLoc.getFilename().getValue();
2746 
2747  llvm::sys::fs::UniqueID id;
2748  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
2749  targetOp.emitError("Unable to get unique ID for file");
2750  return false;
2751  }
2752 
2753  uint64_t line = fileLoc.getLine();
2754  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
2755  id.getFile(), line);
2756  return true;
2757 }
2758 
2759 static bool targetOpSupported(Operation &opInst) {
2760  auto targetOp = cast<omp::TargetOp>(opInst);
2761  if (targetOp.getIfExpr()) {
2762  opInst.emitError("If clause not yet supported");
2763  return false;
2764  }
2765 
2766  if (targetOp.getDevice()) {
2767  opInst.emitError("Device clause not yet supported");
2768  return false;
2769  }
2770 
2771  if (targetOp.getThreadLimit()) {
2772  opInst.emitError("Thread limit clause not yet supported");
2773  return false;
2774  }
2775 
2776  if (targetOp.getNowait()) {
2777  opInst.emitError("Nowait clause not yet supported");
2778  return false;
2779  }
2780 
2781  return true;
2782 }
2783 
2784 static void
2786  LLVM::ModuleTranslation &moduleTranslation,
2787  llvm::IRBuilderBase &builder) {
2788  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2789  // In the case of declare target mapped variables, the basePointer is
2790  // the reference pointer generated by the convertDeclareTargetAttr
2791  // method. Whereas the kernelValue is the original variable, so for
2792  // the device we must replace all uses of this original global variable
2793  // (stored in kernelValue) with the reference pointer (stored in
2794  // basePointer for declare target mapped variables), as for device the
2795  // data is mapped into this reference pointer and should be loaded
2796  // from it, the original variable is discarded. On host both exist and
2797  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
2798  // function to link the two variables in the runtime and then both the
2799  // reference pointer and the pointer are assigned in the kernel argument
2800  // structure for the host.
2801  if (mapData.IsDeclareTarget[i]) {
2802  // The users iterator will get invalidated if we modify an element,
2803  // so we populate this vector of uses to alter each user on an individual
2804  // basis to emit its own load (rather than one load for all).
2806  for (llvm::User *user : mapData.OriginalValue[i]->users())
2807  userVec.push_back(user);
2808 
2809  for (llvm::User *user : userVec) {
2810  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
2811  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
2812  mapData.BasePointers[i]);
2813  load->moveBefore(insn);
2814  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
2815  }
2816  }
2817  }
2818  }
2819 }
2820 
2821 // The createDeviceArgumentAccessor function generates
2822 // instructions for retrieving (acessing) kernel
2823 // arguments inside of the device kernel for use by
2824 // the kernel. This enables different semantics such as
2825 // the creation of temporary copies of data allowing
2826 // semantics like read-only/no host write back kernel
2827 // arguments.
2828 //
2829 // This currently implements a very light version of Clang's
2830 // EmitParmDecl's handling of direct argument handling as well
2831 // as a portion of the argument access generation based on
2832 // capture types found at the end of emitOutlinedFunctionPrologue
2833 // in Clang. The indirect path handling of EmitParmDecl's may be
2834 // required for future work, but a direct 1-to-1 copy doesn't seem
2835 // possible as the logic is rather scattered throughout Clang's
2836 // lowering and perhaps we wish to deviate slightly.
2837 //
2838 // \param mapData - A container containing vectors of information
2839 // corresponding to the input argument, which should have a
2840 // corresponding entry in the MapInfoData containers
2841 // OrigialValue's.
2842 // \param arg - This is the generated kernel function argument that
2843 // corresponds to the passed in input argument. We generated different
2844 // accesses of this Argument, based on capture type and other Input
2845 // related information.
2846 // \param input - This is the host side value that will be passed to
2847 // the kernel i.e. the kernel input, we rewrite all uses of this within
2848 // the kernel (as we generate the kernel body based on the target's region
2849 // which maintians references to the original input) to the retVal argument
2850 // apon exit of this function inside of the OMPIRBuilder. This interlinks
2851 // the kernel argument to future uses of it in the function providing
2852 // appropriate "glue" instructions inbetween.
2853 // \param retVal - This is the value that all uses of input inside of the
2854 // kernel will be re-written to, the goal of this function is to generate
2855 // an appropriate location for the kernel argument to be accessed from,
2856 // e.g. ByRef will result in a temporary allocation location and then
2857 // a store of the kernel argument into this allocated memory which
2858 // will then be loaded from, ByCopy will use the allocated memory
2859 // directly.
2860 static llvm::IRBuilderBase::InsertPoint
2861 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
2862  llvm::Value *input, llvm::Value *&retVal,
2863  llvm::IRBuilderBase &builder,
2864  llvm::OpenMPIRBuilder &ompBuilder,
2865  LLVM::ModuleTranslation &moduleTranslation,
2866  llvm::IRBuilderBase::InsertPoint allocaIP,
2867  llvm::IRBuilderBase::InsertPoint codeGenIP) {
2868  builder.restoreIP(allocaIP);
2869 
2870  mlir::omp::VariableCaptureKind capture =
2871  mlir::omp::VariableCaptureKind::ByRef;
2872 
2873  // Find the associated MapInfoData entry for the current input
2874  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
2875  if (mapData.OriginalValue[i] == input) {
2876  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2877  mapData.MapClause[i])) {
2878  capture = mapOp.getMapCaptureType().value_or(
2879  mlir::omp::VariableCaptureKind::ByRef);
2880  }
2881 
2882  break;
2883  }
2884 
2885  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
2886  unsigned int defaultAS =
2887  ompBuilder.M.getDataLayout().getProgramAddressSpace();
2888 
2889  // Create the alloca for the argument the current point.
2890  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
2891 
2892  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
2893  v = builder.CreatePointerBitCastOrAddrSpaceCast(
2894  v, arg.getType()->getPointerTo(defaultAS));
2895 
2896  builder.CreateStore(&arg, v);
2897 
2898  builder.restoreIP(codeGenIP);
2899 
2900  switch (capture) {
2901  case mlir::omp::VariableCaptureKind::ByCopy: {
2902  retVal = v;
2903  break;
2904  }
2905  case mlir::omp::VariableCaptureKind::ByRef: {
2906  retVal = builder.CreateAlignedLoad(
2907  v->getType(), v,
2908  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
2909  break;
2910  }
2911  case mlir::omp::VariableCaptureKind::This:
2912  case mlir::omp::VariableCaptureKind::VLAType:
2913  assert(false && "Currently unsupported capture kind");
2914  break;
2915  }
2916 
2917  return builder.saveIP();
2918 }
2919 
2920 static LogicalResult
2921 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
2922  LLVM::ModuleTranslation &moduleTranslation) {
2923 
2924  if (!targetOpSupported(opInst))
2925  return failure();
2926 
2927  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
2928  auto targetOp = cast<omp::TargetOp>(opInst);
2929  auto &targetRegion = targetOp.getRegion();
2930  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
2931  SmallVector<Value> mapOperands = targetOp.getMapOperands();
2932 
2933  LogicalResult bodyGenStatus = success();
2934  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2935  auto bodyCB = [&](InsertPointTy allocaIP,
2936  InsertPointTy codeGenIP) -> InsertPointTy {
2937  // Forward target-cpu and target-features function attributes from the
2938  // original function to the new outlined function.
2939  llvm::Function *llvmParentFn =
2940  moduleTranslation.lookupFunction(parentFn.getName());
2941  llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent();
2942  assert(llvmParentFn && llvmOutlinedFn &&
2943  "Both parent and outlined functions must exist at this point");
2944 
2945  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
2946  attr.isStringAttribute())
2947  llvmOutlinedFn->addFnAttr(attr);
2948 
2949  if (auto attr = llvmParentFn->getFnAttribute("target-features");
2950  attr.isStringAttribute())
2951  llvmOutlinedFn->addFnAttr(attr);
2952 
2953  builder.restoreIP(codeGenIP);
2954  unsigned argIndex = 0;
2955  for (auto &mapOp : mapOperands) {
2956  auto mapInfoOp =
2957  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
2958  llvm::Value *mapOpValue =
2959  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
2960  const auto &arg = targetRegion.front().getArgument(argIndex);
2961  moduleTranslation.mapValue(arg, mapOpValue);
2962  argIndex++;
2963  }
2964  llvm::BasicBlock *exitBlock = convertOmpOpRegions(
2965  targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
2966  builder.SetInsertPoint(exitBlock);
2967  return builder.saveIP();
2968  };
2969 
2970  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2971  StringRef parentName = parentFn.getName();
2972 
2973  llvm::TargetRegionEntryInfo entryInfo;
2974 
2975  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
2976  return failure();
2977 
2978  int32_t defaultValTeams = -1;
2979  int32_t defaultValThreads = 0;
2980 
2981  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2982  findAllocaInsertPoint(builder, moduleTranslation);
2983 
2984  MapInfoData mapData;
2985  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl,
2986  builder);
2987 
2988  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
2989  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
2991  builder.restoreIP(codeGenIP);
2992  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {},
2993  true);
2994  return combinedInfos;
2995  };
2996 
2997  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
2998  llvm::Value *&retVal, InsertPointTy allocaIP,
2999  InsertPointTy codeGenIP) {
3000  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3001 
3002  // We just return the unaltered argument for the host function
3003  // for now, some alterations may be required in the future to
3004  // keep host fallback functions working identically to the device
3005  // version (e.g. pass ByCopy values should be treated as such on
3006  // host and device, currently not always the case)
3007  if (!ompBuilder->Config.isTargetDevice()) {
3008  retVal = cast<llvm::Value>(&arg);
3009  return codeGenIP;
3010  }
3011 
3012  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
3013  *ompBuilder, moduleTranslation,
3014  allocaIP, codeGenIP);
3015  };
3016 
3018  for (size_t i = 0; i < mapOperands.size(); ++i) {
3019  // declare target arguments are not passed to kernels as arguments
3020  // TODO: We currently do not handle cases where a member is explicitly
3021  // passed in as an argument, this will likley need to be handled in
3022  // the near future, rather than using IsAMember, it may be better to
3023  // test if the relevant BlockArg is used within the target region and
3024  // then use that as a basis for exclusion in the kernel inputs.
3025  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
3026  kernelInput.push_back(mapData.OriginalValue[i]);
3027  }
3028 
3029  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
3030  ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
3031  defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB));
3032 
3033  // Remap access operations to declare target reference pointers for the
3034  // device, essentially generating extra loadop's as necessary
3035  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3036  handleDeclareTargetMapVar(mapData, moduleTranslation, builder);
3037 
3038  return bodyGenStatus;
3039 }
3040 
3041 static LogicalResult
3042 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
3043  LLVM::ModuleTranslation &moduleTranslation) {
3044  // Amend omp.declare_target by deleting the IR of the outlined functions
3045  // created for target regions. They cannot be filtered out from MLIR earlier
3046  // because the omp.target operation inside must be translated to LLVM, but
3047  // the wrapper functions themselves must not remain at the end of the
3048  // process. We know that functions where omp.declare_target does not match
3049  // omp.is_target_device at this stage can only be wrapper functions because
3050  // those that aren't are removed earlier as an MLIR transformation pass.
3051  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
3052  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
3053  op->getParentOfType<ModuleOp>().getOperation())) {
3054  if (!offloadMod.getIsTargetDevice())
3055  return success();
3056 
3057  omp::DeclareTargetDeviceType declareType =
3058  attribute.getDeviceType().getValue();
3059 
3060  if (declareType == omp::DeclareTargetDeviceType::host) {
3061  llvm::Function *llvmFunc =
3062  moduleTranslation.lookupFunction(funcOp.getName());
3063  llvmFunc->dropAllReferences();
3064  llvmFunc->eraseFromParent();
3065  }
3066  }
3067  return success();
3068  }
3069 
3070  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
3071  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
3072  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
3073  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3074  bool isDeclaration = gOp.isDeclaration();
3075  bool isExternallyVisible =
3076  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
3077  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
3078  llvm::StringRef mangledName = gOp.getSymName();
3079  auto captureClause =
3080  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
3081  auto deviceClause =
3082  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
3083  // unused for MLIR at the moment, required in Clang for book
3084  // keeping
3085  std::vector<llvm::GlobalVariable *> generatedRefs;
3086 
3087  std::vector<llvm::Triple> targetTriple;
3088  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
3089  op->getParentOfType<mlir::ModuleOp>()->getAttr(
3090  LLVM::LLVMDialect::getTargetTripleAttrName()));
3091  if (targetTripleAttr)
3092  targetTriple.emplace_back(targetTripleAttr.data());
3093 
3094  auto fileInfoCallBack = [&loc]() {
3095  std::string filename = "";
3096  std::uint64_t lineNo = 0;
3097 
3098  if (loc) {
3099  filename = loc.getFilename().str();
3100  lineNo = loc.getLine();
3101  }
3102 
3103  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
3104  lineNo);
3105  };
3106 
3107  ompBuilder->registerTargetGlobalVariable(
3108  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3109  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3110  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
3111  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
3112  gVal->getType(), gVal);
3113 
3114  if (ompBuilder->Config.isTargetDevice() &&
3115  (attribute.getCaptureClause().getValue() !=
3116  mlir::omp::DeclareTargetCaptureClause::to ||
3117  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3118  ompBuilder->getAddrOfDeclareTargetVar(
3119  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3120  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3121  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
3122  /*GlobalInitializer*/ nullptr,
3123  /*VariableLinkage*/ nullptr);
3124  }
3125  }
3126  }
3127 
3128  return success();
3129 }
3130 
3131 // Returns true if the operation is inside a TargetOp or
3132 // is part of a declare target function.
3133 static bool isTargetDeviceOp(Operation *op) {
3134  // Assumes no reverse offloading
3135  if (op->getParentOfType<omp::TargetOp>())
3136  return true;
3137 
3138  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
3139  if (auto declareTargetIface =
3140  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3141  parentFn.getOperation()))
3142  if (declareTargetIface.isDeclareTarget() &&
3143  declareTargetIface.getDeclareTargetDeviceType() !=
3144  mlir::omp::DeclareTargetDeviceType::host)
3145  return true;
3146 
3147  return false;
3148 }
3149 
3150 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3151 /// (including OpenMP runtime calls).
3152 static LogicalResult
3153 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
3154  LLVM::ModuleTranslation &moduleTranslation) {
3155 
3156  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3157 
3159  .Case([&](omp::BarrierOp) {
3160  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
3161  return success();
3162  })
3163  .Case([&](omp::TaskwaitOp) {
3164  ompBuilder->createTaskwait(builder.saveIP());
3165  return success();
3166  })
3167  .Case([&](omp::TaskyieldOp) {
3168  ompBuilder->createTaskyield(builder.saveIP());
3169  return success();
3170  })
3171  .Case([&](omp::FlushOp) {
3172  // No support in Openmp runtime function (__kmpc_flush) to accept
3173  // the argument list.
3174  // OpenMP standard states the following:
3175  // "An implementation may implement a flush with a list by ignoring
3176  // the list, and treating it the same as a flush without a list."
3177  //
3178  // The argument list is discarded so that, flush with a list is treated
3179  // same as a flush without a list.
3180  ompBuilder->createFlush(builder.saveIP());
3181  return success();
3182  })
3183  .Case([&](omp::ParallelOp op) {
3184  return convertOmpParallel(op, builder, moduleTranslation);
3185  })
3186  .Case([&](omp::ReductionOp reductionOp) {
3187  return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
3188  })
3189  .Case([&](omp::MasterOp) {
3190  return convertOmpMaster(*op, builder, moduleTranslation);
3191  })
3192  .Case([&](omp::CriticalOp) {
3193  return convertOmpCritical(*op, builder, moduleTranslation);
3194  })
3195  .Case([&](omp::OrderedRegionOp) {
3196  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
3197  })
3198  .Case([&](omp::OrderedOp) {
3199  return convertOmpOrdered(*op, builder, moduleTranslation);
3200  })
3201  .Case([&](omp::WsloopOp) {
3202  return convertOmpWsloop(*op, builder, moduleTranslation);
3203  })
3204  .Case([&](omp::SimdOp) {
3205  return convertOmpSimd(*op, builder, moduleTranslation);
3206  })
3207  .Case([&](omp::AtomicReadOp) {
3208  return convertOmpAtomicRead(*op, builder, moduleTranslation);
3209  })
3210  .Case([&](omp::AtomicWriteOp) {
3211  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
3212  })
3213  .Case([&](omp::AtomicUpdateOp op) {
3214  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
3215  })
3216  .Case([&](omp::AtomicCaptureOp op) {
3217  return convertOmpAtomicCapture(op, builder, moduleTranslation);
3218  })
3219  .Case([&](omp::SectionsOp) {
3220  return convertOmpSections(*op, builder, moduleTranslation);
3221  })
3222  .Case([&](omp::SingleOp op) {
3223  return convertOmpSingle(op, builder, moduleTranslation);
3224  })
3225  .Case([&](omp::TeamsOp op) {
3226  return convertOmpTeams(op, builder, moduleTranslation);
3227  })
3228  .Case([&](omp::TaskOp op) {
3229  return convertOmpTaskOp(op, builder, moduleTranslation);
3230  })
3231  .Case([&](omp::TaskgroupOp op) {
3232  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
3233  })
3234  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp,
3235  omp::CriticalDeclareOp>([](auto op) {
3236  // `yield` and `terminator` can be just omitted. The block structure
3237  // was created in the region that handles their parent operation.
3238  // `declare_reduction` will be used by reductions and is not
3239  // converted directly, skip it.
3240  // `critical.declare` is only used to declare names of critical
3241  // sections which will be used by `critical` ops and hence can be
3242  // ignored for lowering. The OpenMP IRBuilder will create unique
3243  // name for critical section names.
3244  return success();
3245  })
3246  .Case([&](omp::ThreadprivateOp) {
3247  return convertOmpThreadprivate(*op, builder, moduleTranslation);
3248  })
3249  .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp,
3250  omp::TargetUpdateOp>([&](auto op) {
3251  return convertOmpTargetData(op, builder, moduleTranslation);
3252  })
3253  .Case([&](omp::TargetOp) {
3254  return convertOmpTarget(*op, builder, moduleTranslation);
3255  })
3256  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
3257  [&](auto op) {
3258  // No-op, should be handled by relevant owning operations e.g.
3259  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc.
3260  // and then discarded
3261  return success();
3262  })
3263  .Default([&](Operation *inst) {
3264  return inst->emitError("unsupported OpenMP operation: ")
3265  << inst->getName();
3266  });
3267 }
3268 
3269 static LogicalResult
3270 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
3271  LLVM::ModuleTranslation &moduleTranslation) {
3272  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3273 }
3274 
3275 static LogicalResult
3276 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
3277  LLVM::ModuleTranslation &moduleTranslation) {
3278  if (isa<omp::TargetOp>(op))
3279  return convertOmpTarget(*op, builder, moduleTranslation);
3280  if (isa<omp::TargetDataOp>(op))
3281  return convertOmpTargetData(op, builder, moduleTranslation);
3282  bool interrupted =
3283  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
3284  if (isa<omp::TargetOp>(oper)) {
3285  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
3286  return WalkResult::interrupt();
3287  return WalkResult::skip();
3288  }
3289  if (isa<omp::TargetDataOp>(oper)) {
3290  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
3291  return WalkResult::interrupt();
3292  return WalkResult::skip();
3293  }
3294  return WalkResult::advance();
3295  }).wasInterrupted();
3296  return failure(interrupted);
3297 }
3298 
3299 namespace {
3300 
3301 /// Implementation of the dialect interface that converts operations belonging
3302 /// to the OpenMP dialect to LLVM IR.
3303 class OpenMPDialectLLVMIRTranslationInterface
3305 public:
3307 
3308  /// Translates the given operation to LLVM IR using the provided IR builder
3309  /// and saving the state in `moduleTranslation`.
3311  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
3312  LLVM::ModuleTranslation &moduleTranslation) const final;
3313 
3314  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
3315  /// runtime calls, or operation amendments
3317  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
3318  NamedAttribute attribute,
3319  LLVM::ModuleTranslation &moduleTranslation) const final;
3320 };
3321 
3322 } // namespace
3323 
3324 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
3325  Operation *op, ArrayRef<llvm::Instruction *> instructions,
3326  NamedAttribute attribute,
3327  LLVM::ModuleTranslation &moduleTranslation) const {
3329  attribute.getName())
3330  .Case("omp.is_target_device",
3331  [&](Attribute attr) {
3332  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
3333  llvm::OpenMPIRBuilderConfig &config =
3334  moduleTranslation.getOpenMPBuilder()->Config;
3335  config.setIsTargetDevice(deviceAttr.getValue());
3336  return success();
3337  }
3338  return failure();
3339  })
3340  .Case("omp.is_gpu",
3341  [&](Attribute attr) {
3342  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
3343  llvm::OpenMPIRBuilderConfig &config =
3344  moduleTranslation.getOpenMPBuilder()->Config;
3345  config.setIsGPU(gpuAttr.getValue());
3346  return success();
3347  }
3348  return failure();
3349  })
3350  .Case("omp.host_ir_filepath",
3351  [&](Attribute attr) {
3352  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
3353  llvm::OpenMPIRBuilder *ompBuilder =
3354  moduleTranslation.getOpenMPBuilder();
3355  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
3356  return success();
3357  }
3358  return failure();
3359  })
3360  .Case("omp.flags",
3361  [&](Attribute attr) {
3362  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
3363  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
3364  return failure();
3365  })
3366  .Case("omp.version",
3367  [&](Attribute attr) {
3368  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
3369  llvm::OpenMPIRBuilder *ompBuilder =
3370  moduleTranslation.getOpenMPBuilder();
3371  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
3372  versionAttr.getVersion());
3373  return success();
3374  }
3375  return failure();
3376  })
3377  .Case("omp.declare_target",
3378  [&](Attribute attr) {
3379  if (auto declareTargetAttr =
3380  dyn_cast<omp::DeclareTargetAttr>(attr))
3381  return convertDeclareTargetAttr(op, declareTargetAttr,
3382  moduleTranslation);
3383  return failure();
3384  })
3385  .Case("omp.requires",
3386  [&](Attribute attr) {
3387  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
3388  using Requires = omp::ClauseRequires;
3389  Requires flags = requiresAttr.getValue();
3390  llvm::OpenMPIRBuilderConfig &config =
3391  moduleTranslation.getOpenMPBuilder()->Config;
3392  config.setHasRequiresReverseOffload(
3393  bitEnumContainsAll(flags, Requires::reverse_offload));
3394  config.setHasRequiresUnifiedAddress(
3395  bitEnumContainsAll(flags, Requires::unified_address));
3396  config.setHasRequiresUnifiedSharedMemory(
3397  bitEnumContainsAll(flags, Requires::unified_shared_memory));
3398  config.setHasRequiresDynamicAllocators(
3399  bitEnumContainsAll(flags, Requires::dynamic_allocators));
3400  return success();
3401  }
3402  return failure();
3403  })
3404  .Default([](Attribute) {
3405  // Fall through for omp attributes that do not require lowering.
3406  return success();
3407  })(attribute.getValue());
3408 
3409  return failure();
3410 }
3411 
3412 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3413 /// (including OpenMP runtime calls).
3414 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
3415  Operation *op, llvm::IRBuilderBase &builder,
3416  LLVM::ModuleTranslation &moduleTranslation) const {
3417 
3418  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3419  if (ompBuilder->Config.isTargetDevice()) {
3420  if (isTargetDeviceOp(op)) {
3421  return convertTargetDeviceOp(op, builder, moduleTranslation);
3422  } else {
3423  return convertTargetOpsInNest(op, builder, moduleTranslation);
3424  }
3425  }
3426  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3427 }
3428 
3430  registry.insert<omp::OpenMPDialect>();
3431  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
3432  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
3433  });
3434 }
3435 
3437  DialectRegistry registry;
3439  context.appendDialectRegistry(registry);
3440 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpReductionOp(omp::ReductionOp reductionOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP reduction operation using OpenMPIRBuilder.
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, const SmallVector< Value > &devPtrOperands={}, const SmallVector< Value > &devAddrOperands={}, bool isTargetParams=false)
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult inlineReductionCleanup(llvm::SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, llvm::ArrayRef< llvm::Value * > privateReductionVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
handling of DeclareReductionOp's cleanup region
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, mlir::OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
void collectMapDataFromMapOperands(MapInfoData &mapData, llvm::SmallVectorImpl< Value > &mapOperands, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder)
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVector< omp::DeclareReductionOp > &reductionDecls, SmallVector< OwningReductionGen > &owningReductionGens, SmallVector< OwningAtomicReductionGen > &owningAtomicReductionGens, const SmallVector< llvm::Value * > &privateReductionVariables, SmallVector< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static std::optional< omp::DeclareReductionOp > findReductionDeclInContainer(T container, omp::ReductionOp reduction)
Returns a reduction declaration that corresponds to the given reduction operation in the given contai...
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClasue)
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
static void mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, unsigned i)
Map input argument to all reduction initialization regions.
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static bool targetOpSupported(Operation &opInst)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static void collectReductionDecls(T loop, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given loop.
static omp::DeclareReductionOp findReductionDecl(Operation &containerOp, omp::ReductionOp reduction)
Searches for a reduction in a provided region and the regions it is nested in.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static void allocByValReductionVars(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap)
Allocate space for privatized reduction variables.
static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::BasicBlock * convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
A RAII class that on construction replaces the region arguments of the parallel op (which correspond ...
OmpParallelOpConversionManager(omp::ParallelOp opInst)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:30
bool empty()
Definition: Block.h:145
BlockArgument getArgument(unsigned i)
Definition: Block.h:126
unsigned getNumArguments()
Definition: Block.h:125
Operation & back()
Definition: Block.h:149
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:243
iterator begin()
Definition: Block.h:140
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:766
Base class for dialect interfaces providing translation to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
MLIRContext & getContext()
Returns the MLIR context of the module being translated.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:41
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:202
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:216
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition: Builders.cpp:582
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
iterator end()
Definition: Region.h:56
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
BlockArgument getArgument(unsigned i)
Definition: Region.h:124
Block & front()
Definition: Region.h:65
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
static WalkResult skip()
Definition: Visitors.h:53
static WalkResult advance()
Definition: Visitors.h:52
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:56
static WalkResult interrupt()
Definition: Visitors.h:51
Include the generated interface declarations.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:28
SetVector< Block * > getTopologicallySortedBlocks(Region &region)
Get a topologically sorted list of blocks of the given region.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
llvm::SmallVector< bool, 4 > IsAMember
llvm::SmallVector< llvm::Value *, 4 > OriginalValue
llvm::SmallVector< bool, 4 > IsDeclareTarget
llvm::SmallVector< llvm::Type *, 4 > BaseType
void append(MapInfoData &CurInfo)
Append arrays in CurInfo.
llvm::SmallVector< mlir::Operation *, 4 > MapClause
RAII object calling stackPush/stackPop on construction/destruction.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26