MLIR  20.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
18 #include "mlir/IR/IRMapping.h"
19 #include "mlir/IR/Operation.h"
20 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Frontend/OpenMP/OMPConstants.h"
29 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/ReplaceConstant.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include "llvm/Transforms/Utils/ModuleUtils.h"
36 
37 #include <any>
38 #include <cstdint>
39 #include <iterator>
40 #include <numeric>
41 #include <optional>
42 #include <utility>
43 
44 using namespace mlir;
45 
46 namespace {
47 static llvm::omp::ScheduleKind
48 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
49  if (!schedKind.has_value())
50  return llvm::omp::OMP_SCHEDULE_Default;
51  switch (schedKind.value()) {
52  case omp::ClauseScheduleKind::Static:
53  return llvm::omp::OMP_SCHEDULE_Static;
54  case omp::ClauseScheduleKind::Dynamic:
55  return llvm::omp::OMP_SCHEDULE_Dynamic;
56  case omp::ClauseScheduleKind::Guided:
57  return llvm::omp::OMP_SCHEDULE_Guided;
58  case omp::ClauseScheduleKind::Auto:
59  return llvm::omp::OMP_SCHEDULE_Auto;
61  return llvm::omp::OMP_SCHEDULE_Runtime;
62  }
63  llvm_unreachable("unhandled schedule clause argument");
64 }
65 
66 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
67 /// insertion points for allocas.
68 class OpenMPAllocaStackFrame
69  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
70 public:
71  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
72 
73  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
74  : allocaInsertPoint(allocaIP) {}
75  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
76 };
77 
78 /// ModuleTranslation stack frame containing the partial mapping between MLIR
79 /// values and their LLVM IR equivalents.
80 class OpenMPVarMappingStackFrame
82  OpenMPVarMappingStackFrame> {
83 public:
84  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
85 
86  explicit OpenMPVarMappingStackFrame(
87  const DenseMap<Value, llvm::Value *> &mapping)
88  : mapping(mapping) {}
89 
91 };
92 } // namespace
93 
94 /// Find the insertion point for allocas given the current insertion point for
95 /// normal operations in the builder.
96 static llvm::OpenMPIRBuilder::InsertPointTy
97 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
98  const LLVM::ModuleTranslation &moduleTranslation) {
99  // If there is an alloca insertion point on stack, i.e. we are in a nested
100  // operation and a specific point was provided by some surrounding operation,
101  // use it.
102  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
103  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
104  [&](const OpenMPAllocaStackFrame &frame) {
105  allocaInsertPoint = frame.allocaInsertPoint;
106  return WalkResult::interrupt();
107  });
108  if (walkResult.wasInterrupted())
109  return allocaInsertPoint;
110 
111  // Otherwise, insert to the entry block of the surrounding function.
112  // If the current IRBuilder InsertPoint is the function's entry, it cannot
113  // also be used for alloca insertion which would result in insertion order
114  // confusion. Create a new BasicBlock for the Builder and use the entry block
115  // for the allocs.
116  // TODO: Create a dedicated alloca BasicBlock at function creation such that
117  // we do not need to move the current InertPoint here.
118  if (builder.GetInsertBlock() ==
119  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
120  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
121  "Assuming end of basic block");
122  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
123  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
124  builder.GetInsertBlock()->getNextNode());
125  builder.CreateBr(entryBB);
126  builder.SetInsertPoint(entryBB);
127  }
128 
129  llvm::BasicBlock &funcEntryBlock =
130  builder.GetInsertBlock()->getParent()->getEntryBlock();
131  return llvm::OpenMPIRBuilder::InsertPointTy(
132  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
133 }
134 
135 /// Converts the given region that appears within an OpenMP dialect operation to
136 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
137 /// region, and a branch from any block with an successor-less OpenMP terminator
138 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
139 /// of the continuation block if provided.
140 static llvm::BasicBlock *convertOmpOpRegions(
141  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
142  LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
143  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
144  llvm::BasicBlock *continuationBlock =
145  splitBB(builder, true, "omp.region.cont");
146  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
147 
148  llvm::LLVMContext &llvmContext = builder.getContext();
149  for (Block &bb : region) {
150  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
151  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
152  builder.GetInsertBlock()->getNextNode());
153  moduleTranslation.mapBlock(&bb, llvmBB);
154  }
155 
156  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
157 
158  // Terminators (namely YieldOp) may be forwarding values to the region that
159  // need to be available in the continuation block. Collect the types of these
160  // operands in preparation of creating PHI nodes.
161  SmallVector<llvm::Type *> continuationBlockPHITypes;
162  bool operandsProcessed = false;
163  unsigned numYields = 0;
164  for (Block &bb : region.getBlocks()) {
165  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
166  if (!operandsProcessed) {
167  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
168  continuationBlockPHITypes.push_back(
169  moduleTranslation.convertType(yield->getOperand(i).getType()));
170  }
171  operandsProcessed = true;
172  } else {
173  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
174  "mismatching number of values yielded from the region");
175  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
176  llvm::Type *operandType =
177  moduleTranslation.convertType(yield->getOperand(i).getType());
178  (void)operandType;
179  assert(continuationBlockPHITypes[i] == operandType &&
180  "values of mismatching types yielded from the region");
181  }
182  }
183  numYields++;
184  }
185  }
186 
187  // Insert PHI nodes in the continuation block for any values forwarded by the
188  // terminators in this region.
189  if (!continuationBlockPHITypes.empty())
190  assert(
191  continuationBlockPHIs &&
192  "expected continuation block PHIs if converted regions yield values");
193  if (continuationBlockPHIs) {
194  llvm::IRBuilderBase::InsertPointGuard guard(builder);
195  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
196  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
197  for (llvm::Type *ty : continuationBlockPHITypes)
198  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
199  }
200 
201  // Convert blocks one by one in topological order to ensure
202  // defs are converted before uses.
204  for (Block *bb : blocks) {
205  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
206  // Retarget the branch of the entry block to the entry block of the
207  // converted region (regions are single-entry).
208  if (bb->isEntryBlock()) {
209  assert(sourceTerminator->getNumSuccessors() == 1 &&
210  "provided entry block has multiple successors");
211  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
212  "ContinuationBlock is not the successor of the entry block");
213  sourceTerminator->setSuccessor(0, llvmBB);
214  }
215 
216  llvm::IRBuilderBase::InsertPointGuard guard(builder);
217  if (failed(
218  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
219  bodyGenStatus = failure();
220  return continuationBlock;
221  }
222 
223  // Special handling for `omp.yield` and `omp.terminator` (we may have more
224  // than one): they return the control to the parent OpenMP dialect operation
225  // so replace them with the branch to the continuation block. We handle this
226  // here to avoid relying inter-function communication through the
227  // ModuleTranslation class to set up the correct insertion point. This is
228  // also consistent with MLIR's idiom of handling special region terminators
229  // in the same code that handles the region-owning operation.
230  Operation *terminator = bb->getTerminator();
231  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
232  builder.CreateBr(continuationBlock);
233 
234  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
235  (*continuationBlockPHIs)[i]->addIncoming(
236  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
237  }
238  }
239  // After all blocks have been traversed and values mapped, connect the PHI
240  // nodes to the results of preceding blocks.
241  LLVM::detail::connectPHINodes(region, moduleTranslation);
242 
243  // Remove the blocks and values defined in this region from the mapping since
244  // they are not visible outside of this region. This allows the same region to
245  // be converted several times, that is cloned, without clashes, and slightly
246  // speeds up the lookups.
247  moduleTranslation.forgetMapping(region);
248 
249  return continuationBlock;
250 }
251 
252 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
253 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
254  switch (kind) {
255  case omp::ClauseProcBindKind::Close:
256  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
257  case omp::ClauseProcBindKind::Master:
258  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
259  case omp::ClauseProcBindKind::Primary:
260  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
261  case omp::ClauseProcBindKind::Spread:
262  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
263  }
264  llvm_unreachable("Unknown ClauseProcBindKind kind");
265 }
266 
267 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
268 static LogicalResult
269 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
270  LLVM::ModuleTranslation &moduleTranslation) {
271  auto maskedOp = cast<omp::MaskedOp>(opInst);
272  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
273  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
274  // relying on captured variables.
275  LogicalResult bodyGenStatus = success();
276 
277  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
278  // MaskedOp has only one region associated with it.
279  auto &region = maskedOp.getRegion();
280  builder.restoreIP(codeGenIP);
281  convertOmpOpRegions(region, "omp.masked.region", builder, moduleTranslation,
282  bodyGenStatus);
283  };
284 
285  // TODO: Perform finalization actions for variables. This has to be
286  // called for variables which have destructors/finalizers.
287  auto finiCB = [&](InsertPointTy codeGenIP) {};
288 
289  llvm::Value *filterVal = nullptr;
290  if (auto filterVar = maskedOp.getFilteredThreadId()) {
291  filterVal = moduleTranslation.lookupValue(filterVar);
292  } else {
293  llvm::LLVMContext &llvmContext = builder.getContext();
294  filterVal =
295  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
296  }
297  assert(filterVal != nullptr);
298  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
299  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMasked(
300  ompLoc, bodyGenCB, finiCB, filterVal));
301  return success();
302 }
303 
304 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
305 static LogicalResult
306 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
307  LLVM::ModuleTranslation &moduleTranslation) {
308  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
309  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
310  // relying on captured variables.
311  LogicalResult bodyGenStatus = success();
312 
313  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
314  // MasterOp has only one region associated with it.
315  auto &region = cast<omp::MasterOp>(opInst).getRegion();
316  builder.restoreIP(codeGenIP);
317  convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
318  bodyGenStatus);
319  };
320 
321  // TODO: Perform finalization actions for variables. This has to be
322  // called for variables which have destructors/finalizers.
323  auto finiCB = [&](InsertPointTy codeGenIP) {};
324 
325  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
326  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
327  ompLoc, bodyGenCB, finiCB));
328  return success();
329 }
330 
331 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
332 static LogicalResult
333 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
334  LLVM::ModuleTranslation &moduleTranslation) {
335  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
336  auto criticalOp = cast<omp::CriticalOp>(opInst);
337  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
338  // relying on captured variables.
339  LogicalResult bodyGenStatus = success();
340 
341  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
342  // CriticalOp has only one region associated with it.
343  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
344  builder.restoreIP(codeGenIP);
345  convertOmpOpRegions(region, "omp.critical.region", builder,
346  moduleTranslation, bodyGenStatus);
347  };
348 
349  // TODO: Perform finalization actions for variables. This has to be
350  // called for variables which have destructors/finalizers.
351  auto finiCB = [&](InsertPointTy codeGenIP) {};
352 
353  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
354  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
355  llvm::Constant *hint = nullptr;
356 
357  // If it has a name, it probably has a hint too.
358  if (criticalOp.getNameAttr()) {
359  // The verifiers in OpenMP Dialect guarentee that all the pointers are
360  // non-null
361  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
362  auto criticalDeclareOp =
363  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
364  symbolRef);
365  hint = llvm::ConstantInt::get(
366  llvm::Type::getInt32Ty(llvmContext),
367  static_cast<int>(criticalDeclareOp.getHintVal()));
368  }
369  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
370  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
371  return success();
372 }
373 
374 /// Populates `reductions` with reduction declarations used in the given loop.
375 template <typename T>
376 static void
379  std::optional<ArrayAttr> attr = loop.getReductions();
380  if (!attr)
381  return;
382 
383  reductions.reserve(reductions.size() + loop.getNumReductionVars());
384  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
385  reductions.push_back(
386  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
387  loop, symbolRef));
388  }
389 }
390 
391 /// Translates the blocks contained in the given region and appends them to at
392 /// the current insertion point of `builder`. The operations of the entry block
393 /// are appended to the current insertion block. If set, `continuationBlockArgs`
394 /// is populated with translated values that correspond to the values
395 /// omp.yield'ed from the region.
396 static LogicalResult inlineConvertOmpRegions(
397  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
398  LLVM::ModuleTranslation &moduleTranslation,
399  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
400  if (region.empty())
401  return success();
402 
403  // Special case for single-block regions that don't create additional blocks:
404  // insert operations without creating additional blocks.
405  if (llvm::hasSingleElement(region)) {
406  llvm::Instruction *potentialTerminator =
407  builder.GetInsertBlock()->empty() ? nullptr
408  : &builder.GetInsertBlock()->back();
409 
410  if (potentialTerminator && potentialTerminator->isTerminator())
411  potentialTerminator->removeFromParent();
412  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
413 
414  if (failed(moduleTranslation.convertBlock(
415  region.front(), /*ignoreArguments=*/true, builder)))
416  return failure();
417 
418  // The continuation arguments are simply the translated terminator operands.
419  if (continuationBlockArgs)
420  llvm::append_range(
421  *continuationBlockArgs,
422  moduleTranslation.lookupValues(region.front().back().getOperands()));
423 
424  // Drop the mapping that is no longer necessary so that the same region can
425  // be processed multiple times.
426  moduleTranslation.forgetMapping(region);
427 
428  if (potentialTerminator && potentialTerminator->isTerminator()) {
429  llvm::BasicBlock *block = builder.GetInsertBlock();
430  if (block->empty()) {
431  // this can happen for really simple reduction init regions e.g.
432  // %0 = llvm.mlir.constant(0 : i32) : i32
433  // omp.yield(%0 : i32)
434  // because the llvm.mlir.constant (MLIR op) isn't converted into any
435  // llvm op
436  potentialTerminator->insertInto(block, block->begin());
437  } else {
438  potentialTerminator->insertAfter(&block->back());
439  }
440  }
441 
442  return success();
443  }
444 
445  LogicalResult bodyGenStatus = success();
447  llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
448  region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
449  if (failed(bodyGenStatus))
450  return failure();
451  if (continuationBlockArgs)
452  llvm::append_range(*continuationBlockArgs, phis);
453  builder.SetInsertPoint(continuationBlock,
454  continuationBlock->getFirstInsertionPt());
455  return success();
456 }
457 
458 namespace {
459 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
460 /// store lambdas with capture.
461 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
462  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
463  llvm::Value *&)>;
464 using OwningAtomicReductionGen =
465  std::function<llvm::OpenMPIRBuilder::InsertPointTy(
466  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
467  llvm::Value *)>;
468 } // namespace
469 
470 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
471 /// reduction declaration. The generator uses `builder` but ignores its
472 /// insertion point.
473 static OwningReductionGen
474 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
475  LLVM::ModuleTranslation &moduleTranslation) {
476  // The lambda is mutable because we need access to non-const methods of decl
477  // (which aren't actually mutating it), and we must capture decl by-value to
478  // avoid the dangling reference after the parent function returns.
479  OwningReductionGen gen =
480  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
481  llvm::Value *lhs, llvm::Value *rhs,
482  llvm::Value *&result) mutable {
483  Region &reductionRegion = decl.getReductionRegion();
484  moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
485  moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
486  builder.restoreIP(insertPoint);
488  if (failed(inlineConvertOmpRegions(reductionRegion,
489  "omp.reduction.nonatomic.body",
490  builder, moduleTranslation, &phis)))
491  return llvm::OpenMPIRBuilder::InsertPointTy();
492  assert(phis.size() == 1);
493  result = phis[0];
494  return builder.saveIP();
495  };
496  return gen;
497 }
498 
499 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
500 /// given reduction declaration. The generator uses `builder` but ignores its
501 /// insertion point. Returns null if there is no atomic region available in the
502 /// reduction declaration.
503 static OwningAtomicReductionGen
504 makeAtomicReductionGen(omp::DeclareReductionOp decl,
505  llvm::IRBuilderBase &builder,
506  LLVM::ModuleTranslation &moduleTranslation) {
507  if (decl.getAtomicReductionRegion().empty())
508  return OwningAtomicReductionGen();
509 
510  // The lambda is mutable because we need access to non-const methods of decl
511  // (which aren't actually mutating it), and we must capture decl by-value to
512  // avoid the dangling reference after the parent function returns.
513  OwningAtomicReductionGen atomicGen =
514  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
515  llvm::Value *lhs, llvm::Value *rhs) mutable {
516  Region &atomicRegion = decl.getAtomicReductionRegion();
517  moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
518  moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
519  builder.restoreIP(insertPoint);
521  if (failed(inlineConvertOmpRegions(atomicRegion,
522  "omp.reduction.atomic.body", builder,
523  moduleTranslation, &phis)))
524  return llvm::OpenMPIRBuilder::InsertPointTy();
525  assert(phis.empty());
526  return builder.saveIP();
527  };
528  return atomicGen;
529 }
530 
531 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
532 static LogicalResult
533 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
534  LLVM::ModuleTranslation &moduleTranslation) {
535  auto orderedOp = cast<omp::OrderedOp>(opInst);
536 
537  omp::ClauseDepend dependType = *orderedOp.getDependTypeVal();
538  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
539  unsigned numLoops = *orderedOp.getNumLoopsVal();
540  SmallVector<llvm::Value *> vecValues =
541  moduleTranslation.lookupValues(orderedOp.getDependVecVars());
542 
543  size_t indexVecValues = 0;
544  while (indexVecValues < vecValues.size()) {
545  SmallVector<llvm::Value *> storeValues;
546  storeValues.reserve(numLoops);
547  for (unsigned i = 0; i < numLoops; i++) {
548  storeValues.push_back(vecValues[indexVecValues]);
549  indexVecValues++;
550  }
551  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
552  findAllocaInsertPoint(builder, moduleTranslation);
553  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
554  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
555  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
556  }
557  return success();
558 }
559 
560 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
561 /// OpenMPIRBuilder.
562 static LogicalResult
563 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
564  LLVM::ModuleTranslation &moduleTranslation) {
565  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
566  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
567 
568  // TODO: The code generation for ordered simd directive is not supported yet.
569  if (orderedRegionOp.getSimd())
570  return failure();
571 
572  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
573  // relying on captured variables.
574  LogicalResult bodyGenStatus = success();
575 
576  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
577  // OrderedOp has only one region associated with it.
578  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
579  builder.restoreIP(codeGenIP);
580  convertOmpOpRegions(region, "omp.ordered.region", builder,
581  moduleTranslation, bodyGenStatus);
582  };
583 
584  // TODO: Perform finalization actions for variables. This has to be
585  // called for variables which have destructors/finalizers.
586  auto finiCB = [&](InsertPointTy codeGenIP) {};
587 
588  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
589  builder.restoreIP(
590  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
591  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getSimd()));
592  return bodyGenStatus;
593 }
594 
595 /// Allocate space for privatized reduction variables.
596 template <typename T>
598  T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
599  LLVM::ModuleTranslation &moduleTranslation,
600  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
602  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
603  DenseMap<Value, llvm::Value *> &reductionVariableMap,
604  llvm::ArrayRef<bool> isByRefs) {
605  llvm::IRBuilderBase::InsertPointGuard guard(builder);
606  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
607 
608  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
609  if (isByRefs[i])
610  continue;
611  llvm::Value *var = builder.CreateAlloca(
612  moduleTranslation.convertType(reductionDecls[i].getType()));
613  moduleTranslation.mapValue(reductionArgs[i], var);
614  privateReductionVariables[i] = var;
615  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
616  }
617 }
618 
619 /// Map input argument to all reduction initialization regions
620 template <typename T>
621 static void
624  unsigned i) {
625  // map input argument to the initialization region
626  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
627  Region &initializerRegion = reduction.getInitializerRegion();
628  Block &entry = initializerRegion.front();
629  assert(entry.getNumArguments() == 1 &&
630  "the initialization region has one argument");
631 
632  mlir::Value mlirSource = loop.getReductionVars()[i];
633  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
634  assert(llvmSource && "lookup reduction var");
635  moduleTranslation.mapValue(entry.getArgument(0), llvmSource);
636 }
637 
638 /// Collect reduction info
639 template <typename T>
641  T loop, llvm::IRBuilderBase &builder,
642  LLVM::ModuleTranslation &moduleTranslation,
644  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
645  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
646  const ArrayRef<llvm::Value *> privateReductionVariables,
648  unsigned numReductions = loop.getNumReductionVars();
649 
650  for (unsigned i = 0; i < numReductions; ++i) {
651  owningReductionGens.push_back(
652  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
653  owningAtomicReductionGens.push_back(
654  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
655  }
656 
657  // Collect the reduction information.
658  reductionInfos.reserve(numReductions);
659  for (unsigned i = 0; i < numReductions; ++i) {
660  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
661  if (owningAtomicReductionGens[i])
662  atomicGen = owningAtomicReductionGens[i];
663  llvm::Value *variable =
664  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
665  reductionInfos.push_back(
666  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
667  privateReductionVariables[i],
668  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
669  owningReductionGens[i],
670  /*ReductionGenClang=*/nullptr, atomicGen});
671  }
672 }
673 
674 /// handling of DeclareReductionOp's cleanup region
675 static LogicalResult
677  llvm::ArrayRef<llvm::Value *> privateVariables,
678  LLVM::ModuleTranslation &moduleTranslation,
679  llvm::IRBuilderBase &builder, StringRef regionName,
680  bool shouldLoadCleanupRegionArg = true) {
681  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
682  if (cleanupRegion->empty())
683  continue;
684 
685  // map the argument to the cleanup region
686  Block &entry = cleanupRegion->front();
687 
688  llvm::Instruction *potentialTerminator =
689  builder.GetInsertBlock()->empty() ? nullptr
690  : &builder.GetInsertBlock()->back();
691  if (potentialTerminator && potentialTerminator->isTerminator())
692  builder.SetInsertPoint(potentialTerminator);
693  llvm::Value *prviateVarValue =
694  shouldLoadCleanupRegionArg
695  ? builder.CreateLoad(
696  moduleTranslation.convertType(entry.getArgument(0).getType()),
697  privateVariables[i])
698  : privateVariables[i];
699 
700  moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue);
701 
702  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
703  moduleTranslation)))
704  return failure();
705 
706  // clear block argument mapping in case it needs to be re-created with a
707  // different source for another use of the same reduction decl
708  moduleTranslation.forgetMapping(*cleanupRegion);
709  }
710  return success();
711 }
712 
713 // TODO: not used by ParallelOp
714 template <class OP>
715 static LogicalResult createReductionsAndCleanup(
716  OP op, llvm::IRBuilderBase &builder,
717  LLVM::ModuleTranslation &moduleTranslation,
718  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
720  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
721  // Process the reductions if required.
722  if (op.getNumReductionVars() == 0)
723  return success();
724 
725  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
726 
727  // Create the reduction generators. We need to own them here because
728  // ReductionInfo only accepts references to the generators.
729  SmallVector<OwningReductionGen> owningReductionGens;
730  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
732  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
733  owningReductionGens, owningAtomicReductionGens,
734  privateReductionVariables, reductionInfos);
735 
736  // The call to createReductions below expects the block to have a
737  // terminator. Create an unreachable instruction to serve as terminator
738  // and remove it later.
739  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
740  builder.SetInsertPoint(tempTerminator);
741  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
742  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
743  isByRef, op.getNowait());
744  if (!contInsertPoint.getBlock())
745  return op->emitOpError() << "failed to convert reductions";
746  auto nextInsertionPoint =
747  ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
748  tempTerminator->eraseFromParent();
749  builder.restoreIP(nextInsertionPoint);
750 
751  // after the construct, deallocate private reduction variables
752  SmallVector<Region *> reductionRegions;
753  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
754  [](omp::DeclareReductionOp reductionDecl) {
755  return &reductionDecl.getCleanupRegion();
756  });
757  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
758  moduleTranslation, builder,
759  "omp.reduction.cleanup");
760  return success();
761 }
762 
763 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
764  if (!attr)
765  return {};
766  return *attr;
767 }
768 
769 // TODO: not used by omp.parallel
770 template <typename OP>
771 static LogicalResult allocAndInitializeReductionVars(
772  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
773  LLVM::ModuleTranslation &moduleTranslation,
774  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
776  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
777  DenseMap<Value, llvm::Value *> &reductionVariableMap,
778  llvm::ArrayRef<bool> isByRef) {
779  if (op.getNumReductionVars() == 0)
780  return success();
781 
782  allocByValReductionVars(op, reductionArgs, builder, moduleTranslation,
783  allocaIP, reductionDecls, privateReductionVariables,
784  reductionVariableMap, isByRef);
785 
786  // Before the loop, store the initial values of reductions into reduction
787  // variables. Although this could be done after allocas, we don't want to mess
788  // up with the alloca insertion point.
789  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
791 
792  // map block argument to initializer region
793  mapInitializationArg(op, moduleTranslation, reductionDecls, i);
794 
795  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
796  "omp.reduction.neutral", builder,
797  moduleTranslation, &phis)))
798  return failure();
799  assert(phis.size() == 1 && "expected one value to be yielded from the "
800  "reduction neutral element declaration region");
801  if (isByRef[i]) {
802  // Allocate reduction variable (which is a pointer to the real reduction
803  // variable allocated in the inlined region)
804  llvm::Value *var = builder.CreateAlloca(
805  moduleTranslation.convertType(reductionDecls[i].getType()));
806  // Store the result of the inlined region to the allocated reduction var
807  // ptr
808  builder.CreateStore(phis[0], var);
809 
810  privateReductionVariables[i] = var;
811  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
812  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
813  } else {
814  // for by-ref case the store is inside of the reduction region
815  builder.CreateStore(phis[0], privateReductionVariables[i]);
816  // the rest was handled in allocByValReductionVars
817  }
818 
819  // forget the mapping for the initializer region because we might need a
820  // different mapping if this reduction declaration is re-used for a
821  // different variable
822  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
823  }
824 
825  return success();
826 }
827 
828 static LogicalResult
829 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
830  LLVM::ModuleTranslation &moduleTranslation) {
831  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
832  using StorableBodyGenCallbackTy =
833  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
834 
835  auto sectionsOp = cast<omp::SectionsOp>(opInst);
836 
837  // TODO: Support the following clauses: private, firstprivate, lastprivate,
838  // allocate
839  if (!sectionsOp.getAllocateVars().empty() ||
840  !sectionsOp.getAllocatorsVars().empty())
841  return emitError(sectionsOp.getLoc())
842  << "allocate clause is not supported for sections construct";
843 
844  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionVarsByref());
845  assert(isByRef.size() == sectionsOp.getNumReductionVars());
846 
848  collectReductionDecls(sectionsOp, reductionDecls);
849  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
850  findAllocaInsertPoint(builder, moduleTranslation);
851 
852  SmallVector<llvm::Value *> privateReductionVariables(
853  sectionsOp.getNumReductionVars());
854  DenseMap<Value, llvm::Value *> reductionVariableMap;
855 
856  MutableArrayRef<BlockArgument> reductionArgs =
857  sectionsOp.getRegion().getArguments();
858 
860  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
861  reductionDecls, privateReductionVariables, reductionVariableMap,
862  isByRef)))
863  return failure();
864 
865  // Store the mapping between reduction variables and their private copies on
866  // ModuleTranslation stack. It can be then recovered when translating
867  // omp.reduce operations in a separate call.
869  moduleTranslation, reductionVariableMap);
870 
871  LogicalResult bodyGenStatus = success();
873 
874  for (Operation &op : *sectionsOp.getRegion().begin()) {
875  auto sectionOp = dyn_cast<omp::SectionOp>(op);
876  if (!sectionOp) // omp.terminator
877  continue;
878 
879  Region &region = sectionOp.getRegion();
880  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation,
881  &bodyGenStatus](InsertPointTy allocaIP,
882  InsertPointTy codeGenIP) {
883  builder.restoreIP(codeGenIP);
884 
885  // map the omp.section reduction block argument to the omp.sections block
886  // arguments
887  // TODO: this assumes that the only block arguments are reduction
888  // variables
889  assert(region.getNumArguments() ==
890  sectionsOp.getRegion().getNumArguments());
891  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
892  sectionsOp.getRegion().getArguments(), region.getArguments())) {
893  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
894  assert(llvmVal);
895  moduleTranslation.mapValue(sectionArg, llvmVal);
896  }
897 
898  convertOmpOpRegions(region, "omp.section.region", builder,
899  moduleTranslation, bodyGenStatus);
900  };
901  sectionCBs.push_back(sectionCB);
902  }
903 
904  // No sections within omp.sections operation - skip generation. This situation
905  // is only possible if there is only a terminator operation inside the
906  // sections operation
907  if (sectionCBs.empty())
908  return success();
909 
910  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
911 
912  // TODO: Perform appropriate actions according to the data-sharing
913  // attribute (shared, private, firstprivate, ...) of variables.
914  // Currently defaults to shared.
915  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
916  llvm::Value &vPtr,
917  llvm::Value *&replacementValue) -> InsertPointTy {
918  replacementValue = &vPtr;
919  return codeGenIP;
920  };
921 
922  // TODO: Perform finalization actions for variables. This has to be
923  // called for variables which have destructors/finalizers.
924  auto finiCB = [&](InsertPointTy codeGenIP) {};
925 
926  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
927  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
928  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
929  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
930  sectionsOp.getNowait()));
931 
932  if (failed(bodyGenStatus))
933  return bodyGenStatus;
934 
935  // Process the reductions if required.
936  return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
937  allocaIP, reductionDecls,
938  privateReductionVariables, isByRef);
939 }
940 
941 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
942 static LogicalResult
943 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
944  LLVM::ModuleTranslation &moduleTranslation) {
945  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
946  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
947  LogicalResult bodyGenStatus = success();
948  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
949  builder.restoreIP(codegenIP);
950  convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
951  moduleTranslation, bodyGenStatus);
952  };
953  auto finiCB = [&](InsertPointTy codeGenIP) {};
954 
955  // Handle copyprivate
956  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
957  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateFuncs();
960  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
961  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
962  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
963  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
964  llvmCPFuncs.push_back(
965  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
966  }
967 
968  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
969  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs));
970  return bodyGenStatus;
971 }
972 
973 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
974 static LogicalResult
975 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
976  LLVM::ModuleTranslation &moduleTranslation) {
977  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
978  LogicalResult bodyGenStatus = success();
979  if (!op.getAllocatorsVars().empty() || op.getReductions())
980  return op.emitError("unhandled clauses for translation to LLVM IR");
981 
982  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
984  moduleTranslation, allocaIP);
985  builder.restoreIP(codegenIP);
986  convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
987  moduleTranslation, bodyGenStatus);
988  };
989 
990  llvm::Value *numTeamsLower = nullptr;
991  if (Value numTeamsLowerVar = op.getNumTeamsLower())
992  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
993 
994  llvm::Value *numTeamsUpper = nullptr;
995  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
996  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
997 
998  llvm::Value *threadLimit = nullptr;
999  if (Value threadLimitVar = op.getThreadLimit())
1000  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1001 
1002  llvm::Value *ifExpr = nullptr;
1003  if (Value ifExprVar = op.getIfExpr())
1004  ifExpr = moduleTranslation.lookupValue(ifExprVar);
1005 
1006  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1007  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
1008  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
1009  return bodyGenStatus;
1010 }
1011 
1012 static void
1013 buildDependData(std::optional<ArrayAttr> depends, OperandRange dependVars,
1014  LLVM::ModuleTranslation &moduleTranslation,
1016  if (dependVars.empty())
1017  return;
1018  for (auto dep : llvm::zip(dependVars, depends->getValue())) {
1019  llvm::omp::RTLDependenceKindTy type;
1020  switch (
1021  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1022  case mlir::omp::ClauseTaskDepend::taskdependin:
1023  type = llvm::omp::RTLDependenceKindTy::DepIn;
1024  break;
1025  // The OpenMP runtime requires that the codegen for 'depend' clause for
1026  // 'out' dependency kind must be the same as codegen for 'depend' clause
1027  // with 'inout' dependency.
1028  case mlir::omp::ClauseTaskDepend::taskdependout:
1029  case mlir::omp::ClauseTaskDepend::taskdependinout:
1030  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1031  break;
1032  };
1033  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1034  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1035  dds.emplace_back(dd);
1036  }
1037 }
1038 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
1039 static LogicalResult
1040 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1041  LLVM::ModuleTranslation &moduleTranslation) {
1042  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1043  LogicalResult bodyGenStatus = success();
1044  if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() ||
1045  taskOp.getInReductions() || taskOp.getPriority() ||
1046  !taskOp.getAllocateVars().empty()) {
1047  return taskOp.emitError("unhandled clauses for translation to LLVM IR");
1048  }
1049  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1050  // Save the alloca insertion point on ModuleTranslation stack for use in
1051  // nested regions.
1053  moduleTranslation, allocaIP);
1054 
1055  builder.restoreIP(codegenIP);
1056  convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
1057  moduleTranslation, bodyGenStatus);
1058  };
1059 
1061  buildDependData(taskOp.getDepends(), taskOp.getDependVars(),
1062  moduleTranslation, dds);
1063 
1064  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1065  findAllocaInsertPoint(builder, moduleTranslation);
1066  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1067  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
1068  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
1069  moduleTranslation.lookupValue(taskOp.getFinalExpr()),
1070  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds));
1071  return bodyGenStatus;
1072 }
1073 
1074 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
1075 static LogicalResult
1076 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
1077  LLVM::ModuleTranslation &moduleTranslation) {
1078  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1079  LogicalResult bodyGenStatus = success();
1080  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
1081  return tgOp.emitError("unhandled clauses for translation to LLVM IR");
1082  }
1083  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1084  builder.restoreIP(codegenIP);
1085  convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
1086  moduleTranslation, bodyGenStatus);
1087  };
1088  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1089  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1090  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
1091  ompLoc, allocaIP, bodyCB));
1092  return bodyGenStatus;
1093 }
1094 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
1095 static LogicalResult
1096 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
1097  LLVM::ModuleTranslation &moduleTranslation) {
1098  auto wsloopOp = cast<omp::WsloopOp>(opInst);
1099  // FIXME: Here any other nested wrappers (e.g. omp.simd) are skipped, so
1100  // codegen for composite constructs like 'DO/FOR SIMD' will be the same as for
1101  // 'DO/FOR'.
1102  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
1103 
1104  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionVarsByref());
1105  assert(isByRef.size() == wsloopOp.getNumReductionVars());
1106 
1107  // Static is the default.
1108  auto schedule =
1109  wsloopOp.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
1110 
1111  // Find the loop configuration.
1112  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[0]);
1113  llvm::Type *ivType = step->getType();
1114  llvm::Value *chunk = nullptr;
1115  if (wsloopOp.getScheduleChunkVar()) {
1116  llvm::Value *chunkVar =
1117  moduleTranslation.lookupValue(wsloopOp.getScheduleChunkVar());
1118  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
1119  }
1120 
1121  SmallVector<omp::DeclareReductionOp> reductionDecls;
1122  collectReductionDecls(wsloopOp, reductionDecls);
1123  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1124  findAllocaInsertPoint(builder, moduleTranslation);
1125 
1126  SmallVector<llvm::Value *> privateReductionVariables(
1127  wsloopOp.getNumReductionVars());
1128  DenseMap<Value, llvm::Value *> reductionVariableMap;
1129 
1130  MutableArrayRef<BlockArgument> reductionArgs =
1131  wsloopOp.getRegion().getArguments();
1132 
1134  wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
1135  reductionDecls, privateReductionVariables, reductionVariableMap,
1136  isByRef)))
1137  return failure();
1138 
1139  // Store the mapping between reduction variables and their private copies on
1140  // ModuleTranslation stack. It can be then recovered when translating
1141  // omp.reduce operations in a separate call.
1143  moduleTranslation, reductionVariableMap);
1144 
1145  // Set up the source location value for OpenMP runtime.
1146  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1147 
1148  // Generator of the canonical loop body.
1149  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1150  // relying on captured variables.
1153  LogicalResult bodyGenStatus = success();
1154  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1155  // Make sure further conversions know about the induction variable.
1156  moduleTranslation.mapValue(
1157  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1158 
1159  // Capture the body insertion point for use in nested loops. BodyIP of the
1160  // CanonicalLoopInfo always points to the beginning of the entry block of
1161  // the body.
1162  bodyInsertPoints.push_back(ip);
1163 
1164  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1165  return;
1166 
1167  // Convert the body of the loop.
1168  builder.restoreIP(ip);
1169  convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
1170  moduleTranslation, bodyGenStatus);
1171  };
1172 
1173  // Delegate actual loop construction to the OpenMP IRBuilder.
1174  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1175  // loop, i.e. it has a positive step, uses signed integer semantics.
1176  // Reconsider this code when the nested loop operation clearly supports more
1177  // cases.
1178  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1179  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1180  llvm::Value *lowerBound =
1181  moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
1182  llvm::Value *upperBound =
1183  moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
1184  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
1185 
1186  // Make sure loop trip count are emitted in the preheader of the outermost
1187  // loop at the latest so that they are all available for the new collapsed
1188  // loop will be created below.
1189  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1190  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1191  if (i != 0) {
1192  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
1193  computeIP = loopInfos.front()->getPreheaderIP();
1194  }
1195  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1196  loc, bodyGen, lowerBound, upperBound, step,
1197  /*IsSigned=*/true, loopOp.getInclusive(), computeIP));
1198 
1199  if (failed(bodyGenStatus))
1200  return failure();
1201  }
1202 
1203  // Collapse loops. Store the insertion point because LoopInfos may get
1204  // invalidated.
1205  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1206  llvm::CanonicalLoopInfo *loopInfo =
1207  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1208 
1209  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1210 
1211  // TODO: Handle doacross loops when the ordered clause has a parameter.
1212  bool isOrdered = wsloopOp.getOrderedVal().has_value();
1213  std::optional<omp::ScheduleModifier> scheduleModifier =
1214  wsloopOp.getScheduleModifier();
1215  bool isSimd = wsloopOp.getSimdModifier();
1216 
1217  ompBuilder->applyWorkshareLoop(
1218  ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
1219  convertToScheduleKind(schedule), chunk, isSimd,
1220  scheduleModifier == omp::ScheduleModifier::monotonic,
1221  scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
1222 
1223  // Continue building IR after the loop. Note that the LoopInfo returned by
1224  // `collapseLoops` points inside the outermost loop and is intended for
1225  // potential further loop transformations. Use the insertion point stored
1226  // before collapsing loops instead.
1227  builder.restoreIP(afterIP);
1228 
1229  // Process the reductions if required.
1230  return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
1231  allocaIP, reductionDecls,
1232  privateReductionVariables, isByRef);
1233 }
1234 
1235 /// A RAII class that on construction replaces the region arguments of the
1236 /// parallel op (which correspond to private variables) with the actual private
1237 /// variables they correspond to. This prepares the parallel op so that it
1238 /// matches what is expected by the OMPIRBuilder.
1239 ///
1240 /// On destruction, it restores the original state of the operation so that on
1241 /// the MLIR side, the op is not affected by conversion to LLVM IR.
1243 public:
1244  OmpParallelOpConversionManager(omp::ParallelOp opInst)
1245  : region(opInst.getRegion()), privateVars(opInst.getPrivateVars()),
1246  privateArgBeginIdx(opInst.getNumReductionVars()),
1247  privateArgEndIdx(privateArgBeginIdx + privateVars.size()) {
1248  auto privateVarsIt = privateVars.begin();
1249 
1250  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1251  ++argIdx, ++privateVarsIt)
1252  mlir::replaceAllUsesInRegionWith(region.getArgument(argIdx),
1253  *privateVarsIt, region);
1254  }
1255 
1257  auto privateVarsIt = privateVars.begin();
1258 
1259  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1260  ++argIdx, ++privateVarsIt)
1261  mlir::replaceAllUsesInRegionWith(*privateVarsIt,
1262  region.getArgument(argIdx), region);
1263  }
1264 
1265 private:
1266  Region &region;
1267  OperandRange privateVars;
1268  unsigned privateArgBeginIdx;
1269  unsigned privateArgEndIdx;
1270 };
1271 
1272 /// Converts the OpenMP parallel operation to LLVM IR.
1273 static LogicalResult
1274 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1275  LLVM::ModuleTranslation &moduleTranslation) {
1276  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1277  OmpParallelOpConversionManager raii(opInst);
1278  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionVarsByref());
1279  assert(isByRef.size() == opInst.getNumReductionVars());
1280 
1281  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1282  // relying on captured variables.
1283  LogicalResult bodyGenStatus = success();
1284  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1285 
1286  // Collect reduction declarations
1287  SmallVector<omp::DeclareReductionOp> reductionDecls;
1288  collectReductionDecls(opInst, reductionDecls);
1289  SmallVector<llvm::Value *> privateReductionVariables(
1290  opInst.getNumReductionVars());
1291 
1292  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1293  // Allocate reduction vars
1294  DenseMap<Value, llvm::Value *> reductionVariableMap;
1295 
1296  MutableArrayRef<BlockArgument> reductionArgs =
1297  opInst.getRegion().getArguments().slice(
1298  opInst.getNumAllocateVars() + opInst.getNumAllocatorsVars(),
1299  opInst.getNumReductionVars());
1300 
1301  allocByValReductionVars(opInst, reductionArgs, builder, moduleTranslation,
1302  allocaIP, reductionDecls, privateReductionVariables,
1303  reductionVariableMap, isByRef);
1304 
1305  // Initialize reduction vars
1306  builder.restoreIP(allocaIP);
1307  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1308  allocaIP =
1309  InsertPointTy(allocaIP.getBlock(),
1310  allocaIP.getBlock()->getTerminator()->getIterator());
1311  SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
1312  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1313  if (isByRef[i]) {
1314  // Allocate reduction variable (which is a pointer to the real reduciton
1315  // variable allocated in the inlined region)
1316  byRefVars[i] = builder.CreateAlloca(
1317  moduleTranslation.convertType(reductionDecls[i].getType()));
1318  }
1319  }
1320 
1321  builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
1322 
1323  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1325 
1326  // map the block argument
1327  mapInitializationArg(opInst, moduleTranslation, reductionDecls, i);
1328  if (failed(inlineConvertOmpRegions(
1329  reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
1330  builder, moduleTranslation, &phis)))
1331  bodyGenStatus = failure();
1332  assert(phis.size() == 1 &&
1333  "expected one value to be yielded from the "
1334  "reduction neutral element declaration region");
1335 
1336  // mapInitializationArg finishes its block with a terminator. We need to
1337  // insert before that terminator.
1338  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1339 
1340  if (isByRef[i]) {
1341  // Store the result of the inlined region to the allocated reduction var
1342  // ptr
1343  builder.CreateStore(phis[0], byRefVars[i]);
1344 
1345  privateReductionVariables[i] = byRefVars[i];
1346  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1347  reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
1348  } else {
1349  // for by-ref case the store is inside of the reduction init region
1350  builder.CreateStore(phis[0], privateReductionVariables[i]);
1351  // the rest is done in allocByValReductionVars
1352  }
1353 
1354  // clear block argument mapping in case it needs to be re-created with a
1355  // different source for another use of the same reduction decl
1356  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1357  }
1358 
1359  // Store the mapping between reduction variables and their private copies on
1360  // ModuleTranslation stack. It can be then recovered when translating
1361  // omp.reduce operations in a separate call.
1363  moduleTranslation, reductionVariableMap);
1364 
1365  // Save the alloca insertion point on ModuleTranslation stack for use in
1366  // nested regions.
1368  moduleTranslation, allocaIP);
1369 
1370  // ParallelOp has only one region associated with it.
1371  builder.restoreIP(codeGenIP);
1372  auto regionBlock =
1373  convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
1374  moduleTranslation, bodyGenStatus);
1375 
1376  // Process the reductions if required.
1377  if (opInst.getNumReductionVars() > 0) {
1378  // Collect reduction info
1379  SmallVector<OwningReductionGen> owningReductionGens;
1380  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1382  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
1383  owningReductionGens, owningAtomicReductionGens,
1384  privateReductionVariables, reductionInfos);
1385 
1386  // Move to region cont block
1387  builder.SetInsertPoint(regionBlock->getTerminator());
1388 
1389  // Generate reductions from info
1390  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1391  builder.SetInsertPoint(tempTerminator);
1392 
1393  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1394  ompBuilder->createReductions(builder.saveIP(), allocaIP,
1395  reductionInfos, isByRef, false);
1396  if (!contInsertPoint.getBlock()) {
1397  bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
1398  return;
1399  }
1400 
1401  tempTerminator->eraseFromParent();
1402  builder.restoreIP(contInsertPoint);
1403  }
1404  };
1405 
1406  SmallVector<omp::PrivateClauseOp> privatizerClones;
1407  SmallVector<llvm::Value *> privateVariables;
1408 
1409  // TODO: Perform appropriate actions according to the data-sharing
1410  // attribute (shared, private, firstprivate, ...) of variables.
1411  // Currently shared and private are supported.
1412  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
1413  llvm::Value &, llvm::Value &vPtr,
1414  llvm::Value *&replacementValue) -> InsertPointTy {
1415  replacementValue = &vPtr;
1416 
1417  // If this is a private value, this lambda will return the corresponding
1418  // mlir value and its `PrivateClauseOp`. Otherwise, empty values are
1419  // returned.
1420  auto [privVar, privatizerClone] =
1421  [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
1422  if (!opInst.getPrivateVars().empty()) {
1423  auto privVars = opInst.getPrivateVars();
1424  auto privatizers = opInst.getPrivatizers();
1425 
1426  for (auto [privVar, privatizerAttr] :
1427  llvm::zip_equal(privVars, *privatizers)) {
1428  // Find the MLIR private variable corresponding to the LLVM value
1429  // being privatized.
1430  llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
1431  if (llvmPrivVar != &vPtr)
1432  continue;
1433 
1434  SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
1435  omp::PrivateClauseOp privatizer =
1436  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1437  opInst, privSym);
1438 
1439  // Clone the privatizer in case it is used by more than one parallel
1440  // region. The privatizer is processed in-place (see below) before it
1441  // gets inlined in the parallel region and therefore processing the
1442  // original op is dangerous.
1443 
1444  MLIRContext &context = moduleTranslation.getContext();
1445  mlir::IRRewriter opCloner(&context);
1446  opCloner.setInsertionPoint(privatizer);
1447  auto clone = llvm::cast<mlir::omp::PrivateClauseOp>(
1448  opCloner.clone(*privatizer));
1449 
1450  // Unique the clone name to avoid clashes in the symbol table.
1451  unsigned counter = 0;
1452  SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
1453  privatizer.getSymName(),
1454  [&](llvm::StringRef candidate) {
1455  return SymbolTable::lookupNearestSymbolFrom(
1456  opInst, StringAttr::get(&context, candidate)) !=
1457  nullptr;
1458  },
1459  counter);
1460 
1461  clone.setSymName(cloneName);
1462  return {privVar, clone};
1463  }
1464  }
1465 
1466  return {mlir::Value(), omp::PrivateClauseOp()};
1467  }();
1468 
1469  if (privVar) {
1470  Region &allocRegion = privatizerClone.getAllocRegion();
1471 
1472  // If this is a `firstprivate` clause, prepare the `omp.private` op by:
1473  if (privatizerClone.getDataSharingType() ==
1474  omp::DataSharingClauseType::FirstPrivate) {
1475  auto oldAllocBackBlock = std::prev(allocRegion.end());
1476  omp::YieldOp oldAllocYieldOp =
1477  llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
1478 
1479  Region &copyRegion = privatizerClone.getCopyRegion();
1480 
1481  mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
1482  // 1. Cloning the `copy` region to the end of the `alloc` region.
1483  copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
1484  allocRegion.end());
1485 
1486  auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
1487  // 2. Merging the last `alloc` block with the first block in the `copy`
1488  // region clone.
1489  // 3. Re-mapping the first argument of the `copy` region to be the
1490  // argument of the `alloc` region and the second argument of the `copy`
1491  // region to be the yielded value of the `alloc` region (this is the
1492  // private clone of the privatized value).
1493  copyCloneBuilder.mergeBlocks(
1494  &*newCopyRegionFrontBlock, &*oldAllocBackBlock,
1495  {allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)});
1496 
1497  // 4. The old terminator of the `alloc` region is not needed anymore, so
1498  // delete it.
1499  oldAllocYieldOp.erase();
1500  }
1501 
1502  // Replace the privatizer block argument with mlir value being privatized.
1503  // This way, the body of the privatizer will be changed from using the
1504  // region/block argument to the value being privatized.
1505  auto allocRegionArg = allocRegion.getArgument(0);
1506  replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
1507 
1508  auto oldIP = builder.saveIP();
1509  builder.restoreIP(allocaIP);
1510 
1511  SmallVector<llvm::Value *, 1> yieldedValues;
1512  if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder,
1513  moduleTranslation, &yieldedValues))) {
1514  opInst.emitError("failed to inline `alloc` region of an `omp.private` "
1515  "op in the parallel region");
1516  bodyGenStatus = failure();
1517  privatizerClone.erase();
1518  } else {
1519  assert(yieldedValues.size() == 1);
1520  replacementValue = yieldedValues.front();
1521 
1522  // Keep the LLVM replacement value and the op clone in case we need to
1523  // emit cleanup (i.e. deallocation) logic.
1524  privateVariables.push_back(replacementValue);
1525  privatizerClones.push_back(privatizerClone);
1526  }
1527 
1528  builder.restoreIP(oldIP);
1529  }
1530 
1531  return codeGenIP;
1532  };
1533 
1534  // TODO: Perform finalization actions for variables. This has to be
1535  // called for variables which have destructors/finalizers.
1536  auto finiCB = [&](InsertPointTy codeGenIP) {
1537  InsertPointTy oldIP = builder.saveIP();
1538  builder.restoreIP(codeGenIP);
1539 
1540  // if the reduction has a cleanup region, inline it here to finalize the
1541  // reduction variables
1542  SmallVector<Region *> reductionCleanupRegions;
1543  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
1544  [](omp::DeclareReductionOp reductionDecl) {
1545  return &reductionDecl.getCleanupRegion();
1546  });
1547  if (failed(inlineOmpRegionCleanup(
1548  reductionCleanupRegions, privateReductionVariables,
1549  moduleTranslation, builder, "omp.reduction.cleanup")))
1550  bodyGenStatus = failure();
1551 
1552  SmallVector<Region *> privateCleanupRegions;
1553  llvm::transform(privatizerClones, std::back_inserter(privateCleanupRegions),
1554  [](omp::PrivateClauseOp privatizer) {
1555  return &privatizer.getDeallocRegion();
1556  });
1557 
1558  if (failed(inlineOmpRegionCleanup(
1559  privateCleanupRegions, privateVariables, moduleTranslation, builder,
1560  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1561  bodyGenStatus = failure();
1562 
1563  builder.restoreIP(oldIP);
1564  };
1565 
1566  llvm::Value *ifCond = nullptr;
1567  if (auto ifExprVar = opInst.getIfExpr())
1568  ifCond = moduleTranslation.lookupValue(ifExprVar);
1569  llvm::Value *numThreads = nullptr;
1570  if (auto numThreadsVar = opInst.getNumThreadsVar())
1571  numThreads = moduleTranslation.lookupValue(numThreadsVar);
1572  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
1573  if (auto bind = opInst.getProcBindVal())
1574  pbKind = getProcBindKind(*bind);
1575  // TODO: Is the Parallel construct cancellable?
1576  bool isCancellable = false;
1577 
1578  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1579  findAllocaInsertPoint(builder, moduleTranslation);
1580  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1581 
1582  builder.restoreIP(
1583  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
1584  ifCond, numThreads, pbKind, isCancellable));
1585 
1586  for (mlir::omp::PrivateClauseOp privatizerClone : privatizerClones)
1587  privatizerClone.erase();
1588 
1589  return bodyGenStatus;
1590 }
1591 
1592 /// Convert Order attribute to llvm::omp::OrderKind.
1593 static llvm::omp::OrderKind
1594 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
1595  if (!o)
1596  return llvm::omp::OrderKind::OMP_ORDER_unknown;
1597  switch (*o) {
1598  case omp::ClauseOrderKind::Concurrent:
1599  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
1600  }
1601  llvm_unreachable("Unknown ClauseOrderKind kind");
1602 }
1603 
1604 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
1605 static LogicalResult
1606 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
1607  LLVM::ModuleTranslation &moduleTranslation) {
1608  auto simdOp = cast<omp::SimdOp>(opInst);
1609  auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
1610 
1611  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1612 
1613  // Generator of the canonical loop body.
1614  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1615  // relying on captured variables.
1618  LogicalResult bodyGenStatus = success();
1619  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1620  // Make sure further conversions know about the induction variable.
1621  moduleTranslation.mapValue(
1622  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1623 
1624  // Capture the body insertion point for use in nested loops. BodyIP of the
1625  // CanonicalLoopInfo always points to the beginning of the entry block of
1626  // the body.
1627  bodyInsertPoints.push_back(ip);
1628 
1629  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1630  return;
1631 
1632  // Convert the body of the loop.
1633  builder.restoreIP(ip);
1634  convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
1635  moduleTranslation, bodyGenStatus);
1636  };
1637 
1638  // Delegate actual loop construction to the OpenMP IRBuilder.
1639  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1640  // loop, i.e. it has a positive step, uses signed integer semantics.
1641  // Reconsider this code when the nested loop operation clearly supports more
1642  // cases.
1643  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1644  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1645  llvm::Value *lowerBound =
1646  moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
1647  llvm::Value *upperBound =
1648  moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
1649  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
1650 
1651  // Make sure loop trip count are emitted in the preheader of the outermost
1652  // loop at the latest so that they are all available for the new collapsed
1653  // loop will be created below.
1654  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1655  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1656  if (i != 0) {
1657  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1658  ompLoc.DL);
1659  computeIP = loopInfos.front()->getPreheaderIP();
1660  }
1661  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1662  loc, bodyGen, lowerBound, upperBound, step,
1663  /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1664 
1665  if (failed(bodyGenStatus))
1666  return failure();
1667  }
1668 
1669  // Collapse loops.
1670  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1671  llvm::CanonicalLoopInfo *loopInfo =
1672  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1673 
1674  llvm::ConstantInt *simdlen = nullptr;
1675  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
1676  simdlen = builder.getInt64(simdlenVar.value());
1677 
1678  llvm::ConstantInt *safelen = nullptr;
1679  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
1680  safelen = builder.getInt64(safelenVar.value());
1681 
1682  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
1683  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrderVal());
1684  ompBuilder->applySimd(loopInfo, alignedVars,
1685  simdOp.getIfExpr()
1686  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
1687  : nullptr,
1688  order, simdlen, safelen);
1689 
1690  builder.restoreIP(afterIP);
1691  return success();
1692 }
1693 
1694 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1695 static llvm::AtomicOrdering
1696 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
1697  if (!ao)
1698  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1699 
1700  switch (*ao) {
1701  case omp::ClauseMemoryOrderKind::Seq_cst:
1702  return llvm::AtomicOrdering::SequentiallyConsistent;
1703  case omp::ClauseMemoryOrderKind::Acq_rel:
1704  return llvm::AtomicOrdering::AcquireRelease;
1705  case omp::ClauseMemoryOrderKind::Acquire:
1706  return llvm::AtomicOrdering::Acquire;
1707  case omp::ClauseMemoryOrderKind::Release:
1708  return llvm::AtomicOrdering::Release;
1709  case omp::ClauseMemoryOrderKind::Relaxed:
1710  return llvm::AtomicOrdering::Monotonic;
1711  }
1712  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1713 }
1714 
1715 /// Convert omp.atomic.read operation to LLVM IR.
1716 static LogicalResult
1717 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1718  LLVM::ModuleTranslation &moduleTranslation) {
1719 
1720  auto readOp = cast<omp::AtomicReadOp>(opInst);
1721  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1722 
1723  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1724 
1725  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal());
1726  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
1727  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
1728 
1729  llvm::Type *elementType =
1730  moduleTranslation.convertType(readOp.getElementType());
1731 
1732  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
1733  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
1734  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1735  return success();
1736 }
1737 
1738 /// Converts an omp.atomic.write operation to LLVM IR.
1739 static LogicalResult
1740 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1741  LLVM::ModuleTranslation &moduleTranslation) {
1742  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1743  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1744 
1745  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1746  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal());
1747  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
1748  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
1749  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
1750  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1751  /*isVolatile=*/false};
1752  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1753  return success();
1754 }
1755 
1756 /// Converts an LLVM dialect binary operation to the corresponding enum value
1757 /// for `atomicrmw` supported binary operation.
1758 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1760  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1761  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1762  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1763  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1764  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1765  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1766  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1767  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1768  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1769  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1770 }
1771 
1772 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1773 static LogicalResult
1774 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1775  llvm::IRBuilderBase &builder,
1776  LLVM::ModuleTranslation &moduleTranslation) {
1777  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1778 
1779  // Convert values and types.
1780  auto &innerOpList = opInst.getRegion().front().getOperations();
1781  bool isXBinopExpr{false};
1782  llvm::AtomicRMWInst::BinOp binop;
1783  mlir::Value mlirExpr;
1784  llvm::Value *llvmExpr = nullptr;
1785  llvm::Value *llvmX = nullptr;
1786  llvm::Type *llvmXElementType = nullptr;
1787  if (innerOpList.size() == 2) {
1788  // The two operations here are the update and the terminator.
1789  // Since we can identify the update operation, there is a possibility
1790  // that we can generate the atomicrmw instruction.
1791  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
1792  if (!llvm::is_contained(innerOp.getOperands(),
1793  opInst.getRegion().getArgument(0))) {
1794  return opInst.emitError("no atomic update operation with region argument"
1795  " as operand found inside atomic.update region");
1796  }
1797  binop = convertBinOpToAtomic(innerOp);
1798  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
1799  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1800  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1801  } else {
1802  // Since the update region includes more than one operation
1803  // we will resort to generating a cmpxchg loop.
1804  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1805  }
1806  llvmX = moduleTranslation.lookupValue(opInst.getX());
1807  llvmXElementType = moduleTranslation.convertType(
1808  opInst.getRegion().getArgument(0).getType());
1809  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1810  /*isSigned=*/false,
1811  /*isVolatile=*/false};
1812 
1813  llvm::AtomicOrdering atomicOrdering =
1814  convertAtomicOrdering(opInst.getMemoryOrderVal());
1815 
1816  // Generate update code.
1817  LogicalResult updateGenStatus = success();
1818  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1819  llvm::Value *atomicx,
1820  llvm::IRBuilder<> &builder) -> llvm::Value * {
1821  Block &bb = *opInst.getRegion().begin();
1822  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
1823  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1824  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1825  updateGenStatus = (opInst.emitError()
1826  << "unable to convert update operation to llvm IR");
1827  return nullptr;
1828  }
1829  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1830  assert(yieldop && yieldop.getResults().size() == 1 &&
1831  "terminator must be omp.yield op and it must have exactly one "
1832  "argument");
1833  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1834  };
1835 
1836  // Handle ambiguous alloca, if any.
1837  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1838  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1839  builder.restoreIP(ompBuilder->createAtomicUpdate(
1840  ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1841  isXBinopExpr));
1842  return updateGenStatus;
1843 }
1844 
1845 static LogicalResult
1846 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1847  llvm::IRBuilderBase &builder,
1848  LLVM::ModuleTranslation &moduleTranslation) {
1849  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1850  mlir::Value mlirExpr;
1851  bool isXBinopExpr = false, isPostfixUpdate = false;
1852  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1853 
1854  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1855  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1856 
1857  assert((atomicUpdateOp || atomicWriteOp) &&
1858  "internal op must be an atomic.update or atomic.write op");
1859 
1860  if (atomicWriteOp) {
1861  isPostfixUpdate = true;
1862  mlirExpr = atomicWriteOp.getExpr();
1863  } else {
1864  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1865  atomicCaptureOp.getAtomicUpdateOp().getOperation();
1866  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
1867  bool isRegionArgUsed{false};
1868  // Find the binary update operation that uses the region argument
1869  // and get the expression to update
1870  for (Operation &innerOp : innerOpList) {
1871  if (innerOp.getNumOperands() == 2) {
1872  binop = convertBinOpToAtomic(innerOp);
1873  if (!llvm::is_contained(innerOp.getOperands(),
1874  atomicUpdateOp.getRegion().getArgument(0)))
1875  continue;
1876  isRegionArgUsed = true;
1877  isXBinopExpr =
1878  innerOp.getNumOperands() > 0 &&
1879  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
1880  mlirExpr =
1881  (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1882  break;
1883  }
1884  }
1885  if (!isRegionArgUsed)
1886  return atomicUpdateOp.emitError(
1887  "no atomic update operation with region argument"
1888  " as operand found inside atomic.update region");
1889  }
1890 
1891  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1892  llvm::Value *llvmX =
1893  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
1894  llvm::Value *llvmV =
1895  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
1896  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1897  atomicCaptureOp.getAtomicReadOp().getElementType());
1898  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1899  /*isSigned=*/false,
1900  /*isVolatile=*/false};
1901  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1902  /*isSigned=*/false,
1903  /*isVolatile=*/false};
1904 
1905  llvm::AtomicOrdering atomicOrdering =
1906  convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal());
1907 
1908  LogicalResult updateGenStatus = success();
1909  auto updateFn = [&](llvm::Value *atomicx,
1910  llvm::IRBuilder<> &builder) -> llvm::Value * {
1911  if (atomicWriteOp)
1912  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
1913  Block &bb = *atomicUpdateOp.getRegion().begin();
1914  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
1915  atomicx);
1916  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1917  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1918  updateGenStatus = (atomicUpdateOp.emitError()
1919  << "unable to convert update operation to llvm IR");
1920  return nullptr;
1921  }
1922  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1923  assert(yieldop && yieldop.getResults().size() == 1 &&
1924  "terminator must be omp.yield op and it must have exactly one "
1925  "argument");
1926  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1927  };
1928 
1929  // Handle ambiguous alloca, if any.
1930  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1931  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1932  builder.restoreIP(ompBuilder->createAtomicCapture(
1933  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
1934  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
1935  return updateGenStatus;
1936 }
1937 
1938 /// Converts an OpenMP Threadprivate operation into LLVM IR using
1939 /// OpenMPIRBuilder.
1940 static LogicalResult
1941 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
1942  LLVM::ModuleTranslation &moduleTranslation) {
1943  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1944  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
1945 
1946  Value symAddr = threadprivateOp.getSymAddr();
1947  auto *symOp = symAddr.getDefiningOp();
1948  if (!isa<LLVM::AddressOfOp>(symOp))
1949  return opInst.emitError("Addressing symbol not found");
1950  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
1951 
1952  LLVM::GlobalOp global =
1953  addressOfOp.getGlobal(moduleTranslation.symbolTable());
1954  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
1955  llvm::Type *type = globalValue->getValueType();
1956  llvm::TypeSize typeSize =
1957  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
1958  type);
1959  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
1960  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
1961  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
1962  llvm::Value *callInst =
1963  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
1964  ompLoc, globalValue, size, cacheName);
1965  moduleTranslation.mapValue(opInst.getResult(0), callInst);
1966  return success();
1967 }
1968 
1969 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1970 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
1971  switch (deviceClause) {
1972  case mlir::omp::DeclareTargetDeviceType::host:
1973  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1974  break;
1975  case mlir::omp::DeclareTargetDeviceType::nohost:
1976  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1977  break;
1978  case mlir::omp::DeclareTargetDeviceType::any:
1979  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1980  break;
1981  }
1982  llvm_unreachable("unhandled device clause");
1983 }
1984 
1985 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1987  mlir::omp::DeclareTargetCaptureClause captureClasue) {
1988  switch (captureClasue) {
1989  case mlir::omp::DeclareTargetCaptureClause::to:
1990  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1991  case mlir::omp::DeclareTargetCaptureClause::link:
1992  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1993  case mlir::omp::DeclareTargetCaptureClause::enter:
1994  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1995  }
1996  llvm_unreachable("unhandled capture clause");
1997 }
1998 
1999 static llvm::SmallString<64>
2000 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
2001  llvm::OpenMPIRBuilder &ompBuilder) {
2002  llvm::SmallString<64> suffix;
2003  llvm::raw_svector_ostream os(suffix);
2004  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
2005  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
2006  auto fileInfoCallBack = [&loc]() {
2007  return std::pair<std::string, uint64_t>(
2008  llvm::StringRef(loc.getFilename()), loc.getLine());
2009  };
2010 
2011  os << llvm::format(
2012  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
2013  }
2014  os << "_decl_tgt_ref_ptr";
2015 
2016  return suffix;
2017 }
2018 
2019 static bool isDeclareTargetLink(mlir::Value value) {
2020  if (auto addressOfOp =
2021  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2022  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
2023  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
2024  if (auto declareTargetGlobal =
2025  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
2026  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2027  mlir::omp::DeclareTargetCaptureClause::link)
2028  return true;
2029  }
2030  return false;
2031 }
2032 
2033 // Returns the reference pointer generated by the lowering of the declare target
2034 // operation in cases where the link clause is used or the to clause is used in
2035 // USM mode.
2036 static llvm::Value *
2038  LLVM::ModuleTranslation &moduleTranslation) {
2039  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2040 
2041  // An easier way to do this may just be to keep track of any pointer
2042  // references and their mapping to their respective operation
2043  if (auto addressOfOp =
2044  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2045  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
2046  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
2047  addressOfOp.getGlobalName()))) {
2048 
2049  if (auto declareTargetGlobal =
2050  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
2051  gOp.getOperation())) {
2052 
2053  // In this case, we must utilise the reference pointer generated by the
2054  // declare target operation, similar to Clang
2055  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
2056  mlir::omp::DeclareTargetCaptureClause::link) ||
2057  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2058  mlir::omp::DeclareTargetCaptureClause::to &&
2059  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
2060  llvm::SmallString<64> suffix =
2061  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
2062 
2063  if (gOp.getSymName().contains(suffix))
2064  return moduleTranslation.getLLVMModule()->getNamedValue(
2065  gOp.getSymName());
2066 
2067  return moduleTranslation.getLLVMModule()->getNamedValue(
2068  (gOp.getSymName().str() + suffix.str()).str());
2069  }
2070  }
2071  }
2072  }
2073 
2074  return nullptr;
2075 }
2076 
2077 // A small helper structure to contain data gathered
2078 // for map lowering and coalese it into one area and
2079 // avoiding extra computations such as searches in the
2080 // llvm module for lowered mapped variables or checking
2081 // if something is declare target (and retrieving the
2082 // value) more than neccessary.
2088  // Stripped off array/pointer to get the underlying
2089  // element type
2091 
2092  /// Append arrays in \a CurInfo.
2093  void append(MapInfoData &CurInfo) {
2094  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
2095  CurInfo.IsDeclareTarget.end());
2096  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
2097  OriginalValue.append(CurInfo.OriginalValue.begin(),
2098  CurInfo.OriginalValue.end());
2099  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
2100  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
2101  }
2102 };
2103 
2104 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
2105  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
2106  arrTy.getElementType()))
2107  return getArrayElementSizeInBits(nestedArrTy, dl);
2108  return dl.getTypeSizeInBits(arrTy.getElementType());
2109 }
2110 
2111 // This function calculates the size to be offloaded for a specified type, given
2112 // its associated map clause (which can contain bounds information which affects
2113 // the total size), this size is calculated based on the underlying element type
2114 // e.g. given a 1-D array of ints, we will calculate the size from the integer
2115 // type * number of elements in the array. This size can be used in other
2116 // calculations but is ultimately used as an argument to the OpenMP runtimes
2117 // kernel argument structure which is generated through the combinedInfo data
2118 // structures.
2119 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
2120 // CGOpenMPRuntime.cpp.
2121 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
2122  Operation *clauseOp, llvm::Value *basePointer,
2123  llvm::Type *baseType, llvm::IRBuilderBase &builder,
2124  LLVM::ModuleTranslation &moduleTranslation) {
2125  if (auto memberClause =
2126  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
2127  // This calculates the size to transfer based on bounds and the underlying
2128  // element type, provided bounds have been specified (Fortran
2129  // pointers/allocatables/target and arrays that have sections specified fall
2130  // into this as well).
2131  if (!memberClause.getBounds().empty()) {
2132  llvm::Value *elementCount = builder.getInt64(1);
2133  for (auto bounds : memberClause.getBounds()) {
2134  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2135  bounds.getDefiningOp())) {
2136  // The below calculation for the size to be mapped calculated from the
2137  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
2138  // multiply by the underlying element types byte size to get the full
2139  // size to be offloaded based on the bounds
2140  elementCount = builder.CreateMul(
2141  elementCount,
2142  builder.CreateAdd(
2143  builder.CreateSub(
2144  moduleTranslation.lookupValue(boundOp.getUpperBound()),
2145  moduleTranslation.lookupValue(boundOp.getLowerBound())),
2146  builder.getInt64(1)));
2147  }
2148  }
2149 
2150  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
2151  // the size in inconsistent byte or bit format.
2152  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
2153  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
2154  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
2155 
2156  // The size in bytes x number of elements, the sizeInBytes stored is
2157  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
2158  // size, so we do some on the fly runtime math to get the size in
2159  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2160  // some adjustment for members with more complex types.
2161  return builder.CreateMul(elementCount,
2162  builder.getInt64(underlyingTypeSzInBits / 8));
2163  }
2164  }
2165 
2166  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
2167 }
2168 
2170  llvm::SmallVectorImpl<Value> &mapOperands,
2171  LLVM::ModuleTranslation &moduleTranslation,
2172  DataLayout &dl,
2173  llvm::IRBuilderBase &builder) {
2174  for (mlir::Value mapValue : mapOperands) {
2175  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2176  mapValue.getDefiningOp())) {
2177  mlir::Value offloadPtr =
2178  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2179  mapData.OriginalValue.push_back(
2180  moduleTranslation.lookupValue(offloadPtr));
2181  mapData.Pointers.push_back(mapData.OriginalValue.back());
2182 
2183  if (llvm::Value *refPtr =
2184  getRefPtrIfDeclareTarget(offloadPtr,
2185  moduleTranslation)) { // declare target
2186  mapData.IsDeclareTarget.push_back(true);
2187  mapData.BasePointers.push_back(refPtr);
2188  } else { // regular mapped variable
2189  mapData.IsDeclareTarget.push_back(false);
2190  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2191  }
2192 
2193  mapData.BaseType.push_back(
2194  moduleTranslation.convertType(mapOp.getVarType()));
2195  mapData.Sizes.push_back(
2196  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
2197  mapData.BaseType.back(), builder, moduleTranslation));
2198  mapData.MapClause.push_back(mapOp.getOperation());
2199  mapData.Types.push_back(
2200  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
2201  mapData.Names.push_back(LLVM::createMappingInformation(
2202  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2203  mapData.DevicePointers.push_back(
2205 
2206  // Check if this is a member mapping and correctly assign that it is, if
2207  // it is a member of a larger object.
2208  // TODO: Need better handling of members, and distinguishing of members
2209  // that are implicitly allocated on device vs explicitly passed in as
2210  // arguments.
2211  // TODO: May require some further additions to support nested record
2212  // types, i.e. member maps that can have member maps.
2213  mapData.IsAMember.push_back(false);
2214  for (mlir::Value mapValue : mapOperands) {
2215  if (auto map = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2216  mapValue.getDefiningOp())) {
2217  for (auto member : map.getMembers()) {
2218  if (member == mapOp) {
2219  mapData.IsAMember.back() = true;
2220  }
2221  }
2222  }
2223  }
2224  }
2225  }
2226 }
2227 
2228 static int getMapDataMemberIdx(MapInfoData &mapData,
2229  mlir::omp::MapInfoOp memberOp) {
2230  auto *res = llvm::find(mapData.MapClause, memberOp);
2231  assert(res != mapData.MapClause.end() &&
2232  "MapInfoOp for member not found in MapData, cannot return index");
2233  return std::distance(mapData.MapClause.begin(), res);
2234 }
2235 
2236 static mlir::omp::MapInfoOp
2237 getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) {
2238  mlir::DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr();
2239 
2240  // Only 1 member has been mapped, we can return it.
2241  if (indexAttr.size() == 1)
2242  if (auto mapOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(
2243  mapInfo.getMembers()[0].getDefiningOp()))
2244  return mapOp;
2245 
2246  llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape();
2247  llvm::SmallVector<size_t> indices(shape[0]);
2248  std::iota(indices.begin(), indices.end(), 0);
2249 
2250  llvm::sort(indices.begin(), indices.end(),
2251  [&](const size_t a, const size_t b) {
2252  auto indexValues = indexAttr.getValues<int32_t>();
2253  for (int i = 0; i < shape[1]; ++i) {
2254  int aIndex = indexValues[a * shape[1] + i];
2255  int bIndex = indexValues[b * shape[1] + i];
2256 
2257  if (aIndex == bIndex)
2258  continue;
2259 
2260  if (aIndex != -1 && bIndex == -1)
2261  return false;
2262 
2263  if (aIndex == -1 && bIndex != -1)
2264  return true;
2265 
2266  // A is earlier in the record type layout than B
2267  if (aIndex < bIndex)
2268  return first;
2269 
2270  if (bIndex < aIndex)
2271  return !first;
2272  }
2273 
2274  // Iterated the entire list and couldn't make a decision, all
2275  // elements were likely the same. Return false, since the sort
2276  // comparator should return false for equal elements.
2277  return false;
2278  });
2279 
2280  return llvm::cast<mlir::omp::MapInfoOp>(
2281  mapInfo.getMembers()[indices.front()].getDefiningOp());
2282 }
2283 
2284 /// This function calculates the array/pointer offset for map data provided
2285 /// with bounds operations, e.g. when provided something like the following:
2286 ///
2287 /// Fortran
2288 /// map(tofrom: array(2:5, 3:2))
2289 /// or
2290 /// C++
2291 /// map(tofrom: array[1:4][2:3])
2292 /// We must calculate the initial pointer offset to pass across, this function
2293 /// performs this using bounds.
2294 ///
2295 /// NOTE: which while specified in row-major order it currently needs to be
2296 /// flipped for Fortran's column order array allocation and access (as
2297 /// opposed to C++'s row-major, hence the backwards processing where order is
2298 /// important). This is likely important to keep in mind for the future when
2299 /// we incorporate a C++ frontend, both frontends will need to agree on the
2300 /// ordering of generated bounds operations (one may have to flip them) to
2301 /// make the below lowering frontend agnostic. The offload size
2302 /// calcualtion may also have to be adjusted for C++.
2303 std::vector<llvm::Value *>
2305  llvm::IRBuilderBase &builder, bool isArrayTy,
2306  mlir::OperandRange bounds) {
2307  std::vector<llvm::Value *> idx;
2308  // There's no bounds to calculate an offset from, we can safely
2309  // ignore and return no indices.
2310  if (bounds.empty())
2311  return idx;
2312 
2313  // If we have an array type, then we have its type so can treat it as a
2314  // normal GEP instruction where the bounds operations are simply indexes
2315  // into the array. We currently do reverse order of the bounds, which
2316  // I believe leans more towards Fortran's column-major in memory.
2317  if (isArrayTy) {
2318  idx.push_back(builder.getInt64(0));
2319  for (int i = bounds.size() - 1; i >= 0; --i) {
2320  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2321  bounds[i].getDefiningOp())) {
2322  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
2323  }
2324  }
2325  } else {
2326  // If we do not have an array type, but we have bounds, then we're dealing
2327  // with a pointer that's being treated like an array and we have the
2328  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
2329  // address (pointer pointing to the actual data) so we must caclulate the
2330  // offset using a single index which the following two loops attempts to
2331  // compute.
2332 
2333  // Calculates the size offset we need to make per row e.g. first row or
2334  // column only needs to be offset by one, but the next would have to be
2335  // the previous row/column offset multiplied by the extent of current row.
2336  //
2337  // For example ([1][10][100]):
2338  //
2339  // - First row/column we move by 1 for each index increment
2340  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
2341  // current) for 10 for each index increment
2342  // - Third row/column we would move by 10 (second row/column) *
2343  // (extent/size of current) 100 for 1000 for each index increment
2344  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
2345  for (size_t i = 1; i < bounds.size(); ++i) {
2346  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2347  bounds[i].getDefiningOp())) {
2348  dimensionIndexSizeOffset.push_back(builder.CreateMul(
2349  moduleTranslation.lookupValue(boundOp.getExtent()),
2350  dimensionIndexSizeOffset[i - 1]));
2351  }
2352  }
2353 
2354  // Now that we have calculated how much we move by per index, we must
2355  // multiply each lower bound offset in indexes by the size offset we
2356  // have calculated in the previous and accumulate the results to get
2357  // our final resulting offset.
2358  for (int i = bounds.size() - 1; i >= 0; --i) {
2359  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2360  bounds[i].getDefiningOp())) {
2361  if (idx.empty())
2362  idx.emplace_back(builder.CreateMul(
2363  moduleTranslation.lookupValue(boundOp.getLowerBound()),
2364  dimensionIndexSizeOffset[i]));
2365  else
2366  idx.back() = builder.CreateAdd(
2367  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
2368  boundOp.getLowerBound()),
2369  dimensionIndexSizeOffset[i]));
2370  }
2371  }
2372  }
2373 
2374  return idx;
2375 }
2376 
2377 // This creates two insertions into the MapInfosTy data structure for the
2378 // "parent" of a set of members, (usually a container e.g.
2379 // class/structure/derived type) when subsequent members have also been
2380 // explicitly mapped on the same map clause. Certain types, such as Fortran
2381 // descriptors are mapped like this as well, however, the members are
2382 // implicit as far as a user is concerned, but we must explicitly map them
2383 // internally.
2384 //
2385 // This function also returns the memberOfFlag for this particular parent,
2386 // which is utilised in subsequent member mappings (by modifying there map type
2387 // with it) to indicate that a member is part of this parent and should be
2388 // treated by the runtime as such. Important to achieve the correct mapping.
2389 //
2390 // This function borrows a lot from Clang's emitCombinedEntry function
2391 // inside of CGOpenMPRuntime.cpp
2392 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
2393  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2394  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2395  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2396  uint64_t mapDataIndex, bool isTargetParams) {
2397  // Map the first segment of our structure
2398  combinedInfo.Types.emplace_back(
2399  isTargetParams
2400  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
2401  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
2402  combinedInfo.DevicePointers.emplace_back(
2404  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2405  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2406  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2407 
2408  // Calculate size of the parent object being mapped based on the
2409  // addresses at runtime, highAddr - lowAddr = size. This of course
2410  // doesn't factor in allocated data like pointers, hence the further
2411  // processing of members specified by users, or in the case of
2412  // Fortran pointers and allocatables, the mapping of the pointed to
2413  // data by the descriptor (which itself, is a structure containing
2414  // runtime information on the dynamically allocated data).
2415  auto parentClause =
2416  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2417 
2418  llvm::Value *lowAddr, *highAddr;
2419  if (!parentClause.getPartialMap()) {
2420  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
2421  builder.getPtrTy());
2422  highAddr = builder.CreatePointerCast(
2423  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
2424  mapData.Pointers[mapDataIndex], 1),
2425  builder.getPtrTy());
2426  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2427  } else {
2428  auto mapOp =
2429  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2430  int firstMemberIdx = getMapDataMemberIdx(
2431  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
2432  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
2433  builder.getPtrTy());
2434  int lastMemberIdx = getMapDataMemberIdx(
2435  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
2436  highAddr = builder.CreatePointerCast(
2437  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
2438  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
2439  builder.getPtrTy());
2440  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
2441  }
2442 
2443  llvm::Value *size = builder.CreateIntCast(
2444  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
2445  builder.getInt64Ty(),
2446  /*isSigned=*/false);
2447  combinedInfo.Sizes.push_back(size);
2448 
2449  // TODO: This will need to be expanded to include the whole host of logic for
2450  // the map flags that Clang currently supports (e.g. it should take the map
2451  // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some
2452  // further case specific flag modifications). For the moment, it handles what
2453  // we support as expected.
2454  llvm::omp::OpenMPOffloadMappingFlags mapFlag =
2455  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
2456 
2457  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
2458  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
2459  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2460 
2461  // This creates the initial MEMBER_OF mapping that consists of
2462  // the parent/top level container (same as above effectively, except
2463  // with a fixed initial compile time size and separate maptype which
2464  // indicates the true mape type (tofrom etc.). This parent mapping is
2465  // only relevant if the structure in its totality is being mapped,
2466  // otherwise the above suffices.
2467  if (!parentClause.getPartialMap()) {
2468  combinedInfo.Types.emplace_back(mapFlag);
2469  combinedInfo.DevicePointers.emplace_back(
2471  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2472  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2473  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2474  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2475  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
2476  }
2477  return memberOfFlag;
2478 }
2479 
2480 // The intent is to verify if the mapped data being passed is a
2481 // pointer -> pointee that requires special handling in certain cases,
2482 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
2483 //
2484 // There may be a better way to verify this, but unfortunately with
2485 // opaque pointers we lose the ability to easily check if something is
2486 // a pointer whilst maintaining access to the underlying type.
2487 static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp) {
2488  // If we have a varPtrPtr field assigned then the underlying type is a pointer
2489  if (mapOp.getVarPtrPtr())
2490  return true;
2491 
2492  // If the map data is declare target with a link clause, then it's represented
2493  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
2494  // no relation to pointers.
2495  if (isDeclareTargetLink(mapOp.getVarPtr()))
2496  return true;
2497 
2498  return false;
2499 }
2500 
2501 // This function is intended to add explicit mappings of members
2503  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2504  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2505  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2506  uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
2507 
2508  auto parentClause =
2509  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2510 
2511  for (auto mappedMembers : parentClause.getMembers()) {
2512  auto memberClause =
2513  llvm::cast<mlir::omp::MapInfoOp>(mappedMembers.getDefiningOp());
2514  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
2515 
2516  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
2517 
2518  // Same MemberOfFlag to indicate its link with parent and other members
2519  // of.
2520  auto mapFlag =
2521  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
2522  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2523  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
2524  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2525  if (checkIfPointerMap(memberClause))
2526  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2527 
2528  combinedInfo.Types.emplace_back(mapFlag);
2529  combinedInfo.DevicePointers.emplace_back(
2531  combinedInfo.Names.emplace_back(
2532  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
2533  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2534  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
2535  combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
2536  }
2537 }
2538 
2539 static void
2540 processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
2541  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2542  bool isTargetParams, int mapDataParentIdx = -1) {
2543  // Declare Target Mappings are excluded from being marked as
2544  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
2545  // marked with OMP_MAP_PTR_AND_OBJ instead.
2546  auto mapFlag = mapData.Types[mapDataIdx];
2547  auto mapInfoOp =
2548  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
2549 
2550  bool isPtrTy = checkIfPointerMap(mapInfoOp);
2551  if (isPtrTy)
2552  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2553 
2554  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
2555  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2556 
2557  if (mapInfoOp.getMapCaptureType().value() ==
2558  mlir::omp::VariableCaptureKind::ByCopy &&
2559  !isPtrTy)
2560  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
2561 
2562  // if we're provided a mapDataParentIdx, then the data being mapped is
2563  // part of a larger object (in a parent <-> member mapping) and in this
2564  // case our BasePointer should be the parent.
2565  if (mapDataParentIdx >= 0)
2566  combinedInfo.BasePointers.emplace_back(
2567  mapData.BasePointers[mapDataParentIdx]);
2568  else
2569  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
2570 
2571  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
2572  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
2573  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
2574  combinedInfo.Types.emplace_back(mapFlag);
2575  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
2576 }
2577 
2579  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2580  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2581  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2582  uint64_t mapDataIndex, bool isTargetParams) {
2583  auto parentClause =
2584  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2585 
2586  // If we have a partial map (no parent referenced in the map clauses of the
2587  // directive, only members) and only a single member, we do not need to bind
2588  // the map of the member to the parent, we can pass the member separately.
2589  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
2590  auto memberClause = llvm::cast<mlir::omp::MapInfoOp>(
2591  parentClause.getMembers()[0].getDefiningOp());
2592  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
2593  // Note: Clang treats arrays with explicit bounds that fall into this
2594  // category as a parent with map case, however, it seems this isn't a
2595  // requirement, and processing them as an individual map is fine. So,
2596  // we will handle them as individual maps for the moment, as it's
2597  // difficult for us to check this as we always require bounds to be
2598  // specified currently and it's also marginally more optimal (single
2599  // map rather than two). The difference may come from the fact that
2600  // Clang maps array without bounds as pointers (which we do not
2601  // currently do), whereas we treat them as arrays in all cases
2602  // currently.
2603  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
2604  mapDataIndex);
2605  return;
2606  }
2607 
2608  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
2609  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
2610  combinedInfo, mapData, mapDataIndex, isTargetParams);
2611  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
2612  combinedInfo, mapData, mapDataIndex,
2613  memberOfParentFlag);
2614 }
2615 
2616 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2617 // generates different operation (e.g. load/store) combinations for
2618 // arguments to the kernel, based on map capture kinds which are then
2619 // utilised in the combinedInfo in place of the original Map value.
2620 static void
2622  LLVM::ModuleTranslation &moduleTranslation,
2623  llvm::IRBuilderBase &builder) {
2624  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2625  // if it's declare target, skip it, it's handled separately.
2626  if (!mapData.IsDeclareTarget[i]) {
2627  auto mapOp =
2628  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2629  mlir::omp::VariableCaptureKind captureKind =
2630  mapOp.getMapCaptureType().value_or(
2631  mlir::omp::VariableCaptureKind::ByRef);
2632  bool isPtrTy = checkIfPointerMap(mapOp);
2633 
2634  // Currently handles array sectioning lowerbound case, but more
2635  // logic may be required in the future. Clang invokes EmitLValue,
2636  // which has specialised logic for special Clang types such as user
2637  // defines, so it is possible we will have to extend this for
2638  // structures or other complex types. As the general idea is that this
2639  // function mimics some of the logic from Clang that we require for
2640  // kernel argument passing from host -> device.
2641  switch (captureKind) {
2642  case mlir::omp::VariableCaptureKind::ByRef: {
2643  llvm::Value *newV = mapData.Pointers[i];
2644  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
2645  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
2646  mapOp.getBounds());
2647  if (isPtrTy)
2648  newV = builder.CreateLoad(builder.getPtrTy(), newV);
2649 
2650  if (!offsetIdx.empty())
2651  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
2652  "array_offset");
2653  mapData.Pointers[i] = newV;
2654  } break;
2655  case mlir::omp::VariableCaptureKind::ByCopy: {
2656  llvm::Type *type = mapData.BaseType[i];
2657  llvm::Value *newV;
2658  if (mapData.Pointers[i]->getType()->isPointerTy())
2659  newV = builder.CreateLoad(type, mapData.Pointers[i]);
2660  else
2661  newV = mapData.Pointers[i];
2662 
2663  if (!isPtrTy) {
2664  auto curInsert = builder.saveIP();
2665  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
2666  auto *memTempAlloc =
2667  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
2668  builder.restoreIP(curInsert);
2669 
2670  builder.CreateStore(newV, memTempAlloc);
2671  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
2672  }
2673 
2674  mapData.Pointers[i] = newV;
2675  mapData.BasePointers[i] = newV;
2676  } break;
2677  case mlir::omp::VariableCaptureKind::This:
2678  case mlir::omp::VariableCaptureKind::VLAType:
2679  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
2680  break;
2681  }
2682  }
2683  }
2684 }
2685 
2686 // Generate all map related information and fill the combinedInfo.
2687 static void genMapInfos(llvm::IRBuilderBase &builder,
2688  LLVM::ModuleTranslation &moduleTranslation,
2689  DataLayout &dl,
2690  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2691  MapInfoData &mapData,
2692  const SmallVector<Value> &devPtrOperands = {},
2693  const SmallVector<Value> &devAddrOperands = {},
2694  bool isTargetParams = false) {
2695  // We wish to modify some of the methods in which arguments are
2696  // passed based on their capture type by the target region, this can
2697  // involve generating new loads and stores, which changes the
2698  // MLIR value to LLVM value mapping, however, we only wish to do this
2699  // locally for the current function/target and also avoid altering
2700  // ModuleTranslation, so we remap the base pointer or pointer stored
2701  // in the map infos corresponding MapInfoData, which is later accessed
2702  // by genMapInfos and createTarget to help generate the kernel and
2703  // kernel arg structure. It primarily becomes relevant in cases like
2704  // bycopy, or byref range'd arrays. In the default case, we simply
2705  // pass thee pointer byref as both basePointer and pointer.
2706  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
2707  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
2708 
2709  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2710 
2711  auto fail = [&combinedInfo]() -> void {
2712  combinedInfo.BasePointers.clear();
2713  combinedInfo.Pointers.clear();
2714  combinedInfo.DevicePointers.clear();
2715  combinedInfo.Sizes.clear();
2716  combinedInfo.Types.clear();
2717  combinedInfo.Names.clear();
2718  };
2719 
2720  // We operate under the assumption that all vectors that are
2721  // required in MapInfoData are of equal lengths (either filled with
2722  // default constructed data or appropiate information) so we can
2723  // utilise the size from any component of MapInfoData, if we can't
2724  // something is missing from the initial MapInfoData construction.
2725  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2726  // NOTE/TODO: We currently do not support arbitrary depth record
2727  // type mapping.
2728  if (mapData.IsAMember[i])
2729  continue;
2730 
2731  auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2732  if (!mapInfoOp.getMembers().empty()) {
2733  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
2734  combinedInfo, mapData, i, isTargetParams);
2735  continue;
2736  }
2737 
2738  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
2739  }
2740 
2741  auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) {
2742  index = 0;
2743  for (llvm::Value *basePtr : combinedInfo.BasePointers) {
2744  if (basePtr == val)
2745  return true;
2746  index++;
2747  }
2748  return false;
2749  };
2750 
2751  auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void {
2752  for (const auto &devOp : devOperands) {
2753  // TODO: Only LLVMPointerTypes are handled.
2754  if (!isa<LLVM::LLVMPointerType>(devOp.getType()))
2755  return fail();
2756 
2757  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devOp);
2758 
2759  // Check if map info is already present for this entry.
2760  unsigned infoIndex;
2761  if (findMapInfo(mapOpValue, infoIndex)) {
2762  combinedInfo.Types[infoIndex] |=
2763  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
2764  combinedInfo.DevicePointers[infoIndex] = devOpType;
2765  } else {
2766  combinedInfo.BasePointers.emplace_back(mapOpValue);
2767  combinedInfo.Pointers.emplace_back(mapOpValue);
2768  combinedInfo.DevicePointers.emplace_back(devOpType);
2769  combinedInfo.Names.emplace_back(
2770  LLVM::createMappingInformation(devOp.getLoc(), *ompBuilder));
2771  combinedInfo.Types.emplace_back(
2772  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
2773  combinedInfo.Sizes.emplace_back(builder.getInt64(0));
2774  }
2775  }
2776  };
2777 
2778  addDevInfos(devPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
2779  addDevInfos(devAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
2780 }
2781 
2782 static LogicalResult
2783 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
2784  LLVM::ModuleTranslation &moduleTranslation) {
2785  llvm::Value *ifCond = nullptr;
2786  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
2787  SmallVector<Value> mapOperands;
2788  SmallVector<Value> useDevPtrOperands;
2789  SmallVector<Value> useDevAddrOperands;
2790  llvm::omp::RuntimeFunction RTLFn;
2791  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
2792 
2793  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2794 
2795  LogicalResult result =
2797  .Case([&](omp::TargetDataOp dataOp) {
2798  if (auto ifExprVar = dataOp.getIfExpr())
2799  ifCond = moduleTranslation.lookupValue(ifExprVar);
2800 
2801  if (auto devId = dataOp.getDevice())
2802  if (auto constOp =
2803  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2804  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2805  deviceID = intAttr.getInt();
2806 
2807  mapOperands = dataOp.getMapOperands();
2808  useDevPtrOperands = dataOp.getUseDevicePtr();
2809  useDevAddrOperands = dataOp.getUseDeviceAddr();
2810  return success();
2811  })
2812  .Case([&](omp::TargetEnterDataOp enterDataOp) {
2813  if (enterDataOp.getNowait())
2814  return (LogicalResult)(enterDataOp.emitError(
2815  "`nowait` is not supported yet"));
2816 
2817  if (auto ifExprVar = enterDataOp.getIfExpr())
2818  ifCond = moduleTranslation.lookupValue(ifExprVar);
2819 
2820  if (auto devId = enterDataOp.getDevice())
2821  if (auto constOp =
2822  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2823  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2824  deviceID = intAttr.getInt();
2825  RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
2826  mapOperands = enterDataOp.getMapOperands();
2827  return success();
2828  })
2829  .Case([&](omp::TargetExitDataOp exitDataOp) {
2830  if (exitDataOp.getNowait())
2831  return (LogicalResult)(exitDataOp.emitError(
2832  "`nowait` is not supported yet"));
2833 
2834  if (auto ifExprVar = exitDataOp.getIfExpr())
2835  ifCond = moduleTranslation.lookupValue(ifExprVar);
2836 
2837  if (auto devId = exitDataOp.getDevice())
2838  if (auto constOp =
2839  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2840  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2841  deviceID = intAttr.getInt();
2842 
2843  RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper;
2844  mapOperands = exitDataOp.getMapOperands();
2845  return success();
2846  })
2847  .Case([&](omp::TargetUpdateOp updateDataOp) {
2848  if (updateDataOp.getNowait())
2849  return (LogicalResult)(updateDataOp.emitError(
2850  "`nowait` is not supported yet"));
2851 
2852  if (auto ifExprVar = updateDataOp.getIfExpr())
2853  ifCond = moduleTranslation.lookupValue(ifExprVar);
2854 
2855  if (auto devId = updateDataOp.getDevice())
2856  if (auto constOp =
2857  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2858  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2859  deviceID = intAttr.getInt();
2860 
2861  RTLFn = llvm::omp::OMPRTL___tgt_target_data_update_mapper;
2862  mapOperands = updateDataOp.getMapOperands();
2863  return success();
2864  })
2865  .Default([&](Operation *op) {
2866  return op->emitError("unsupported OpenMP operation: ")
2867  << op->getName();
2868  });
2869 
2870  if (failed(result))
2871  return failure();
2872 
2873  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2874 
2875  MapInfoData mapData;
2876  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL,
2877  builder);
2878 
2879  // Fill up the arrays with all the mapped variables.
2880  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
2881  auto genMapInfoCB =
2882  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
2883  builder.restoreIP(codeGenIP);
2884  if (auto dataOp = dyn_cast<omp::TargetDataOp>(op)) {
2885  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData,
2886  useDevPtrOperands, useDevAddrOperands);
2887  } else {
2888  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
2889  }
2890  return combinedInfo;
2891  };
2892 
2893  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
2894  /*SeparateBeginEndCalls=*/true);
2895 
2896  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
2897  LogicalResult bodyGenStatus = success();
2898  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) {
2899  assert(isa<omp::TargetDataOp>(op) &&
2900  "BodyGen requested for non TargetDataOp");
2901  Region &region = cast<omp::TargetDataOp>(op).getRegion();
2902  switch (bodyGenType) {
2903  case BodyGenTy::Priv:
2904  // Check if any device ptr/addr info is available
2905  if (!info.DevicePtrInfoMap.empty()) {
2906  builder.restoreIP(codeGenIP);
2907  unsigned argIndex = 0;
2908  for (auto &devPtrOp : useDevPtrOperands) {
2909  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devPtrOp);
2910  const auto &arg = region.front().getArgument(argIndex);
2911  moduleTranslation.mapValue(arg,
2912  info.DevicePtrInfoMap[mapOpValue].second);
2913  argIndex++;
2914  }
2915 
2916  for (auto &devAddrOp : useDevAddrOperands) {
2917  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devAddrOp);
2918  const auto &arg = region.front().getArgument(argIndex);
2919  auto *LI = builder.CreateLoad(
2920  builder.getPtrTy(), info.DevicePtrInfoMap[mapOpValue].second);
2921  moduleTranslation.mapValue(arg, LI);
2922  argIndex++;
2923  }
2924 
2925  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2926  builder, moduleTranslation);
2927  }
2928  break;
2929  case BodyGenTy::DupNoPriv:
2930  break;
2931  case BodyGenTy::NoPriv:
2932  // If device info is available then region has already been generated
2933  if (info.DevicePtrInfoMap.empty()) {
2934  builder.restoreIP(codeGenIP);
2935  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2936  builder, moduleTranslation);
2937  }
2938  break;
2939  }
2940  return builder.saveIP();
2941  };
2942 
2943  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2944  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2945  findAllocaInsertPoint(builder, moduleTranslation);
2946  if (isa<omp::TargetDataOp>(op)) {
2947  builder.restoreIP(ompBuilder->createTargetData(
2948  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2949  info, genMapInfoCB, nullptr, bodyGenCB));
2950  } else {
2951  builder.restoreIP(ompBuilder->createTargetData(
2952  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2953  info, genMapInfoCB, &RTLFn));
2954  }
2955 
2956  return bodyGenStatus;
2957 }
2958 
2959 /// Lowers the FlagsAttr which is applied to the module on the device
2960 /// pass when offloading, this attribute contains OpenMP RTL globals that can
2961 /// be passed as flags to the frontend, otherwise they are set to default
2962 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
2963  LLVM::ModuleTranslation &moduleTranslation) {
2964  if (!cast<mlir::ModuleOp>(op))
2965  return failure();
2966 
2967  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2968 
2969  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
2970  attribute.getOpenmpDeviceVersion());
2971 
2972  if (attribute.getNoGpuLib())
2973  return success();
2974 
2975  ompBuilder->createGlobalFlag(
2976  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
2977  "__omp_rtl_debug_kind");
2978  ompBuilder->createGlobalFlag(
2979  attribute
2980  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
2981  ,
2982  "__omp_rtl_assume_teams_oversubscription");
2983  ompBuilder->createGlobalFlag(
2984  attribute
2985  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
2986  ,
2987  "__omp_rtl_assume_threads_oversubscription");
2988  ompBuilder->createGlobalFlag(
2989  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
2990  "__omp_rtl_assume_no_thread_state");
2991  ompBuilder->createGlobalFlag(
2992  attribute
2993  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
2994  ,
2995  "__omp_rtl_assume_no_nested_parallelism");
2996  return success();
2997 }
2998 
2999 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
3000  omp::TargetOp targetOp,
3001  llvm::StringRef parentName = "") {
3002  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
3003 
3004  assert(fileLoc && "No file found from location");
3005  StringRef fileName = fileLoc.getFilename().getValue();
3006 
3007  llvm::sys::fs::UniqueID id;
3008  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
3009  targetOp.emitError("Unable to get unique ID for file");
3010  return false;
3011  }
3012 
3013  uint64_t line = fileLoc.getLine();
3014  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
3015  id.getFile(), line);
3016  return true;
3017 }
3018 
3019 static bool targetOpSupported(Operation &opInst) {
3020  auto targetOp = cast<omp::TargetOp>(opInst);
3021  if (targetOp.getIfExpr()) {
3022  opInst.emitError("If clause not yet supported");
3023  return false;
3024  }
3025 
3026  if (targetOp.getDevice()) {
3027  opInst.emitError("Device clause not yet supported");
3028  return false;
3029  }
3030 
3031  if (targetOp.getThreadLimit()) {
3032  opInst.emitError("Thread limit clause not yet supported");
3033  return false;
3034  }
3035 
3036  if (targetOp.getNowait()) {
3037  opInst.emitError("Nowait clause not yet supported");
3038  return false;
3039  }
3040 
3041  return true;
3042 }
3043 
3044 static void
3046  LLVM::ModuleTranslation &moduleTranslation,
3047  llvm::IRBuilderBase &builder, llvm::Function *func) {
3048  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3049  // In the case of declare target mapped variables, the basePointer is
3050  // the reference pointer generated by the convertDeclareTargetAttr
3051  // method. Whereas the kernelValue is the original variable, so for
3052  // the device we must replace all uses of this original global variable
3053  // (stored in kernelValue) with the reference pointer (stored in
3054  // basePointer for declare target mapped variables), as for device the
3055  // data is mapped into this reference pointer and should be loaded
3056  // from it, the original variable is discarded. On host both exist and
3057  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
3058  // function to link the two variables in the runtime and then both the
3059  // reference pointer and the pointer are assigned in the kernel argument
3060  // structure for the host.
3061  if (mapData.IsDeclareTarget[i]) {
3062  // If the original map value is a constant, then we have to make sure all
3063  // of it's uses within the current kernel/function that we are going to
3064  // rewrite are converted to instructions, as we will be altering the old
3065  // use (OriginalValue) from a constant to an instruction, which will be
3066  // illegal and ICE the compiler if the user is a constant expression of
3067  // some kind e.g. a constant GEP.
3068  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
3069  convertUsersOfConstantsToInstructions(constant, func, false);
3070 
3071  // The users iterator will get invalidated if we modify an element,
3072  // so we populate this vector of uses to alter each user on an
3073  // individual basis to emit its own load (rather than one load for
3074  // all).
3076  for (llvm::User *user : mapData.OriginalValue[i]->users())
3077  userVec.push_back(user);
3078 
3079  for (llvm::User *user : userVec) {
3080  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
3081  if (insn->getFunction() == func) {
3082  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
3083  mapData.BasePointers[i]);
3084  load->moveBefore(insn);
3085  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
3086  }
3087  }
3088  }
3089  }
3090  }
3091 }
3092 
3093 // The createDeviceArgumentAccessor function generates
3094 // instructions for retrieving (acessing) kernel
3095 // arguments inside of the device kernel for use by
3096 // the kernel. This enables different semantics such as
3097 // the creation of temporary copies of data allowing
3098 // semantics like read-only/no host write back kernel
3099 // arguments.
3100 //
3101 // This currently implements a very light version of Clang's
3102 // EmitParmDecl's handling of direct argument handling as well
3103 // as a portion of the argument access generation based on
3104 // capture types found at the end of emitOutlinedFunctionPrologue
3105 // in Clang. The indirect path handling of EmitParmDecl's may be
3106 // required for future work, but a direct 1-to-1 copy doesn't seem
3107 // possible as the logic is rather scattered throughout Clang's
3108 // lowering and perhaps we wish to deviate slightly.
3109 //
3110 // \param mapData - A container containing vectors of information
3111 // corresponding to the input argument, which should have a
3112 // corresponding entry in the MapInfoData containers
3113 // OrigialValue's.
3114 // \param arg - This is the generated kernel function argument that
3115 // corresponds to the passed in input argument. We generated different
3116 // accesses of this Argument, based on capture type and other Input
3117 // related information.
3118 // \param input - This is the host side value that will be passed to
3119 // the kernel i.e. the kernel input, we rewrite all uses of this within
3120 // the kernel (as we generate the kernel body based on the target's region
3121 // which maintians references to the original input) to the retVal argument
3122 // apon exit of this function inside of the OMPIRBuilder. This interlinks
3123 // the kernel argument to future uses of it in the function providing
3124 // appropriate "glue" instructions inbetween.
3125 // \param retVal - This is the value that all uses of input inside of the
3126 // kernel will be re-written to, the goal of this function is to generate
3127 // an appropriate location for the kernel argument to be accessed from,
3128 // e.g. ByRef will result in a temporary allocation location and then
3129 // a store of the kernel argument into this allocated memory which
3130 // will then be loaded from, ByCopy will use the allocated memory
3131 // directly.
3132 static llvm::IRBuilderBase::InsertPoint
3133 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
3134  llvm::Value *input, llvm::Value *&retVal,
3135  llvm::IRBuilderBase &builder,
3136  llvm::OpenMPIRBuilder &ompBuilder,
3137  LLVM::ModuleTranslation &moduleTranslation,
3138  llvm::IRBuilderBase::InsertPoint allocaIP,
3139  llvm::IRBuilderBase::InsertPoint codeGenIP) {
3140  builder.restoreIP(allocaIP);
3141 
3142  mlir::omp::VariableCaptureKind capture =
3143  mlir::omp::VariableCaptureKind::ByRef;
3144 
3145  // Find the associated MapInfoData entry for the current input
3146  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
3147  if (mapData.OriginalValue[i] == input) {
3148  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
3149  mapData.MapClause[i])) {
3150  capture = mapOp.getMapCaptureType().value_or(
3151  mlir::omp::VariableCaptureKind::ByRef);
3152  }
3153 
3154  break;
3155  }
3156 
3157  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
3158  unsigned int defaultAS =
3159  ompBuilder.M.getDataLayout().getProgramAddressSpace();
3160 
3161  // Create the alloca for the argument the current point.
3162  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
3163 
3164  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
3165  v = builder.CreatePointerBitCastOrAddrSpaceCast(
3166  v, arg.getType()->getPointerTo(defaultAS));
3167 
3168  builder.CreateStore(&arg, v);
3169 
3170  builder.restoreIP(codeGenIP);
3171 
3172  switch (capture) {
3173  case mlir::omp::VariableCaptureKind::ByCopy: {
3174  retVal = v;
3175  break;
3176  }
3177  case mlir::omp::VariableCaptureKind::ByRef: {
3178  retVal = builder.CreateAlignedLoad(
3179  v->getType(), v,
3180  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
3181  break;
3182  }
3183  case mlir::omp::VariableCaptureKind::This:
3184  case mlir::omp::VariableCaptureKind::VLAType:
3185  assert(false && "Currently unsupported capture kind");
3186  break;
3187  }
3188 
3189  return builder.saveIP();
3190 }
3191 
3192 static LogicalResult
3193 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
3194  LLVM::ModuleTranslation &moduleTranslation) {
3195 
3196  if (!targetOpSupported(opInst))
3197  return failure();
3198 
3199  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
3200  auto targetOp = cast<omp::TargetOp>(opInst);
3201  auto &targetRegion = targetOp.getRegion();
3202  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
3203  SmallVector<Value> mapOperands = targetOp.getMapOperands();
3204  llvm::Function *llvmOutlinedFn = nullptr;
3205 
3206  LogicalResult bodyGenStatus = success();
3207  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3208  auto bodyCB = [&](InsertPointTy allocaIP,
3209  InsertPointTy codeGenIP) -> InsertPointTy {
3210  // Forward target-cpu and target-features function attributes from the
3211  // original function to the new outlined function.
3212  llvm::Function *llvmParentFn =
3213  moduleTranslation.lookupFunction(parentFn.getName());
3214  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
3215  assert(llvmParentFn && llvmOutlinedFn &&
3216  "Both parent and outlined functions must exist at this point");
3217 
3218  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
3219  attr.isStringAttribute())
3220  llvmOutlinedFn->addFnAttr(attr);
3221 
3222  if (auto attr = llvmParentFn->getFnAttribute("target-features");
3223  attr.isStringAttribute())
3224  llvmOutlinedFn->addFnAttr(attr);
3225 
3226  builder.restoreIP(codeGenIP);
3227  unsigned argIndex = 0;
3228  for (auto &mapOp : mapOperands) {
3229  auto mapInfoOp =
3230  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
3231  llvm::Value *mapOpValue =
3232  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
3233  const auto &arg = targetRegion.front().getArgument(argIndex);
3234  moduleTranslation.mapValue(arg, mapOpValue);
3235  argIndex++;
3236  }
3237  llvm::BasicBlock *exitBlock = convertOmpOpRegions(
3238  targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
3239  builder.SetInsertPoint(exitBlock);
3240  return builder.saveIP();
3241  };
3242 
3243  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3244  StringRef parentName = parentFn.getName();
3245 
3246  llvm::TargetRegionEntryInfo entryInfo;
3247 
3248  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
3249  return failure();
3250 
3251  int32_t defaultValTeams = -1;
3252  int32_t defaultValThreads = 0;
3253 
3254  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3255  findAllocaInsertPoint(builder, moduleTranslation);
3256 
3257  MapInfoData mapData;
3258  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl,
3259  builder);
3260 
3261  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
3262  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
3264  builder.restoreIP(codeGenIP);
3265  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {},
3266  true);
3267  return combinedInfos;
3268  };
3269 
3270  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
3271  llvm::Value *&retVal, InsertPointTy allocaIP,
3272  InsertPointTy codeGenIP) {
3273  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3274 
3275  // We just return the unaltered argument for the host function
3276  // for now, some alterations may be required in the future to
3277  // keep host fallback functions working identically to the device
3278  // version (e.g. pass ByCopy values should be treated as such on
3279  // host and device, currently not always the case)
3280  if (!ompBuilder->Config.isTargetDevice()) {
3281  retVal = cast<llvm::Value>(&arg);
3282  return codeGenIP;
3283  }
3284 
3285  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
3286  *ompBuilder, moduleTranslation,
3287  allocaIP, codeGenIP);
3288  };
3289 
3291  for (size_t i = 0; i < mapOperands.size(); ++i) {
3292  // declare target arguments are not passed to kernels as arguments
3293  // TODO: We currently do not handle cases where a member is explicitly
3294  // passed in as an argument, this will likley need to be handled in
3295  // the near future, rather than using IsAMember, it may be better to
3296  // test if the relevant BlockArg is used within the target region and
3297  // then use that as a basis for exclusion in the kernel inputs.
3298  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
3299  kernelInput.push_back(mapData.OriginalValue[i]);
3300  }
3302  buildDependData(targetOp.getDepends(), targetOp.getDependVars(),
3303  moduleTranslation, dds);
3304 
3305  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
3306  ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
3307  defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB,
3308  dds));
3309 
3310  // Remap access operations to declare target reference pointers for the
3311  // device, essentially generating extra loadop's as necessary
3312  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3313  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
3314  llvmOutlinedFn);
3315 
3316  return bodyGenStatus;
3317 }
3318 
3319 static LogicalResult
3320 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
3321  LLVM::ModuleTranslation &moduleTranslation) {
3322  // Amend omp.declare_target by deleting the IR of the outlined functions
3323  // created for target regions. They cannot be filtered out from MLIR earlier
3324  // because the omp.target operation inside must be translated to LLVM, but
3325  // the wrapper functions themselves must not remain at the end of the
3326  // process. We know that functions where omp.declare_target does not match
3327  // omp.is_target_device at this stage can only be wrapper functions because
3328  // those that aren't are removed earlier as an MLIR transformation pass.
3329  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
3330  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
3331  op->getParentOfType<ModuleOp>().getOperation())) {
3332  if (!offloadMod.getIsTargetDevice())
3333  return success();
3334 
3335  omp::DeclareTargetDeviceType declareType =
3336  attribute.getDeviceType().getValue();
3337 
3338  if (declareType == omp::DeclareTargetDeviceType::host) {
3339  llvm::Function *llvmFunc =
3340  moduleTranslation.lookupFunction(funcOp.getName());
3341  llvmFunc->dropAllReferences();
3342  llvmFunc->eraseFromParent();
3343  }
3344  }
3345  return success();
3346  }
3347 
3348  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
3349  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
3350  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
3351  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3352  bool isDeclaration = gOp.isDeclaration();
3353  bool isExternallyVisible =
3354  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
3355  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
3356  llvm::StringRef mangledName = gOp.getSymName();
3357  auto captureClause =
3358  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
3359  auto deviceClause =
3360  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
3361  // unused for MLIR at the moment, required in Clang for book
3362  // keeping
3363  std::vector<llvm::GlobalVariable *> generatedRefs;
3364 
3365  std::vector<llvm::Triple> targetTriple;
3366  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
3367  op->getParentOfType<mlir::ModuleOp>()->getAttr(
3368  LLVM::LLVMDialect::getTargetTripleAttrName()));
3369  if (targetTripleAttr)
3370  targetTriple.emplace_back(targetTripleAttr.data());
3371 
3372  auto fileInfoCallBack = [&loc]() {
3373  std::string filename = "";
3374  std::uint64_t lineNo = 0;
3375 
3376  if (loc) {
3377  filename = loc.getFilename().str();
3378  lineNo = loc.getLine();
3379  }
3380 
3381  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
3382  lineNo);
3383  };
3384 
3385  ompBuilder->registerTargetGlobalVariable(
3386  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3387  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3388  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
3389  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
3390  gVal->getType(), gVal);
3391 
3392  if (ompBuilder->Config.isTargetDevice() &&
3393  (attribute.getCaptureClause().getValue() !=
3394  mlir::omp::DeclareTargetCaptureClause::to ||
3395  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3396  ompBuilder->getAddrOfDeclareTargetVar(
3397  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3398  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3399  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
3400  /*GlobalInitializer*/ nullptr,
3401  /*VariableLinkage*/ nullptr);
3402  }
3403  }
3404  }
3405 
3406  return success();
3407 }
3408 
3409 // Returns true if the operation is inside a TargetOp or
3410 // is part of a declare target function.
3411 static bool isTargetDeviceOp(Operation *op) {
3412  // Assumes no reverse offloading
3413  if (op->getParentOfType<omp::TargetOp>())
3414  return true;
3415 
3416  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
3417  if (auto declareTargetIface =
3418  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3419  parentFn.getOperation()))
3420  if (declareTargetIface.isDeclareTarget() &&
3421  declareTargetIface.getDeclareTargetDeviceType() !=
3422  mlir::omp::DeclareTargetDeviceType::host)
3423  return true;
3424 
3425  return false;
3426 }
3427 
3428 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3429 /// (including OpenMP runtime calls).
3430 static LogicalResult
3431 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
3432  LLVM::ModuleTranslation &moduleTranslation) {
3433 
3434  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3435 
3437  .Case([&](omp::BarrierOp) {
3438  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
3439  return success();
3440  })
3441  .Case([&](omp::TaskwaitOp) {
3442  ompBuilder->createTaskwait(builder.saveIP());
3443  return success();
3444  })
3445  .Case([&](omp::TaskyieldOp) {
3446  ompBuilder->createTaskyield(builder.saveIP());
3447  return success();
3448  })
3449  .Case([&](omp::FlushOp) {
3450  // No support in Openmp runtime function (__kmpc_flush) to accept
3451  // the argument list.
3452  // OpenMP standard states the following:
3453  // "An implementation may implement a flush with a list by ignoring
3454  // the list, and treating it the same as a flush without a list."
3455  //
3456  // The argument list is discarded so that, flush with a list is treated
3457  // same as a flush without a list.
3458  ompBuilder->createFlush(builder.saveIP());
3459  return success();
3460  })
3461  .Case([&](omp::ParallelOp op) {
3462  return convertOmpParallel(op, builder, moduleTranslation);
3463  })
3464  .Case([&](omp::MaskedOp) {
3465  return convertOmpMasked(*op, builder, moduleTranslation);
3466  })
3467  .Case([&](omp::MasterOp) {
3468  return convertOmpMaster(*op, builder, moduleTranslation);
3469  })
3470  .Case([&](omp::CriticalOp) {
3471  return convertOmpCritical(*op, builder, moduleTranslation);
3472  })
3473  .Case([&](omp::OrderedRegionOp) {
3474  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
3475  })
3476  .Case([&](omp::OrderedOp) {
3477  return convertOmpOrdered(*op, builder, moduleTranslation);
3478  })
3479  .Case([&](omp::WsloopOp) {
3480  return convertOmpWsloop(*op, builder, moduleTranslation);
3481  })
3482  .Case([&](omp::SimdOp) {
3483  return convertOmpSimd(*op, builder, moduleTranslation);
3484  })
3485  .Case([&](omp::AtomicReadOp) {
3486  return convertOmpAtomicRead(*op, builder, moduleTranslation);
3487  })
3488  .Case([&](omp::AtomicWriteOp) {
3489  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
3490  })
3491  .Case([&](omp::AtomicUpdateOp op) {
3492  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
3493  })
3494  .Case([&](omp::AtomicCaptureOp op) {
3495  return convertOmpAtomicCapture(op, builder, moduleTranslation);
3496  })
3497  .Case([&](omp::SectionsOp) {
3498  return convertOmpSections(*op, builder, moduleTranslation);
3499  })
3500  .Case([&](omp::SingleOp op) {
3501  return convertOmpSingle(op, builder, moduleTranslation);
3502  })
3503  .Case([&](omp::TeamsOp op) {
3504  return convertOmpTeams(op, builder, moduleTranslation);
3505  })
3506  .Case([&](omp::TaskOp op) {
3507  return convertOmpTaskOp(op, builder, moduleTranslation);
3508  })
3509  .Case([&](omp::TaskgroupOp op) {
3510  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
3511  })
3512  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp,
3513  omp::CriticalDeclareOp>([](auto op) {
3514  // `yield` and `terminator` can be just omitted. The block structure
3515  // was created in the region that handles their parent operation.
3516  // `declare_reduction` will be used by reductions and is not
3517  // converted directly, skip it.
3518  // `critical.declare` is only used to declare names of critical
3519  // sections which will be used by `critical` ops and hence can be
3520  // ignored for lowering. The OpenMP IRBuilder will create unique
3521  // name for critical section names.
3522  return success();
3523  })
3524  .Case([&](omp::ThreadprivateOp) {
3525  return convertOmpThreadprivate(*op, builder, moduleTranslation);
3526  })
3527  .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp,
3528  omp::TargetUpdateOp>([&](auto op) {
3529  return convertOmpTargetData(op, builder, moduleTranslation);
3530  })
3531  .Case([&](omp::TargetOp) {
3532  return convertOmpTarget(*op, builder, moduleTranslation);
3533  })
3534  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
3535  [&](auto op) {
3536  // No-op, should be handled by relevant owning operations e.g.
3537  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc.
3538  // and then discarded
3539  return success();
3540  })
3541  .Default([&](Operation *inst) {
3542  return inst->emitError("unsupported OpenMP operation: ")
3543  << inst->getName();
3544  });
3545 }
3546 
3547 static LogicalResult
3548 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
3549  LLVM::ModuleTranslation &moduleTranslation) {
3550  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3551 }
3552 
3553 static LogicalResult
3554 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
3555  LLVM::ModuleTranslation &moduleTranslation) {
3556  if (isa<omp::TargetOp>(op))
3557  return convertOmpTarget(*op, builder, moduleTranslation);
3558  if (isa<omp::TargetDataOp>(op))
3559  return convertOmpTargetData(op, builder, moduleTranslation);
3560  bool interrupted =
3561  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
3562  if (isa<omp::TargetOp>(oper)) {
3563  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
3564  return WalkResult::interrupt();
3565  return WalkResult::skip();
3566  }
3567  if (isa<omp::TargetDataOp>(oper)) {
3568  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
3569  return WalkResult::interrupt();
3570  return WalkResult::skip();
3571  }
3572  return WalkResult::advance();
3573  }).wasInterrupted();
3574  return failure(interrupted);
3575 }
3576 
3577 namespace {
3578 
3579 /// Implementation of the dialect interface that converts operations belonging
3580 /// to the OpenMP dialect to LLVM IR.
3581 class OpenMPDialectLLVMIRTranslationInterface
3583 public:
3585 
3586  /// Translates the given operation to LLVM IR using the provided IR builder
3587  /// and saving the state in `moduleTranslation`.
3588  LogicalResult
3589  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
3590  LLVM::ModuleTranslation &moduleTranslation) const final;
3591 
3592  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
3593  /// runtime calls, or operation amendments
3594  LogicalResult
3596  NamedAttribute attribute,
3597  LLVM::ModuleTranslation &moduleTranslation) const final;
3598 };
3599 
3600 } // namespace
3601 
3602 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
3603  Operation *op, ArrayRef<llvm::Instruction *> instructions,
3604  NamedAttribute attribute,
3605  LLVM::ModuleTranslation &moduleTranslation) const {
3606  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
3607  attribute.getName())
3608  .Case("omp.is_target_device",
3609  [&](Attribute attr) {
3610  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
3611  llvm::OpenMPIRBuilderConfig &config =
3612  moduleTranslation.getOpenMPBuilder()->Config;
3613  config.setIsTargetDevice(deviceAttr.getValue());
3614  return success();
3615  }
3616  return failure();
3617  })
3618  .Case("omp.is_gpu",
3619  [&](Attribute attr) {
3620  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
3621  llvm::OpenMPIRBuilderConfig &config =
3622  moduleTranslation.getOpenMPBuilder()->Config;
3623  config.setIsGPU(gpuAttr.getValue());
3624  return success();
3625  }
3626  return failure();
3627  })
3628  .Case("omp.host_ir_filepath",
3629  [&](Attribute attr) {
3630  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
3631  llvm::OpenMPIRBuilder *ompBuilder =
3632  moduleTranslation.getOpenMPBuilder();
3633  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
3634  return success();
3635  }
3636  return failure();
3637  })
3638  .Case("omp.flags",
3639  [&](Attribute attr) {
3640  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
3641  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
3642  return failure();
3643  })
3644  .Case("omp.version",
3645  [&](Attribute attr) {
3646  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
3647  llvm::OpenMPIRBuilder *ompBuilder =
3648  moduleTranslation.getOpenMPBuilder();
3649  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
3650  versionAttr.getVersion());
3651  return success();
3652  }
3653  return failure();
3654  })
3655  .Case("omp.declare_target",
3656  [&](Attribute attr) {
3657  if (auto declareTargetAttr =
3658  dyn_cast<omp::DeclareTargetAttr>(attr))
3659  return convertDeclareTargetAttr(op, declareTargetAttr,
3660  moduleTranslation);
3661  return failure();
3662  })
3663  .Case("omp.requires",
3664  [&](Attribute attr) {
3665  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
3666  using Requires = omp::ClauseRequires;
3667  Requires flags = requiresAttr.getValue();
3668  llvm::OpenMPIRBuilderConfig &config =
3669  moduleTranslation.getOpenMPBuilder()->Config;
3670  config.setHasRequiresReverseOffload(
3671  bitEnumContainsAll(flags, Requires::reverse_offload));
3672  config.setHasRequiresUnifiedAddress(
3673  bitEnumContainsAll(flags, Requires::unified_address));
3674  config.setHasRequiresUnifiedSharedMemory(
3675  bitEnumContainsAll(flags, Requires::unified_shared_memory));
3676  config.setHasRequiresDynamicAllocators(
3677  bitEnumContainsAll(flags, Requires::dynamic_allocators));
3678  return success();
3679  }
3680  return failure();
3681  })
3682  .Default([](Attribute) {
3683  // Fall through for omp attributes that do not require lowering.
3684  return success();
3685  })(attribute.getValue());
3686 
3687  return failure();
3688 }
3689 
3690 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3691 /// (including OpenMP runtime calls).
3692 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
3693  Operation *op, llvm::IRBuilderBase &builder,
3694  LLVM::ModuleTranslation &moduleTranslation) const {
3695 
3696  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3697  if (ompBuilder->Config.isTargetDevice()) {
3698  if (isTargetDeviceOp(op)) {
3699  return convertTargetDeviceOp(op, builder, moduleTranslation);
3700  } else {
3701  return convertTargetOpsInNest(op, builder, moduleTranslation);
3702  }
3703  }
3704  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3705 }
3706 
3708  registry.insert<omp::OpenMPDialect>();
3709  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
3710  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
3711  });
3712 }
3713 
3715  DialectRegistry registry;
3717  context.appendDialectRegistry(registry);
3718 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, const SmallVector< Value > &devPtrOperands={}, const SmallVector< Value > &devAddrOperands={}, bool isTargetParams=false)
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, mlir::OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
void collectMapDataFromMapOperands(MapInfoData &mapData, llvm::SmallVectorImpl< Value > &mapOperands, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClasue)
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
static void mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, unsigned i)
Map input argument to all reduction initialization regions.
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool targetOpSupported(Operation &opInst)
static int getMapDataMemberIdx(MapInfoData &mapData, mlir::omp::MapInfoOp memberOp)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static void buildDependData(std::optional< ArrayAttr > depends, OperandRange dependVars, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::OpenMPIRBuilder::DependData > &dds)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static void allocByValReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static void collectReductionDecls(T loop, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given loop.
static mlir::omp::MapInfoOp getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first)
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::BasicBlock * convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
A RAII class that on construction replaces the region arguments of the parallel op (which correspond ...
OmpParallelOpConversionManager(omp::ParallelOp opInst)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:31
BlockArgument getArgument(unsigned i)
Definition: Block.h:127
unsigned getNumArguments()
Definition: Block.h:126
Operation & back()
Definition: Block.h:150
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:243
Operation & front()
Definition: Block.h:151
iterator begin()
Definition: Block.h:141
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
An attribute that represents a reference to a dense integer vector or tensor object.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:766
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
MLIRContext & getContext()
Returns the MLIR context of the module being translated.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:41
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:559
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:401
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition: Builders.cpp:586
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:671
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
bool empty()
Definition: Region.h:60
iterator end()
Definition: Region.h:56
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
BlockArgument getArgument(unsigned i)
Definition: Region.h:124
Block & front()
Definition: Region.h:65
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
Include the generated interface declarations.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:27
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
llvm::SmallVector< bool, 4 > IsAMember
llvm::SmallVector< llvm::Value *, 4 > OriginalValue
llvm::SmallVector< bool, 4 > IsDeclareTarget
llvm::SmallVector< llvm::Type *, 4 > BaseType
void append(MapInfoData &CurInfo)
Append arrays in CurInfo.
llvm::SmallVector< mlir::Operation *, 4 > MapClause
RAII object calling stackPush/stackPop on construction/destruction.