MLIR  19.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
18 #include "mlir/IR/IRMapping.h"
19 #include "mlir/IR/Operation.h"
20 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Frontend/OpenMP/OMPConstants.h"
29 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/ReplaceConstant.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include "llvm/Transforms/Utils/ModuleUtils.h"
36 
37 #include <any>
38 #include <cstdint>
39 #include <iterator>
40 #include <numeric>
41 #include <optional>
42 #include <utility>
43 
44 using namespace mlir;
45 
46 namespace {
47 static llvm::omp::ScheduleKind
48 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
49  if (!schedKind.has_value())
50  return llvm::omp::OMP_SCHEDULE_Default;
51  switch (schedKind.value()) {
52  case omp::ClauseScheduleKind::Static:
53  return llvm::omp::OMP_SCHEDULE_Static;
54  case omp::ClauseScheduleKind::Dynamic:
55  return llvm::omp::OMP_SCHEDULE_Dynamic;
56  case omp::ClauseScheduleKind::Guided:
57  return llvm::omp::OMP_SCHEDULE_Guided;
58  case omp::ClauseScheduleKind::Auto:
59  return llvm::omp::OMP_SCHEDULE_Auto;
61  return llvm::omp::OMP_SCHEDULE_Runtime;
62  }
63  llvm_unreachable("unhandled schedule clause argument");
64 }
65 
66 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
67 /// insertion points for allocas.
68 class OpenMPAllocaStackFrame
69  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
70 public:
71  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
72 
73  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
74  : allocaInsertPoint(allocaIP) {}
75  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
76 };
77 
78 /// ModuleTranslation stack frame containing the partial mapping between MLIR
79 /// values and their LLVM IR equivalents.
80 class OpenMPVarMappingStackFrame
82  OpenMPVarMappingStackFrame> {
83 public:
84  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
85 
86  explicit OpenMPVarMappingStackFrame(
87  const DenseMap<Value, llvm::Value *> &mapping)
88  : mapping(mapping) {}
89 
91 };
92 } // namespace
93 
94 /// Find the insertion point for allocas given the current insertion point for
95 /// normal operations in the builder.
96 static llvm::OpenMPIRBuilder::InsertPointTy
97 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
98  const LLVM::ModuleTranslation &moduleTranslation) {
99  // If there is an alloca insertion point on stack, i.e. we are in a nested
100  // operation and a specific point was provided by some surrounding operation,
101  // use it.
102  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
103  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
104  [&](const OpenMPAllocaStackFrame &frame) {
105  allocaInsertPoint = frame.allocaInsertPoint;
106  return WalkResult::interrupt();
107  });
108  if (walkResult.wasInterrupted())
109  return allocaInsertPoint;
110 
111  // Otherwise, insert to the entry block of the surrounding function.
112  // If the current IRBuilder InsertPoint is the function's entry, it cannot
113  // also be used for alloca insertion which would result in insertion order
114  // confusion. Create a new BasicBlock for the Builder and use the entry block
115  // for the allocs.
116  // TODO: Create a dedicated alloca BasicBlock at function creation such that
117  // we do not need to move the current InertPoint here.
118  if (builder.GetInsertBlock() ==
119  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
120  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
121  "Assuming end of basic block");
122  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
123  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
124  builder.GetInsertBlock()->getNextNode());
125  builder.CreateBr(entryBB);
126  builder.SetInsertPoint(entryBB);
127  }
128 
129  llvm::BasicBlock &funcEntryBlock =
130  builder.GetInsertBlock()->getParent()->getEntryBlock();
131  return llvm::OpenMPIRBuilder::InsertPointTy(
132  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
133 }
134 
135 /// Converts the given region that appears within an OpenMP dialect operation to
136 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
137 /// region, and a branch from any block with an successor-less OpenMP terminator
138 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
139 /// of the continuation block if provided.
140 static llvm::BasicBlock *convertOmpOpRegions(
141  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
142  LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
143  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
144  llvm::BasicBlock *continuationBlock =
145  splitBB(builder, true, "omp.region.cont");
146  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
147 
148  llvm::LLVMContext &llvmContext = builder.getContext();
149  for (Block &bb : region) {
150  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
151  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
152  builder.GetInsertBlock()->getNextNode());
153  moduleTranslation.mapBlock(&bb, llvmBB);
154  }
155 
156  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
157 
158  // Terminators (namely YieldOp) may be forwarding values to the region that
159  // need to be available in the continuation block. Collect the types of these
160  // operands in preparation of creating PHI nodes.
161  SmallVector<llvm::Type *> continuationBlockPHITypes;
162  bool operandsProcessed = false;
163  unsigned numYields = 0;
164  for (Block &bb : region.getBlocks()) {
165  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
166  if (!operandsProcessed) {
167  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
168  continuationBlockPHITypes.push_back(
169  moduleTranslation.convertType(yield->getOperand(i).getType()));
170  }
171  operandsProcessed = true;
172  } else {
173  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
174  "mismatching number of values yielded from the region");
175  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
176  llvm::Type *operandType =
177  moduleTranslation.convertType(yield->getOperand(i).getType());
178  (void)operandType;
179  assert(continuationBlockPHITypes[i] == operandType &&
180  "values of mismatching types yielded from the region");
181  }
182  }
183  numYields++;
184  }
185  }
186 
187  // Insert PHI nodes in the continuation block for any values forwarded by the
188  // terminators in this region.
189  if (!continuationBlockPHITypes.empty())
190  assert(
191  continuationBlockPHIs &&
192  "expected continuation block PHIs if converted regions yield values");
193  if (continuationBlockPHIs) {
194  llvm::IRBuilderBase::InsertPointGuard guard(builder);
195  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
196  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
197  for (llvm::Type *ty : continuationBlockPHITypes)
198  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
199  }
200 
201  // Convert blocks one by one in topological order to ensure
202  // defs are converted before uses.
204  for (Block *bb : blocks) {
205  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
206  // Retarget the branch of the entry block to the entry block of the
207  // converted region (regions are single-entry).
208  if (bb->isEntryBlock()) {
209  assert(sourceTerminator->getNumSuccessors() == 1 &&
210  "provided entry block has multiple successors");
211  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
212  "ContinuationBlock is not the successor of the entry block");
213  sourceTerminator->setSuccessor(0, llvmBB);
214  }
215 
216  llvm::IRBuilderBase::InsertPointGuard guard(builder);
217  if (failed(
218  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
219  bodyGenStatus = failure();
220  return continuationBlock;
221  }
222 
223  // Special handling for `omp.yield` and `omp.terminator` (we may have more
224  // than one): they return the control to the parent OpenMP dialect operation
225  // so replace them with the branch to the continuation block. We handle this
226  // here to avoid relying inter-function communication through the
227  // ModuleTranslation class to set up the correct insertion point. This is
228  // also consistent with MLIR's idiom of handling special region terminators
229  // in the same code that handles the region-owning operation.
230  Operation *terminator = bb->getTerminator();
231  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
232  builder.CreateBr(continuationBlock);
233 
234  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
235  (*continuationBlockPHIs)[i]->addIncoming(
236  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
237  }
238  }
239  // After all blocks have been traversed and values mapped, connect the PHI
240  // nodes to the results of preceding blocks.
241  LLVM::detail::connectPHINodes(region, moduleTranslation);
242 
243  // Remove the blocks and values defined in this region from the mapping since
244  // they are not visible outside of this region. This allows the same region to
245  // be converted several times, that is cloned, without clashes, and slightly
246  // speeds up the lookups.
247  moduleTranslation.forgetMapping(region);
248 
249  return continuationBlock;
250 }
251 
252 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
253 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
254  switch (kind) {
255  case omp::ClauseProcBindKind::Close:
256  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
257  case omp::ClauseProcBindKind::Master:
258  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
259  case omp::ClauseProcBindKind::Primary:
260  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
261  case omp::ClauseProcBindKind::Spread:
262  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
263  }
264  llvm_unreachable("Unknown ClauseProcBindKind kind");
265 }
266 
267 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
268 static LogicalResult
269 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
270  LLVM::ModuleTranslation &moduleTranslation) {
271  auto maskedOp = cast<omp::MaskedOp>(opInst);
272  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
273  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
274  // relying on captured variables.
275  LogicalResult bodyGenStatus = success();
276 
277  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
278  // MaskedOp has only one region associated with it.
279  auto &region = maskedOp.getRegion();
280  builder.restoreIP(codeGenIP);
281  convertOmpOpRegions(region, "omp.masked.region", builder, moduleTranslation,
282  bodyGenStatus);
283  };
284 
285  // TODO: Perform finalization actions for variables. This has to be
286  // called for variables which have destructors/finalizers.
287  auto finiCB = [&](InsertPointTy codeGenIP) {};
288 
289  llvm::Value *filterVal = nullptr;
290  if (auto filterVar = maskedOp.getFilteredThreadId()) {
291  filterVal = moduleTranslation.lookupValue(filterVar);
292  } else {
293  llvm::LLVMContext &llvmContext = builder.getContext();
294  filterVal =
295  llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), /*V=*/0);
296  }
297  assert(filterVal != nullptr);
298  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
299  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMasked(
300  ompLoc, bodyGenCB, finiCB, filterVal));
301  return success();
302 }
303 
304 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
305 static LogicalResult
306 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
307  LLVM::ModuleTranslation &moduleTranslation) {
308  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
309  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
310  // relying on captured variables.
311  LogicalResult bodyGenStatus = success();
312 
313  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
314  // MasterOp has only one region associated with it.
315  auto &region = cast<omp::MasterOp>(opInst).getRegion();
316  builder.restoreIP(codeGenIP);
317  convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
318  bodyGenStatus);
319  };
320 
321  // TODO: Perform finalization actions for variables. This has to be
322  // called for variables which have destructors/finalizers.
323  auto finiCB = [&](InsertPointTy codeGenIP) {};
324 
325  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
326  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
327  ompLoc, bodyGenCB, finiCB));
328  return success();
329 }
330 
331 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
332 static LogicalResult
333 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
334  LLVM::ModuleTranslation &moduleTranslation) {
335  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
336  auto criticalOp = cast<omp::CriticalOp>(opInst);
337  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
338  // relying on captured variables.
339  LogicalResult bodyGenStatus = success();
340 
341  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
342  // CriticalOp has only one region associated with it.
343  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
344  builder.restoreIP(codeGenIP);
345  convertOmpOpRegions(region, "omp.critical.region", builder,
346  moduleTranslation, bodyGenStatus);
347  };
348 
349  // TODO: Perform finalization actions for variables. This has to be
350  // called for variables which have destructors/finalizers.
351  auto finiCB = [&](InsertPointTy codeGenIP) {};
352 
353  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
354  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
355  llvm::Constant *hint = nullptr;
356 
357  // If it has a name, it probably has a hint too.
358  if (criticalOp.getNameAttr()) {
359  // The verifiers in OpenMP Dialect guarentee that all the pointers are
360  // non-null
361  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
362  auto criticalDeclareOp =
363  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
364  symbolRef);
365  hint = llvm::ConstantInt::get(
366  llvm::Type::getInt32Ty(llvmContext),
367  static_cast<int>(criticalDeclareOp.getHintVal()));
368  }
369  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
370  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
371  return success();
372 }
373 
374 /// Populates `reductions` with reduction declarations used in the given loop.
375 template <typename T>
376 static void
379  std::optional<ArrayAttr> attr = loop.getReductions();
380  if (!attr)
381  return;
382 
383  reductions.reserve(reductions.size() + loop.getNumReductionVars());
384  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
385  reductions.push_back(
386  SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
387  loop, symbolRef));
388  }
389 }
390 
391 /// Translates the blocks contained in the given region and appends them to at
392 /// the current insertion point of `builder`. The operations of the entry block
393 /// are appended to the current insertion block. If set, `continuationBlockArgs`
394 /// is populated with translated values that correspond to the values
395 /// omp.yield'ed from the region.
396 static LogicalResult inlineConvertOmpRegions(
397  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
398  LLVM::ModuleTranslation &moduleTranslation,
399  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
400  if (region.empty())
401  return success();
402 
403  // Special case for single-block regions that don't create additional blocks:
404  // insert operations without creating additional blocks.
405  if (llvm::hasSingleElement(region)) {
406  llvm::Instruction *potentialTerminator =
407  builder.GetInsertBlock()->empty() ? nullptr
408  : &builder.GetInsertBlock()->back();
409 
410  if (potentialTerminator && potentialTerminator->isTerminator())
411  potentialTerminator->removeFromParent();
412  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
413 
414  if (failed(moduleTranslation.convertBlock(
415  region.front(), /*ignoreArguments=*/true, builder)))
416  return failure();
417 
418  // The continuation arguments are simply the translated terminator operands.
419  if (continuationBlockArgs)
420  llvm::append_range(
421  *continuationBlockArgs,
422  moduleTranslation.lookupValues(region.front().back().getOperands()));
423 
424  // Drop the mapping that is no longer necessary so that the same region can
425  // be processed multiple times.
426  moduleTranslation.forgetMapping(region);
427 
428  if (potentialTerminator && potentialTerminator->isTerminator()) {
429  llvm::BasicBlock *block = builder.GetInsertBlock();
430  if (block->empty()) {
431  // this can happen for really simple reduction init regions e.g.
432  // %0 = llvm.mlir.constant(0 : i32) : i32
433  // omp.yield(%0 : i32)
434  // because the llvm.mlir.constant (MLIR op) isn't converted into any
435  // llvm op
436  potentialTerminator->insertInto(block, block->begin());
437  } else {
438  potentialTerminator->insertAfter(&block->back());
439  }
440  }
441 
442  return success();
443  }
444 
445  LogicalResult bodyGenStatus = success();
447  llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
448  region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
449  if (failed(bodyGenStatus))
450  return failure();
451  if (continuationBlockArgs)
452  llvm::append_range(*continuationBlockArgs, phis);
453  builder.SetInsertPoint(continuationBlock,
454  continuationBlock->getFirstInsertionPt());
455  return success();
456 }
457 
458 namespace {
459 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
460 /// store lambdas with capture.
461 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
462  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
463  llvm::Value *&)>;
464 using OwningAtomicReductionGen =
465  std::function<llvm::OpenMPIRBuilder::InsertPointTy(
466  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
467  llvm::Value *)>;
468 } // namespace
469 
470 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
471 /// reduction declaration. The generator uses `builder` but ignores its
472 /// insertion point.
473 static OwningReductionGen
474 makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
475  LLVM::ModuleTranslation &moduleTranslation) {
476  // The lambda is mutable because we need access to non-const methods of decl
477  // (which aren't actually mutating it), and we must capture decl by-value to
478  // avoid the dangling reference after the parent function returns.
479  OwningReductionGen gen =
480  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
481  llvm::Value *lhs, llvm::Value *rhs,
482  llvm::Value *&result) mutable {
483  Region &reductionRegion = decl.getReductionRegion();
484  moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
485  moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
486  builder.restoreIP(insertPoint);
488  if (failed(inlineConvertOmpRegions(reductionRegion,
489  "omp.reduction.nonatomic.body",
490  builder, moduleTranslation, &phis)))
491  return llvm::OpenMPIRBuilder::InsertPointTy();
492  assert(phis.size() == 1);
493  result = phis[0];
494  return builder.saveIP();
495  };
496  return gen;
497 }
498 
499 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
500 /// given reduction declaration. The generator uses `builder` but ignores its
501 /// insertion point. Returns null if there is no atomic region available in the
502 /// reduction declaration.
503 static OwningAtomicReductionGen
504 makeAtomicReductionGen(omp::DeclareReductionOp decl,
505  llvm::IRBuilderBase &builder,
506  LLVM::ModuleTranslation &moduleTranslation) {
507  if (decl.getAtomicReductionRegion().empty())
508  return OwningAtomicReductionGen();
509 
510  // The lambda is mutable because we need access to non-const methods of decl
511  // (which aren't actually mutating it), and we must capture decl by-value to
512  // avoid the dangling reference after the parent function returns.
513  OwningAtomicReductionGen atomicGen =
514  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
515  llvm::Value *lhs, llvm::Value *rhs) mutable {
516  Region &atomicRegion = decl.getAtomicReductionRegion();
517  moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
518  moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
519  builder.restoreIP(insertPoint);
521  if (failed(inlineConvertOmpRegions(atomicRegion,
522  "omp.reduction.atomic.body", builder,
523  moduleTranslation, &phis)))
524  return llvm::OpenMPIRBuilder::InsertPointTy();
525  assert(phis.empty());
526  return builder.saveIP();
527  };
528  return atomicGen;
529 }
530 
531 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
532 static LogicalResult
533 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
534  LLVM::ModuleTranslation &moduleTranslation) {
535  auto orderedOp = cast<omp::OrderedOp>(opInst);
536 
537  omp::ClauseDepend dependType = *orderedOp.getDependTypeVal();
538  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
539  unsigned numLoops = *orderedOp.getNumLoopsVal();
540  SmallVector<llvm::Value *> vecValues =
541  moduleTranslation.lookupValues(orderedOp.getDependVecVars());
542 
543  size_t indexVecValues = 0;
544  while (indexVecValues < vecValues.size()) {
545  SmallVector<llvm::Value *> storeValues;
546  storeValues.reserve(numLoops);
547  for (unsigned i = 0; i < numLoops; i++) {
548  storeValues.push_back(vecValues[indexVecValues]);
549  indexVecValues++;
550  }
551  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
552  findAllocaInsertPoint(builder, moduleTranslation);
553  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
554  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
555  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
556  }
557  return success();
558 }
559 
560 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
561 /// OpenMPIRBuilder.
562 static LogicalResult
563 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
564  LLVM::ModuleTranslation &moduleTranslation) {
565  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
566  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
567 
568  // TODO: The code generation for ordered simd directive is not supported yet.
569  if (orderedRegionOp.getSimd())
570  return failure();
571 
572  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
573  // relying on captured variables.
574  LogicalResult bodyGenStatus = success();
575 
576  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
577  // OrderedOp has only one region associated with it.
578  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
579  builder.restoreIP(codeGenIP);
580  convertOmpOpRegions(region, "omp.ordered.region", builder,
581  moduleTranslation, bodyGenStatus);
582  };
583 
584  // TODO: Perform finalization actions for variables. This has to be
585  // called for variables which have destructors/finalizers.
586  auto finiCB = [&](InsertPointTy codeGenIP) {};
587 
588  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
589  builder.restoreIP(
590  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
591  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getSimd()));
592  return bodyGenStatus;
593 }
594 
595 /// Allocate space for privatized reduction variables.
596 template <typename T>
598  T loop, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
599  LLVM::ModuleTranslation &moduleTranslation,
600  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
602  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
603  DenseMap<Value, llvm::Value *> &reductionVariableMap,
604  llvm::ArrayRef<bool> isByRefs) {
605  llvm::IRBuilderBase::InsertPointGuard guard(builder);
606  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
607 
608  for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
609  if (isByRefs[i])
610  continue;
611  llvm::Value *var = builder.CreateAlloca(
612  moduleTranslation.convertType(reductionDecls[i].getType()));
613  moduleTranslation.mapValue(reductionArgs[i], var);
614  privateReductionVariables[i] = var;
615  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
616  }
617 }
618 
619 /// Map input argument to all reduction initialization regions
620 template <typename T>
621 static void
624  unsigned i) {
625  // map input argument to the initialization region
626  mlir::omp::DeclareReductionOp &reduction = reductionDecls[i];
627  Region &initializerRegion = reduction.getInitializerRegion();
628  Block &entry = initializerRegion.front();
629  assert(entry.getNumArguments() == 1 &&
630  "the initialization region has one argument");
631 
632  mlir::Value mlirSource = loop.getReductionVars()[i];
633  llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
634  assert(llvmSource && "lookup reduction var");
635  moduleTranslation.mapValue(entry.getArgument(0), llvmSource);
636 }
637 
638 /// Collect reduction info
639 template <typename T>
641  T loop, llvm::IRBuilderBase &builder,
642  LLVM::ModuleTranslation &moduleTranslation,
644  SmallVectorImpl<OwningReductionGen> &owningReductionGens,
645  SmallVectorImpl<OwningAtomicReductionGen> &owningAtomicReductionGens,
646  const ArrayRef<llvm::Value *> privateReductionVariables,
648  unsigned numReductions = loop.getNumReductionVars();
649 
650  for (unsigned i = 0; i < numReductions; ++i) {
651  owningReductionGens.push_back(
652  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
653  owningAtomicReductionGens.push_back(
654  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
655  }
656 
657  // Collect the reduction information.
658  reductionInfos.reserve(numReductions);
659  for (unsigned i = 0; i < numReductions; ++i) {
660  llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr;
661  if (owningAtomicReductionGens[i])
662  atomicGen = owningAtomicReductionGens[i];
663  llvm::Value *variable =
664  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
665  reductionInfos.push_back(
666  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
667  privateReductionVariables[i],
668  /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
669  owningReductionGens[i],
670  /*ReductionGenClang=*/nullptr, atomicGen});
671  }
672 }
673 
674 /// handling of DeclareReductionOp's cleanup region
675 static LogicalResult
677  llvm::ArrayRef<llvm::Value *> privateVariables,
678  LLVM::ModuleTranslation &moduleTranslation,
679  llvm::IRBuilderBase &builder, StringRef regionName,
680  bool shouldLoadCleanupRegionArg = true) {
681  for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
682  if (cleanupRegion->empty())
683  continue;
684 
685  // map the argument to the cleanup region
686  Block &entry = cleanupRegion->front();
687 
688  llvm::Instruction *potentialTerminator =
689  builder.GetInsertBlock()->empty() ? nullptr
690  : &builder.GetInsertBlock()->back();
691  if (potentialTerminator && potentialTerminator->isTerminator())
692  builder.SetInsertPoint(potentialTerminator);
693  llvm::Value *prviateVarValue =
694  shouldLoadCleanupRegionArg
695  ? builder.CreateLoad(
696  moduleTranslation.convertType(entry.getArgument(0).getType()),
697  privateVariables[i])
698  : privateVariables[i];
699 
700  moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue);
701 
702  if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
703  moduleTranslation)))
704  return failure();
705 
706  // clear block argument mapping in case it needs to be re-created with a
707  // different source for another use of the same reduction decl
708  moduleTranslation.forgetMapping(*cleanupRegion);
709  }
710  return success();
711 }
712 
713 // TODO: not used by ParallelOp
714 template <class OP>
715 static LogicalResult createReductionsAndCleanup(
716  OP op, llvm::IRBuilderBase &builder,
717  LLVM::ModuleTranslation &moduleTranslation,
718  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
720  ArrayRef<llvm::Value *> privateReductionVariables, ArrayRef<bool> isByRef) {
721  // Process the reductions if required.
722  if (op.getNumReductionVars() == 0)
723  return success();
724 
725  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
726 
727  // Create the reduction generators. We need to own them here because
728  // ReductionInfo only accepts references to the generators.
729  SmallVector<OwningReductionGen> owningReductionGens;
730  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
732  collectReductionInfo(op, builder, moduleTranslation, reductionDecls,
733  owningReductionGens, owningAtomicReductionGens,
734  privateReductionVariables, reductionInfos);
735 
736  // The call to createReductions below expects the block to have a
737  // terminator. Create an unreachable instruction to serve as terminator
738  // and remove it later.
739  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
740  builder.SetInsertPoint(tempTerminator);
741  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
742  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
743  isByRef, op.getNowait());
744  if (!contInsertPoint.getBlock())
745  return op->emitOpError() << "failed to convert reductions";
746  auto nextInsertionPoint =
747  ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
748  tempTerminator->eraseFromParent();
749  builder.restoreIP(nextInsertionPoint);
750 
751  // after the construct, deallocate private reduction variables
752  SmallVector<Region *> reductionRegions;
753  llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
754  [](omp::DeclareReductionOp reductionDecl) {
755  return &reductionDecl.getCleanupRegion();
756  });
757  return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
758  moduleTranslation, builder,
759  "omp.reduction.cleanup");
760  return success();
761 }
762 
763 static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) {
764  if (!attr)
765  return {};
766  return *attr;
767 }
768 
769 // TODO: not used by omp.parallel
770 template <typename OP>
771 static LogicalResult allocAndInitializeReductionVars(
772  OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
773  LLVM::ModuleTranslation &moduleTranslation,
774  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
776  SmallVectorImpl<llvm::Value *> &privateReductionVariables,
777  DenseMap<Value, llvm::Value *> &reductionVariableMap,
778  llvm::ArrayRef<bool> isByRef) {
779  if (op.getNumReductionVars() == 0)
780  return success();
781 
782  allocByValReductionVars(op, reductionArgs, builder, moduleTranslation,
783  allocaIP, reductionDecls, privateReductionVariables,
784  reductionVariableMap, isByRef);
785 
786  // Before the loop, store the initial values of reductions into reduction
787  // variables. Although this could be done after allocas, we don't want to mess
788  // up with the alloca insertion point.
789  for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
791 
792  // map block argument to initializer region
793  mapInitializationArg(op, moduleTranslation, reductionDecls, i);
794 
795  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
796  "omp.reduction.neutral", builder,
797  moduleTranslation, &phis)))
798  return failure();
799  assert(phis.size() == 1 && "expected one value to be yielded from the "
800  "reduction neutral element declaration region");
801  if (isByRef[i]) {
802  // Allocate reduction variable (which is a pointer to the real reduction
803  // variable allocated in the inlined region)
804  llvm::Value *var = builder.CreateAlloca(
805  moduleTranslation.convertType(reductionDecls[i].getType()));
806  // Store the result of the inlined region to the allocated reduction var
807  // ptr
808  builder.CreateStore(phis[0], var);
809 
810  privateReductionVariables[i] = var;
811  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
812  reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
813  } else {
814  // for by-ref case the store is inside of the reduction region
815  builder.CreateStore(phis[0], privateReductionVariables[i]);
816  // the rest was handled in allocByValReductionVars
817  }
818 
819  // forget the mapping for the initializer region because we might need a
820  // different mapping if this reduction declaration is re-used for a
821  // different variable
822  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
823  }
824 
825  return success();
826 }
827 
828 static LogicalResult
829 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
830  LLVM::ModuleTranslation &moduleTranslation) {
831  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
832  using StorableBodyGenCallbackTy =
833  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
834 
835  auto sectionsOp = cast<omp::SectionsOp>(opInst);
836 
837  // TODO: Support the following clauses: private, firstprivate, lastprivate,
838  // allocate
839  if (!sectionsOp.getAllocateVars().empty() ||
840  !sectionsOp.getAllocatorsVars().empty())
841  return emitError(sectionsOp.getLoc())
842  << "allocate clause is not supported for sections construct";
843 
844  llvm::ArrayRef<bool> isByRef = getIsByRef(sectionsOp.getReductionVarsByref());
845  assert(isByRef.size() == sectionsOp.getNumReductionVars());
846 
848  collectReductionDecls(sectionsOp, reductionDecls);
849  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
850  findAllocaInsertPoint(builder, moduleTranslation);
851 
852  SmallVector<llvm::Value *> privateReductionVariables(
853  sectionsOp.getNumReductionVars());
854  DenseMap<Value, llvm::Value *> reductionVariableMap;
855 
856  MutableArrayRef<BlockArgument> reductionArgs =
857  sectionsOp.getRegion().getArguments();
858 
860  sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP,
861  reductionDecls, privateReductionVariables, reductionVariableMap,
862  isByRef)))
863  return failure();
864 
865  // Store the mapping between reduction variables and their private copies on
866  // ModuleTranslation stack. It can be then recovered when translating
867  // omp.reduce operations in a separate call.
869  moduleTranslation, reductionVariableMap);
870 
871  LogicalResult bodyGenStatus = success();
873 
874  for (Operation &op : *sectionsOp.getRegion().begin()) {
875  auto sectionOp = dyn_cast<omp::SectionOp>(op);
876  if (!sectionOp) // omp.terminator
877  continue;
878 
879  Region &region = sectionOp.getRegion();
880  auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation,
881  &bodyGenStatus](InsertPointTy allocaIP,
882  InsertPointTy codeGenIP) {
883  builder.restoreIP(codeGenIP);
884 
885  // map the omp.section reduction block argument to the omp.sections block
886  // arguments
887  // TODO: this assumes that the only block arguments are reduction
888  // variables
889  assert(region.getNumArguments() ==
890  sectionsOp.getRegion().getNumArguments());
891  for (auto [sectionsArg, sectionArg] : llvm::zip_equal(
892  sectionsOp.getRegion().getArguments(), region.getArguments())) {
893  llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg);
894  assert(llvmVal);
895  moduleTranslation.mapValue(sectionArg, llvmVal);
896  }
897 
898  convertOmpOpRegions(region, "omp.section.region", builder,
899  moduleTranslation, bodyGenStatus);
900  };
901  sectionCBs.push_back(sectionCB);
902  }
903 
904  // No sections within omp.sections operation - skip generation. This situation
905  // is only possible if there is only a terminator operation inside the
906  // sections operation
907  if (sectionCBs.empty())
908  return success();
909 
910  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
911 
912  // TODO: Perform appropriate actions according to the data-sharing
913  // attribute (shared, private, firstprivate, ...) of variables.
914  // Currently defaults to shared.
915  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
916  llvm::Value &vPtr,
917  llvm::Value *&replacementValue) -> InsertPointTy {
918  replacementValue = &vPtr;
919  return codeGenIP;
920  };
921 
922  // TODO: Perform finalization actions for variables. This has to be
923  // called for variables which have destructors/finalizers.
924  auto finiCB = [&](InsertPointTy codeGenIP) {};
925 
926  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
927  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
928  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
929  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
930  sectionsOp.getNowait()));
931 
932  if (failed(bodyGenStatus))
933  return bodyGenStatus;
934 
935  // Process the reductions if required.
936  return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
937  allocaIP, reductionDecls,
938  privateReductionVariables, isByRef);
939 }
940 
941 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
942 static LogicalResult
943 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
944  LLVM::ModuleTranslation &moduleTranslation) {
945  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
946  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
947  LogicalResult bodyGenStatus = success();
948  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
949  builder.restoreIP(codegenIP);
950  convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
951  moduleTranslation, bodyGenStatus);
952  };
953  auto finiCB = [&](InsertPointTy codeGenIP) {};
954 
955  // Handle copyprivate
956  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
957  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateFuncs();
960  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
961  llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
962  auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
963  singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
964  llvmCPFuncs.push_back(
965  moduleTranslation.lookupFunction(llvmFuncOp.getName()));
966  }
967 
968  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
969  ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs));
970  return bodyGenStatus;
971 }
972 
973 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
974 static LogicalResult
975 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
976  LLVM::ModuleTranslation &moduleTranslation) {
977  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
978  LogicalResult bodyGenStatus = success();
979  if (!op.getAllocatorsVars().empty() || op.getReductions())
980  return op.emitError("unhandled clauses for translation to LLVM IR");
981 
982  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
984  moduleTranslation, allocaIP);
985  builder.restoreIP(codegenIP);
986  convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
987  moduleTranslation, bodyGenStatus);
988  };
989 
990  llvm::Value *numTeamsLower = nullptr;
991  if (Value numTeamsLowerVar = op.getNumTeamsLower())
992  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
993 
994  llvm::Value *numTeamsUpper = nullptr;
995  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
996  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
997 
998  llvm::Value *threadLimit = nullptr;
999  if (Value threadLimitVar = op.getThreadLimit())
1000  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
1001 
1002  llvm::Value *ifExpr = nullptr;
1003  if (Value ifExprVar = op.getIfExpr())
1004  ifExpr = moduleTranslation.lookupValue(ifExprVar);
1005 
1006  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1007  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
1008  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
1009  return bodyGenStatus;
1010 }
1011 
1012 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
1013 static LogicalResult
1014 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1015  LLVM::ModuleTranslation &moduleTranslation) {
1016  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1017  LogicalResult bodyGenStatus = success();
1018  if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() ||
1019  taskOp.getInReductions() || taskOp.getPriority() ||
1020  !taskOp.getAllocateVars().empty()) {
1021  return taskOp.emitError("unhandled clauses for translation to LLVM IR");
1022  }
1023  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1024  // Save the alloca insertion point on ModuleTranslation stack for use in
1025  // nested regions.
1027  moduleTranslation, allocaIP);
1028 
1029  builder.restoreIP(codegenIP);
1030  convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
1031  moduleTranslation, bodyGenStatus);
1032  };
1033 
1035  if (!taskOp.getDependVars().empty() && taskOp.getDepends()) {
1036  for (auto dep :
1037  llvm::zip(taskOp.getDependVars(), taskOp.getDepends()->getValue())) {
1038  llvm::omp::RTLDependenceKindTy type;
1039  switch (
1040  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
1041  case mlir::omp::ClauseTaskDepend::taskdependin:
1042  type = llvm::omp::RTLDependenceKindTy::DepIn;
1043  break;
1044  // The OpenMP runtime requires that the codegen for 'depend' clause for
1045  // 'out' dependency kind must be the same as codegen for 'depend' clause
1046  // with 'inout' dependency.
1047  case mlir::omp::ClauseTaskDepend::taskdependout:
1048  case mlir::omp::ClauseTaskDepend::taskdependinout:
1049  type = llvm::omp::RTLDependenceKindTy::DepInOut;
1050  break;
1051  };
1052  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
1053  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
1054  dds.emplace_back(dd);
1055  }
1056  }
1057 
1058  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1059  findAllocaInsertPoint(builder, moduleTranslation);
1060  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1061  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
1062  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
1063  moduleTranslation.lookupValue(taskOp.getFinalExpr()),
1064  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds));
1065  return bodyGenStatus;
1066 }
1067 
1068 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
1069 static LogicalResult
1070 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
1071  LLVM::ModuleTranslation &moduleTranslation) {
1072  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1073  LogicalResult bodyGenStatus = success();
1074  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
1075  return tgOp.emitError("unhandled clauses for translation to LLVM IR");
1076  }
1077  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1078  builder.restoreIP(codegenIP);
1079  convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
1080  moduleTranslation, bodyGenStatus);
1081  };
1082  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1083  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1084  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
1085  ompLoc, allocaIP, bodyCB));
1086  return bodyGenStatus;
1087 }
1088 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
1089 static LogicalResult
1090 convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
1091  LLVM::ModuleTranslation &moduleTranslation) {
1092  auto wsloopOp = cast<omp::WsloopOp>(opInst);
1093  // FIXME: Here any other nested wrappers (e.g. omp.simd) are skipped, so
1094  // codegen for composite constructs like 'DO/FOR SIMD' will be the same as for
1095  // 'DO/FOR'.
1096  auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
1097 
1098  llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionVarsByref());
1099  assert(isByRef.size() == wsloopOp.getNumReductionVars());
1100 
1101  // Static is the default.
1102  auto schedule =
1103  wsloopOp.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
1104 
1105  // Find the loop configuration.
1106  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[0]);
1107  llvm::Type *ivType = step->getType();
1108  llvm::Value *chunk = nullptr;
1109  if (wsloopOp.getScheduleChunkVar()) {
1110  llvm::Value *chunkVar =
1111  moduleTranslation.lookupValue(wsloopOp.getScheduleChunkVar());
1112  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
1113  }
1114 
1115  SmallVector<omp::DeclareReductionOp> reductionDecls;
1116  collectReductionDecls(wsloopOp, reductionDecls);
1117  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1118  findAllocaInsertPoint(builder, moduleTranslation);
1119 
1120  SmallVector<llvm::Value *> privateReductionVariables(
1121  wsloopOp.getNumReductionVars());
1122  DenseMap<Value, llvm::Value *> reductionVariableMap;
1123 
1124  MutableArrayRef<BlockArgument> reductionArgs =
1125  wsloopOp.getRegion().getArguments();
1126 
1128  wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
1129  reductionDecls, privateReductionVariables, reductionVariableMap,
1130  isByRef)))
1131  return failure();
1132 
1133  // Store the mapping between reduction variables and their private copies on
1134  // ModuleTranslation stack. It can be then recovered when translating
1135  // omp.reduce operations in a separate call.
1137  moduleTranslation, reductionVariableMap);
1138 
1139  // Set up the source location value for OpenMP runtime.
1140  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1141 
1142  // Generator of the canonical loop body.
1143  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1144  // relying on captured variables.
1147  LogicalResult bodyGenStatus = success();
1148  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1149  // Make sure further conversions know about the induction variable.
1150  moduleTranslation.mapValue(
1151  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1152 
1153  // Capture the body insertion point for use in nested loops. BodyIP of the
1154  // CanonicalLoopInfo always points to the beginning of the entry block of
1155  // the body.
1156  bodyInsertPoints.push_back(ip);
1157 
1158  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1159  return;
1160 
1161  // Convert the body of the loop.
1162  builder.restoreIP(ip);
1163  convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
1164  moduleTranslation, bodyGenStatus);
1165  };
1166 
1167  // Delegate actual loop construction to the OpenMP IRBuilder.
1168  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1169  // loop, i.e. it has a positive step, uses signed integer semantics.
1170  // Reconsider this code when the nested loop operation clearly supports more
1171  // cases.
1172  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1173  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1174  llvm::Value *lowerBound =
1175  moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
1176  llvm::Value *upperBound =
1177  moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
1178  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
1179 
1180  // Make sure loop trip count are emitted in the preheader of the outermost
1181  // loop at the latest so that they are all available for the new collapsed
1182  // loop will be created below.
1183  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1184  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1185  if (i != 0) {
1186  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
1187  computeIP = loopInfos.front()->getPreheaderIP();
1188  }
1189  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1190  loc, bodyGen, lowerBound, upperBound, step,
1191  /*IsSigned=*/true, loopOp.getInclusive(), computeIP));
1192 
1193  if (failed(bodyGenStatus))
1194  return failure();
1195  }
1196 
1197  // Collapse loops. Store the insertion point because LoopInfos may get
1198  // invalidated.
1199  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1200  llvm::CanonicalLoopInfo *loopInfo =
1201  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1202 
1203  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1204 
1205  // TODO: Handle doacross loops when the ordered clause has a parameter.
1206  bool isOrdered = wsloopOp.getOrderedVal().has_value();
1207  std::optional<omp::ScheduleModifier> scheduleModifier =
1208  wsloopOp.getScheduleModifier();
1209  bool isSimd = wsloopOp.getSimdModifier();
1210 
1211  ompBuilder->applyWorkshareLoop(
1212  ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
1213  convertToScheduleKind(schedule), chunk, isSimd,
1214  scheduleModifier == omp::ScheduleModifier::monotonic,
1215  scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
1216 
1217  // Continue building IR after the loop. Note that the LoopInfo returned by
1218  // `collapseLoops` points inside the outermost loop and is intended for
1219  // potential further loop transformations. Use the insertion point stored
1220  // before collapsing loops instead.
1221  builder.restoreIP(afterIP);
1222 
1223  // Process the reductions if required.
1224  return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
1225  allocaIP, reductionDecls,
1226  privateReductionVariables, isByRef);
1227 }
1228 
1229 /// A RAII class that on construction replaces the region arguments of the
1230 /// parallel op (which correspond to private variables) with the actual private
1231 /// variables they correspond to. This prepares the parallel op so that it
1232 /// matches what is expected by the OMPIRBuilder.
1233 ///
1234 /// On destruction, it restores the original state of the operation so that on
1235 /// the MLIR side, the op is not affected by conversion to LLVM IR.
1237 public:
1238  OmpParallelOpConversionManager(omp::ParallelOp opInst)
1239  : region(opInst.getRegion()), privateVars(opInst.getPrivateVars()),
1240  privateArgBeginIdx(opInst.getNumReductionVars()),
1241  privateArgEndIdx(privateArgBeginIdx + privateVars.size()) {
1242  auto privateVarsIt = privateVars.begin();
1243 
1244  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1245  ++argIdx, ++privateVarsIt)
1246  mlir::replaceAllUsesInRegionWith(region.getArgument(argIdx),
1247  *privateVarsIt, region);
1248  }
1249 
1251  auto privateVarsIt = privateVars.begin();
1252 
1253  for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx;
1254  ++argIdx, ++privateVarsIt)
1255  mlir::replaceAllUsesInRegionWith(*privateVarsIt,
1256  region.getArgument(argIdx), region);
1257  }
1258 
1259 private:
1260  Region &region;
1261  OperandRange privateVars;
1262  unsigned privateArgBeginIdx;
1263  unsigned privateArgEndIdx;
1264 };
1265 
1266 /// Converts the OpenMP parallel operation to LLVM IR.
1267 static LogicalResult
1268 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1269  LLVM::ModuleTranslation &moduleTranslation) {
1270  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1271  OmpParallelOpConversionManager raii(opInst);
1272  ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionVarsByref());
1273  assert(isByRef.size() == opInst.getNumReductionVars());
1274 
1275  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1276  // relying on captured variables.
1277  LogicalResult bodyGenStatus = success();
1278  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1279 
1280  // Collect reduction declarations
1281  SmallVector<omp::DeclareReductionOp> reductionDecls;
1282  collectReductionDecls(opInst, reductionDecls);
1283  SmallVector<llvm::Value *> privateReductionVariables(
1284  opInst.getNumReductionVars());
1285 
1286  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1287  // Allocate reduction vars
1288  DenseMap<Value, llvm::Value *> reductionVariableMap;
1289 
1290  MutableArrayRef<BlockArgument> reductionArgs =
1291  opInst.getRegion().getArguments().slice(
1292  opInst.getNumAllocateVars() + opInst.getNumAllocatorsVars(),
1293  opInst.getNumReductionVars());
1294 
1295  allocByValReductionVars(opInst, reductionArgs, builder, moduleTranslation,
1296  allocaIP, reductionDecls, privateReductionVariables,
1297  reductionVariableMap, isByRef);
1298 
1299  // Initialize reduction vars
1300  builder.restoreIP(allocaIP);
1301  llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
1302  allocaIP =
1303  InsertPointTy(allocaIP.getBlock(),
1304  allocaIP.getBlock()->getTerminator()->getIterator());
1305  SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
1306  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1307  if (isByRef[i]) {
1308  // Allocate reduction variable (which is a pointer to the real reduciton
1309  // variable allocated in the inlined region)
1310  byRefVars[i] = builder.CreateAlloca(
1311  moduleTranslation.convertType(reductionDecls[i].getType()));
1312  }
1313  }
1314 
1315  builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
1316 
1317  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1319 
1320  // map the block argument
1321  mapInitializationArg(opInst, moduleTranslation, reductionDecls, i);
1322  if (failed(inlineConvertOmpRegions(
1323  reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
1324  builder, moduleTranslation, &phis)))
1325  bodyGenStatus = failure();
1326  assert(phis.size() == 1 &&
1327  "expected one value to be yielded from the "
1328  "reduction neutral element declaration region");
1329 
1330  // mapInitializationArg finishes its block with a terminator. We need to
1331  // insert before that terminator.
1332  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
1333 
1334  if (isByRef[i]) {
1335  // Store the result of the inlined region to the allocated reduction var
1336  // ptr
1337  builder.CreateStore(phis[0], byRefVars[i]);
1338 
1339  privateReductionVariables[i] = byRefVars[i];
1340  moduleTranslation.mapValue(reductionArgs[i], phis[0]);
1341  reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
1342  } else {
1343  // for by-ref case the store is inside of the reduction init region
1344  builder.CreateStore(phis[0], privateReductionVariables[i]);
1345  // the rest is done in allocByValReductionVars
1346  }
1347 
1348  // clear block argument mapping in case it needs to be re-created with a
1349  // different source for another use of the same reduction decl
1350  moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
1351  }
1352 
1353  // Store the mapping between reduction variables and their private copies on
1354  // ModuleTranslation stack. It can be then recovered when translating
1355  // omp.reduce operations in a separate call.
1357  moduleTranslation, reductionVariableMap);
1358 
1359  // Save the alloca insertion point on ModuleTranslation stack for use in
1360  // nested regions.
1362  moduleTranslation, allocaIP);
1363 
1364  // ParallelOp has only one region associated with it.
1365  builder.restoreIP(codeGenIP);
1366  auto regionBlock =
1367  convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
1368  moduleTranslation, bodyGenStatus);
1369 
1370  // Process the reductions if required.
1371  if (opInst.getNumReductionVars() > 0) {
1372  // Collect reduction info
1373  SmallVector<OwningReductionGen> owningReductionGens;
1374  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1376  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
1377  owningReductionGens, owningAtomicReductionGens,
1378  privateReductionVariables, reductionInfos);
1379 
1380  // Move to region cont block
1381  builder.SetInsertPoint(regionBlock->getTerminator());
1382 
1383  // Generate reductions from info
1384  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1385  builder.SetInsertPoint(tempTerminator);
1386 
1387  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1388  ompBuilder->createReductions(builder.saveIP(), allocaIP,
1389  reductionInfos, isByRef, false);
1390  if (!contInsertPoint.getBlock()) {
1391  bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
1392  return;
1393  }
1394 
1395  tempTerminator->eraseFromParent();
1396  builder.restoreIP(contInsertPoint);
1397  }
1398  };
1399 
1400  SmallVector<omp::PrivateClauseOp> privatizerClones;
1401  SmallVector<llvm::Value *> privateVariables;
1402 
1403  // TODO: Perform appropriate actions according to the data-sharing
1404  // attribute (shared, private, firstprivate, ...) of variables.
1405  // Currently shared and private are supported.
1406  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
1407  llvm::Value &, llvm::Value &vPtr,
1408  llvm::Value *&replacementValue) -> InsertPointTy {
1409  replacementValue = &vPtr;
1410 
1411  // If this is a private value, this lambda will return the corresponding
1412  // mlir value and its `PrivateClauseOp`. Otherwise, empty values are
1413  // returned.
1414  auto [privVar, privatizerClone] =
1415  [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
1416  if (!opInst.getPrivateVars().empty()) {
1417  auto privVars = opInst.getPrivateVars();
1418  auto privatizers = opInst.getPrivatizers();
1419 
1420  for (auto [privVar, privatizerAttr] :
1421  llvm::zip_equal(privVars, *privatizers)) {
1422  // Find the MLIR private variable corresponding to the LLVM value
1423  // being privatized.
1424  llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
1425  if (llvmPrivVar != &vPtr)
1426  continue;
1427 
1428  SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
1429  omp::PrivateClauseOp privatizer =
1430  SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
1431  opInst, privSym);
1432 
1433  // Clone the privatizer in case it is used by more than one parallel
1434  // region. The privatizer is processed in-place (see below) before it
1435  // gets inlined in the parallel region and therefore processing the
1436  // original op is dangerous.
1437 
1438  MLIRContext &context = moduleTranslation.getContext();
1439  mlir::IRRewriter opCloner(&context);
1440  opCloner.setInsertionPoint(privatizer);
1441  auto clone = llvm::cast<mlir::omp::PrivateClauseOp>(
1442  opCloner.clone(*privatizer));
1443 
1444  // Unique the clone name to avoid clashes in the symbol table.
1445  unsigned counter = 0;
1446  SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
1447  privatizer.getSymName(),
1448  [&](llvm::StringRef candidate) {
1449  return SymbolTable::lookupNearestSymbolFrom(
1450  opInst, StringAttr::get(&context, candidate)) !=
1451  nullptr;
1452  },
1453  counter);
1454 
1455  clone.setSymName(cloneName);
1456  return {privVar, clone};
1457  }
1458  }
1459 
1460  return {mlir::Value(), omp::PrivateClauseOp()};
1461  }();
1462 
1463  if (privVar) {
1464  Region &allocRegion = privatizerClone.getAllocRegion();
1465 
1466  // If this is a `firstprivate` clause, prepare the `omp.private` op by:
1467  if (privatizerClone.getDataSharingType() ==
1468  omp::DataSharingClauseType::FirstPrivate) {
1469  auto oldAllocBackBlock = std::prev(allocRegion.end());
1470  omp::YieldOp oldAllocYieldOp =
1471  llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
1472 
1473  Region &copyRegion = privatizerClone.getCopyRegion();
1474 
1475  mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
1476  // 1. Cloning the `copy` region to the end of the `alloc` region.
1477  copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
1478  allocRegion.end());
1479 
1480  auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
1481  // 2. Merging the last `alloc` block with the first block in the `copy`
1482  // region clone.
1483  // 3. Re-mapping the first argument of the `copy` region to be the
1484  // argument of the `alloc` region and the second argument of the `copy`
1485  // region to be the yielded value of the `alloc` region (this is the
1486  // private clone of the privatized value).
1487  copyCloneBuilder.mergeBlocks(
1488  &*newCopyRegionFrontBlock, &*oldAllocBackBlock,
1489  {allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)});
1490 
1491  // 4. The old terminator of the `alloc` region is not needed anymore, so
1492  // delete it.
1493  oldAllocYieldOp.erase();
1494  }
1495 
1496  // Replace the privatizer block argument with mlir value being privatized.
1497  // This way, the body of the privatizer will be changed from using the
1498  // region/block argument to the value being privatized.
1499  auto allocRegionArg = allocRegion.getArgument(0);
1500  replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
1501 
1502  auto oldIP = builder.saveIP();
1503  builder.restoreIP(allocaIP);
1504 
1505  SmallVector<llvm::Value *, 1> yieldedValues;
1506  if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder,
1507  moduleTranslation, &yieldedValues))) {
1508  opInst.emitError("failed to inline `alloc` region of an `omp.private` "
1509  "op in the parallel region");
1510  bodyGenStatus = failure();
1511  privatizerClone.erase();
1512  } else {
1513  assert(yieldedValues.size() == 1);
1514  replacementValue = yieldedValues.front();
1515 
1516  // Keep the LLVM replacement value and the op clone in case we need to
1517  // emit cleanup (i.e. deallocation) logic.
1518  privateVariables.push_back(replacementValue);
1519  privatizerClones.push_back(privatizerClone);
1520  }
1521 
1522  builder.restoreIP(oldIP);
1523  }
1524 
1525  return codeGenIP;
1526  };
1527 
1528  // TODO: Perform finalization actions for variables. This has to be
1529  // called for variables which have destructors/finalizers.
1530  auto finiCB = [&](InsertPointTy codeGenIP) {
1531  InsertPointTy oldIP = builder.saveIP();
1532  builder.restoreIP(codeGenIP);
1533 
1534  // if the reduction has a cleanup region, inline it here to finalize the
1535  // reduction variables
1536  SmallVector<Region *> reductionCleanupRegions;
1537  llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
1538  [](omp::DeclareReductionOp reductionDecl) {
1539  return &reductionDecl.getCleanupRegion();
1540  });
1541  if (failed(inlineOmpRegionCleanup(
1542  reductionCleanupRegions, privateReductionVariables,
1543  moduleTranslation, builder, "omp.reduction.cleanup")))
1544  bodyGenStatus = failure();
1545 
1546  SmallVector<Region *> privateCleanupRegions;
1547  llvm::transform(privatizerClones, std::back_inserter(privateCleanupRegions),
1548  [](omp::PrivateClauseOp privatizer) {
1549  return &privatizer.getDeallocRegion();
1550  });
1551 
1552  if (failed(inlineOmpRegionCleanup(
1553  privateCleanupRegions, privateVariables, moduleTranslation, builder,
1554  "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
1555  bodyGenStatus = failure();
1556 
1557  builder.restoreIP(oldIP);
1558  };
1559 
1560  llvm::Value *ifCond = nullptr;
1561  if (auto ifExprVar = opInst.getIfExpr())
1562  ifCond = moduleTranslation.lookupValue(ifExprVar);
1563  llvm::Value *numThreads = nullptr;
1564  if (auto numThreadsVar = opInst.getNumThreadsVar())
1565  numThreads = moduleTranslation.lookupValue(numThreadsVar);
1566  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
1567  if (auto bind = opInst.getProcBindVal())
1568  pbKind = getProcBindKind(*bind);
1569  // TODO: Is the Parallel construct cancellable?
1570  bool isCancellable = false;
1571 
1572  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1573  findAllocaInsertPoint(builder, moduleTranslation);
1574  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1575 
1576  builder.restoreIP(
1577  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
1578  ifCond, numThreads, pbKind, isCancellable));
1579 
1580  for (mlir::omp::PrivateClauseOp privatizerClone : privatizerClones)
1581  privatizerClone.erase();
1582 
1583  return bodyGenStatus;
1584 }
1585 
1586 /// Convert Order attribute to llvm::omp::OrderKind.
1587 static llvm::omp::OrderKind
1588 convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
1589  if (!o)
1590  return llvm::omp::OrderKind::OMP_ORDER_unknown;
1591  switch (*o) {
1592  case omp::ClauseOrderKind::Concurrent:
1593  return llvm::omp::OrderKind::OMP_ORDER_concurrent;
1594  }
1595  llvm_unreachable("Unknown ClauseOrderKind kind");
1596 }
1597 
1598 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
1599 static LogicalResult
1600 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
1601  LLVM::ModuleTranslation &moduleTranslation) {
1602  auto simdOp = cast<omp::SimdOp>(opInst);
1603  auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
1604 
1605  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1606 
1607  // Generator of the canonical loop body.
1608  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1609  // relying on captured variables.
1612  LogicalResult bodyGenStatus = success();
1613  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1614  // Make sure further conversions know about the induction variable.
1615  moduleTranslation.mapValue(
1616  loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
1617 
1618  // Capture the body insertion point for use in nested loops. BodyIP of the
1619  // CanonicalLoopInfo always points to the beginning of the entry block of
1620  // the body.
1621  bodyInsertPoints.push_back(ip);
1622 
1623  if (loopInfos.size() != loopOp.getNumLoops() - 1)
1624  return;
1625 
1626  // Convert the body of the loop.
1627  builder.restoreIP(ip);
1628  convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
1629  moduleTranslation, bodyGenStatus);
1630  };
1631 
1632  // Delegate actual loop construction to the OpenMP IRBuilder.
1633  // TODO: this currently assumes omp.loop_nest is semantically similar to SCF
1634  // loop, i.e. it has a positive step, uses signed integer semantics.
1635  // Reconsider this code when the nested loop operation clearly supports more
1636  // cases.
1637  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1638  for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) {
1639  llvm::Value *lowerBound =
1640  moduleTranslation.lookupValue(loopOp.getLowerBound()[i]);
1641  llvm::Value *upperBound =
1642  moduleTranslation.lookupValue(loopOp.getUpperBound()[i]);
1643  llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]);
1644 
1645  // Make sure loop trip count are emitted in the preheader of the outermost
1646  // loop at the latest so that they are all available for the new collapsed
1647  // loop will be created below.
1648  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1649  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1650  if (i != 0) {
1651  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1652  ompLoc.DL);
1653  computeIP = loopInfos.front()->getPreheaderIP();
1654  }
1655  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1656  loc, bodyGen, lowerBound, upperBound, step,
1657  /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1658 
1659  if (failed(bodyGenStatus))
1660  return failure();
1661  }
1662 
1663  // Collapse loops.
1664  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1665  llvm::CanonicalLoopInfo *loopInfo =
1666  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1667 
1668  llvm::ConstantInt *simdlen = nullptr;
1669  if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen())
1670  simdlen = builder.getInt64(simdlenVar.value());
1671 
1672  llvm::ConstantInt *safelen = nullptr;
1673  if (std::optional<uint64_t> safelenVar = simdOp.getSafelen())
1674  safelen = builder.getInt64(safelenVar.value());
1675 
1676  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
1677  llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrderVal());
1678  ompBuilder->applySimd(loopInfo, alignedVars,
1679  simdOp.getIfExpr()
1680  ? moduleTranslation.lookupValue(simdOp.getIfExpr())
1681  : nullptr,
1682  order, simdlen, safelen);
1683 
1684  builder.restoreIP(afterIP);
1685  return success();
1686 }
1687 
1688 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1689 static llvm::AtomicOrdering
1690 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
1691  if (!ao)
1692  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1693 
1694  switch (*ao) {
1695  case omp::ClauseMemoryOrderKind::Seq_cst:
1696  return llvm::AtomicOrdering::SequentiallyConsistent;
1697  case omp::ClauseMemoryOrderKind::Acq_rel:
1698  return llvm::AtomicOrdering::AcquireRelease;
1699  case omp::ClauseMemoryOrderKind::Acquire:
1700  return llvm::AtomicOrdering::Acquire;
1701  case omp::ClauseMemoryOrderKind::Release:
1702  return llvm::AtomicOrdering::Release;
1703  case omp::ClauseMemoryOrderKind::Relaxed:
1704  return llvm::AtomicOrdering::Monotonic;
1705  }
1706  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1707 }
1708 
1709 /// Convert omp.atomic.read operation to LLVM IR.
1710 static LogicalResult
1711 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1712  LLVM::ModuleTranslation &moduleTranslation) {
1713 
1714  auto readOp = cast<omp::AtomicReadOp>(opInst);
1715  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1716 
1717  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1718 
1719  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal());
1720  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
1721  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
1722 
1723  llvm::Type *elementType =
1724  moduleTranslation.convertType(readOp.getElementType());
1725 
1726  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
1727  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
1728  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1729  return success();
1730 }
1731 
1732 /// Converts an omp.atomic.write operation to LLVM IR.
1733 static LogicalResult
1734 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1735  LLVM::ModuleTranslation &moduleTranslation) {
1736  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1737  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1738 
1739  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1740  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal());
1741  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
1742  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
1743  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
1744  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1745  /*isVolatile=*/false};
1746  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1747  return success();
1748 }
1749 
1750 /// Converts an LLVM dialect binary operation to the corresponding enum value
1751 /// for `atomicrmw` supported binary operation.
1752 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1754  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1755  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1756  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1757  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1758  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1759  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1760  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1761  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1762  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1763  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1764 }
1765 
1766 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1767 static LogicalResult
1768 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1769  llvm::IRBuilderBase &builder,
1770  LLVM::ModuleTranslation &moduleTranslation) {
1771  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1772 
1773  // Convert values and types.
1774  auto &innerOpList = opInst.getRegion().front().getOperations();
1775  bool isXBinopExpr{false};
1776  llvm::AtomicRMWInst::BinOp binop;
1777  mlir::Value mlirExpr;
1778  llvm::Value *llvmExpr = nullptr;
1779  llvm::Value *llvmX = nullptr;
1780  llvm::Type *llvmXElementType = nullptr;
1781  if (innerOpList.size() == 2) {
1782  // The two operations here are the update and the terminator.
1783  // Since we can identify the update operation, there is a possibility
1784  // that we can generate the atomicrmw instruction.
1785  mlir::Operation &innerOp = *opInst.getRegion().front().begin();
1786  if (!llvm::is_contained(innerOp.getOperands(),
1787  opInst.getRegion().getArgument(0))) {
1788  return opInst.emitError("no atomic update operation with region argument"
1789  " as operand found inside atomic.update region");
1790  }
1791  binop = convertBinOpToAtomic(innerOp);
1792  isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
1793  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1794  llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1795  } else {
1796  // Since the update region includes more than one operation
1797  // we will resort to generating a cmpxchg loop.
1798  binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1799  }
1800  llvmX = moduleTranslation.lookupValue(opInst.getX());
1801  llvmXElementType = moduleTranslation.convertType(
1802  opInst.getRegion().getArgument(0).getType());
1803  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1804  /*isSigned=*/false,
1805  /*isVolatile=*/false};
1806 
1807  llvm::AtomicOrdering atomicOrdering =
1808  convertAtomicOrdering(opInst.getMemoryOrderVal());
1809 
1810  // Generate update code.
1811  LogicalResult updateGenStatus = success();
1812  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1813  llvm::Value *atomicx,
1814  llvm::IRBuilder<> &builder) -> llvm::Value * {
1815  Block &bb = *opInst.getRegion().begin();
1816  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
1817  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1818  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1819  updateGenStatus = (opInst.emitError()
1820  << "unable to convert update operation to llvm IR");
1821  return nullptr;
1822  }
1823  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1824  assert(yieldop && yieldop.getResults().size() == 1 &&
1825  "terminator must be omp.yield op and it must have exactly one "
1826  "argument");
1827  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1828  };
1829 
1830  // Handle ambiguous alloca, if any.
1831  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1832  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1833  builder.restoreIP(ompBuilder->createAtomicUpdate(
1834  ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1835  isXBinopExpr));
1836  return updateGenStatus;
1837 }
1838 
1839 static LogicalResult
1840 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1841  llvm::IRBuilderBase &builder,
1842  LLVM::ModuleTranslation &moduleTranslation) {
1843  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1844  mlir::Value mlirExpr;
1845  bool isXBinopExpr = false, isPostfixUpdate = false;
1846  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1847 
1848  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1849  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1850 
1851  assert((atomicUpdateOp || atomicWriteOp) &&
1852  "internal op must be an atomic.update or atomic.write op");
1853 
1854  if (atomicWriteOp) {
1855  isPostfixUpdate = true;
1856  mlirExpr = atomicWriteOp.getExpr();
1857  } else {
1858  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1859  atomicCaptureOp.getAtomicUpdateOp().getOperation();
1860  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
1861  bool isRegionArgUsed{false};
1862  // Find the binary update operation that uses the region argument
1863  // and get the expression to update
1864  for (Operation &innerOp : innerOpList) {
1865  if (innerOp.getNumOperands() == 2) {
1866  binop = convertBinOpToAtomic(innerOp);
1867  if (!llvm::is_contained(innerOp.getOperands(),
1868  atomicUpdateOp.getRegion().getArgument(0)))
1869  continue;
1870  isRegionArgUsed = true;
1871  isXBinopExpr =
1872  innerOp.getNumOperands() > 0 &&
1873  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
1874  mlirExpr =
1875  (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1876  break;
1877  }
1878  }
1879  if (!isRegionArgUsed)
1880  return atomicUpdateOp.emitError(
1881  "no atomic update operation with region argument"
1882  " as operand found inside atomic.update region");
1883  }
1884 
1885  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1886  llvm::Value *llvmX =
1887  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
1888  llvm::Value *llvmV =
1889  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
1890  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1891  atomicCaptureOp.getAtomicReadOp().getElementType());
1892  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1893  /*isSigned=*/false,
1894  /*isVolatile=*/false};
1895  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1896  /*isSigned=*/false,
1897  /*isVolatile=*/false};
1898 
1899  llvm::AtomicOrdering atomicOrdering =
1900  convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal());
1901 
1902  LogicalResult updateGenStatus = success();
1903  auto updateFn = [&](llvm::Value *atomicx,
1904  llvm::IRBuilder<> &builder) -> llvm::Value * {
1905  if (atomicWriteOp)
1906  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
1907  Block &bb = *atomicUpdateOp.getRegion().begin();
1908  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
1909  atomicx);
1910  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1911  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1912  updateGenStatus = (atomicUpdateOp.emitError()
1913  << "unable to convert update operation to llvm IR");
1914  return nullptr;
1915  }
1916  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1917  assert(yieldop && yieldop.getResults().size() == 1 &&
1918  "terminator must be omp.yield op and it must have exactly one "
1919  "argument");
1920  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1921  };
1922 
1923  // Handle ambiguous alloca, if any.
1924  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1925  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1926  builder.restoreIP(ompBuilder->createAtomicCapture(
1927  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
1928  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
1929  return updateGenStatus;
1930 }
1931 
1932 /// Converts an OpenMP Threadprivate operation into LLVM IR using
1933 /// OpenMPIRBuilder.
1934 static LogicalResult
1935 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
1936  LLVM::ModuleTranslation &moduleTranslation) {
1937  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1938  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
1939 
1940  Value symAddr = threadprivateOp.getSymAddr();
1941  auto *symOp = symAddr.getDefiningOp();
1942  if (!isa<LLVM::AddressOfOp>(symOp))
1943  return opInst.emitError("Addressing symbol not found");
1944  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
1945 
1946  LLVM::GlobalOp global =
1947  addressOfOp.getGlobal(moduleTranslation.symbolTable());
1948  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
1949  llvm::Type *type = globalValue->getValueType();
1950  llvm::TypeSize typeSize =
1951  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
1952  type);
1953  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
1954  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
1955  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
1956  llvm::Value *callInst =
1957  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
1958  ompLoc, globalValue, size, cacheName);
1959  moduleTranslation.mapValue(opInst.getResult(0), callInst);
1960  return success();
1961 }
1962 
1963 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1964 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
1965  switch (deviceClause) {
1966  case mlir::omp::DeclareTargetDeviceType::host:
1967  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1968  break;
1969  case mlir::omp::DeclareTargetDeviceType::nohost:
1970  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1971  break;
1972  case mlir::omp::DeclareTargetDeviceType::any:
1973  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1974  break;
1975  }
1976  llvm_unreachable("unhandled device clause");
1977 }
1978 
1979 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1981  mlir::omp::DeclareTargetCaptureClause captureClasue) {
1982  switch (captureClasue) {
1983  case mlir::omp::DeclareTargetCaptureClause::to:
1984  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1985  case mlir::omp::DeclareTargetCaptureClause::link:
1986  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1987  case mlir::omp::DeclareTargetCaptureClause::enter:
1988  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1989  }
1990  llvm_unreachable("unhandled capture clause");
1991 }
1992 
1993 static llvm::SmallString<64>
1994 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
1995  llvm::OpenMPIRBuilder &ompBuilder) {
1996  llvm::SmallString<64> suffix;
1997  llvm::raw_svector_ostream os(suffix);
1998  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
1999  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
2000  auto fileInfoCallBack = [&loc]() {
2001  return std::pair<std::string, uint64_t>(
2002  llvm::StringRef(loc.getFilename()), loc.getLine());
2003  };
2004 
2005  os << llvm::format(
2006  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
2007  }
2008  os << "_decl_tgt_ref_ptr";
2009 
2010  return suffix;
2011 }
2012 
2013 static bool isDeclareTargetLink(mlir::Value value) {
2014  if (auto addressOfOp =
2015  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2016  auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>();
2017  Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName());
2018  if (auto declareTargetGlobal =
2019  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp))
2020  if (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2021  mlir::omp::DeclareTargetCaptureClause::link)
2022  return true;
2023  }
2024  return false;
2025 }
2026 
2027 // Returns the reference pointer generated by the lowering of the declare target
2028 // operation in cases where the link clause is used or the to clause is used in
2029 // USM mode.
2030 static llvm::Value *
2032  LLVM::ModuleTranslation &moduleTranslation) {
2033  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2034 
2035  // An easier way to do this may just be to keep track of any pointer
2036  // references and their mapping to their respective operation
2037  if (auto addressOfOp =
2038  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
2039  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
2040  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
2041  addressOfOp.getGlobalName()))) {
2042 
2043  if (auto declareTargetGlobal =
2044  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
2045  gOp.getOperation())) {
2046 
2047  // In this case, we must utilise the reference pointer generated by the
2048  // declare target operation, similar to Clang
2049  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
2050  mlir::omp::DeclareTargetCaptureClause::link) ||
2051  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
2052  mlir::omp::DeclareTargetCaptureClause::to &&
2053  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
2054  llvm::SmallString<64> suffix =
2055  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
2056 
2057  if (gOp.getSymName().contains(suffix))
2058  return moduleTranslation.getLLVMModule()->getNamedValue(
2059  gOp.getSymName());
2060 
2061  return moduleTranslation.getLLVMModule()->getNamedValue(
2062  (gOp.getSymName().str() + suffix.str()).str());
2063  }
2064  }
2065  }
2066  }
2067 
2068  return nullptr;
2069 }
2070 
2071 // A small helper structure to contain data gathered
2072 // for map lowering and coalese it into one area and
2073 // avoiding extra computations such as searches in the
2074 // llvm module for lowered mapped variables or checking
2075 // if something is declare target (and retrieving the
2076 // value) more than neccessary.
2082  // Stripped off array/pointer to get the underlying
2083  // element type
2085 
2086  /// Append arrays in \a CurInfo.
2087  void append(MapInfoData &CurInfo) {
2088  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
2089  CurInfo.IsDeclareTarget.end());
2090  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
2091  OriginalValue.append(CurInfo.OriginalValue.begin(),
2092  CurInfo.OriginalValue.end());
2093  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
2094  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
2095  }
2096 };
2097 
2098 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
2099  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
2100  arrTy.getElementType()))
2101  return getArrayElementSizeInBits(nestedArrTy, dl);
2102  return dl.getTypeSizeInBits(arrTy.getElementType());
2103 }
2104 
2105 // This function calculates the size to be offloaded for a specified type, given
2106 // its associated map clause (which can contain bounds information which affects
2107 // the total size), this size is calculated based on the underlying element type
2108 // e.g. given a 1-D array of ints, we will calculate the size from the integer
2109 // type * number of elements in the array. This size can be used in other
2110 // calculations but is ultimately used as an argument to the OpenMP runtimes
2111 // kernel argument structure which is generated through the combinedInfo data
2112 // structures.
2113 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
2114 // CGOpenMPRuntime.cpp.
2115 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
2116  Operation *clauseOp, llvm::Value *basePointer,
2117  llvm::Type *baseType, llvm::IRBuilderBase &builder,
2118  LLVM::ModuleTranslation &moduleTranslation) {
2119  if (auto memberClause =
2120  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
2121  // This calculates the size to transfer based on bounds and the underlying
2122  // element type, provided bounds have been specified (Fortran
2123  // pointers/allocatables/target and arrays that have sections specified fall
2124  // into this as well).
2125  if (!memberClause.getBounds().empty()) {
2126  llvm::Value *elementCount = builder.getInt64(1);
2127  for (auto bounds : memberClause.getBounds()) {
2128  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2129  bounds.getDefiningOp())) {
2130  // The below calculation for the size to be mapped calculated from the
2131  // map.info's bounds is: (elemCount * [UB - LB] + 1), later we
2132  // multiply by the underlying element types byte size to get the full
2133  // size to be offloaded based on the bounds
2134  elementCount = builder.CreateMul(
2135  elementCount,
2136  builder.CreateAdd(
2137  builder.CreateSub(
2138  moduleTranslation.lookupValue(boundOp.getUpperBound()),
2139  moduleTranslation.lookupValue(boundOp.getLowerBound())),
2140  builder.getInt64(1)));
2141  }
2142  }
2143 
2144  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
2145  // the size in inconsistent byte or bit format.
2146  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
2147  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type))
2148  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
2149 
2150  // The size in bytes x number of elements, the sizeInBytes stored is
2151  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
2152  // size, so we do some on the fly runtime math to get the size in
2153  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
2154  // some adjustment for members with more complex types.
2155  return builder.CreateMul(elementCount,
2156  builder.getInt64(underlyingTypeSzInBits / 8));
2157  }
2158  }
2159 
2160  return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
2161 }
2162 
2164  llvm::SmallVectorImpl<Value> &mapOperands,
2165  LLVM::ModuleTranslation &moduleTranslation,
2166  DataLayout &dl,
2167  llvm::IRBuilderBase &builder) {
2168  for (mlir::Value mapValue : mapOperands) {
2169  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2170  mapValue.getDefiningOp())) {
2171  mlir::Value offloadPtr =
2172  mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
2173  mapData.OriginalValue.push_back(
2174  moduleTranslation.lookupValue(offloadPtr));
2175  mapData.Pointers.push_back(mapData.OriginalValue.back());
2176 
2177  if (llvm::Value *refPtr =
2178  getRefPtrIfDeclareTarget(offloadPtr,
2179  moduleTranslation)) { // declare target
2180  mapData.IsDeclareTarget.push_back(true);
2181  mapData.BasePointers.push_back(refPtr);
2182  } else { // regular mapped variable
2183  mapData.IsDeclareTarget.push_back(false);
2184  mapData.BasePointers.push_back(mapData.OriginalValue.back());
2185  }
2186 
2187  mapData.BaseType.push_back(
2188  moduleTranslation.convertType(mapOp.getVarType()));
2189  mapData.Sizes.push_back(
2190  getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(),
2191  mapData.BaseType.back(), builder, moduleTranslation));
2192  mapData.MapClause.push_back(mapOp.getOperation());
2193  mapData.Types.push_back(
2194  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
2195  mapData.Names.push_back(LLVM::createMappingInformation(
2196  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
2197  mapData.DevicePointers.push_back(
2199 
2200  // Check if this is a member mapping and correctly assign that it is, if
2201  // it is a member of a larger object.
2202  // TODO: Need better handling of members, and distinguishing of members
2203  // that are implicitly allocated on device vs explicitly passed in as
2204  // arguments.
2205  // TODO: May require some further additions to support nested record
2206  // types, i.e. member maps that can have member maps.
2207  mapData.IsAMember.push_back(false);
2208  for (mlir::Value mapValue : mapOperands) {
2209  if (auto map = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2210  mapValue.getDefiningOp())) {
2211  for (auto member : map.getMembers()) {
2212  if (member == mapOp) {
2213  mapData.IsAMember.back() = true;
2214  }
2215  }
2216  }
2217  }
2218  }
2219  }
2220 }
2221 
2222 static int getMapDataMemberIdx(MapInfoData &mapData,
2223  mlir::omp::MapInfoOp memberOp) {
2224  auto *res = llvm::find(mapData.MapClause, memberOp);
2225  assert(res != mapData.MapClause.end() &&
2226  "MapInfoOp for member not found in MapData, cannot return index");
2227  return std::distance(mapData.MapClause.begin(), res);
2228 }
2229 
2230 static mlir::omp::MapInfoOp
2231 getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) {
2232  mlir::DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr();
2233 
2234  // Only 1 member has been mapped, we can return it.
2235  if (indexAttr.size() == 1)
2236  if (auto mapOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(
2237  mapInfo.getMembers()[0].getDefiningOp()))
2238  return mapOp;
2239 
2240  llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape();
2241  llvm::SmallVector<size_t> indices(shape[0]);
2242  std::iota(indices.begin(), indices.end(), 0);
2243 
2244  llvm::sort(indices.begin(), indices.end(),
2245  [&](const size_t a, const size_t b) {
2246  auto indexValues = indexAttr.getValues<int32_t>();
2247  for (int i = 0; i < shape[1]; ++i) {
2248  int aIndex = indexValues[a * shape[1] + i];
2249  int bIndex = indexValues[b * shape[1] + i];
2250 
2251  if (aIndex == bIndex)
2252  continue;
2253 
2254  if (aIndex != -1 && bIndex == -1)
2255  return false;
2256 
2257  if (aIndex == -1 && bIndex != -1)
2258  return true;
2259 
2260  // A is earlier in the record type layout than B
2261  if (aIndex < bIndex)
2262  return first;
2263 
2264  if (bIndex < aIndex)
2265  return !first;
2266  }
2267 
2268  // Iterated the entire list and couldn't make a decision, all
2269  // elements were likely the same. Return false, since the sort
2270  // comparator should return false for equal elements.
2271  return false;
2272  });
2273 
2274  return llvm::cast<mlir::omp::MapInfoOp>(
2275  mapInfo.getMembers()[indices.front()].getDefiningOp());
2276 }
2277 
2278 /// This function calculates the array/pointer offset for map data provided
2279 /// with bounds operations, e.g. when provided something like the following:
2280 ///
2281 /// Fortran
2282 /// map(tofrom: array(2:5, 3:2))
2283 /// or
2284 /// C++
2285 /// map(tofrom: array[1:4][2:3])
2286 /// We must calculate the initial pointer offset to pass across, this function
2287 /// performs this using bounds.
2288 ///
2289 /// NOTE: which while specified in row-major order it currently needs to be
2290 /// flipped for Fortran's column order array allocation and access (as
2291 /// opposed to C++'s row-major, hence the backwards processing where order is
2292 /// important). This is likely important to keep in mind for the future when
2293 /// we incorporate a C++ frontend, both frontends will need to agree on the
2294 /// ordering of generated bounds operations (one may have to flip them) to
2295 /// make the below lowering frontend agnostic. The offload size
2296 /// calcualtion may also have to be adjusted for C++.
2297 std::vector<llvm::Value *>
2299  llvm::IRBuilderBase &builder, bool isArrayTy,
2300  mlir::OperandRange bounds) {
2301  std::vector<llvm::Value *> idx;
2302  // There's no bounds to calculate an offset from, we can safely
2303  // ignore and return no indices.
2304  if (bounds.empty())
2305  return idx;
2306 
2307  // If we have an array type, then we have its type so can treat it as a
2308  // normal GEP instruction where the bounds operations are simply indexes
2309  // into the array. We currently do reverse order of the bounds, which
2310  // I believe leans more towards Fortran's column-major in memory.
2311  if (isArrayTy) {
2312  idx.push_back(builder.getInt64(0));
2313  for (int i = bounds.size() - 1; i >= 0; --i) {
2314  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2315  bounds[i].getDefiningOp())) {
2316  idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound()));
2317  }
2318  }
2319  } else {
2320  // If we do not have an array type, but we have bounds, then we're dealing
2321  // with a pointer that's being treated like an array and we have the
2322  // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
2323  // address (pointer pointing to the actual data) so we must caclulate the
2324  // offset using a single index which the following two loops attempts to
2325  // compute.
2326 
2327  // Calculates the size offset we need to make per row e.g. first row or
2328  // column only needs to be offset by one, but the next would have to be
2329  // the previous row/column offset multiplied by the extent of current row.
2330  //
2331  // For example ([1][10][100]):
2332  //
2333  // - First row/column we move by 1 for each index increment
2334  // - Second row/column we move by 1 (first row/column) * 10 (extent/size of
2335  // current) for 10 for each index increment
2336  // - Third row/column we would move by 10 (second row/column) *
2337  // (extent/size of current) 100 for 1000 for each index increment
2338  std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
2339  for (size_t i = 1; i < bounds.size(); ++i) {
2340  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2341  bounds[i].getDefiningOp())) {
2342  dimensionIndexSizeOffset.push_back(builder.CreateMul(
2343  moduleTranslation.lookupValue(boundOp.getExtent()),
2344  dimensionIndexSizeOffset[i - 1]));
2345  }
2346  }
2347 
2348  // Now that we have calculated how much we move by per index, we must
2349  // multiply each lower bound offset in indexes by the size offset we
2350  // have calculated in the previous and accumulate the results to get
2351  // our final resulting offset.
2352  for (int i = bounds.size() - 1; i >= 0; --i) {
2353  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>(
2354  bounds[i].getDefiningOp())) {
2355  if (idx.empty())
2356  idx.emplace_back(builder.CreateMul(
2357  moduleTranslation.lookupValue(boundOp.getLowerBound()),
2358  dimensionIndexSizeOffset[i]));
2359  else
2360  idx.back() = builder.CreateAdd(
2361  idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
2362  boundOp.getLowerBound()),
2363  dimensionIndexSizeOffset[i]));
2364  }
2365  }
2366  }
2367 
2368  return idx;
2369 }
2370 
2371 // This creates two insertions into the MapInfosTy data structure for the
2372 // "parent" of a set of members, (usually a container e.g.
2373 // class/structure/derived type) when subsequent members have also been
2374 // explicitly mapped on the same map clause. Certain types, such as Fortran
2375 // descriptors are mapped like this as well, however, the members are
2376 // implicit as far as a user is concerned, but we must explicitly map them
2377 // internally.
2378 //
2379 // This function also returns the memberOfFlag for this particular parent,
2380 // which is utilised in subsequent member mappings (by modifying there map type
2381 // with it) to indicate that a member is part of this parent and should be
2382 // treated by the runtime as such. Important to achieve the correct mapping.
2383 //
2384 // This function borrows a lot from Clang's emitCombinedEntry function
2385 // inside of CGOpenMPRuntime.cpp
2386 static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
2387  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2388  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2389  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2390  uint64_t mapDataIndex, bool isTargetParams) {
2391  // Map the first segment of our structure
2392  combinedInfo.Types.emplace_back(
2393  isTargetParams
2394  ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
2395  : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
2396  combinedInfo.DevicePointers.emplace_back(
2398  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2399  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2400  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2401 
2402  // Calculate size of the parent object being mapped based on the
2403  // addresses at runtime, highAddr - lowAddr = size. This of course
2404  // doesn't factor in allocated data like pointers, hence the further
2405  // processing of members specified by users, or in the case of
2406  // Fortran pointers and allocatables, the mapping of the pointed to
2407  // data by the descriptor (which itself, is a structure containing
2408  // runtime information on the dynamically allocated data).
2409  auto parentClause =
2410  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2411 
2412  llvm::Value *lowAddr, *highAddr;
2413  if (!parentClause.getPartialMap()) {
2414  lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex],
2415  builder.getPtrTy());
2416  highAddr = builder.CreatePointerCast(
2417  builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
2418  mapData.Pointers[mapDataIndex], 1),
2419  builder.getPtrTy());
2420  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2421  } else {
2422  auto mapOp =
2423  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2424  int firstMemberIdx = getMapDataMemberIdx(
2425  mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
2426  lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
2427  builder.getPtrTy());
2428  int lastMemberIdx = getMapDataMemberIdx(
2429  mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
2430  highAddr = builder.CreatePointerCast(
2431  builder.CreateGEP(mapData.BaseType[lastMemberIdx],
2432  mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
2433  builder.getPtrTy());
2434  combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
2435  }
2436 
2437  llvm::Value *size = builder.CreateIntCast(
2438  builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
2439  builder.getInt64Ty(),
2440  /*isSigned=*/false);
2441  combinedInfo.Sizes.push_back(size);
2442 
2443  // TODO: This will need to be expanded to include the whole host of logic for
2444  // the map flags that Clang currently supports (e.g. it should take the map
2445  // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some
2446  // further case specific flag modifications). For the moment, it handles what
2447  // we support as expected.
2448  llvm::omp::OpenMPOffloadMappingFlags mapFlag =
2449  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
2450 
2451  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
2452  ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
2453  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2454 
2455  // This creates the initial MEMBER_OF mapping that consists of
2456  // the parent/top level container (same as above effectively, except
2457  // with a fixed initial compile time size and separate maptype which
2458  // indicates the true mape type (tofrom etc.). This parent mapping is
2459  // only relevant if the structure in its totality is being mapped,
2460  // otherwise the above suffices.
2461  if (!parentClause.getPartialMap()) {
2462  combinedInfo.Types.emplace_back(mapFlag);
2463  combinedInfo.DevicePointers.emplace_back(
2465  combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
2466  mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
2467  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2468  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
2469  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
2470  }
2471  return memberOfFlag;
2472 }
2473 
2474 // The intent is to verify if the mapped data being passed is a
2475 // pointer -> pointee that requires special handling in certain cases,
2476 // e.g. applying the OMP_MAP_PTR_AND_OBJ map type.
2477 //
2478 // There may be a better way to verify this, but unfortunately with
2479 // opaque pointers we lose the ability to easily check if something is
2480 // a pointer whilst maintaining access to the underlying type.
2481 static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp) {
2482  // If we have a varPtrPtr field assigned then the underlying type is a pointer
2483  if (mapOp.getVarPtrPtr())
2484  return true;
2485 
2486  // If the map data is declare target with a link clause, then it's represented
2487  // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has
2488  // no relation to pointers.
2489  if (isDeclareTargetLink(mapOp.getVarPtr()))
2490  return true;
2491 
2492  return false;
2493 }
2494 
2495 // This function is intended to add explicit mappings of members
2497  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2498  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2499  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2500  uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) {
2501 
2502  auto parentClause =
2503  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2504 
2505  for (auto mappedMembers : parentClause.getMembers()) {
2506  auto memberClause =
2507  llvm::cast<mlir::omp::MapInfoOp>(mappedMembers.getDefiningOp());
2508  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
2509 
2510  assert(memberDataIdx >= 0 && "could not find mapped member of structure");
2511 
2512  // Same MemberOfFlag to indicate its link with parent and other members
2513  // of.
2514  auto mapFlag =
2515  llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value());
2516  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2517  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
2518  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
2519  if (checkIfPointerMap(memberClause))
2520  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2521 
2522  combinedInfo.Types.emplace_back(mapFlag);
2523  combinedInfo.DevicePointers.emplace_back(
2525  combinedInfo.Names.emplace_back(
2526  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
2527  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
2528  combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
2529  combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
2530  }
2531 }
2532 
2533 static void
2534 processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
2535  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2536  bool isTargetParams, int mapDataParentIdx = -1) {
2537  // Declare Target Mappings are excluded from being marked as
2538  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
2539  // marked with OMP_MAP_PTR_AND_OBJ instead.
2540  auto mapFlag = mapData.Types[mapDataIdx];
2541  auto mapInfoOp =
2542  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
2543 
2544  bool isPtrTy = checkIfPointerMap(mapInfoOp);
2545  if (isPtrTy)
2546  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
2547 
2548  if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx])
2549  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
2550 
2551  if (mapInfoOp.getMapCaptureType().value() ==
2552  mlir::omp::VariableCaptureKind::ByCopy &&
2553  !isPtrTy)
2554  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
2555 
2556  // if we're provided a mapDataParentIdx, then the data being mapped is
2557  // part of a larger object (in a parent <-> member mapping) and in this
2558  // case our BasePointer should be the parent.
2559  if (mapDataParentIdx >= 0)
2560  combinedInfo.BasePointers.emplace_back(
2561  mapData.BasePointers[mapDataParentIdx]);
2562  else
2563  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
2564 
2565  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
2566  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
2567  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
2568  combinedInfo.Types.emplace_back(mapFlag);
2569  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
2570 }
2571 
2573  LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
2574  llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
2575  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
2576  uint64_t mapDataIndex, bool isTargetParams) {
2577  auto parentClause =
2578  llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
2579 
2580  // If we have a partial map (no parent referenced in the map clauses of the
2581  // directive, only members) and only a single member, we do not need to bind
2582  // the map of the member to the parent, we can pass the member separately.
2583  if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) {
2584  auto memberClause = llvm::cast<mlir::omp::MapInfoOp>(
2585  parentClause.getMembers()[0].getDefiningOp());
2586  int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
2587  // Note: Clang treats arrays with explicit bounds that fall into this
2588  // category as a parent with map case, however, it seems this isn't a
2589  // requirement, and processing them as an individual map is fine. So,
2590  // we will handle them as individual maps for the moment, as it's
2591  // difficult for us to check this as we always require bounds to be
2592  // specified currently and it's also marginally more optimal (single
2593  // map rather than two). The difference may come from the fact that
2594  // Clang maps array without bounds as pointers (which we do not
2595  // currently do), whereas we treat them as arrays in all cases
2596  // currently.
2597  processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams,
2598  mapDataIndex);
2599  return;
2600  }
2601 
2602  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
2603  mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
2604  combinedInfo, mapData, mapDataIndex, isTargetParams);
2605  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
2606  combinedInfo, mapData, mapDataIndex,
2607  memberOfParentFlag);
2608 }
2609 
2610 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2611 // generates different operation (e.g. load/store) combinations for
2612 // arguments to the kernel, based on map capture kinds which are then
2613 // utilised in the combinedInfo in place of the original Map value.
2614 static void
2616  LLVM::ModuleTranslation &moduleTranslation,
2617  llvm::IRBuilderBase &builder) {
2618  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2619  // if it's declare target, skip it, it's handled separately.
2620  if (!mapData.IsDeclareTarget[i]) {
2621  auto mapOp =
2622  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2623  mlir::omp::VariableCaptureKind captureKind =
2624  mapOp.getMapCaptureType().value_or(
2625  mlir::omp::VariableCaptureKind::ByRef);
2626  bool isPtrTy = checkIfPointerMap(mapOp);
2627 
2628  // Currently handles array sectioning lowerbound case, but more
2629  // logic may be required in the future. Clang invokes EmitLValue,
2630  // which has specialised logic for special Clang types such as user
2631  // defines, so it is possible we will have to extend this for
2632  // structures or other complex types. As the general idea is that this
2633  // function mimics some of the logic from Clang that we require for
2634  // kernel argument passing from host -> device.
2635  switch (captureKind) {
2636  case mlir::omp::VariableCaptureKind::ByRef: {
2637  llvm::Value *newV = mapData.Pointers[i];
2638  std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
2639  moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
2640  mapOp.getBounds());
2641  if (isPtrTy)
2642  newV = builder.CreateLoad(builder.getPtrTy(), newV);
2643 
2644  if (!offsetIdx.empty())
2645  newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx,
2646  "array_offset");
2647  mapData.Pointers[i] = newV;
2648  } break;
2649  case mlir::omp::VariableCaptureKind::ByCopy: {
2650  llvm::Type *type = mapData.BaseType[i];
2651  llvm::Value *newV;
2652  if (mapData.Pointers[i]->getType()->isPointerTy())
2653  newV = builder.CreateLoad(type, mapData.Pointers[i]);
2654  else
2655  newV = mapData.Pointers[i];
2656 
2657  if (!isPtrTy) {
2658  auto curInsert = builder.saveIP();
2659  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
2660  auto *memTempAlloc =
2661  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
2662  builder.restoreIP(curInsert);
2663 
2664  builder.CreateStore(newV, memTempAlloc);
2665  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
2666  }
2667 
2668  mapData.Pointers[i] = newV;
2669  mapData.BasePointers[i] = newV;
2670  } break;
2671  case mlir::omp::VariableCaptureKind::This:
2672  case mlir::omp::VariableCaptureKind::VLAType:
2673  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
2674  break;
2675  }
2676  }
2677  }
2678 }
2679 
2680 // Generate all map related information and fill the combinedInfo.
2681 static void genMapInfos(llvm::IRBuilderBase &builder,
2682  LLVM::ModuleTranslation &moduleTranslation,
2683  DataLayout &dl,
2684  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
2685  MapInfoData &mapData,
2686  const SmallVector<Value> &devPtrOperands = {},
2687  const SmallVector<Value> &devAddrOperands = {},
2688  bool isTargetParams = false) {
2689  // We wish to modify some of the methods in which arguments are
2690  // passed based on their capture type by the target region, this can
2691  // involve generating new loads and stores, which changes the
2692  // MLIR value to LLVM value mapping, however, we only wish to do this
2693  // locally for the current function/target and also avoid altering
2694  // ModuleTranslation, so we remap the base pointer or pointer stored
2695  // in the map infos corresponding MapInfoData, which is later accessed
2696  // by genMapInfos and createTarget to help generate the kernel and
2697  // kernel arg structure. It primarily becomes relevant in cases like
2698  // bycopy, or byref range'd arrays. In the default case, we simply
2699  // pass thee pointer byref as both basePointer and pointer.
2700  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
2701  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
2702 
2703  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2704 
2705  auto fail = [&combinedInfo]() -> void {
2706  combinedInfo.BasePointers.clear();
2707  combinedInfo.Pointers.clear();
2708  combinedInfo.DevicePointers.clear();
2709  combinedInfo.Sizes.clear();
2710  combinedInfo.Types.clear();
2711  combinedInfo.Names.clear();
2712  };
2713 
2714  // We operate under the assumption that all vectors that are
2715  // required in MapInfoData are of equal lengths (either filled with
2716  // default constructed data or appropiate information) so we can
2717  // utilise the size from any component of MapInfoData, if we can't
2718  // something is missing from the initial MapInfoData construction.
2719  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2720  // NOTE/TODO: We currently do not support arbitrary depth record
2721  // type mapping.
2722  if (mapData.IsAMember[i])
2723  continue;
2724 
2725  auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
2726  if (!mapInfoOp.getMembers().empty()) {
2727  processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl,
2728  combinedInfo, mapData, i, isTargetParams);
2729  continue;
2730  }
2731 
2732  processIndividualMap(mapData, i, combinedInfo, isTargetParams);
2733  }
2734 
2735  auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) {
2736  index = 0;
2737  for (llvm::Value *basePtr : combinedInfo.BasePointers) {
2738  if (basePtr == val)
2739  return true;
2740  index++;
2741  }
2742  return false;
2743  };
2744 
2745  auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void {
2746  for (const auto &devOp : devOperands) {
2747  // TODO: Only LLVMPointerTypes are handled.
2748  if (!isa<LLVM::LLVMPointerType>(devOp.getType()))
2749  return fail();
2750 
2751  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devOp);
2752 
2753  // Check if map info is already present for this entry.
2754  unsigned infoIndex;
2755  if (findMapInfo(mapOpValue, infoIndex)) {
2756  combinedInfo.Types[infoIndex] |=
2757  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
2758  combinedInfo.DevicePointers[infoIndex] = devOpType;
2759  } else {
2760  combinedInfo.BasePointers.emplace_back(mapOpValue);
2761  combinedInfo.Pointers.emplace_back(mapOpValue);
2762  combinedInfo.DevicePointers.emplace_back(devOpType);
2763  combinedInfo.Names.emplace_back(
2764  LLVM::createMappingInformation(devOp.getLoc(), *ompBuilder));
2765  combinedInfo.Types.emplace_back(
2766  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
2767  combinedInfo.Sizes.emplace_back(builder.getInt64(0));
2768  }
2769  }
2770  };
2771 
2772  addDevInfos(devPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
2773  addDevInfos(devAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
2774 }
2775 
2776 static LogicalResult
2777 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
2778  LLVM::ModuleTranslation &moduleTranslation) {
2779  llvm::Value *ifCond = nullptr;
2780  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
2781  SmallVector<Value> mapOperands;
2782  SmallVector<Value> useDevPtrOperands;
2783  SmallVector<Value> useDevAddrOperands;
2784  llvm::omp::RuntimeFunction RTLFn;
2785  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
2786 
2787  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2788 
2789  LogicalResult result =
2791  .Case([&](omp::TargetDataOp dataOp) {
2792  if (auto ifExprVar = dataOp.getIfExpr())
2793  ifCond = moduleTranslation.lookupValue(ifExprVar);
2794 
2795  if (auto devId = dataOp.getDevice())
2796  if (auto constOp =
2797  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2798  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2799  deviceID = intAttr.getInt();
2800 
2801  mapOperands = dataOp.getMapOperands();
2802  useDevPtrOperands = dataOp.getUseDevicePtr();
2803  useDevAddrOperands = dataOp.getUseDeviceAddr();
2804  return success();
2805  })
2806  .Case([&](omp::TargetEnterDataOp enterDataOp) {
2807  if (enterDataOp.getNowait())
2808  return (LogicalResult)(enterDataOp.emitError(
2809  "`nowait` is not supported yet"));
2810 
2811  if (auto ifExprVar = enterDataOp.getIfExpr())
2812  ifCond = moduleTranslation.lookupValue(ifExprVar);
2813 
2814  if (auto devId = enterDataOp.getDevice())
2815  if (auto constOp =
2816  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2817  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2818  deviceID = intAttr.getInt();
2819  RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
2820  mapOperands = enterDataOp.getMapOperands();
2821  return success();
2822  })
2823  .Case([&](omp::TargetExitDataOp exitDataOp) {
2824  if (exitDataOp.getNowait())
2825  return (LogicalResult)(exitDataOp.emitError(
2826  "`nowait` is not supported yet"));
2827 
2828  if (auto ifExprVar = exitDataOp.getIfExpr())
2829  ifCond = moduleTranslation.lookupValue(ifExprVar);
2830 
2831  if (auto devId = exitDataOp.getDevice())
2832  if (auto constOp =
2833  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2834  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2835  deviceID = intAttr.getInt();
2836 
2837  RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper;
2838  mapOperands = exitDataOp.getMapOperands();
2839  return success();
2840  })
2841  .Case([&](omp::TargetUpdateOp updateDataOp) {
2842  if (updateDataOp.getNowait())
2843  return (LogicalResult)(updateDataOp.emitError(
2844  "`nowait` is not supported yet"));
2845 
2846  if (auto ifExprVar = updateDataOp.getIfExpr())
2847  ifCond = moduleTranslation.lookupValue(ifExprVar);
2848 
2849  if (auto devId = updateDataOp.getDevice())
2850  if (auto constOp =
2851  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
2852  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
2853  deviceID = intAttr.getInt();
2854 
2855  RTLFn = llvm::omp::OMPRTL___tgt_target_data_update_mapper;
2856  mapOperands = updateDataOp.getMapOperands();
2857  return success();
2858  })
2859  .Default([&](Operation *op) {
2860  return op->emitError("unsupported OpenMP operation: ")
2861  << op->getName();
2862  });
2863 
2864  if (failed(result))
2865  return failure();
2866 
2867  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2868 
2869  MapInfoData mapData;
2870  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL,
2871  builder);
2872 
2873  // Fill up the arrays with all the mapped variables.
2874  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
2875  auto genMapInfoCB =
2876  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
2877  builder.restoreIP(codeGenIP);
2878  if (auto dataOp = dyn_cast<omp::TargetDataOp>(op)) {
2879  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData,
2880  useDevPtrOperands, useDevAddrOperands);
2881  } else {
2882  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
2883  }
2884  return combinedInfo;
2885  };
2886 
2887  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
2888  /*SeparateBeginEndCalls=*/true);
2889 
2890  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
2891  LogicalResult bodyGenStatus = success();
2892  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) {
2893  assert(isa<omp::TargetDataOp>(op) &&
2894  "BodyGen requested for non TargetDataOp");
2895  Region &region = cast<omp::TargetDataOp>(op).getRegion();
2896  switch (bodyGenType) {
2897  case BodyGenTy::Priv:
2898  // Check if any device ptr/addr info is available
2899  if (!info.DevicePtrInfoMap.empty()) {
2900  builder.restoreIP(codeGenIP);
2901  unsigned argIndex = 0;
2902  for (auto &devPtrOp : useDevPtrOperands) {
2903  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devPtrOp);
2904  const auto &arg = region.front().getArgument(argIndex);
2905  moduleTranslation.mapValue(arg,
2906  info.DevicePtrInfoMap[mapOpValue].second);
2907  argIndex++;
2908  }
2909 
2910  for (auto &devAddrOp : useDevAddrOperands) {
2911  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devAddrOp);
2912  const auto &arg = region.front().getArgument(argIndex);
2913  auto *LI = builder.CreateLoad(
2914  builder.getPtrTy(), info.DevicePtrInfoMap[mapOpValue].second);
2915  moduleTranslation.mapValue(arg, LI);
2916  argIndex++;
2917  }
2918 
2919  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2920  builder, moduleTranslation);
2921  }
2922  break;
2923  case BodyGenTy::DupNoPriv:
2924  break;
2925  case BodyGenTy::NoPriv:
2926  // If device info is available then region has already been generated
2927  if (info.DevicePtrInfoMap.empty()) {
2928  builder.restoreIP(codeGenIP);
2929  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
2930  builder, moduleTranslation);
2931  }
2932  break;
2933  }
2934  return builder.saveIP();
2935  };
2936 
2937  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2938  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2939  findAllocaInsertPoint(builder, moduleTranslation);
2940  if (isa<omp::TargetDataOp>(op)) {
2941  builder.restoreIP(ompBuilder->createTargetData(
2942  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2943  info, genMapInfoCB, nullptr, bodyGenCB));
2944  } else {
2945  builder.restoreIP(ompBuilder->createTargetData(
2946  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2947  info, genMapInfoCB, &RTLFn));
2948  }
2949 
2950  return bodyGenStatus;
2951 }
2952 
2953 /// Lowers the FlagsAttr which is applied to the module on the device
2954 /// pass when offloading, this attribute contains OpenMP RTL globals that can
2955 /// be passed as flags to the frontend, otherwise they are set to default
2956 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
2957  LLVM::ModuleTranslation &moduleTranslation) {
2958  if (!cast<mlir::ModuleOp>(op))
2959  return failure();
2960 
2961  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2962 
2963  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
2964  attribute.getOpenmpDeviceVersion());
2965 
2966  if (attribute.getNoGpuLib())
2967  return success();
2968 
2969  ompBuilder->createGlobalFlag(
2970  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
2971  "__omp_rtl_debug_kind");
2972  ompBuilder->createGlobalFlag(
2973  attribute
2974  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
2975  ,
2976  "__omp_rtl_assume_teams_oversubscription");
2977  ompBuilder->createGlobalFlag(
2978  attribute
2979  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
2980  ,
2981  "__omp_rtl_assume_threads_oversubscription");
2982  ompBuilder->createGlobalFlag(
2983  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
2984  "__omp_rtl_assume_no_thread_state");
2985  ompBuilder->createGlobalFlag(
2986  attribute
2987  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
2988  ,
2989  "__omp_rtl_assume_no_nested_parallelism");
2990  return success();
2991 }
2992 
2993 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
2994  omp::TargetOp targetOp,
2995  llvm::StringRef parentName = "") {
2996  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
2997 
2998  assert(fileLoc && "No file found from location");
2999  StringRef fileName = fileLoc.getFilename().getValue();
3000 
3001  llvm::sys::fs::UniqueID id;
3002  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
3003  targetOp.emitError("Unable to get unique ID for file");
3004  return false;
3005  }
3006 
3007  uint64_t line = fileLoc.getLine();
3008  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
3009  id.getFile(), line);
3010  return true;
3011 }
3012 
3013 static bool targetOpSupported(Operation &opInst) {
3014  auto targetOp = cast<omp::TargetOp>(opInst);
3015  if (targetOp.getIfExpr()) {
3016  opInst.emitError("If clause not yet supported");
3017  return false;
3018  }
3019 
3020  if (targetOp.getDevice()) {
3021  opInst.emitError("Device clause not yet supported");
3022  return false;
3023  }
3024 
3025  if (targetOp.getThreadLimit()) {
3026  opInst.emitError("Thread limit clause not yet supported");
3027  return false;
3028  }
3029 
3030  if (targetOp.getNowait()) {
3031  opInst.emitError("Nowait clause not yet supported");
3032  return false;
3033  }
3034 
3035  return true;
3036 }
3037 
3038 static void
3040  LLVM::ModuleTranslation &moduleTranslation,
3041  llvm::IRBuilderBase &builder, llvm::Function *func) {
3042  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
3043  // In the case of declare target mapped variables, the basePointer is
3044  // the reference pointer generated by the convertDeclareTargetAttr
3045  // method. Whereas the kernelValue is the original variable, so for
3046  // the device we must replace all uses of this original global variable
3047  // (stored in kernelValue) with the reference pointer (stored in
3048  // basePointer for declare target mapped variables), as for device the
3049  // data is mapped into this reference pointer and should be loaded
3050  // from it, the original variable is discarded. On host both exist and
3051  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
3052  // function to link the two variables in the runtime and then both the
3053  // reference pointer and the pointer are assigned in the kernel argument
3054  // structure for the host.
3055  if (mapData.IsDeclareTarget[i]) {
3056  // If the original map value is a constant, then we have to make sure all
3057  // of it's uses within the current kernel/function that we are going to
3058  // rewrite are converted to instructions, as we will be altering the old
3059  // use (OriginalValue) from a constant to an instruction, which will be
3060  // illegal and ICE the compiler if the user is a constant expression of
3061  // some kind e.g. a constant GEP.
3062  if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
3063  convertUsersOfConstantsToInstructions(constant, func, false);
3064 
3065  // The users iterator will get invalidated if we modify an element,
3066  // so we populate this vector of uses to alter each user on an
3067  // individual basis to emit its own load (rather than one load for
3068  // all).
3070  for (llvm::User *user : mapData.OriginalValue[i]->users())
3071  userVec.push_back(user);
3072 
3073  for (llvm::User *user : userVec) {
3074  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
3075  if (insn->getFunction() == func) {
3076  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
3077  mapData.BasePointers[i]);
3078  load->moveBefore(insn);
3079  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
3080  }
3081  }
3082  }
3083  }
3084  }
3085 }
3086 
3087 // The createDeviceArgumentAccessor function generates
3088 // instructions for retrieving (acessing) kernel
3089 // arguments inside of the device kernel for use by
3090 // the kernel. This enables different semantics such as
3091 // the creation of temporary copies of data allowing
3092 // semantics like read-only/no host write back kernel
3093 // arguments.
3094 //
3095 // This currently implements a very light version of Clang's
3096 // EmitParmDecl's handling of direct argument handling as well
3097 // as a portion of the argument access generation based on
3098 // capture types found at the end of emitOutlinedFunctionPrologue
3099 // in Clang. The indirect path handling of EmitParmDecl's may be
3100 // required for future work, but a direct 1-to-1 copy doesn't seem
3101 // possible as the logic is rather scattered throughout Clang's
3102 // lowering and perhaps we wish to deviate slightly.
3103 //
3104 // \param mapData - A container containing vectors of information
3105 // corresponding to the input argument, which should have a
3106 // corresponding entry in the MapInfoData containers
3107 // OrigialValue's.
3108 // \param arg - This is the generated kernel function argument that
3109 // corresponds to the passed in input argument. We generated different
3110 // accesses of this Argument, based on capture type and other Input
3111 // related information.
3112 // \param input - This is the host side value that will be passed to
3113 // the kernel i.e. the kernel input, we rewrite all uses of this within
3114 // the kernel (as we generate the kernel body based on the target's region
3115 // which maintians references to the original input) to the retVal argument
3116 // apon exit of this function inside of the OMPIRBuilder. This interlinks
3117 // the kernel argument to future uses of it in the function providing
3118 // appropriate "glue" instructions inbetween.
3119 // \param retVal - This is the value that all uses of input inside of the
3120 // kernel will be re-written to, the goal of this function is to generate
3121 // an appropriate location for the kernel argument to be accessed from,
3122 // e.g. ByRef will result in a temporary allocation location and then
3123 // a store of the kernel argument into this allocated memory which
3124 // will then be loaded from, ByCopy will use the allocated memory
3125 // directly.
3126 static llvm::IRBuilderBase::InsertPoint
3127 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
3128  llvm::Value *input, llvm::Value *&retVal,
3129  llvm::IRBuilderBase &builder,
3130  llvm::OpenMPIRBuilder &ompBuilder,
3131  LLVM::ModuleTranslation &moduleTranslation,
3132  llvm::IRBuilderBase::InsertPoint allocaIP,
3133  llvm::IRBuilderBase::InsertPoint codeGenIP) {
3134  builder.restoreIP(allocaIP);
3135 
3136  mlir::omp::VariableCaptureKind capture =
3137  mlir::omp::VariableCaptureKind::ByRef;
3138 
3139  // Find the associated MapInfoData entry for the current input
3140  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
3141  if (mapData.OriginalValue[i] == input) {
3142  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
3143  mapData.MapClause[i])) {
3144  capture = mapOp.getMapCaptureType().value_or(
3145  mlir::omp::VariableCaptureKind::ByRef);
3146  }
3147 
3148  break;
3149  }
3150 
3151  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
3152  unsigned int defaultAS =
3153  ompBuilder.M.getDataLayout().getProgramAddressSpace();
3154 
3155  // Create the alloca for the argument the current point.
3156  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
3157 
3158  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
3159  v = builder.CreatePointerBitCastOrAddrSpaceCast(
3160  v, arg.getType()->getPointerTo(defaultAS));
3161 
3162  builder.CreateStore(&arg, v);
3163 
3164  builder.restoreIP(codeGenIP);
3165 
3166  switch (capture) {
3167  case mlir::omp::VariableCaptureKind::ByCopy: {
3168  retVal = v;
3169  break;
3170  }
3171  case mlir::omp::VariableCaptureKind::ByRef: {
3172  retVal = builder.CreateAlignedLoad(
3173  v->getType(), v,
3174  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
3175  break;
3176  }
3177  case mlir::omp::VariableCaptureKind::This:
3178  case mlir::omp::VariableCaptureKind::VLAType:
3179  assert(false && "Currently unsupported capture kind");
3180  break;
3181  }
3182 
3183  return builder.saveIP();
3184 }
3185 
3186 static LogicalResult
3187 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
3188  LLVM::ModuleTranslation &moduleTranslation) {
3189 
3190  if (!targetOpSupported(opInst))
3191  return failure();
3192 
3193  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
3194  auto targetOp = cast<omp::TargetOp>(opInst);
3195  auto &targetRegion = targetOp.getRegion();
3196  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
3197  SmallVector<Value> mapOperands = targetOp.getMapOperands();
3198  llvm::Function *llvmOutlinedFn = nullptr;
3199 
3200  LogicalResult bodyGenStatus = success();
3201  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3202  auto bodyCB = [&](InsertPointTy allocaIP,
3203  InsertPointTy codeGenIP) -> InsertPointTy {
3204  // Forward target-cpu and target-features function attributes from the
3205  // original function to the new outlined function.
3206  llvm::Function *llvmParentFn =
3207  moduleTranslation.lookupFunction(parentFn.getName());
3208  llvmOutlinedFn = codeGenIP.getBlock()->getParent();
3209  assert(llvmParentFn && llvmOutlinedFn &&
3210  "Both parent and outlined functions must exist at this point");
3211 
3212  if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
3213  attr.isStringAttribute())
3214  llvmOutlinedFn->addFnAttr(attr);
3215 
3216  if (auto attr = llvmParentFn->getFnAttribute("target-features");
3217  attr.isStringAttribute())
3218  llvmOutlinedFn->addFnAttr(attr);
3219 
3220  builder.restoreIP(codeGenIP);
3221  unsigned argIndex = 0;
3222  for (auto &mapOp : mapOperands) {
3223  auto mapInfoOp =
3224  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
3225  llvm::Value *mapOpValue =
3226  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
3227  const auto &arg = targetRegion.front().getArgument(argIndex);
3228  moduleTranslation.mapValue(arg, mapOpValue);
3229  argIndex++;
3230  }
3231  llvm::BasicBlock *exitBlock = convertOmpOpRegions(
3232  targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
3233  builder.SetInsertPoint(exitBlock);
3234  return builder.saveIP();
3235  };
3236 
3237  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
3238  StringRef parentName = parentFn.getName();
3239 
3240  llvm::TargetRegionEntryInfo entryInfo;
3241 
3242  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
3243  return failure();
3244 
3245  int32_t defaultValTeams = -1;
3246  int32_t defaultValThreads = 0;
3247 
3248  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
3249  findAllocaInsertPoint(builder, moduleTranslation);
3250 
3251  MapInfoData mapData;
3252  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl,
3253  builder);
3254 
3255  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
3256  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
3258  builder.restoreIP(codeGenIP);
3259  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {},
3260  true);
3261  return combinedInfos;
3262  };
3263 
3264  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
3265  llvm::Value *&retVal, InsertPointTy allocaIP,
3266  InsertPointTy codeGenIP) {
3267  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3268 
3269  // We just return the unaltered argument for the host function
3270  // for now, some alterations may be required in the future to
3271  // keep host fallback functions working identically to the device
3272  // version (e.g. pass ByCopy values should be treated as such on
3273  // host and device, currently not always the case)
3274  if (!ompBuilder->Config.isTargetDevice()) {
3275  retVal = cast<llvm::Value>(&arg);
3276  return codeGenIP;
3277  }
3278 
3279  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
3280  *ompBuilder, moduleTranslation,
3281  allocaIP, codeGenIP);
3282  };
3283 
3285  for (size_t i = 0; i < mapOperands.size(); ++i) {
3286  // declare target arguments are not passed to kernels as arguments
3287  // TODO: We currently do not handle cases where a member is explicitly
3288  // passed in as an argument, this will likley need to be handled in
3289  // the near future, rather than using IsAMember, it may be better to
3290  // test if the relevant BlockArg is used within the target region and
3291  // then use that as a basis for exclusion in the kernel inputs.
3292  if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
3293  kernelInput.push_back(mapData.OriginalValue[i]);
3294  }
3295 
3296  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
3297  ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
3298  defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB));
3299 
3300  // Remap access operations to declare target reference pointers for the
3301  // device, essentially generating extra loadop's as necessary
3302  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
3303  handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
3304  llvmOutlinedFn);
3305 
3306  return bodyGenStatus;
3307 }
3308 
3309 static LogicalResult
3310 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
3311  LLVM::ModuleTranslation &moduleTranslation) {
3312  // Amend omp.declare_target by deleting the IR of the outlined functions
3313  // created for target regions. They cannot be filtered out from MLIR earlier
3314  // because the omp.target operation inside must be translated to LLVM, but
3315  // the wrapper functions themselves must not remain at the end of the
3316  // process. We know that functions where omp.declare_target does not match
3317  // omp.is_target_device at this stage can only be wrapper functions because
3318  // those that aren't are removed earlier as an MLIR transformation pass.
3319  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
3320  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
3321  op->getParentOfType<ModuleOp>().getOperation())) {
3322  if (!offloadMod.getIsTargetDevice())
3323  return success();
3324 
3325  omp::DeclareTargetDeviceType declareType =
3326  attribute.getDeviceType().getValue();
3327 
3328  if (declareType == omp::DeclareTargetDeviceType::host) {
3329  llvm::Function *llvmFunc =
3330  moduleTranslation.lookupFunction(funcOp.getName());
3331  llvmFunc->dropAllReferences();
3332  llvmFunc->eraseFromParent();
3333  }
3334  }
3335  return success();
3336  }
3337 
3338  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
3339  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
3340  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
3341  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3342  bool isDeclaration = gOp.isDeclaration();
3343  bool isExternallyVisible =
3344  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
3345  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
3346  llvm::StringRef mangledName = gOp.getSymName();
3347  auto captureClause =
3348  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
3349  auto deviceClause =
3350  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
3351  // unused for MLIR at the moment, required in Clang for book
3352  // keeping
3353  std::vector<llvm::GlobalVariable *> generatedRefs;
3354 
3355  std::vector<llvm::Triple> targetTriple;
3356  auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>(
3357  op->getParentOfType<mlir::ModuleOp>()->getAttr(
3358  LLVM::LLVMDialect::getTargetTripleAttrName()));
3359  if (targetTripleAttr)
3360  targetTriple.emplace_back(targetTripleAttr.data());
3361 
3362  auto fileInfoCallBack = [&loc]() {
3363  std::string filename = "";
3364  std::uint64_t lineNo = 0;
3365 
3366  if (loc) {
3367  filename = loc.getFilename().str();
3368  lineNo = loc.getLine();
3369  }
3370 
3371  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
3372  lineNo);
3373  };
3374 
3375  ompBuilder->registerTargetGlobalVariable(
3376  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3377  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3378  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
3379  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
3380  gVal->getType(), gVal);
3381 
3382  if (ompBuilder->Config.isTargetDevice() &&
3383  (attribute.getCaptureClause().getValue() !=
3384  mlir::omp::DeclareTargetCaptureClause::to ||
3385  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
3386  ompBuilder->getAddrOfDeclareTargetVar(
3387  captureClause, deviceClause, isDeclaration, isExternallyVisible,
3388  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
3389  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
3390  /*GlobalInitializer*/ nullptr,
3391  /*VariableLinkage*/ nullptr);
3392  }
3393  }
3394  }
3395 
3396  return success();
3397 }
3398 
3399 // Returns true if the operation is inside a TargetOp or
3400 // is part of a declare target function.
3401 static bool isTargetDeviceOp(Operation *op) {
3402  // Assumes no reverse offloading
3403  if (op->getParentOfType<omp::TargetOp>())
3404  return true;
3405 
3406  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
3407  if (auto declareTargetIface =
3408  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
3409  parentFn.getOperation()))
3410  if (declareTargetIface.isDeclareTarget() &&
3411  declareTargetIface.getDeclareTargetDeviceType() !=
3412  mlir::omp::DeclareTargetDeviceType::host)
3413  return true;
3414 
3415  return false;
3416 }
3417 
3418 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3419 /// (including OpenMP runtime calls).
3420 static LogicalResult
3421 convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
3422  LLVM::ModuleTranslation &moduleTranslation) {
3423 
3424  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3425 
3427  .Case([&](omp::BarrierOp) {
3428  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
3429  return success();
3430  })
3431  .Case([&](omp::TaskwaitOp) {
3432  ompBuilder->createTaskwait(builder.saveIP());
3433  return success();
3434  })
3435  .Case([&](omp::TaskyieldOp) {
3436  ompBuilder->createTaskyield(builder.saveIP());
3437  return success();
3438  })
3439  .Case([&](omp::FlushOp) {
3440  // No support in Openmp runtime function (__kmpc_flush) to accept
3441  // the argument list.
3442  // OpenMP standard states the following:
3443  // "An implementation may implement a flush with a list by ignoring
3444  // the list, and treating it the same as a flush without a list."
3445  //
3446  // The argument list is discarded so that, flush with a list is treated
3447  // same as a flush without a list.
3448  ompBuilder->createFlush(builder.saveIP());
3449  return success();
3450  })
3451  .Case([&](omp::ParallelOp op) {
3452  return convertOmpParallel(op, builder, moduleTranslation);
3453  })
3454  .Case([&](omp::MaskedOp) {
3455  return convertOmpMasked(*op, builder, moduleTranslation);
3456  })
3457  .Case([&](omp::MasterOp) {
3458  return convertOmpMaster(*op, builder, moduleTranslation);
3459  })
3460  .Case([&](omp::CriticalOp) {
3461  return convertOmpCritical(*op, builder, moduleTranslation);
3462  })
3463  .Case([&](omp::OrderedRegionOp) {
3464  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
3465  })
3466  .Case([&](omp::OrderedOp) {
3467  return convertOmpOrdered(*op, builder, moduleTranslation);
3468  })
3469  .Case([&](omp::WsloopOp) {
3470  return convertOmpWsloop(*op, builder, moduleTranslation);
3471  })
3472  .Case([&](omp::SimdOp) {
3473  return convertOmpSimd(*op, builder, moduleTranslation);
3474  })
3475  .Case([&](omp::AtomicReadOp) {
3476  return convertOmpAtomicRead(*op, builder, moduleTranslation);
3477  })
3478  .Case([&](omp::AtomicWriteOp) {
3479  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
3480  })
3481  .Case([&](omp::AtomicUpdateOp op) {
3482  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
3483  })
3484  .Case([&](omp::AtomicCaptureOp op) {
3485  return convertOmpAtomicCapture(op, builder, moduleTranslation);
3486  })
3487  .Case([&](omp::SectionsOp) {
3488  return convertOmpSections(*op, builder, moduleTranslation);
3489  })
3490  .Case([&](omp::SingleOp op) {
3491  return convertOmpSingle(op, builder, moduleTranslation);
3492  })
3493  .Case([&](omp::TeamsOp op) {
3494  return convertOmpTeams(op, builder, moduleTranslation);
3495  })
3496  .Case([&](omp::TaskOp op) {
3497  return convertOmpTaskOp(op, builder, moduleTranslation);
3498  })
3499  .Case([&](omp::TaskgroupOp op) {
3500  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
3501  })
3502  .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp,
3503  omp::CriticalDeclareOp>([](auto op) {
3504  // `yield` and `terminator` can be just omitted. The block structure
3505  // was created in the region that handles their parent operation.
3506  // `declare_reduction` will be used by reductions and is not
3507  // converted directly, skip it.
3508  // `critical.declare` is only used to declare names of critical
3509  // sections which will be used by `critical` ops and hence can be
3510  // ignored for lowering. The OpenMP IRBuilder will create unique
3511  // name for critical section names.
3512  return success();
3513  })
3514  .Case([&](omp::ThreadprivateOp) {
3515  return convertOmpThreadprivate(*op, builder, moduleTranslation);
3516  })
3517  .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp,
3518  omp::TargetUpdateOp>([&](auto op) {
3519  return convertOmpTargetData(op, builder, moduleTranslation);
3520  })
3521  .Case([&](omp::TargetOp) {
3522  return convertOmpTarget(*op, builder, moduleTranslation);
3523  })
3524  .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
3525  [&](auto op) {
3526  // No-op, should be handled by relevant owning operations e.g.
3527  // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc.
3528  // and then discarded
3529  return success();
3530  })
3531  .Default([&](Operation *inst) {
3532  return inst->emitError("unsupported OpenMP operation: ")
3533  << inst->getName();
3534  });
3535 }
3536 
3537 static LogicalResult
3538 convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
3539  LLVM::ModuleTranslation &moduleTranslation) {
3540  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3541 }
3542 
3543 static LogicalResult
3544 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
3545  LLVM::ModuleTranslation &moduleTranslation) {
3546  if (isa<omp::TargetOp>(op))
3547  return convertOmpTarget(*op, builder, moduleTranslation);
3548  if (isa<omp::TargetDataOp>(op))
3549  return convertOmpTargetData(op, builder, moduleTranslation);
3550  bool interrupted =
3551  op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
3552  if (isa<omp::TargetOp>(oper)) {
3553  if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
3554  return WalkResult::interrupt();
3555  return WalkResult::skip();
3556  }
3557  if (isa<omp::TargetDataOp>(oper)) {
3558  if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
3559  return WalkResult::interrupt();
3560  return WalkResult::skip();
3561  }
3562  return WalkResult::advance();
3563  }).wasInterrupted();
3564  return failure(interrupted);
3565 }
3566 
3567 namespace {
3568 
3569 /// Implementation of the dialect interface that converts operations belonging
3570 /// to the OpenMP dialect to LLVM IR.
3571 class OpenMPDialectLLVMIRTranslationInterface
3573 public:
3575 
3576  /// Translates the given operation to LLVM IR using the provided IR builder
3577  /// and saving the state in `moduleTranslation`.
3578  LogicalResult
3579  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
3580  LLVM::ModuleTranslation &moduleTranslation) const final;
3581 
3582  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
3583  /// runtime calls, or operation amendments
3584  LogicalResult
3586  NamedAttribute attribute,
3587  LLVM::ModuleTranslation &moduleTranslation) const final;
3588 };
3589 
3590 } // namespace
3591 
3592 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
3593  Operation *op, ArrayRef<llvm::Instruction *> instructions,
3594  NamedAttribute attribute,
3595  LLVM::ModuleTranslation &moduleTranslation) const {
3596  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
3597  attribute.getName())
3598  .Case("omp.is_target_device",
3599  [&](Attribute attr) {
3600  if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
3601  llvm::OpenMPIRBuilderConfig &config =
3602  moduleTranslation.getOpenMPBuilder()->Config;
3603  config.setIsTargetDevice(deviceAttr.getValue());
3604  return success();
3605  }
3606  return failure();
3607  })
3608  .Case("omp.is_gpu",
3609  [&](Attribute attr) {
3610  if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
3611  llvm::OpenMPIRBuilderConfig &config =
3612  moduleTranslation.getOpenMPBuilder()->Config;
3613  config.setIsGPU(gpuAttr.getValue());
3614  return success();
3615  }
3616  return failure();
3617  })
3618  .Case("omp.host_ir_filepath",
3619  [&](Attribute attr) {
3620  if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
3621  llvm::OpenMPIRBuilder *ompBuilder =
3622  moduleTranslation.getOpenMPBuilder();
3623  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
3624  return success();
3625  }
3626  return failure();
3627  })
3628  .Case("omp.flags",
3629  [&](Attribute attr) {
3630  if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
3631  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
3632  return failure();
3633  })
3634  .Case("omp.version",
3635  [&](Attribute attr) {
3636  if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
3637  llvm::OpenMPIRBuilder *ompBuilder =
3638  moduleTranslation.getOpenMPBuilder();
3639  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
3640  versionAttr.getVersion());
3641  return success();
3642  }
3643  return failure();
3644  })
3645  .Case("omp.declare_target",
3646  [&](Attribute attr) {
3647  if (auto declareTargetAttr =
3648  dyn_cast<omp::DeclareTargetAttr>(attr))
3649  return convertDeclareTargetAttr(op, declareTargetAttr,
3650  moduleTranslation);
3651  return failure();
3652  })
3653  .Case("omp.requires",
3654  [&](Attribute attr) {
3655  if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
3656  using Requires = omp::ClauseRequires;
3657  Requires flags = requiresAttr.getValue();
3658  llvm::OpenMPIRBuilderConfig &config =
3659  moduleTranslation.getOpenMPBuilder()->Config;
3660  config.setHasRequiresReverseOffload(
3661  bitEnumContainsAll(flags, Requires::reverse_offload));
3662  config.setHasRequiresUnifiedAddress(
3663  bitEnumContainsAll(flags, Requires::unified_address));
3664  config.setHasRequiresUnifiedSharedMemory(
3665  bitEnumContainsAll(flags, Requires::unified_shared_memory));
3666  config.setHasRequiresDynamicAllocators(
3667  bitEnumContainsAll(flags, Requires::dynamic_allocators));
3668  return success();
3669  }
3670  return failure();
3671  })
3672  .Default([](Attribute) {
3673  // Fall through for omp attributes that do not require lowering.
3674  return success();
3675  })(attribute.getValue());
3676 
3677  return failure();
3678 }
3679 
3680 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
3681 /// (including OpenMP runtime calls).
3682 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
3683  Operation *op, llvm::IRBuilderBase &builder,
3684  LLVM::ModuleTranslation &moduleTranslation) const {
3685 
3686  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
3687  if (ompBuilder->Config.isTargetDevice()) {
3688  if (isTargetDeviceOp(op)) {
3689  return convertTargetDeviceOp(op, builder, moduleTranslation);
3690  } else {
3691  return convertTargetOpsInNest(op, builder, moduleTranslation);
3692  }
3693  }
3694  return convertHostOrTargetOperation(op, builder, moduleTranslation);
3695 }
3696 
3698  registry.insert<omp::OpenMPDialect>();
3699  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
3700  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
3701  });
3702 }
3703 
3705  DialectRegistry registry;
3707  context.appendDialectRegistry(registry);
3708 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func)
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static llvm::omp::OrderKind convertOrderKind(std::optional< omp::ClauseOrderKind > o)
Convert Order attribute to llvm::omp::OrderKind.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static OwningAtomicReductionGen makeAtomicReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, const SmallVector< Value > &devPtrOperands={}, const SmallVector< Value > &devAddrOperands={}, bool isTargetParams=false)
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult allocAndInitializeReductionVars(OP op, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRef)
static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
static bool isTargetDeviceOp(Operation *op)
static LogicalResult inlineOmpRegionCleanup(llvm::SmallVectorImpl< Region * > &cleanupRegions, llvm::ArrayRef< llvm::Value * > privateVariables, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, StringRef regionName, bool shouldLoadCleanupRegionArg=true)
handling of DeclareReductionOp's cleanup region
std::vector< llvm::Value * > calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, bool isArrayTy, mlir::OperandRange bounds)
This function calculates the array/pointer offset for map data provided with bounds operations,...
void collectMapDataFromMapOperands(MapInfoData &mapData, llvm::SmallVectorImpl< Value > &mapOperands, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static OwningReductionGen makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult createReductionsAndCleanup(OP op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, ArrayRef< llvm::Value * > privateReductionVariables, ArrayRef< bool > isByRef)
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClasue)
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
static void mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, unsigned i)
Map input argument to all reduction initialization regions.
static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, bool isTargetParams, int mapDataParentIdx=-1)
static ArrayRef< bool > getIsByRef(std::optional< ArrayRef< bool >> attr)
static bool targetOpSupported(Operation &opInst)
static int getMapDataMemberIdx(MapInfoData &mapData, mlir::omp::MapInfoOp memberOp)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static void allocByValReductionVars(T loop, ArrayRef< BlockArgument > reductionArgs, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap, llvm::ArrayRef< bool > isByRefs)
Allocate space for privatized reduction variables.
static LogicalResult convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void processMapMembersWithParent(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag)
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams)
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static void collectReductionDecls(T loop, SmallVectorImpl< omp::DeclareReductionOp > &reductions)
Populates reductions with reduction declarations used in the given loop.
static mlir::omp::MapInfoOp getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first)
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< omp::DeclareReductionOp > &reductionDecls, SmallVectorImpl< OwningReductionGen > &owningReductionGens, SmallVectorImpl< OwningAtomicReductionGen > &owningAtomicReductionGens, const ArrayRef< llvm::Value * > privateReductionVariables, SmallVectorImpl< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Given an OpenMP MLIR operation, create the corresponding LLVM IR (including OpenMP runtime calls).
static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp)
static LogicalResult convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::BasicBlock * convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::Value *basePointer, llvm::Type *baseType, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static bool isDeclareTargetLink(mlir::Value value)
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
A RAII class that on construction replaces the region arguments of the parallel op (which correspond ...
OmpParallelOpConversionManager(omp::ParallelOp opInst)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:31
BlockArgument getArgument(unsigned i)
Definition: Block.h:127
unsigned getNumArguments()
Definition: Block.h:126
Operation & back()
Definition: Block.h:150
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:243
Operation & front()
Definition: Block.h:151
iterator begin()
Definition: Block.h:141
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
An attribute that represents a reference to a dense integer vector or tensor object.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:766
Base class for dialect interfaces providing translation to LLVM IR.
virtual LogicalResult amendOperation(Operation *op, ArrayRef< llvm::Instruction * > instructions, NamedAttribute attribute, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to act on an operation that has dialect attributes from the derive...
virtual LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const
Hook for derived dialect interface to provide translation of the operations to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
MLIRContext & getContext()
Returns the MLIR context of the module being translated.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:41
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:555
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:400
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition: Builders.cpp:582
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:671
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
bool empty()
Definition: Region.h:60
iterator end()
Definition: Region.h:56
unsigned getNumArguments()
Definition: Region.h:123
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
BlockArgument getArgument(unsigned i)
Definition: Region.h:124
Block & front()
Definition: Region.h:65
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
Include the generated interface declarations.
Definition: CallGraph.h:229
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:27
SetVector< Block * > getBlocksSortedByDominance(Region &region)
Gets a list of blocks that is sorted according to dominance.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
llvm::SmallVector< bool, 4 > IsAMember
llvm::SmallVector< llvm::Value *, 4 > OriginalValue
llvm::SmallVector< bool, 4 > IsDeclareTarget
llvm::SmallVector< llvm::Type *, 4 > BaseType
void append(MapInfoData &CurInfo)
Append arrays in CurInfo.
llvm::SmallVector< mlir::Operation *, 4 > MapClause
RAII object calling stackPush/stackPop on construction/destruction.