MLIR  18.0.0git
OpenMPToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR OpenMP dialect and LLVM
10 // IR.
11 //
12 //===----------------------------------------------------------------------===//
17 #include "mlir/IR/IRMapping.h"
18 #include "mlir/IR/Operation.h"
19 #include "mlir/Support/LLVM.h"
24 
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DebugInfoMetadata.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/TargetParser/Triple.h"
33 #include "llvm/Transforms/Utils/ModuleUtils.h"
34 
35 #include <any>
36 #include <optional>
37 #include <utility>
38 
39 using namespace mlir;
40 
41 namespace {
42 static llvm::omp::ScheduleKind
43 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
44  if (!schedKind.has_value())
45  return llvm::omp::OMP_SCHEDULE_Default;
46  switch (schedKind.value()) {
47  case omp::ClauseScheduleKind::Static:
48  return llvm::omp::OMP_SCHEDULE_Static;
49  case omp::ClauseScheduleKind::Dynamic:
50  return llvm::omp::OMP_SCHEDULE_Dynamic;
51  case omp::ClauseScheduleKind::Guided:
52  return llvm::omp::OMP_SCHEDULE_Guided;
53  case omp::ClauseScheduleKind::Auto:
54  return llvm::omp::OMP_SCHEDULE_Auto;
56  return llvm::omp::OMP_SCHEDULE_Runtime;
57  }
58  llvm_unreachable("unhandled schedule clause argument");
59 }
60 
61 /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
62 /// insertion points for allocas.
63 class OpenMPAllocaStackFrame
64  : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> {
65 public:
66  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
67 
68  explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
69  : allocaInsertPoint(allocaIP) {}
70  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
71 };
72 
73 /// ModuleTranslation stack frame containing the partial mapping between MLIR
74 /// values and their LLVM IR equivalents.
75 class OpenMPVarMappingStackFrame
77  OpenMPVarMappingStackFrame> {
78 public:
79  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame)
80 
81  explicit OpenMPVarMappingStackFrame(
82  const DenseMap<Value, llvm::Value *> &mapping)
83  : mapping(mapping) {}
84 
86 };
87 } // namespace
88 
89 /// Find the insertion point for allocas given the current insertion point for
90 /// normal operations in the builder.
91 static llvm::OpenMPIRBuilder::InsertPointTy
92 findAllocaInsertPoint(llvm::IRBuilderBase &builder,
93  const LLVM::ModuleTranslation &moduleTranslation) {
94  // If there is an alloca insertion point on stack, i.e. we are in a nested
95  // operation and a specific point was provided by some surrounding operation,
96  // use it.
97  llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
98  WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
99  [&](const OpenMPAllocaStackFrame &frame) {
100  allocaInsertPoint = frame.allocaInsertPoint;
101  return WalkResult::interrupt();
102  });
103  if (walkResult.wasInterrupted())
104  return allocaInsertPoint;
105 
106  // Otherwise, insert to the entry block of the surrounding function.
107  // If the current IRBuilder InsertPoint is the function's entry, it cannot
108  // also be used for alloca insertion which would result in insertion order
109  // confusion. Create a new BasicBlock for the Builder and use the entry block
110  // for the allocs.
111  // TODO: Create a dedicated alloca BasicBlock at function creation such that
112  // we do not need to move the current InertPoint here.
113  if (builder.GetInsertBlock() ==
114  &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
115  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
116  "Assuming end of basic block");
117  llvm::BasicBlock *entryBB = llvm::BasicBlock::Create(
118  builder.getContext(), "entry", builder.GetInsertBlock()->getParent(),
119  builder.GetInsertBlock()->getNextNode());
120  builder.CreateBr(entryBB);
121  builder.SetInsertPoint(entryBB);
122  }
123 
124  llvm::BasicBlock &funcEntryBlock =
125  builder.GetInsertBlock()->getParent()->getEntryBlock();
126  return llvm::OpenMPIRBuilder::InsertPointTy(
127  &funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
128 }
129 
130 /// Converts the given region that appears within an OpenMP dialect operation to
131 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
132 /// region, and a branch from any block with an successor-less OpenMP terminator
133 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
134 /// of the continuation block if provided.
135 static llvm::BasicBlock *convertOmpOpRegions(
136  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
137  LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
138  SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
139  llvm::BasicBlock *continuationBlock =
140  splitBB(builder, true, "omp.region.cont");
141  llvm::BasicBlock *sourceBlock = builder.GetInsertBlock();
142 
143  llvm::LLVMContext &llvmContext = builder.getContext();
144  for (Block &bb : region) {
145  llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
146  llvmContext, blockName, builder.GetInsertBlock()->getParent(),
147  builder.GetInsertBlock()->getNextNode());
148  moduleTranslation.mapBlock(&bb, llvmBB);
149  }
150 
151  llvm::Instruction *sourceTerminator = sourceBlock->getTerminator();
152 
153  // Terminators (namely YieldOp) may be forwarding values to the region that
154  // need to be available in the continuation block. Collect the types of these
155  // operands in preparation of creating PHI nodes.
156  SmallVector<llvm::Type *> continuationBlockPHITypes;
157  bool operandsProcessed = false;
158  unsigned numYields = 0;
159  for (Block &bb : region.getBlocks()) {
160  if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
161  if (!operandsProcessed) {
162  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
163  continuationBlockPHITypes.push_back(
164  moduleTranslation.convertType(yield->getOperand(i).getType()));
165  }
166  operandsProcessed = true;
167  } else {
168  assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
169  "mismatching number of values yielded from the region");
170  for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
171  llvm::Type *operandType =
172  moduleTranslation.convertType(yield->getOperand(i).getType());
173  (void)operandType;
174  assert(continuationBlockPHITypes[i] == operandType &&
175  "values of mismatching types yielded from the region");
176  }
177  }
178  numYields++;
179  }
180  }
181 
182  // Insert PHI nodes in the continuation block for any values forwarded by the
183  // terminators in this region.
184  if (!continuationBlockPHITypes.empty())
185  assert(
186  continuationBlockPHIs &&
187  "expected continuation block PHIs if converted regions yield values");
188  if (continuationBlockPHIs) {
189  llvm::IRBuilderBase::InsertPointGuard guard(builder);
190  continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
191  builder.SetInsertPoint(continuationBlock, continuationBlock->begin());
192  for (llvm::Type *ty : continuationBlockPHITypes)
193  continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
194  }
195 
196  // Convert blocks one by one in topological order to ensure
197  // defs are converted before uses.
199  for (Block *bb : blocks) {
200  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
201  // Retarget the branch of the entry block to the entry block of the
202  // converted region (regions are single-entry).
203  if (bb->isEntryBlock()) {
204  assert(sourceTerminator->getNumSuccessors() == 1 &&
205  "provided entry block has multiple successors");
206  assert(sourceTerminator->getSuccessor(0) == continuationBlock &&
207  "ContinuationBlock is not the successor of the entry block");
208  sourceTerminator->setSuccessor(0, llvmBB);
209  }
210 
211  llvm::IRBuilderBase::InsertPointGuard guard(builder);
212  if (failed(
213  moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
214  bodyGenStatus = failure();
215  return continuationBlock;
216  }
217 
218  // Special handling for `omp.yield` and `omp.terminator` (we may have more
219  // than one): they return the control to the parent OpenMP dialect operation
220  // so replace them with the branch to the continuation block. We handle this
221  // here to avoid relying inter-function communication through the
222  // ModuleTranslation class to set up the correct insertion point. This is
223  // also consistent with MLIR's idiom of handling special region terminators
224  // in the same code that handles the region-owning operation.
225  Operation *terminator = bb->getTerminator();
226  if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
227  builder.CreateBr(continuationBlock);
228 
229  for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
230  (*continuationBlockPHIs)[i]->addIncoming(
231  moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
232  }
233  }
234  // After all blocks have been traversed and values mapped, connect the PHI
235  // nodes to the results of preceding blocks.
236  LLVM::detail::connectPHINodes(region, moduleTranslation);
237 
238  // Remove the blocks and values defined in this region from the mapping since
239  // they are not visible outside of this region. This allows the same region to
240  // be converted several times, that is cloned, without clashes, and slightly
241  // speeds up the lookups.
242  moduleTranslation.forgetMapping(region);
243 
244  return continuationBlock;
245 }
246 
247 /// Convert ProcBindKind from MLIR-generated enum to LLVM enum.
248 static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
249  switch (kind) {
250  case omp::ClauseProcBindKind::Close:
251  return llvm::omp::ProcBindKind::OMP_PROC_BIND_close;
252  case omp::ClauseProcBindKind::Master:
253  return llvm::omp::ProcBindKind::OMP_PROC_BIND_master;
254  case omp::ClauseProcBindKind::Primary:
255  return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;
256  case omp::ClauseProcBindKind::Spread:
257  return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
258  }
259  llvm_unreachable("Unknown ClauseProcBindKind kind");
260 }
261 
262 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
263 static LogicalResult
264 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
265  LLVM::ModuleTranslation &moduleTranslation) {
266  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
267  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
268  // relying on captured variables.
269  LogicalResult bodyGenStatus = success();
270 
271  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
272  // MasterOp has only one region associated with it.
273  auto &region = cast<omp::MasterOp>(opInst).getRegion();
274  builder.restoreIP(codeGenIP);
275  convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
276  bodyGenStatus);
277  };
278 
279  // TODO: Perform finalization actions for variables. This has to be
280  // called for variables which have destructors/finalizers.
281  auto finiCB = [&](InsertPointTy codeGenIP) {};
282 
283  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
284  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
285  ompLoc, bodyGenCB, finiCB));
286  return success();
287 }
288 
289 /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
290 static LogicalResult
291 convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
292  LLVM::ModuleTranslation &moduleTranslation) {
293  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
294  auto criticalOp = cast<omp::CriticalOp>(opInst);
295  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
296  // relying on captured variables.
297  LogicalResult bodyGenStatus = success();
298 
299  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
300  // CriticalOp has only one region associated with it.
301  auto &region = cast<omp::CriticalOp>(opInst).getRegion();
302  builder.restoreIP(codeGenIP);
303  convertOmpOpRegions(region, "omp.critical.region", builder,
304  moduleTranslation, bodyGenStatus);
305  };
306 
307  // TODO: Perform finalization actions for variables. This has to be
308  // called for variables which have destructors/finalizers.
309  auto finiCB = [&](InsertPointTy codeGenIP) {};
310 
311  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
312  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
313  llvm::Constant *hint = nullptr;
314 
315  // If it has a name, it probably has a hint too.
316  if (criticalOp.getNameAttr()) {
317  // The verifiers in OpenMP Dialect guarentee that all the pointers are
318  // non-null
319  auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr());
320  auto criticalDeclareOp =
321  SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp,
322  symbolRef);
323  hint = llvm::ConstantInt::get(
324  llvm::Type::getInt32Ty(llvmContext),
325  static_cast<int>(criticalDeclareOp.getHintVal()));
326  }
327  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
328  ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
329  return success();
330 }
331 
332 /// Returns a reduction declaration that corresponds to the given reduction
333 /// operation in the given container. Currently only supports reductions inside
334 /// WsLoopOp and ParallelOp but can be easily extended as long as the given
335 /// construct implements getNumReductionVars.
336 template <typename T>
337 static std::optional<omp::ReductionDeclareOp>
338 findReductionDeclInContainer(T container, omp::ReductionOp reduction) {
339  for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
340  if (container.getReductionVars()[i] != reduction.getAccumulator())
341  continue;
342 
343  SymbolRefAttr reductionSymbol =
344  cast<SymbolRefAttr>((*container.getReductions())[i]);
345  auto declareOp =
346  SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
347  container, reductionSymbol);
348  return declareOp;
349  }
350  return std::nullopt;
351 }
352 
353 /// Searches for a reduction in a provided region and the regions
354 /// it is nested in
355 static omp::ReductionDeclareOp findReductionDecl(Operation &containerOp,
356  omp::ReductionOp reduction) {
357  std::optional<omp::ReductionDeclareOp> declareOp = std::nullopt;
358  Operation *container = &containerOp;
359 
360  while (!declareOp.has_value() && container) {
361  // Check if current container is supported for reductions searches
362  if (auto par = dyn_cast<omp::ParallelOp>(*container)) {
363  declareOp = findReductionDeclInContainer(par, reduction);
364  } else if (auto loop = dyn_cast<omp::WsLoopOp>(*container)) {
365  declareOp = findReductionDeclInContainer(loop, reduction);
366  } else {
367  break;
368  }
369 
370  // See if we can search parent for reductions as well
371  container = containerOp.getParentOp();
372  }
373 
374  assert(declareOp.has_value() &&
375  "reduction operation must be associated with a declaration");
376 
377  return *declareOp;
378 }
379 
380 /// Populates `reductions` with reduction declarations used in the given loop.
381 template <typename T>
382 static void
385  std::optional<ArrayAttr> attr = loop.getReductions();
386  if (!attr)
387  return;
388 
389  reductions.reserve(reductions.size() + loop.getNumReductionVars());
390  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
391  reductions.push_back(
392  SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
393  loop, symbolRef));
394  }
395 }
396 
397 /// Translates the blocks contained in the given region and appends them to at
398 /// the current insertion point of `builder`. The operations of the entry block
399 /// are appended to the current insertion block, which is not expected to have a
400 /// terminator. If set, `continuationBlockArgs` is populated with translated
401 /// values that correspond to the values omp.yield'ed from the region.
403  Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
404  LLVM::ModuleTranslation &moduleTranslation,
405  SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
406  if (region.empty())
407  return success();
408 
409  // Special case for single-block regions that don't create additional blocks:
410  // insert operations without creating additional blocks.
411  if (llvm::hasSingleElement(region)) {
412  moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
413  if (failed(moduleTranslation.convertBlock(
414  region.front(), /*ignoreArguments=*/true, builder)))
415  return failure();
416 
417  // The continuation arguments are simply the translated terminator operands.
418  if (continuationBlockArgs)
419  llvm::append_range(
420  *continuationBlockArgs,
421  moduleTranslation.lookupValues(region.front().back().getOperands()));
422 
423  // Drop the mapping that is no longer necessary so that the same region can
424  // be processed multiple times.
425  moduleTranslation.forgetMapping(region);
426  return success();
427  }
428 
429  LogicalResult bodyGenStatus = success();
431  llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
432  region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
433  if (failed(bodyGenStatus))
434  return failure();
435  if (continuationBlockArgs)
436  llvm::append_range(*continuationBlockArgs, phis);
437  builder.SetInsertPoint(continuationBlock,
438  continuationBlock->getFirstInsertionPt());
439  return success();
440 }
441 
442 namespace {
443 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
444 /// store lambdas with capture.
445 using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
446  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
447  llvm::Value *&)>;
448 using OwningAtomicReductionGen =
449  std::function<llvm::OpenMPIRBuilder::InsertPointTy(
450  llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
451  llvm::Value *)>;
452 } // namespace
453 
454 /// Create an OpenMPIRBuilder-compatible reduction generator for the given
455 /// reduction declaration. The generator uses `builder` but ignores its
456 /// insertion point.
457 static OwningReductionGen
458 makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
459  LLVM::ModuleTranslation &moduleTranslation) {
460  // The lambda is mutable because we need access to non-const methods of decl
461  // (which aren't actually mutating it), and we must capture decl by-value to
462  // avoid the dangling reference after the parent function returns.
463  OwningReductionGen gen =
464  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
465  llvm::Value *lhs, llvm::Value *rhs,
466  llvm::Value *&result) mutable {
467  Region &reductionRegion = decl.getReductionRegion();
468  moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
469  moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
470  builder.restoreIP(insertPoint);
472  if (failed(inlineConvertOmpRegions(reductionRegion,
473  "omp.reduction.nonatomic.body",
474  builder, moduleTranslation, &phis)))
475  return llvm::OpenMPIRBuilder::InsertPointTy();
476  assert(phis.size() == 1);
477  result = phis[0];
478  return builder.saveIP();
479  };
480  return gen;
481 }
482 
483 /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
484 /// given reduction declaration. The generator uses `builder` but ignores its
485 /// insertion point. Returns null if there is no atomic region available in the
486 /// reduction declaration.
487 static OwningAtomicReductionGen
488 makeAtomicReductionGen(omp::ReductionDeclareOp decl,
489  llvm::IRBuilderBase &builder,
490  LLVM::ModuleTranslation &moduleTranslation) {
491  if (decl.getAtomicReductionRegion().empty())
492  return OwningAtomicReductionGen();
493 
494  // The lambda is mutable because we need access to non-const methods of decl
495  // (which aren't actually mutating it), and we must capture decl by-value to
496  // avoid the dangling reference after the parent function returns.
497  OwningAtomicReductionGen atomicGen =
498  [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
499  llvm::Value *lhs, llvm::Value *rhs) mutable {
500  Region &atomicRegion = decl.getAtomicReductionRegion();
501  moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
502  moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
503  builder.restoreIP(insertPoint);
505  if (failed(inlineConvertOmpRegions(atomicRegion,
506  "omp.reduction.atomic.body", builder,
507  moduleTranslation, &phis)))
508  return llvm::OpenMPIRBuilder::InsertPointTy();
509  assert(phis.empty());
510  return builder.saveIP();
511  };
512  return atomicGen;
513 }
514 
515 /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
516 static LogicalResult
517 convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
518  LLVM::ModuleTranslation &moduleTranslation) {
519  auto orderedOp = cast<omp::OrderedOp>(opInst);
520 
521  omp::ClauseDepend dependType = *orderedOp.getDependTypeVal();
522  bool isDependSource = dependType == omp::ClauseDepend::dependsource;
523  unsigned numLoops = *orderedOp.getNumLoopsVal();
524  SmallVector<llvm::Value *> vecValues =
525  moduleTranslation.lookupValues(orderedOp.getDependVecVars());
526 
527  size_t indexVecValues = 0;
528  while (indexVecValues < vecValues.size()) {
529  SmallVector<llvm::Value *> storeValues;
530  storeValues.reserve(numLoops);
531  for (unsigned i = 0; i < numLoops; i++) {
532  storeValues.push_back(vecValues[indexVecValues]);
533  indexVecValues++;
534  }
535  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
536  findAllocaInsertPoint(builder, moduleTranslation);
537  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
538  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
539  ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
540  }
541  return success();
542 }
543 
544 /// Converts an OpenMP 'ordered_region' operation into LLVM IR using
545 /// OpenMPIRBuilder.
546 static LogicalResult
547 convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
548  LLVM::ModuleTranslation &moduleTranslation) {
549  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
550  auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
551 
552  // TODO: The code generation for ordered simd directive is not supported yet.
553  if (orderedRegionOp.getSimd())
554  return failure();
555 
556  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
557  // relying on captured variables.
558  LogicalResult bodyGenStatus = success();
559 
560  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
561  // OrderedOp has only one region associated with it.
562  auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
563  builder.restoreIP(codeGenIP);
564  convertOmpOpRegions(region, "omp.ordered.region", builder,
565  moduleTranslation, bodyGenStatus);
566  };
567 
568  // TODO: Perform finalization actions for variables. This has to be
569  // called for variables which have destructors/finalizers.
570  auto finiCB = [&](InsertPointTy codeGenIP) {};
571 
572  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
573  builder.restoreIP(
574  moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
575  ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getSimd()));
576  return bodyGenStatus;
577 }
578 
579 static LogicalResult
580 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
581  LLVM::ModuleTranslation &moduleTranslation) {
582  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
583  using StorableBodyGenCallbackTy =
584  llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
585 
586  auto sectionsOp = cast<omp::SectionsOp>(opInst);
587 
588  // TODO: Support the following clauses: private, firstprivate, lastprivate,
589  // reduction, allocate
590  if (!sectionsOp.getReductionVars().empty() || sectionsOp.getReductions() ||
591  !sectionsOp.getAllocateVars().empty() ||
592  !sectionsOp.getAllocatorsVars().empty())
593  return emitError(sectionsOp.getLoc())
594  << "reduction and allocate clauses are not supported for sections "
595  "construct";
596 
597  LogicalResult bodyGenStatus = success();
599 
600  for (Operation &op : *sectionsOp.getRegion().begin()) {
601  auto sectionOp = dyn_cast<omp::SectionOp>(op);
602  if (!sectionOp) // omp.terminator
603  continue;
604 
605  Region &region = sectionOp.getRegion();
606  auto sectionCB = [&region, &builder, &moduleTranslation, &bodyGenStatus](
607  InsertPointTy allocaIP, InsertPointTy codeGenIP) {
608  builder.restoreIP(codeGenIP);
609  convertOmpOpRegions(region, "omp.section.region", builder,
610  moduleTranslation, bodyGenStatus);
611  };
612  sectionCBs.push_back(sectionCB);
613  }
614 
615  // No sections within omp.sections operation - skip generation. This situation
616  // is only possible if there is only a terminator operation inside the
617  // sections operation
618  if (sectionCBs.empty())
619  return success();
620 
621  assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
622 
623  // TODO: Perform appropriate actions according to the data-sharing
624  // attribute (shared, private, firstprivate, ...) of variables.
625  // Currently defaults to shared.
626  auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
627  llvm::Value &vPtr,
628  llvm::Value *&replacementValue) -> InsertPointTy {
629  replacementValue = &vPtr;
630  return codeGenIP;
631  };
632 
633  // TODO: Perform finalization actions for variables. This has to be
634  // called for variables which have destructors/finalizers.
635  auto finiCB = [&](InsertPointTy codeGenIP) {};
636 
637  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
638  findAllocaInsertPoint(builder, moduleTranslation);
639  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
640  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
641  ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
642  sectionsOp.getNowait()));
643  return bodyGenStatus;
644 }
645 
646 /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
647 static LogicalResult
648 convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
649  LLVM::ModuleTranslation &moduleTranslation) {
650  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
651  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
652  LogicalResult bodyGenStatus = success();
653  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
654  builder.restoreIP(codegenIP);
655  convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
656  moduleTranslation, bodyGenStatus);
657  };
658  auto finiCB = [&](InsertPointTy codeGenIP) {};
659  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
660  ompLoc, bodyCB, finiCB, singleOp.getNowait(), /*DidIt=*/nullptr));
661  return bodyGenStatus;
662 }
663 
664 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
665 static LogicalResult
666 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
667  LLVM::ModuleTranslation &moduleTranslation) {
668  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
669  LogicalResult bodyGenStatus = success();
670  if (!op.getAllocatorsVars().empty() || op.getReductions())
671  return op.emitError("unhandled clauses for translation to LLVM IR");
672 
673  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
675  moduleTranslation, allocaIP);
676  builder.restoreIP(codegenIP);
677  convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
678  moduleTranslation, bodyGenStatus);
679  };
680 
681  llvm::Value *numTeamsLower = nullptr;
682  if (Value numTeamsLowerVar = op.getNumTeamsLower())
683  numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar);
684 
685  llvm::Value *numTeamsUpper = nullptr;
686  if (Value numTeamsUpperVar = op.getNumTeamsUpper())
687  numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar);
688 
689  llvm::Value *threadLimit = nullptr;
690  if (Value threadLimitVar = op.getThreadLimit())
691  threadLimit = moduleTranslation.lookupValue(threadLimitVar);
692 
693  llvm::Value *ifExpr = nullptr;
694  if (Value ifExprVar = op.getIfExpr())
695  ifExpr = moduleTranslation.lookupValue(ifExprVar);
696 
697  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
698  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
699  ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
700  return bodyGenStatus;
701 }
702 
703 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
704 static LogicalResult
705 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
706  LLVM::ModuleTranslation &moduleTranslation) {
707  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
708  LogicalResult bodyGenStatus = success();
709  if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() ||
710  taskOp.getInReductions() || taskOp.getPriority() ||
711  !taskOp.getAllocateVars().empty()) {
712  return taskOp.emitError("unhandled clauses for translation to LLVM IR");
713  }
714  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
715  // Save the alloca insertion point on ModuleTranslation stack for use in
716  // nested regions.
718  moduleTranslation, allocaIP);
719 
720  builder.restoreIP(codegenIP);
721  convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
722  moduleTranslation, bodyGenStatus);
723  };
724 
726  if (!taskOp.getDependVars().empty() && taskOp.getDepends()) {
727  for (auto dep :
728  llvm::zip(taskOp.getDependVars(), taskOp.getDepends()->getValue())) {
729  llvm::omp::RTLDependenceKindTy type;
730  switch (
731  cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) {
732  case mlir::omp::ClauseTaskDepend::taskdependin:
733  type = llvm::omp::RTLDependenceKindTy::DepIn;
734  break;
735  // The OpenMP runtime requires that the codegen for 'depend' clause for
736  // 'out' dependency kind must be the same as codegen for 'depend' clause
737  // with 'inout' dependency.
738  case mlir::omp::ClauseTaskDepend::taskdependout:
739  case mlir::omp::ClauseTaskDepend::taskdependinout:
740  type = llvm::omp::RTLDependenceKindTy::DepInOut;
741  break;
742  };
743  llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep));
744  llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal);
745  dds.emplace_back(dd);
746  }
747  }
748 
749  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
750  findAllocaInsertPoint(builder, moduleTranslation);
751  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
752  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
753  ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
754  moduleTranslation.lookupValue(taskOp.getFinalExpr()),
755  moduleTranslation.lookupValue(taskOp.getIfExpr()), dds));
756  return bodyGenStatus;
757 }
758 
759 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
760 static LogicalResult
761 convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,
762  LLVM::ModuleTranslation &moduleTranslation) {
763  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
764  LogicalResult bodyGenStatus = success();
765  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
766  return tgOp.emitError("unhandled clauses for translation to LLVM IR");
767  }
768  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
769  builder.restoreIP(codegenIP);
770  convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
771  moduleTranslation, bodyGenStatus);
772  };
773  InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
774  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
775  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
776  ompLoc, allocaIP, bodyCB));
777  return bodyGenStatus;
778 }
779 
780 /// Allocate space for privatized reduction variables.
781 template <typename T>
782 static void
783 allocReductionVars(T loop, llvm::IRBuilderBase &builder,
784  LLVM::ModuleTranslation &moduleTranslation,
785  llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
786  SmallVector<omp::ReductionDeclareOp> &reductionDecls,
787  SmallVector<llvm::Value *> &privateReductionVariables,
788  DenseMap<Value, llvm::Value *> &reductionVariableMap) {
789  unsigned numReductions = loop.getNumReductionVars();
790  privateReductionVariables.reserve(numReductions);
791  if (numReductions != 0) {
792  llvm::IRBuilderBase::InsertPointGuard guard(builder);
793  builder.restoreIP(allocaIP);
794  for (unsigned i = 0; i < numReductions; ++i) {
795  llvm::Value *var = builder.CreateAlloca(
796  moduleTranslation.convertType(reductionDecls[i].getType()));
797  privateReductionVariables.push_back(var);
798  reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
799  }
800  }
801 }
802 
803 /// Collect reduction info
804 template <typename T>
806  T loop, llvm::IRBuilderBase &builder,
807  LLVM::ModuleTranslation &moduleTranslation,
808  SmallVector<omp::ReductionDeclareOp> &reductionDecls,
809  SmallVector<OwningReductionGen> &owningReductionGens,
810  SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,
811  const SmallVector<llvm::Value *> &privateReductionVariables,
813  unsigned numReductions = loop.getNumReductionVars();
814 
815  for (unsigned i = 0; i < numReductions; ++i) {
816  owningReductionGens.push_back(
817  makeReductionGen(reductionDecls[i], builder, moduleTranslation));
818  owningAtomicReductionGens.push_back(
819  makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
820  }
821 
822  // Collect the reduction information.
823  reductionInfos.reserve(numReductions);
824  for (unsigned i = 0; i < numReductions; ++i) {
825  llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
826  if (owningAtomicReductionGens[i])
827  atomicGen = owningAtomicReductionGens[i];
828  llvm::Value *variable =
829  moduleTranslation.lookupValue(loop.getReductionVars()[i]);
830  reductionInfos.push_back(
831  {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
832  privateReductionVariables[i], owningReductionGens[i], atomicGen});
833  }
834 }
835 
836 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
837 static LogicalResult
838 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
839  LLVM::ModuleTranslation &moduleTranslation) {
840  auto loop = cast<omp::WsLoopOp>(opInst);
841  // TODO: this should be in the op verifier instead.
842  if (loop.getLowerBound().empty())
843  return failure();
844 
845  // Static is the default.
846  auto schedule =
847  loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
848 
849  // Find the loop configuration.
850  llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]);
851  llvm::Type *ivType = step->getType();
852  llvm::Value *chunk = nullptr;
853  if (loop.getScheduleChunkVar()) {
854  llvm::Value *chunkVar =
855  moduleTranslation.lookupValue(loop.getScheduleChunkVar());
856  chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
857  }
858 
860  collectReductionDecls(loop, reductionDecls);
861  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
862  findAllocaInsertPoint(builder, moduleTranslation);
863 
864  SmallVector<llvm::Value *> privateReductionVariables;
865  DenseMap<Value, llvm::Value *> reductionVariableMap;
866  allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls,
867  privateReductionVariables, reductionVariableMap);
868 
869  // Store the mapping between reduction variables and their private copies on
870  // ModuleTranslation stack. It can be then recovered when translating
871  // omp.reduce operations in a separate call.
873  moduleTranslation, reductionVariableMap);
874 
875  // Before the loop, store the initial values of reductions into reduction
876  // variables. Although this could be done after allocas, we don't want to mess
877  // up with the alloca insertion point.
878  for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
880  if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
881  "omp.reduction.neutral", builder,
882  moduleTranslation, &phis)))
883  return failure();
884  assert(phis.size() == 1 && "expected one value to be yielded from the "
885  "reduction neutral element declaration region");
886  builder.CreateStore(phis[0], privateReductionVariables[i]);
887  }
888 
889  // Set up the source location value for OpenMP runtime.
890  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
891 
892  // Generator of the canonical loop body.
893  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
894  // relying on captured variables.
897  LogicalResult bodyGenStatus = success();
898  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
899  // Make sure further conversions know about the induction variable.
900  moduleTranslation.mapValue(
901  loop.getRegion().front().getArgument(loopInfos.size()), iv);
902 
903  // Capture the body insertion point for use in nested loops. BodyIP of the
904  // CanonicalLoopInfo always points to the beginning of the entry block of
905  // the body.
906  bodyInsertPoints.push_back(ip);
907 
908  if (loopInfos.size() != loop.getNumLoops() - 1)
909  return;
910 
911  // Convert the body of the loop.
912  builder.restoreIP(ip);
913  convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
914  moduleTranslation, bodyGenStatus);
915  };
916 
917  // Delegate actual loop construction to the OpenMP IRBuilder.
918  // TODO: this currently assumes WsLoop is semantically similar to SCF loop,
919  // i.e. it has a positive step, uses signed integer semantics. Reconsider
920  // this code when WsLoop clearly supports more cases.
921  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
922  for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
923  llvm::Value *lowerBound =
924  moduleTranslation.lookupValue(loop.getLowerBound()[i]);
925  llvm::Value *upperBound =
926  moduleTranslation.lookupValue(loop.getUpperBound()[i]);
927  llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[i]);
928 
929  // Make sure loop trip count are emitted in the preheader of the outermost
930  // loop at the latest so that they are all available for the new collapsed
931  // loop will be created below.
932  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
933  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
934  if (i != 0) {
935  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
936  computeIP = loopInfos.front()->getPreheaderIP();
937  }
938  loopInfos.push_back(ompBuilder->createCanonicalLoop(
939  loc, bodyGen, lowerBound, upperBound, step,
940  /*IsSigned=*/true, loop.getInclusive(), computeIP));
941 
942  if (failed(bodyGenStatus))
943  return failure();
944  }
945 
946  // Collapse loops. Store the insertion point because LoopInfos may get
947  // invalidated.
948  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
949  llvm::CanonicalLoopInfo *loopInfo =
950  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
951 
952  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
953 
954  // TODO: Handle doacross loops when the ordered clause has a parameter.
955  bool isOrdered = loop.getOrderedVal().has_value();
956  std::optional<omp::ScheduleModifier> scheduleModifier =
957  loop.getScheduleModifier();
958  bool isSimd = loop.getSimdModifier();
959 
960  ompBuilder->applyWorkshareLoop(
961  ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
962  convertToScheduleKind(schedule), chunk, isSimd,
963  scheduleModifier == omp::ScheduleModifier::monotonic,
964  scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
965 
966  // Continue building IR after the loop. Note that the LoopInfo returned by
967  // `collapseLoops` points inside the outermost loop and is intended for
968  // potential further loop transformations. Use the insertion point stored
969  // before collapsing loops instead.
970  builder.restoreIP(afterIP);
971 
972  // Process the reductions if required.
973  if (loop.getNumReductionVars() == 0)
974  return success();
975 
976  // Create the reduction generators. We need to own them here because
977  // ReductionInfo only accepts references to the generators.
978  SmallVector<OwningReductionGen> owningReductionGens;
979  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
981  collectReductionInfo(loop, builder, moduleTranslation, reductionDecls,
982  owningReductionGens, owningAtomicReductionGens,
983  privateReductionVariables, reductionInfos);
984 
985  // The call to createReductions below expects the block to have a
986  // terminator. Create an unreachable instruction to serve as terminator
987  // and remove it later.
988  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
989  builder.SetInsertPoint(tempTerminator);
990  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
991  ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
992  loop.getNowait());
993  if (!contInsertPoint.getBlock())
994  return loop->emitOpError() << "failed to convert reductions";
995  auto nextInsertionPoint =
996  ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
997  tempTerminator->eraseFromParent();
998  builder.restoreIP(nextInsertionPoint);
999 
1000  return success();
1001 }
1002 
1003 /// Converts the OpenMP parallel operation to LLVM IR.
1004 static LogicalResult
1005 convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1006  LLVM::ModuleTranslation &moduleTranslation) {
1007  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1008  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1009  // relying on captured variables.
1010  LogicalResult bodyGenStatus = success();
1011  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1012 
1013  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1014  // Collect reduction declarations
1015  SmallVector<omp::ReductionDeclareOp> reductionDecls;
1016  collectReductionDecls(opInst, reductionDecls);
1017 
1018  // Allocate reduction vars
1019  SmallVector<llvm::Value *> privateReductionVariables;
1020  DenseMap<Value, llvm::Value *> reductionVariableMap;
1021  allocReductionVars(opInst, builder, moduleTranslation, allocaIP,
1022  reductionDecls, privateReductionVariables,
1023  reductionVariableMap);
1024 
1025  // Store the mapping between reduction variables and their private copies on
1026  // ModuleTranslation stack. It can be then recovered when translating
1027  // omp.reduce operations in a separate call.
1029  moduleTranslation, reductionVariableMap);
1030 
1031  // Initialize reduction vars
1032  builder.restoreIP(allocaIP);
1033  for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
1036  reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
1037  builder, moduleTranslation, &phis)))
1038  bodyGenStatus = failure();
1039  assert(phis.size() == 1 &&
1040  "expected one value to be yielded from the "
1041  "reduction neutral element declaration region");
1042  builder.restoreIP(allocaIP);
1043  builder.CreateStore(phis[0], privateReductionVariables[i]);
1044  }
1045 
1046  // Save the alloca insertion point on ModuleTranslation stack for use in
1047  // nested regions.
1049  moduleTranslation, allocaIP);
1050 
1051  // ParallelOp has only one region associated with it.
1052  builder.restoreIP(codeGenIP);
1053  auto regionBlock =
1054  convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
1055  moduleTranslation, bodyGenStatus);
1056 
1057  // Process the reductions if required.
1058  if (opInst.getNumReductionVars() > 0) {
1059  // Collect reduction info
1060  SmallVector<OwningReductionGen> owningReductionGens;
1061  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
1063  collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
1064  owningReductionGens, owningAtomicReductionGens,
1065  privateReductionVariables, reductionInfos);
1066 
1067  // Move to region cont block
1068  builder.SetInsertPoint(regionBlock->getTerminator());
1069 
1070  // Generate reductions from info
1071  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
1072  builder.SetInsertPoint(tempTerminator);
1073 
1074  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
1075  ompBuilder->createReductions(builder.saveIP(), allocaIP,
1076  reductionInfos, false);
1077  if (!contInsertPoint.getBlock()) {
1078  bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
1079  return;
1080  }
1081 
1082  tempTerminator->eraseFromParent();
1083  builder.restoreIP(contInsertPoint);
1084  }
1085  };
1086 
1087  // TODO: Perform appropriate actions according to the data-sharing
1088  // attribute (shared, private, firstprivate, ...) of variables.
1089  // Currently defaults to shared.
1090  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
1091  llvm::Value &, llvm::Value &vPtr,
1092  llvm::Value *&replacementValue) -> InsertPointTy {
1093  replacementValue = &vPtr;
1094 
1095  return codeGenIP;
1096  };
1097 
1098  // TODO: Perform finalization actions for variables. This has to be
1099  // called for variables which have destructors/finalizers.
1100  auto finiCB = [&](InsertPointTy codeGenIP) {};
1101 
1102  llvm::Value *ifCond = nullptr;
1103  if (auto ifExprVar = opInst.getIfExprVar())
1104  ifCond = moduleTranslation.lookupValue(ifExprVar);
1105  llvm::Value *numThreads = nullptr;
1106  if (auto numThreadsVar = opInst.getNumThreadsVar())
1107  numThreads = moduleTranslation.lookupValue(numThreadsVar);
1108  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
1109  if (auto bind = opInst.getProcBindVal())
1110  pbKind = getProcBindKind(*bind);
1111  // TODO: Is the Parallel construct cancellable?
1112  bool isCancellable = false;
1113 
1114  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1115  findAllocaInsertPoint(builder, moduleTranslation);
1116  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1117 
1118  builder.restoreIP(
1119  ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
1120  ifCond, numThreads, pbKind, isCancellable));
1121 
1122  return bodyGenStatus;
1123 }
1124 
1125 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
1126 static LogicalResult
1127 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
1128  LLVM::ModuleTranslation &moduleTranslation) {
1129  auto loop = cast<omp::SimdLoopOp>(opInst);
1130 
1131  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1132 
1133  // Generator of the canonical loop body.
1134  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
1135  // relying on captured variables.
1138  LogicalResult bodyGenStatus = success();
1139  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
1140  // Make sure further conversions know about the induction variable.
1141  moduleTranslation.mapValue(
1142  loop.getRegion().front().getArgument(loopInfos.size()), iv);
1143 
1144  // Capture the body insertion point for use in nested loops. BodyIP of the
1145  // CanonicalLoopInfo always points to the beginning of the entry block of
1146  // the body.
1147  bodyInsertPoints.push_back(ip);
1148 
1149  if (loopInfos.size() != loop.getNumLoops() - 1)
1150  return;
1151 
1152  // Convert the body of the loop.
1153  builder.restoreIP(ip);
1154  convertOmpOpRegions(loop.getRegion(), "omp.simdloop.region", builder,
1155  moduleTranslation, bodyGenStatus);
1156  };
1157 
1158  // Delegate actual loop construction to the OpenMP IRBuilder.
1159  // TODO: this currently assumes SimdLoop is semantically similar to SCF loop,
1160  // i.e. it has a positive step, uses signed integer semantics. Reconsider
1161  // this code when SimdLoop clearly supports more cases.
1162  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1163  for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) {
1164  llvm::Value *lowerBound =
1165  moduleTranslation.lookupValue(loop.getLowerBound()[i]);
1166  llvm::Value *upperBound =
1167  moduleTranslation.lookupValue(loop.getUpperBound()[i]);
1168  llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[i]);
1169 
1170  // Make sure loop trip count are emitted in the preheader of the outermost
1171  // loop at the latest so that they are all available for the new collapsed
1172  // loop will be created below.
1173  llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc;
1174  llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP;
1175  if (i != 0) {
1176  loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(),
1177  ompLoc.DL);
1178  computeIP = loopInfos.front()->getPreheaderIP();
1179  }
1180  loopInfos.push_back(ompBuilder->createCanonicalLoop(
1181  loc, bodyGen, lowerBound, upperBound, step,
1182  /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
1183 
1184  if (failed(bodyGenStatus))
1185  return failure();
1186  }
1187 
1188  // Collapse loops.
1189  llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP();
1190  llvm::CanonicalLoopInfo *loopInfo =
1191  ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
1192 
1193  llvm::ConstantInt *simdlen = nullptr;
1194  if (std::optional<uint64_t> simdlenVar = loop.getSimdlen())
1195  simdlen = builder.getInt64(simdlenVar.value());
1196 
1197  llvm::ConstantInt *safelen = nullptr;
1198  if (std::optional<uint64_t> safelenVar = loop.getSafelen())
1199  safelen = builder.getInt64(safelenVar.value());
1200 
1201  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
1202  ompBuilder->applySimd(
1203  loopInfo, alignedVars,
1204  loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
1205  : nullptr,
1206  llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
1207 
1208  builder.restoreIP(afterIP);
1209  return success();
1210 }
1211 
1212 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
1213 static llvm::AtomicOrdering
1214 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
1215  if (!ao)
1216  return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering
1217 
1218  switch (*ao) {
1219  case omp::ClauseMemoryOrderKind::Seq_cst:
1220  return llvm::AtomicOrdering::SequentiallyConsistent;
1221  case omp::ClauseMemoryOrderKind::Acq_rel:
1222  return llvm::AtomicOrdering::AcquireRelease;
1223  case omp::ClauseMemoryOrderKind::Acquire:
1224  return llvm::AtomicOrdering::Acquire;
1225  case omp::ClauseMemoryOrderKind::Release:
1226  return llvm::AtomicOrdering::Release;
1227  case omp::ClauseMemoryOrderKind::Relaxed:
1228  return llvm::AtomicOrdering::Monotonic;
1229  }
1230  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
1231 }
1232 
1233 /// Convert omp.atomic.read operation to LLVM IR.
1234 static LogicalResult
1235 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
1236  LLVM::ModuleTranslation &moduleTranslation) {
1237 
1238  auto readOp = cast<omp::AtomicReadOp>(opInst);
1239  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1240 
1241  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1242 
1243  llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal());
1244  llvm::Value *x = moduleTranslation.lookupValue(readOp.getX());
1245  llvm::Value *v = moduleTranslation.lookupValue(readOp.getV());
1246 
1247  llvm::Type *elementType =
1248  moduleTranslation.convertType(readOp.getElementType());
1249 
1250  llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false};
1251  llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false};
1252  builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO));
1253  return success();
1254 }
1255 
1256 /// Converts an omp.atomic.write operation to LLVM IR.
1257 static LogicalResult
1258 convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
1259  LLVM::ModuleTranslation &moduleTranslation) {
1260  auto writeOp = cast<omp::AtomicWriteOp>(opInst);
1261  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1262 
1263  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1264  llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal());
1265  llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr());
1266  llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX());
1267  llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType());
1268  llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false,
1269  /*isVolatile=*/false};
1270  builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao));
1271  return success();
1272 }
1273 
1274 /// Converts an LLVM dialect binary operation to the corresponding enum value
1275 /// for `atomicrmw` supported binary operation.
1276 llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
1278  .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
1279  .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
1280  .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
1281  .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
1282  .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
1283  .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
1284  .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
1285  .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
1286  .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
1287  .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
1288 }
1289 
1290 /// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
1291 static LogicalResult
1292 convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
1293  llvm::IRBuilderBase &builder,
1294  LLVM::ModuleTranslation &moduleTranslation) {
1295  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1296 
1297  // Convert values and types.
1298  auto &innerOpList = opInst.getRegion().front().getOperations();
1299  bool isRegionArgUsed{false}, isXBinopExpr{false};
1300  llvm::AtomicRMWInst::BinOp binop;
1301  mlir::Value mlirExpr;
1302  // Find the binary update operation that uses the region argument
1303  // and get the expression to update
1304  for (Operation &innerOp : innerOpList) {
1305  if (innerOp.getNumOperands() == 2) {
1306  binop = convertBinOpToAtomic(innerOp);
1307  if (!llvm::is_contained(innerOp.getOperands(),
1308  opInst.getRegion().getArgument(0)))
1309  continue;
1310  isRegionArgUsed = true;
1311  isXBinopExpr = innerOp.getNumOperands() > 0 &&
1312  innerOp.getOperand(0) == opInst.getRegion().getArgument(0);
1313  mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1314  break;
1315  }
1316  }
1317  if (!isRegionArgUsed)
1318  return opInst.emitError("no atomic update operation with region argument"
1319  " as operand found inside atomic.update region");
1320 
1321  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1322  llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.getX());
1323  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1324  opInst.getRegion().getArgument(0).getType());
1325  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1326  /*isSigned=*/false,
1327  /*isVolatile=*/false};
1328 
1329  llvm::AtomicOrdering atomicOrdering =
1330  convertAtomicOrdering(opInst.getMemoryOrderVal());
1331 
1332  // Generate update code.
1333  LogicalResult updateGenStatus = success();
1334  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
1335  llvm::Value *atomicx,
1336  llvm::IRBuilder<> &builder) -> llvm::Value * {
1337  Block &bb = *opInst.getRegion().begin();
1338  moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
1339  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1340  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1341  updateGenStatus = (opInst.emitError()
1342  << "unable to convert update operation to llvm IR");
1343  return nullptr;
1344  }
1345  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1346  assert(yieldop && yieldop.getResults().size() == 1 &&
1347  "terminator must be omp.yield op and it must have exactly one "
1348  "argument");
1349  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1350  };
1351 
1352  // Handle ambiguous alloca, if any.
1353  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1354  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1355  builder.restoreIP(ompBuilder->createAtomicUpdate(
1356  ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
1357  isXBinopExpr));
1358  return updateGenStatus;
1359 }
1360 
1361 static LogicalResult
1362 convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
1363  llvm::IRBuilderBase &builder,
1364  LLVM::ModuleTranslation &moduleTranslation) {
1365  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1366  mlir::Value mlirExpr;
1367  bool isXBinopExpr = false, isPostfixUpdate = false;
1368  llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
1369 
1370  omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
1371  omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
1372 
1373  assert((atomicUpdateOp || atomicWriteOp) &&
1374  "internal op must be an atomic.update or atomic.write op");
1375 
1376  if (atomicWriteOp) {
1377  isPostfixUpdate = true;
1378  mlirExpr = atomicWriteOp.getExpr();
1379  } else {
1380  isPostfixUpdate = atomicCaptureOp.getSecondOp() ==
1381  atomicCaptureOp.getAtomicUpdateOp().getOperation();
1382  auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations();
1383  bool isRegionArgUsed{false};
1384  // Find the binary update operation that uses the region argument
1385  // and get the expression to update
1386  for (Operation &innerOp : innerOpList) {
1387  if (innerOp.getNumOperands() == 2) {
1388  binop = convertBinOpToAtomic(innerOp);
1389  if (!llvm::is_contained(innerOp.getOperands(),
1390  atomicUpdateOp.getRegion().getArgument(0)))
1391  continue;
1392  isRegionArgUsed = true;
1393  isXBinopExpr =
1394  innerOp.getNumOperands() > 0 &&
1395  innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0);
1396  mlirExpr =
1397  (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0));
1398  break;
1399  }
1400  }
1401  if (!isRegionArgUsed)
1402  return atomicUpdateOp.emitError(
1403  "no atomic update operation with region argument"
1404  " as operand found inside atomic.update region");
1405  }
1406 
1407  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
1408  llvm::Value *llvmX =
1409  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX());
1410  llvm::Value *llvmV =
1411  moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV());
1412  llvm::Type *llvmXElementType = moduleTranslation.convertType(
1413  atomicCaptureOp.getAtomicReadOp().getElementType());
1414  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
1415  /*isSigned=*/false,
1416  /*isVolatile=*/false};
1417  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType,
1418  /*isSigned=*/false,
1419  /*isVolatile=*/false};
1420 
1421  llvm::AtomicOrdering atomicOrdering =
1422  convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal());
1423 
1424  LogicalResult updateGenStatus = success();
1425  auto updateFn = [&](llvm::Value *atomicx,
1426  llvm::IRBuilder<> &builder) -> llvm::Value * {
1427  if (atomicWriteOp)
1428  return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
1429  Block &bb = *atomicUpdateOp.getRegion().begin();
1430  moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
1431  atomicx);
1432  moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
1433  if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
1434  updateGenStatus = (atomicUpdateOp.emitError()
1435  << "unable to convert update operation to llvm IR");
1436  return nullptr;
1437  }
1438  omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
1439  assert(yieldop && yieldop.getResults().size() == 1 &&
1440  "terminator must be omp.yield op and it must have exactly one "
1441  "argument");
1442  return moduleTranslation.lookupValue(yieldop.getResults()[0]);
1443  };
1444 
1445  // Handle ambiguous alloca, if any.
1446  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
1447  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1448  builder.restoreIP(ompBuilder->createAtomicCapture(
1449  ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
1450  binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
1451  return updateGenStatus;
1452 }
1453 
1454 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
1455 /// mapping between reduction variables and their private equivalents to have
1456 /// been stored on the ModuleTranslation stack. Currently only supports
1457 /// reduction within WsLoopOp and ParallelOp, but can be easily extended.
1458 static LogicalResult
1459 convertOmpReductionOp(omp::ReductionOp reductionOp,
1460  llvm::IRBuilderBase &builder,
1461  LLVM::ModuleTranslation &moduleTranslation) {
1462  // Find the declaration that corresponds to the reduction op.
1463  omp::ReductionDeclareOp declaration;
1464  Operation *reductionParent = reductionOp->getParentOp();
1465  if (dyn_cast<omp::ParallelOp>(reductionParent) ||
1466  dyn_cast<omp::WsLoopOp>(reductionParent)) {
1467  declaration = findReductionDecl(*reductionParent, reductionOp);
1468  } else {
1469  llvm_unreachable("Unhandled reduction container");
1470  }
1471  assert(declaration && "could not find reduction declaration");
1472 
1473  // Retrieve the mapping between reduction variables and their private
1474  // equivalents.
1475  const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
1476  moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
1477  [&](const OpenMPVarMappingStackFrame &frame) {
1478  if (frame.mapping.contains(reductionOp.getAccumulator())) {
1479  reductionVariableMap = &frame.mapping;
1480  return WalkResult::interrupt();
1481  }
1482  return WalkResult::advance();
1483  });
1484  assert(reductionVariableMap && "couldn't find private reduction variables");
1485  // Translate the reduction operation by emitting the body of the corresponding
1486  // reduction declaration.
1487  Region &reductionRegion = declaration.getReductionRegion();
1488  llvm::Value *privateReductionVar =
1489  reductionVariableMap->lookup(reductionOp.getAccumulator());
1490  llvm::Value *reductionVal = builder.CreateLoad(
1491  moduleTranslation.convertType(reductionOp.getOperand().getType()),
1492  privateReductionVar);
1493 
1494  moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
1495  reductionVal);
1496  moduleTranslation.mapValue(
1497  reductionRegion.front().getArgument(1),
1498  moduleTranslation.lookupValue(reductionOp.getOperand()));
1499 
1501  if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
1502  builder, moduleTranslation, &phis)))
1503  return failure();
1504  assert(phis.size() == 1 && "expected one value to be yielded from "
1505  "the reduction body declaration region");
1506  builder.CreateStore(phis[0], privateReductionVar);
1507  return success();
1508 }
1509 
1510 /// Converts an OpenMP Threadprivate operation into LLVM IR using
1511 /// OpenMPIRBuilder.
1512 static LogicalResult
1513 convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
1514  LLVM::ModuleTranslation &moduleTranslation) {
1515  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1516  auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst);
1517 
1518  Value symAddr = threadprivateOp.getSymAddr();
1519  auto *symOp = symAddr.getDefiningOp();
1520  if (!isa<LLVM::AddressOfOp>(symOp))
1521  return opInst.emitError("Addressing symbol not found");
1522  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
1523 
1524  LLVM::GlobalOp global =
1525  addressOfOp.getGlobal(moduleTranslation.symbolTable());
1526  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
1527  llvm::Type *type = globalValue->getValueType();
1528  llvm::TypeSize typeSize =
1529  builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
1530  type);
1531  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
1532  llvm::StringRef suffix = llvm::StringRef(".cache", 6);
1533  std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str();
1534  llvm::Value *callInst =
1535  moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate(
1536  ompLoc, globalValue, size, cacheName);
1537  moduleTranslation.mapValue(opInst.getResult(0), callInst);
1538  return success();
1539 }
1540 
1541 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1542 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
1543  switch (deviceClause) {
1544  case mlir::omp::DeclareTargetDeviceType::host:
1545  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1546  break;
1547  case mlir::omp::DeclareTargetDeviceType::nohost:
1548  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1549  break;
1550  case mlir::omp::DeclareTargetDeviceType::any:
1551  return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1552  break;
1553  }
1554  llvm_unreachable("unhandled device clause");
1555 }
1556 
1557 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1559  mlir::omp::DeclareTargetCaptureClause captureClasue) {
1560  switch (captureClasue) {
1561  case mlir::omp::DeclareTargetCaptureClause::to:
1562  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1563  case mlir::omp::DeclareTargetCaptureClause::link:
1564  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1565  case mlir::omp::DeclareTargetCaptureClause::enter:
1566  return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1567  }
1568  llvm_unreachable("unhandled capture clause");
1569 }
1570 
1571 static llvm::SmallString<64>
1572 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
1573  llvm::OpenMPIRBuilder &ompBuilder) {
1574  llvm::SmallString<64> suffix;
1575  llvm::raw_svector_ostream os(suffix);
1576  if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) {
1577  auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>();
1578  auto fileInfoCallBack = [&loc]() {
1579  return std::pair<std::string, uint64_t>(
1580  llvm::StringRef(loc.getFilename()), loc.getLine());
1581  };
1582 
1583  os << llvm::format(
1584  "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID);
1585  }
1586  os << "_decl_tgt_ref_ptr";
1587 
1588  return suffix;
1589 }
1590 
1591 // Returns the reference pointer generated by the lowering of the declare target
1592 // operation in cases where the link clause is used or the to clause is used in
1593 // USM mode.
1594 static llvm::Value *
1596  LLVM::ModuleTranslation &moduleTranslation) {
1597  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1598 
1599  // An easier way to do this may just be to keep track of any pointer
1600  // references and their mapping to their respective operation
1601  if (auto addressOfOp =
1602  llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) {
1603  if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>(
1604  addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
1605  addressOfOp.getGlobalName()))) {
1606 
1607  if (auto declareTargetGlobal =
1608  llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
1609  gOp.getOperation())) {
1610 
1611  // In this case, we must utilise the reference pointer generated by the
1612  // declare target operation, similar to Clang
1613  if ((declareTargetGlobal.getDeclareTargetCaptureClause() ==
1614  mlir::omp::DeclareTargetCaptureClause::link) ||
1615  (declareTargetGlobal.getDeclareTargetCaptureClause() ==
1616  mlir::omp::DeclareTargetCaptureClause::to &&
1617  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
1618  llvm::SmallString<64> suffix =
1619  getDeclareTargetRefPtrSuffix(gOp, *ompBuilder);
1620 
1621  if (gOp.getSymName().contains(suffix))
1622  return moduleTranslation.getLLVMModule()->getNamedValue(
1623  gOp.getSymName());
1624 
1625  return moduleTranslation.getLLVMModule()->getNamedValue(
1626  (gOp.getSymName().str() + suffix.str()).str());
1627  }
1628  }
1629  }
1630  }
1631 
1632  return nullptr;
1633 }
1634 
1635 // A small helper structure to contain data gathered
1636 // for map lowering and coalese it into one area and
1637 // avoiding extra computations such as searches in the
1638 // llvm module for lowered mapped varibles or checking
1639 // if something is declare target (and retrieving the
1640 // value) more than neccessary.
1645  // Stripped off array/pointer to get the underlying
1646  // element type
1648 
1649  /// Append arrays in \a CurInfo.
1650  void append(MapInfoData &CurInfo) {
1651  IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
1652  CurInfo.IsDeclareTarget.end());
1653  MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
1654  OriginalValue.append(CurInfo.OriginalValue.begin(),
1655  CurInfo.OriginalValue.end());
1656  BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
1657  llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
1658  }
1659 };
1660 
1661 uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
1662  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
1663  arrTy.getElementType()))
1664  return getArrayElementSizeInBits(nestedArrTy, dl);
1665  return dl.getTypeSizeInBits(arrTy.getElementType());
1666 }
1667 
1668 // This function calculates the size to be offloaded for a specified type, given
1669 // its associated map clause (which can contain bounds information which affects
1670 // the total size), this size is calculated based on the underlying element type
1671 // e.g. given a 1-D array of ints, we will calculate the size from the integer
1672 // type * number of elements in the array. This size can be used in other
1673 // calculations but is ultimately used as an argument to the OpenMP runtimes
1674 // kernel argument structure which is generated through the combinedInfo data
1675 // structures.
1676 // This function is somewhat equivalent to Clang's getExprTypeSize inside of
1677 // CGOpenMPRuntime.cpp.
1678 llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
1679  Operation *clauseOp, llvm::IRBuilderBase &builder,
1680  LLVM::ModuleTranslation &moduleTranslation) {
1681  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
1682  // the size in inconsistent byte or bit format.
1683  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
1684  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type)) {
1685  underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
1686  }
1687 
1688  if (auto memberClause =
1689  mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
1690  // This calculates the size to transfer based on bounds and the underlying
1691  // element type, provided bounds have been specified (Fortran
1692  // pointers/allocatables/target and arrays that have sections specified fall
1693  // into this as well).
1694  if (!memberClause.getBounds().empty()) {
1695  llvm::Value *elementCount = builder.getInt64(1);
1696  for (auto bounds : memberClause.getBounds()) {
1697  if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
1698  bounds.getDefiningOp())) {
1699  // The below calculation for the size to be mapped calculated from the
1700  // map_info's bounds is: (elemCount * [UB - LB] + 1), later we
1701  // multiply by the underlying element types byte size to get the full
1702  // size to be offloaded based on the bounds
1703  elementCount = builder.CreateMul(
1704  elementCount,
1705  builder.CreateAdd(
1706  builder.CreateSub(
1707  moduleTranslation.lookupValue(boundOp.getUpperBound()),
1708  moduleTranslation.lookupValue(boundOp.getLowerBound())),
1709  builder.getInt64(1)));
1710  }
1711  }
1712 
1713  // The size in bytes x number of elements, the sizeInBytes stored is
1714  // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
1715  // size, so we do some on the fly runtime math to get the size in
1716  // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
1717  // some adjustment for members with more complex types.
1718  return builder.CreateMul(elementCount,
1719  builder.getInt64(underlyingTypeSzInBits / 8));
1720  }
1721  }
1722 
1723  return builder.getInt64(underlyingTypeSzInBits / 8);
1724 }
1725 
1727  llvm::SmallVectorImpl<Value> &mapOperands,
1728  LLVM::ModuleTranslation &moduleTranslation,
1729  DataLayout &dl,
1730  llvm::IRBuilderBase &builder) {
1731  for (mlir::Value mapValue : mapOperands) {
1732  assert(mlir::isa<mlir::omp::MapInfoOp>(mapValue.getDefiningOp()) &&
1733  "missing map info operation or incorrect map info operation type");
1734  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
1735  mapValue.getDefiningOp())) {
1736  mapData.OriginalValue.push_back(
1737  moduleTranslation.lookupValue(mapOp.getVarPtr()));
1738  mapData.Pointers.push_back(mapData.OriginalValue.back());
1739 
1740  if (llvm::Value *refPtr =
1741  getRefPtrIfDeclareTarget(mapOp.getVarPtr(),
1742  moduleTranslation)) { // declare target
1743  mapData.IsDeclareTarget.push_back(true);
1744  mapData.BasePointers.push_back(refPtr);
1745  } else { // regular mapped variable
1746  mapData.IsDeclareTarget.push_back(false);
1747  mapData.BasePointers.push_back(mapData.OriginalValue.back());
1748  }
1749 
1750  mapData.Sizes.push_back(getSizeInBytes(dl, mapOp.getVarType(), mapOp,
1751  builder, moduleTranslation));
1752  mapData.BaseType.push_back(
1753  moduleTranslation.convertType(mapOp.getVarType()));
1754  mapData.MapClause.push_back(mapOp.getOperation());
1755  mapData.Types.push_back(
1756  llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
1757  mapData.Names.push_back(LLVM::createMappingInformation(
1758  mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
1759  mapData.DevicePointers.push_back(
1761  }
1762  }
1763 }
1764 
1765 // Generate all map related information and fill the combinedInfo.
1766 static void genMapInfos(llvm::IRBuilderBase &builder,
1767  LLVM::ModuleTranslation &moduleTranslation,
1768  DataLayout &dl,
1769  llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
1770  MapInfoData &mapData,
1771  const SmallVector<Value> &devPtrOperands = {},
1772  const SmallVector<Value> &devAddrOperands = {},
1773  bool isTargetParams = false) {
1774  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1775 
1776  auto fail = [&combinedInfo]() -> void {
1777  combinedInfo.BasePointers.clear();
1778  combinedInfo.Pointers.clear();
1779  combinedInfo.DevicePointers.clear();
1780  combinedInfo.Sizes.clear();
1781  combinedInfo.Types.clear();
1782  combinedInfo.Names.clear();
1783  };
1784 
1785  // We operate under the assumption that all vectors that are
1786  // required in MapInfoData are of equal lengths (either filled with
1787  // default constructed data or appropiate information) so we can
1788  // utilise the size from any component of MapInfoData, if we can't
1789  // something is missing from the initial MapInfoData construction.
1790  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
1791  // Declare Target Mappings are excluded from being marked as
1792  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're marked
1793  // with OMP_MAP_PTR_AND_OBJ instead.
1794  auto mapFlag = mapData.Types[i];
1795  if (mapData.IsDeclareTarget[i])
1796  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
1797  else if (isTargetParams)
1798  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
1799 
1800  if (auto mapInfoOp = dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]))
1801  if (mapInfoOp.getMapCaptureType().value() ==
1802  mlir::omp::VariableCaptureKind::ByCopy &&
1803  !mapInfoOp.getVarType().isa<LLVM::LLVMPointerType>())
1804  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
1805 
1806  combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]);
1807  combinedInfo.Pointers.emplace_back(mapData.Pointers[i]);
1808  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]);
1809  combinedInfo.Names.emplace_back(mapData.Names[i]);
1810  combinedInfo.Types.emplace_back(mapFlag);
1811  combinedInfo.Sizes.emplace_back(mapData.Sizes[i]);
1812  }
1813 
1814  auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) {
1815  index = 0;
1816  for (llvm::Value *basePtr : combinedInfo.BasePointers) {
1817  if (basePtr == val)
1818  return true;
1819  index++;
1820  }
1821  return false;
1822  };
1823 
1824  auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void {
1825  for (const auto &devOp : devOperands) {
1826  // TODO: Only LLVMPointerTypes are handled.
1827  if (!devOp.getType().template isa<LLVM::LLVMPointerType>())
1828  return fail();
1829 
1830  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devOp);
1831 
1832  // Check if map info is already present for this entry.
1833  unsigned infoIndex;
1834  if (findMapInfo(mapOpValue, infoIndex)) {
1835  combinedInfo.Types[infoIndex] |=
1836  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
1837  combinedInfo.DevicePointers[infoIndex] = devOpType;
1838  } else {
1839  combinedInfo.BasePointers.emplace_back(mapOpValue);
1840  combinedInfo.Pointers.emplace_back(mapOpValue);
1841  combinedInfo.DevicePointers.emplace_back(devOpType);
1842  combinedInfo.Names.emplace_back(
1843  LLVM::createMappingInformation(devOp.getLoc(), *ompBuilder));
1844  combinedInfo.Types.emplace_back(
1845  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
1846  combinedInfo.Sizes.emplace_back(builder.getInt64(0));
1847  }
1848  }
1849  };
1850 
1851  addDevInfos(devPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer);
1852  addDevInfos(devAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address);
1853 }
1854 
1855 static LogicalResult
1856 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
1857  LLVM::ModuleTranslation &moduleTranslation) {
1858  llvm::Value *ifCond = nullptr;
1859  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
1860  SmallVector<Value> mapOperands;
1861  SmallVector<Value> useDevPtrOperands;
1862  SmallVector<Value> useDevAddrOperands;
1863  llvm::omp::RuntimeFunction RTLFn;
1864  DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>());
1865 
1866  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
1867 
1868  LogicalResult result =
1870  .Case([&](omp::DataOp dataOp) {
1871  if (auto ifExprVar = dataOp.getIfExpr())
1872  ifCond = moduleTranslation.lookupValue(ifExprVar);
1873 
1874  if (auto devId = dataOp.getDevice())
1875  if (auto constOp =
1876  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
1877  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
1878  deviceID = intAttr.getInt();
1879 
1880  mapOperands = dataOp.getMapOperands();
1881  useDevPtrOperands = dataOp.getUseDevicePtr();
1882  useDevAddrOperands = dataOp.getUseDeviceAddr();
1883  return success();
1884  })
1885  .Case([&](omp::EnterDataOp enterDataOp) {
1886  if (enterDataOp.getNowait())
1887  return failure();
1888 
1889  if (auto ifExprVar = enterDataOp.getIfExpr())
1890  ifCond = moduleTranslation.lookupValue(ifExprVar);
1891 
1892  if (auto devId = enterDataOp.getDevice())
1893  if (auto constOp =
1894  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
1895  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
1896  deviceID = intAttr.getInt();
1897  RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
1898  mapOperands = enterDataOp.getMapOperands();
1899  return success();
1900  })
1901  .Case([&](omp::ExitDataOp exitDataOp) {
1902  if (exitDataOp.getNowait())
1903  return failure();
1904 
1905  if (auto ifExprVar = exitDataOp.getIfExpr())
1906  ifCond = moduleTranslation.lookupValue(ifExprVar);
1907 
1908  if (auto devId = exitDataOp.getDevice())
1909  if (auto constOp =
1910  dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp()))
1911  if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
1912  deviceID = intAttr.getInt();
1913 
1914  RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper;
1915  mapOperands = exitDataOp.getMapOperands();
1916  return success();
1917  })
1918  .Default([&](Operation *op) {
1919  return op->emitError("unsupported OpenMP operation: ")
1920  << op->getName();
1921  });
1922 
1923  if (failed(result))
1924  return failure();
1925 
1926  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1927 
1928  MapInfoData mapData;
1929  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL,
1930  builder);
1931 
1932  // Fill up the arrays with all the mapped variables.
1933  llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
1934  auto genMapInfoCB =
1935  [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
1936  builder.restoreIP(codeGenIP);
1937  if (auto dataOp = dyn_cast<omp::DataOp>(op)) {
1938  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData,
1939  useDevPtrOperands, useDevAddrOperands);
1940  } else {
1941  genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
1942  }
1943  return combinedInfo;
1944  };
1945 
1946  llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
1947  /*SeparateBeginEndCalls=*/true);
1948 
1949  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
1950  LogicalResult bodyGenStatus = success();
1951  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) {
1952  assert(isa<omp::DataOp>(op) && "BodyGen requested for non DataOp");
1953  Region &region = cast<omp::DataOp>(op).getRegion();
1954  switch (bodyGenType) {
1955  case BodyGenTy::Priv:
1956  // Check if any device ptr/addr info is available
1957  if (!info.DevicePtrInfoMap.empty()) {
1958  builder.restoreIP(codeGenIP);
1959  unsigned argIndex = 0;
1960  for (auto &devPtrOp : useDevPtrOperands) {
1961  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devPtrOp);
1962  const auto &arg = region.front().getArgument(argIndex);
1963  moduleTranslation.mapValue(arg,
1964  info.DevicePtrInfoMap[mapOpValue].second);
1965  argIndex++;
1966  }
1967 
1968  for (auto &devAddrOp : useDevAddrOperands) {
1969  llvm::Value *mapOpValue = moduleTranslation.lookupValue(devAddrOp);
1970  const auto &arg = region.front().getArgument(argIndex);
1971  auto *LI = builder.CreateLoad(
1972  builder.getPtrTy(), info.DevicePtrInfoMap[mapOpValue].second);
1973  moduleTranslation.mapValue(arg, LI);
1974  argIndex++;
1975  }
1976 
1977  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
1978  builder, moduleTranslation);
1979  }
1980  break;
1981  case BodyGenTy::DupNoPriv:
1982  break;
1983  case BodyGenTy::NoPriv:
1984  // If device info is available then region has already been generated
1985  if (info.DevicePtrInfoMap.empty()) {
1986  builder.restoreIP(codeGenIP);
1987  bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
1988  builder, moduleTranslation);
1989  }
1990  break;
1991  }
1992  return builder.saveIP();
1993  };
1994 
1995  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
1996  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1997  findAllocaInsertPoint(builder, moduleTranslation);
1998  if (isa<omp::DataOp>(op)) {
1999  builder.restoreIP(ompBuilder->createTargetData(
2000  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2001  info, genMapInfoCB, nullptr, bodyGenCB));
2002  } else {
2003  builder.restoreIP(ompBuilder->createTargetData(
2004  ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
2005  info, genMapInfoCB, &RTLFn));
2006  }
2007 
2008  return bodyGenStatus;
2009 }
2010 
2011 /// Lowers the FlagsAttr which is applied to the module on the device
2012 /// pass when offloading, this attribute contains OpenMP RTL globals that can
2013 /// be passed as flags to the frontend, otherwise they are set to default
2014 LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
2015  LLVM::ModuleTranslation &moduleTranslation) {
2016  if (!cast<mlir::ModuleOp>(op))
2017  return failure();
2018 
2019  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2020 
2021  ompBuilder->createGlobalFlag(
2022  attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
2023  "__omp_rtl_debug_kind");
2024  ompBuilder->createGlobalFlag(
2025  attribute
2026  .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/
2027  ,
2028  "__omp_rtl_assume_teams_oversubscription");
2029  ompBuilder->createGlobalFlag(
2030  attribute
2031  .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/
2032  ,
2033  "__omp_rtl_assume_threads_oversubscription");
2034  ompBuilder->createGlobalFlag(
2035  attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/,
2036  "__omp_rtl_assume_no_thread_state");
2037  ompBuilder->createGlobalFlag(
2038  attribute
2039  .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
2040  ,
2041  "__omp_rtl_assume_no_nested_parallelism");
2042  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
2043  attribute.getOpenmpDeviceVersion());
2044  return success();
2045 }
2046 
2047 static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo,
2048  omp::TargetOp targetOp,
2049  llvm::StringRef parentName = "") {
2050  auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>();
2051 
2052  assert(fileLoc && "No file found from location");
2053  StringRef fileName = fileLoc.getFilename().getValue();
2054 
2055  llvm::sys::fs::UniqueID id;
2056  if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) {
2057  targetOp.emitError("Unable to get unique ID for file");
2058  return false;
2059  }
2060 
2061  uint64_t line = fileLoc.getLine();
2062  targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(),
2063  id.getFile(), line);
2064  return true;
2065 }
2066 
2067 static bool targetOpSupported(Operation &opInst) {
2068  auto targetOp = cast<omp::TargetOp>(opInst);
2069  if (targetOp.getIfExpr()) {
2070  opInst.emitError("If clause not yet supported");
2071  return false;
2072  }
2073 
2074  if (targetOp.getDevice()) {
2075  opInst.emitError("Device clause not yet supported");
2076  return false;
2077  }
2078 
2079  if (targetOp.getThreadLimit()) {
2080  opInst.emitError("Thread limit clause not yet supported");
2081  return false;
2082  }
2083 
2084  if (targetOp.getNowait()) {
2085  opInst.emitError("Nowait clause not yet supported");
2086  return false;
2087  }
2088 
2089  return true;
2090 }
2091 
2092 static void
2094  LLVM::ModuleTranslation &moduleTranslation,
2095  llvm::IRBuilderBase &builder) {
2096  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2097  // In the case of declare target mapped variables, the basePointer is
2098  // the reference pointer generated by the convertDeclareTargetAttr
2099  // method. Whereas the kernelValue is the original variable, so for
2100  // the device we must replace all uses of this original global variable
2101  // (stored in kernelValue) with the reference pointer (stored in
2102  // basePointer for declare target mapped variables), as for device the
2103  // data is mapped into this reference pointer and should be loaded
2104  // from it, the original variable is discarded. On host both exist and
2105  // metadata is generated (elsewhere in the convertDeclareTargetAttr)
2106  // function to link the two variables in the runtime and then both the
2107  // reference pointer and the pointer are assigned in the kernel argument
2108  // structure for the host.
2109  if (mapData.IsDeclareTarget[i]) {
2110  // The users iterator will get invalidated if we modify an element,
2111  // so we populate this vector of uses to alter each user on an individual
2112  // basis to emit its own load (rather than one load for all).
2114  for (llvm::User *user : mapData.OriginalValue[i]->users())
2115  userVec.push_back(user);
2116 
2117  for (llvm::User *user : userVec) {
2118  if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
2119  auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
2120  mapData.BasePointers[i]);
2121  load->moveBefore(insn);
2122  user->replaceUsesOfWith(mapData.OriginalValue[i], load);
2123  }
2124  }
2125  }
2126  }
2127 }
2128 
2129 // The createDeviceArgumentAccessor function generates
2130 // instructions for retrieving (acessing) kernel
2131 // arguments inside of the device kernel for use by
2132 // the kernel. This enables different semantics such as
2133 // the creation of temporary copies of data allowing
2134 // semantics like read-only/no host write back kernel
2135 // arguments.
2136 //
2137 // This currently implements a very light version of Clang's
2138 // EmitParmDecl's handling of direct argument handling as well
2139 // as a portion of the argument access generation based on
2140 // capture types found at the end of emitOutlinedFunctionPrologue
2141 // in Clang. The indirect path handling of EmitParmDecl's may be
2142 // required for future work, but a direct 1-to-1 copy doesn't seem
2143 // possible as the logic is rather scattered throughout Clang's
2144 // lowering and perhaps we wish to deviate slightly.
2145 //
2146 // \param mapData - A container containing vectors of information
2147 // corresponding to the input argument, which should have a
2148 // corresponding entry in the MapInfoData containers
2149 // OrigialValue's.
2150 // \param arg - This is the generated kernel function argument that
2151 // corresponds to the passed in input argument. We generated different
2152 // accesses of this Argument, based on capture type and other Input
2153 // related information.
2154 // \param input - This is the host side value that will be passed to
2155 // the kernel i.e. the kernel input, we rewrite all uses of this within
2156 // the kernel (as we generate the kernel body based on the target's region
2157 // which maintians references to the original input) to the retVal argument
2158 // apon exit of this function inside of the OMPIRBuilder. This interlinks
2159 // the kernel argument to future uses of it in the function providing
2160 // appropriate "glue" instructions inbetween.
2161 // \param retVal - This is the value that all uses of input inside of the
2162 // kernel will be re-written to, the goal of this function is to generate
2163 // an appropriate location for the kernel argument to be accessed from,
2164 // e.g. ByRef will result in a temporary allocation location and then
2165 // a store of the kernel argument into this allocated memory which
2166 // will then be loaded from, ByCopy will use the allocated memory
2167 // directly.
2168 static llvm::IRBuilderBase::InsertPoint
2169 createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
2170  llvm::Value *input, llvm::Value *&retVal,
2171  llvm::IRBuilderBase &builder,
2172  llvm::OpenMPIRBuilder &ompBuilder,
2173  LLVM::ModuleTranslation &moduleTranslation,
2174  llvm::IRBuilderBase::InsertPoint allocaIP,
2175  llvm::IRBuilderBase::InsertPoint codeGenIP) {
2176  builder.restoreIP(allocaIP);
2177 
2178  mlir::omp::VariableCaptureKind capture =
2179  mlir::omp::VariableCaptureKind::ByRef;
2180 
2181  // Find the associated MapInfoData entry for the current input
2182  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
2183  if (mapData.OriginalValue[i] == input) {
2184  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2185  mapData.MapClause[i])) {
2186  capture = mapOp.getMapCaptureType().value_or(
2187  mlir::omp::VariableCaptureKind::ByRef);
2188  }
2189 
2190  break;
2191  }
2192 
2193  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
2194  unsigned int defaultAS =
2195  ompBuilder.M.getDataLayout().getProgramAddressSpace();
2196 
2197  // Create the alloca for the argument the current point.
2198  llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
2199 
2200  if (allocaAS != defaultAS && arg.getType()->isPointerTy())
2201  v = builder.CreatePointerBitCastOrAddrSpaceCast(
2202  v, arg.getType()->getPointerTo(defaultAS));
2203 
2204  builder.CreateStore(&arg, v);
2205 
2206  builder.restoreIP(codeGenIP);
2207 
2208  switch (capture) {
2209  case mlir::omp::VariableCaptureKind::ByCopy: {
2210  retVal = v;
2211  break;
2212  }
2213  case mlir::omp::VariableCaptureKind::ByRef: {
2214  retVal = builder.CreateAlignedLoad(
2215  v->getType(), v,
2216  ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
2217  break;
2218  }
2219  case mlir::omp::VariableCaptureKind::This:
2220  case mlir::omp::VariableCaptureKind::VLAType:
2221  assert(false && "Currently unsupported capture kind");
2222  break;
2223  }
2224 
2225  return builder.saveIP();
2226 }
2227 
2228 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
2229 // generates different operation (e.g. load/store) combinations for
2230 // arguments to the kernel, based on map capture kinds which are then
2231 // utilised in the combinedInfo in place of the original Map value.
2232 static void
2234  LLVM::ModuleTranslation &moduleTranslation,
2235  llvm::IRBuilderBase &builder) {
2236  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2237  // if it's declare target, skip it, it's handled seperately.
2238  if (!mapData.IsDeclareTarget[i]) {
2239  mlir::omp::VariableCaptureKind captureKind =
2240  mlir::omp::VariableCaptureKind::ByRef;
2241 
2242  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2243  mapData.MapClause[i])) {
2244  captureKind = mapOp.getMapCaptureType().value_or(
2245  mlir::omp::VariableCaptureKind::ByRef);
2246  }
2247 
2248  switch (captureKind) {
2249  case mlir::omp::VariableCaptureKind::ByRef: {
2250  // Currently handles array sectioning lowerbound case, but more
2251  // logic may be required in the future. Clang invokes EmitLValue,
2252  // which has specialised logic for special Clang types such as user
2253  // defines, so it is possible we will have to extend this for
2254  // structures or other complex types. As the general idea is that this
2255  // function mimics some of the logic from Clang that we require for
2256  // kernel argument passing from host -> device.
2257  if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
2258  mapData.MapClause[i])) {
2259  if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) {
2260 
2261  std::vector<llvm::Value *> idx =
2262  std::vector<llvm::Value *>{builder.getInt64(0)};
2263  for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) {
2264  if (auto boundOp =
2265  mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
2266  mapOp.getBounds()[i].getDefiningOp())) {
2267  idx.push_back(
2268  moduleTranslation.lookupValue(boundOp.getLowerBound()));
2269  }
2270  }
2271 
2272  mapData.Pointers[i] = builder.CreateInBoundsGEP(
2273  mapData.BaseType[i], mapData.Pointers[i], idx);
2274  }
2275  }
2276  } break;
2277  case mlir::omp::VariableCaptureKind::ByCopy: {
2278  llvm::Type *type = mapData.BaseType[i];
2279  llvm::Value *newV;
2280  if (mapData.Pointers[i]->getType()->isPointerTy())
2281  newV = builder.CreateLoad(type, mapData.Pointers[i]);
2282  else
2283  newV = mapData.Pointers[i];
2284 
2285  if (!type->isPointerTy()) {
2286  auto curInsert = builder.saveIP();
2287  builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
2288  auto *memTempAlloc =
2289  builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
2290  builder.restoreIP(curInsert);
2291 
2292  builder.CreateStore(newV, memTempAlloc);
2293  newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc);
2294  }
2295 
2296  mapData.Pointers[i] = newV;
2297  mapData.BasePointers[i] = newV;
2298  } break;
2299  case mlir::omp::VariableCaptureKind::This:
2300  case mlir::omp::VariableCaptureKind::VLAType:
2301  mapData.MapClause[i]->emitOpError("Unhandled capture kind");
2302  break;
2303  }
2304  }
2305  }
2306 }
2307 
2308 static LogicalResult
2309 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
2310  LLVM::ModuleTranslation &moduleTranslation) {
2311 
2312  if (!targetOpSupported(opInst))
2313  return failure();
2314 
2315  auto targetOp = cast<omp::TargetOp>(opInst);
2316  auto &targetRegion = targetOp.getRegion();
2317  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
2318  SmallVector<Value> mapOperands = targetOp.getMapOperands();
2319 
2320  LogicalResult bodyGenStatus = success();
2321  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2322  auto bodyCB = [&](InsertPointTy allocaIP,
2323  InsertPointTy codeGenIP) -> InsertPointTy {
2324  builder.restoreIP(codeGenIP);
2325  unsigned argIndex = 0;
2326  for (auto &mapOp : mapOperands) {
2327  auto mapInfoOp =
2328  mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
2329  llvm::Value *mapOpValue =
2330  moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
2331  const auto &arg = targetRegion.front().getArgument(argIndex);
2332  moduleTranslation.mapValue(arg, mapOpValue);
2333  argIndex++;
2334  }
2335  llvm::BasicBlock *exitBlock = convertOmpOpRegions(
2336  targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
2337  builder.SetInsertPoint(exitBlock);
2338  return builder.saveIP();
2339  };
2340 
2341  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
2342  StringRef parentName = opInst.getParentOfType<LLVM::LLVMFuncOp>().getName();
2343 
2344  // Override parent name if early outlining function
2345  if (auto earlyOutlineOp = llvm::dyn_cast<mlir::omp::EarlyOutliningInterface>(
2346  opInst.getParentOfType<LLVM::LLVMFuncOp>().getOperation())) {
2347  llvm::StringRef outlineParentName = earlyOutlineOp.getParentName();
2348  parentName = outlineParentName.empty() ? parentName : outlineParentName;
2349  }
2350 
2351  llvm::TargetRegionEntryInfo entryInfo;
2352 
2353  if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
2354  return failure();
2355 
2356  int32_t defaultValTeams = -1;
2357  int32_t defaultValThreads = 0;
2358 
2359  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
2360  findAllocaInsertPoint(builder, moduleTranslation);
2361 
2362  MapInfoData mapData;
2363  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl,
2364  builder);
2365 
2366  // We wish to modify some of the methods in which kernel arguments are
2367  // passed based on their capture type by the target region, this can
2368  // involve generating new loads and stores, which changes the
2369  // MLIR value to LLVM value mapping, however, we only wish to do this
2370  // locally for the current function/target and also avoid altering
2371  // ModuleTranslation, so we remap the base pointer or pointer stored
2372  // in the map infos corresponding MapInfoData, which is later accessed
2373  // by genMapInfos and createTarget to help generate the kernel and
2374  // kernel arg structure. It primarily becomes relevant in cases like
2375  // bycopy, or byref range'd arrays. In the default case, we simply
2376  // pass thee pointer byref as both basePointer and pointer.
2377  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
2378  createAlteredByCaptureMap(mapData, moduleTranslation, builder);
2379 
2380  llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
2381  auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
2383  builder.restoreIP(codeGenIP);
2384  genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {},
2385  true);
2386  return combinedInfos;
2387  };
2388 
2389  auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
2390  llvm::Value *&retVal, InsertPointTy allocaIP,
2391  InsertPointTy codeGenIP) {
2392  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2393 
2394  // We just return the unaltered argument for the host function
2395  // for now, some alterations may be required in the future to
2396  // keep host fallback functions working identically to the device
2397  // version (e.g. pass ByCopy values should be treated as such on
2398  // host and device, currently not always the case)
2399  if (!ompBuilder->Config.isTargetDevice()) {
2400  retVal = cast<llvm::Value>(&arg);
2401  return codeGenIP;
2402  }
2403 
2404  return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
2405  *ompBuilder, moduleTranslation,
2406  allocaIP, codeGenIP);
2407  };
2408 
2410  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
2411  // declare target arguments are not passed to kernels as arguments
2412  if (!mapData.IsDeclareTarget[i])
2413  kernelInput.push_back(mapData.OriginalValue[i]);
2414  }
2415 
2416  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
2417  ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
2418  defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB));
2419 
2420  // Remap access operations to declare target reference pointers for the
2421  // device, essentially generating extra loadop's as necessary
2422  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
2423  handleDeclareTargetMapVar(mapData, moduleTranslation, builder);
2424 
2425  return bodyGenStatus;
2426 }
2427 
2428 static LogicalResult
2429 convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
2430  LLVM::ModuleTranslation &moduleTranslation) {
2431  // Amend omp.declare_target by deleting the IR of the outlined functions
2432  // created for target regions. They cannot be filtered out from MLIR earlier
2433  // because the omp.target operation inside must be translated to LLVM, but
2434  // the wrapper functions themselves must not remain at the end of the
2435  // process. We know that functions where omp.declare_target does not match
2436  // omp.is_target_device at this stage can only be wrapper functions because
2437  // those that aren't are removed earlier as an MLIR transformation pass.
2438  if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) {
2439  if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>(
2440  op->getParentOfType<ModuleOp>().getOperation())) {
2441  if (!offloadMod.getIsTargetDevice())
2442  return success();
2443 
2444  omp::DeclareTargetDeviceType declareType =
2445  attribute.getDeviceType().getValue();
2446 
2447  if (declareType == omp::DeclareTargetDeviceType::host) {
2448  llvm::Function *llvmFunc =
2449  moduleTranslation.lookupFunction(funcOp.getName());
2450  llvmFunc->dropAllReferences();
2451  llvmFunc->eraseFromParent();
2452  }
2453  }
2454  return success();
2455  }
2456 
2457  if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) {
2458  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
2459  if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) {
2460  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2461  bool isDeclaration = gOp.isDeclaration();
2462  bool isExternallyVisible =
2463  gOp.getVisibility() != mlir::SymbolTable::Visibility::Private;
2464  auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>();
2465  llvm::StringRef mangledName = gOp.getSymName();
2466  auto captureClause =
2467  convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
2468  auto deviceClause =
2469  convertToDeviceClauseKind(attribute.getDeviceType().getValue());
2470  // unused for MLIR at the moment, required in Clang for book
2471  // keeping
2472  std::vector<llvm::GlobalVariable *> generatedRefs;
2473 
2474  std::vector<llvm::Triple> targetTriple;
2475  auto targetTripleAttr =
2476  op->getParentOfType<mlir::ModuleOp>()
2477  ->getAttr(LLVM::LLVMDialect::getTargetTripleAttrName())
2478  .dyn_cast_or_null<mlir::StringAttr>();
2479  if (targetTripleAttr)
2480  targetTriple.emplace_back(targetTripleAttr.data());
2481 
2482  auto fileInfoCallBack = [&loc]() {
2483  std::string filename = "";
2484  std::uint64_t lineNo = 0;
2485 
2486  if (loc) {
2487  filename = loc.getFilename().str();
2488  lineNo = loc.getLine();
2489  }
2490 
2491  return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename),
2492  lineNo);
2493  };
2494 
2495  ompBuilder->registerTargetGlobalVariable(
2496  captureClause, deviceClause, isDeclaration, isExternallyVisible,
2497  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
2498  generatedRefs, /*OpenMPSimd*/ false, targetTriple,
2499  /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
2500  gVal->getType(), gVal);
2501 
2502  if (ompBuilder->Config.isTargetDevice() &&
2503  (attribute.getCaptureClause().getValue() !=
2504  mlir::omp::DeclareTargetCaptureClause::to ||
2505  ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
2506  ompBuilder->getAddrOfDeclareTargetVar(
2507  captureClause, deviceClause, isDeclaration, isExternallyVisible,
2508  ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName,
2509  generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(),
2510  /*GlobalInitializer*/ nullptr,
2511  /*VariableLinkage*/ nullptr);
2512  }
2513  }
2514  }
2515 
2516  return success();
2517 }
2518 
2519 /// Converts the module-level set of OpenMP requires clauses into LLVM IR using
2520 /// OpenMPIRBuilder.
2521 static LogicalResult
2522 convertRequiresAttr(Operation &op, omp::ClauseRequiresAttr requiresAttr,
2523  LLVM::ModuleTranslation &moduleTranslation) {
2524  auto *ompBuilder = moduleTranslation.getOpenMPBuilder();
2525 
2526  // No need to read requiresAttr here, because it has already been done in
2527  // translateModuleToLLVMIR(). There, flags are stored in the
2528  // OpenMPIRBuilderConfig object, available to the OpenMPIRBuilder.
2529  auto *regFn =
2530  ompBuilder->createRegisterRequires(ompBuilder->createPlatformSpecificName(
2531  {"omp_offloading", "requires_reg"}));
2532 
2533  // Add registration function as global constructor
2534  if (regFn)
2535  llvm::appendToGlobalCtors(ompBuilder->M, regFn, /* Priority = */ 0);
2536 
2537  return success();
2538 }
2539 
2540 namespace {
2541 
2542 /// Implementation of the dialect interface that converts operations belonging
2543 /// to the OpenMP dialect to LLVM IR.
2544 class OpenMPDialectLLVMIRTranslationInterface
2546 public:
2548 
2549  /// Translates the given operation to LLVM IR using the provided IR builder
2550  /// and saving the state in `moduleTranslation`.
2552  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
2553  LLVM::ModuleTranslation &moduleTranslation) const final;
2554 
2555  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime
2556  /// calls, or operation amendments
2558  amendOperation(Operation *op, NamedAttribute attribute,
2559  LLVM::ModuleTranslation &moduleTranslation) const final;
2560 };
2561 
2562 } // namespace
2563 
2564 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
2565  Operation *op, NamedAttribute attribute,
2566  LLVM::ModuleTranslation &moduleTranslation) const {
2568  attribute.getName())
2569  .Case("omp.is_target_device",
2570  [&](Attribute attr) {
2571  if (auto deviceAttr = attr.dyn_cast<BoolAttr>()) {
2572  llvm::OpenMPIRBuilderConfig &config =
2573  moduleTranslation.getOpenMPBuilder()->Config;
2574  config.setIsTargetDevice(deviceAttr.getValue());
2575  return success();
2576  }
2577  return failure();
2578  })
2579  .Case("omp.is_gpu",
2580  [&](Attribute attr) {
2581  if (auto gpuAttr = attr.dyn_cast<BoolAttr>()) {
2582  llvm::OpenMPIRBuilderConfig &config =
2583  moduleTranslation.getOpenMPBuilder()->Config;
2584  config.setIsGPU(gpuAttr.getValue());
2585  return success();
2586  }
2587  return failure();
2588  })
2589  .Case("omp.host_ir_filepath",
2590  [&](Attribute attr) {
2591  if (auto filepathAttr = attr.dyn_cast<StringAttr>()) {
2592  llvm::OpenMPIRBuilder *ompBuilder =
2593  moduleTranslation.getOpenMPBuilder();
2594  ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
2595  return success();
2596  }
2597  return failure();
2598  })
2599  .Case("omp.flags",
2600  [&](Attribute attr) {
2601  if (auto rtlAttr = attr.dyn_cast<omp::FlagsAttr>())
2602  return convertFlagsAttr(op, rtlAttr, moduleTranslation);
2603  return failure();
2604  })
2605  .Case("omp.version",
2606  [&](Attribute attr) {
2607  if (auto versionAttr = attr.dyn_cast<omp::VersionAttr>()) {
2608  llvm::OpenMPIRBuilder *ompBuilder =
2609  moduleTranslation.getOpenMPBuilder();
2610  ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
2611  versionAttr.getVersion());
2612  return success();
2613  }
2614  return failure();
2615  })
2616  .Case("omp.declare_target",
2617  [&](Attribute attr) {
2618  if (auto declareTargetAttr =
2619  attr.dyn_cast<omp::DeclareTargetAttr>())
2620  return convertDeclareTargetAttr(op, declareTargetAttr,
2621  moduleTranslation);
2622  return failure();
2623  })
2624  .Case(
2625  "omp.requires",
2626  [&](Attribute attr) {
2627  if (auto requiresAttr = attr.dyn_cast<omp::ClauseRequiresAttr>()) {
2628  using Requires = omp::ClauseRequires;
2629  Requires flags = requiresAttr.getValue();
2630  llvm::OpenMPIRBuilderConfig &config =
2631  moduleTranslation.getOpenMPBuilder()->Config;
2632  config.setHasRequiresReverseOffload(
2633  bitEnumContainsAll(flags, Requires::reverse_offload));
2634  config.setHasRequiresUnifiedAddress(
2635  bitEnumContainsAll(flags, Requires::unified_address));
2636  config.setHasRequiresUnifiedSharedMemory(
2637  bitEnumContainsAll(flags, Requires::unified_shared_memory));
2638  config.setHasRequiresDynamicAllocators(
2639  bitEnumContainsAll(flags, Requires::dynamic_allocators));
2640  return convertRequiresAttr(*op, requiresAttr, moduleTranslation);
2641  }
2642  return failure();
2643  })
2644  .Default([](Attribute) {
2645  // Fall through for omp attributes that do not require lowering.
2646  return success();
2647  })(attribute.getValue());
2648 
2649  return failure();
2650 }
2651 
2652 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
2653 /// (including OpenMP runtime calls).
2654 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
2655  Operation *op, llvm::IRBuilderBase &builder,
2656  LLVM::ModuleTranslation &moduleTranslation) const {
2657 
2658  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
2659 
2661  .Case([&](omp::BarrierOp) {
2662  ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
2663  return success();
2664  })
2665  .Case([&](omp::TaskwaitOp) {
2666  ompBuilder->createTaskwait(builder.saveIP());
2667  return success();
2668  })
2669  .Case([&](omp::TaskyieldOp) {
2670  ompBuilder->createTaskyield(builder.saveIP());
2671  return success();
2672  })
2673  .Case([&](omp::FlushOp) {
2674  // No support in Openmp runtime function (__kmpc_flush) to accept
2675  // the argument list.
2676  // OpenMP standard states the following:
2677  // "An implementation may implement a flush with a list by ignoring
2678  // the list, and treating it the same as a flush without a list."
2679  //
2680  // The argument list is discarded so that, flush with a list is treated
2681  // same as a flush without a list.
2682  ompBuilder->createFlush(builder.saveIP());
2683  return success();
2684  })
2685  .Case([&](omp::ParallelOp op) {
2686  return convertOmpParallel(op, builder, moduleTranslation);
2687  })
2688  .Case([&](omp::ReductionOp reductionOp) {
2689  return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
2690  })
2691  .Case([&](omp::MasterOp) {
2692  return convertOmpMaster(*op, builder, moduleTranslation);
2693  })
2694  .Case([&](omp::CriticalOp) {
2695  return convertOmpCritical(*op, builder, moduleTranslation);
2696  })
2697  .Case([&](omp::OrderedRegionOp) {
2698  return convertOmpOrderedRegion(*op, builder, moduleTranslation);
2699  })
2700  .Case([&](omp::OrderedOp) {
2701  return convertOmpOrdered(*op, builder, moduleTranslation);
2702  })
2703  .Case([&](omp::WsLoopOp) {
2704  return convertOmpWsLoop(*op, builder, moduleTranslation);
2705  })
2706  .Case([&](omp::SimdLoopOp) {
2707  return convertOmpSimdLoop(*op, builder, moduleTranslation);
2708  })
2709  .Case([&](omp::AtomicReadOp) {
2710  return convertOmpAtomicRead(*op, builder, moduleTranslation);
2711  })
2712  .Case([&](omp::AtomicWriteOp) {
2713  return convertOmpAtomicWrite(*op, builder, moduleTranslation);
2714  })
2715  .Case([&](omp::AtomicUpdateOp op) {
2716  return convertOmpAtomicUpdate(op, builder, moduleTranslation);
2717  })
2718  .Case([&](omp::AtomicCaptureOp op) {
2719  return convertOmpAtomicCapture(op, builder, moduleTranslation);
2720  })
2721  .Case([&](omp::SectionsOp) {
2722  return convertOmpSections(*op, builder, moduleTranslation);
2723  })
2724  .Case([&](omp::SingleOp op) {
2725  return convertOmpSingle(op, builder, moduleTranslation);
2726  })
2727  .Case([&](omp::TeamsOp op) {
2728  return convertOmpTeams(op, builder, moduleTranslation);
2729  })
2730  .Case([&](omp::TaskOp op) {
2731  return convertOmpTaskOp(op, builder, moduleTranslation);
2732  })
2733  .Case([&](omp::TaskGroupOp op) {
2734  return convertOmpTaskgroupOp(op, builder, moduleTranslation);
2735  })
2736  .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
2737  omp::CriticalDeclareOp>([](auto op) {
2738  // `yield` and `terminator` can be just omitted. The block structure
2739  // was created in the region that handles their parent operation.
2740  // `reduction.declare` will be used by reductions and is not
2741  // converted directly, skip it.
2742  // `critical.declare` is only used to declare names of critical
2743  // sections which will be used by `critical` ops and hence can be
2744  // ignored for lowering. The OpenMP IRBuilder will create unique
2745  // name for critical section names.
2746  return success();
2747  })
2748  .Case([&](omp::ThreadprivateOp) {
2749  return convertOmpThreadprivate(*op, builder, moduleTranslation);
2750  })
2751  .Case<omp::DataOp, omp::EnterDataOp, omp::ExitDataOp>([&](auto op) {
2752  return convertOmpTargetData(op, builder, moduleTranslation);
2753  })
2754  .Case([&](omp::TargetOp) {
2755  return convertOmpTarget(*op, builder, moduleTranslation);
2756  })
2757  .Case<omp::MapInfoOp, omp::DataBoundsOp>([&](auto op) {
2758  // No-op, should be handled by relevant owning operations e.g.
2759  // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
2760  // discarded
2761  return success();
2762  })
2763  .Default([&](Operation *inst) {
2764  return inst->emitError("unsupported OpenMP operation: ")
2765  << inst->getName();
2766  });
2767 }
2768 
2770  registry.insert<omp::OpenMPDialect>();
2771  registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
2772  dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>();
2773  });
2774 }
2775 
2777  DialectRegistry registry;
2779  context.appendDialectRegistry(registry);
2780 }
@ None
static llvm::Value * getRefPtrIfDeclareTarget(mlir::Value value, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static void allocReductionVars(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, SmallVector< omp::ReductionDeclareOp > &reductionDecls, SmallVector< llvm::Value * > &privateReductionVariables, DenseMap< Value, llvm::Value * > &reductionVariableMap)
Allocate space for privatized reduction variables.
static LogicalResult convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered_region' operation into LLVM IR using OpenMPIRBuilder.
static llvm::OpenMPIRBuilder::InsertPointTy findAllocaInsertPoint(llvm::IRBuilderBase &builder, const LLVM::ModuleTranslation &moduleTranslation)
Find the insertion point for allocas given the current insertion point for normal operations in the b...
static LogicalResult convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an omp.atomic.write operation to LLVM IR.
static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
static OwningReductionGen makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible reduction generator for the given reduction declaration.
static LogicalResult convertOmpReductionOp(omp::ReductionOp reductionOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP reduction operation using OpenMPIRBuilder.
static void collectReductionInfo(T loop, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVector< omp::ReductionDeclareOp > &reductionDecls, SmallVector< OwningReductionGen > &owningReductionGens, SmallVector< OwningAtomicReductionGen > &owningAtomicReductionGens, const SmallVector< llvm::Value * > &privateReductionVariables, SmallVector< llvm::OpenMPIRBuilder::ReductionInfo > &reductionInfos)
Collect reduction info.
static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static std::optional< omp::ReductionDeclareOp > findReductionDeclInContainer(T container, omp::ReductionOp reduction)
Returns a reduction declaration that corresponds to the given reduction operation in the given contai...
llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op)
Converts an LLVM dialect binary operation to the corresponding enum value for atomicrmw supported bin...
static llvm::AtomicOrdering convertAtomicOrdering(std::optional< omp::ClauseMemoryOrderKind > ao)
Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder.
static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, const SmallVector< Value > &devPtrOperands={}, const SmallVector< Value > &devAddrOperands={}, bool isTargetParams=false)
static LogicalResult convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
void collectMapDataFromMapOperands(MapInfoData &mapData, llvm::SmallVectorImpl< Value > &mapOperands, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, llvm::IRBuilderBase &builder)
static llvm::SmallString< 64 > getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, llvm::OpenMPIRBuilder &ompBuilder)
static LogicalResult convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult inlineConvertOmpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl< llvm::Value * > *continuationBlockArgs=nullptr)
Translates the blocks contained in the given region and appends them to at the current insertion poin...
static LogicalResult convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP Threadprivate operation into LLVM IR using OpenMPIRBuilder.
llvm::Value * getSizeInBytes(DataLayout &dl, const mlir::Type &type, Operation *clauseOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static LogicalResult convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
static OwningAtomicReductionGen makeAtomicReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Create an OpenMPIRBuilder-compatible atomic reduction generator for the given reduction declaration.
LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, LLVM::ModuleTranslation &moduleTranslation)
Lowers the FlagsAttr which is applied to the module on the device pass when offloading,...
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertToCaptureClauseKind(mlir::omp::DeclareTargetCaptureClause captureClasue)
uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl)
static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder)
static bool targetOpSupported(Operation &opInst)
static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP)
static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind)
Convert ProcBindKind from MLIR-generated enum to LLVM enum.
static LogicalResult convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static void collectReductionDecls(T loop, SmallVectorImpl< omp::ReductionDeclareOp > &reductions)
Populates reductions with reduction declarations used in the given loop.
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause)
static LogicalResult convertRequiresAttr(Operation &op, omp::ClauseRequiresAttr requiresAttr, LLVM::ModuleTranslation &moduleTranslation)
Converts the module-level set of OpenMP requires clauses into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
Convert omp.atomic.read operation to LLVM IR.
static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, omp::TargetOp targetOp, llvm::StringRef parentName="")
static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation)
static llvm::BasicBlock * convertOmpOpRegions(Region &region, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl< llvm::PHINode * > *continuationBlockPHIs=nullptr)
Converts the given region that appears within an OpenMP dialect operation to LLVM IR,...
static omp::ReductionDeclareOp findReductionDecl(Operation &containerOp, omp::ReductionOp reduction)
Searches for a reduction in a provided region and the regions it is nested in.
#define MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CLASS_NAME)
Definition: TypeID.h:274
Attributes are known-constant values of operations.
Definition: Attributes.h:25
U dyn_cast() const
Definition: Attributes.h:175
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgument getArgument(unsigned i)
Definition: Block.h:122
Operation & back()
Definition: Block.h:145
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:238
iterator begin()
Definition: Block.h:136
Special case of IntegerAttr to represent boolean integers, i.e., signless i1 integers.
The main mechanism for performing data layout queries.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Base class for dialect interfaces providing translation to LLVM IR.
Concrete CRTP base class for ModuleTranslation stack frames.
Implementation class for module translation.
LogicalResult convertBlock(Block &bb, bool ignoreArguments, llvm::IRBuilderBase &builder)
Translates the contents of the given block to LLVM IR using this translator.
llvm::Value * lookupValue(Value value) const
Finds an LLVM IR value corresponding to the given MLIR value.
SmallVector< llvm::Value * > lookupValues(ValueRange values)
Looks up remapped a list of remapped values.
llvm::BasicBlock * lookupBlock(Block *block) const
Finds an LLVM IR basic block that corresponds to the given MLIR block.
SymbolTableCollection & symbolTable()
llvm::Type * convertType(Type type)
Converts the type from MLIR LLVM dialect to LLVM.
llvm::OpenMPIRBuilder * getOpenMPBuilder()
Returns the OpenMP IR builder associated with the LLVM IR module being constructed.
llvm::LLVMContext & getLLVMContext() const
Returns the LLVM context in which the IR is being constructed.
llvm::GlobalValue * lookupGlobal(Operation *op)
Finds an LLVM IR global value that corresponds to the given MLIR operation defining a global value.
llvm::Module * getLLVMModule()
Returns the LLVM module in which the IR is being constructed.
llvm::Function * lookupFunction(StringRef name) const
Finds an LLVM IR function by its name.
void mapBlock(Block *mlir, llvm::BasicBlock *llvm)
Stores the mapping between an MLIR block and LLVM IR basic block.
WalkResult stackWalk(llvm::function_ref< WalkResult(const T &)> callback) const
Calls callback for every ModuleTranslation stack frame of type T starting from the top of the stack.
void forgetMapping(Region &region)
Removes the mapping for blocks contained in the region and values defined in these blocks.
void mapValue(Value mlir, llvm::Value *llvm)
Stores the mapping between an MLIR value and its LLVM IR counterpart.
T findInstanceOf()
Return an instance of the given location type if one is nested under the current location.
Definition: Location.h:41
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:198
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:212
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:665
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
@ Private
The symbol is private and may only be referenced by SymbolRefAttrs local to the operations within the...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
static WalkResult advance()
Definition: Visitors.h:52
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:56
static WalkResult interrupt()
Definition: Visitors.h:51
void connectPHINodes(Region &region, const ModuleTranslation &state)
For all blocks in the region that were converted to LLVM IR using the given ModuleTranslation,...
llvm::Constant * createMappingInformation(Location loc, llvm::OpenMPIRBuilder &builder)
Create a constant string representing the mapping information extracted from the MLIR location inform...
Runtime
Potential runtimes for AMD GPU kernels.
Definition: Runtimes.h:15
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
SetVector< Block * > getTopologicallySortedBlocks(Region &region)
Get a topologically sorted list of blocks of the given region.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
void registerOpenMPDialectTranslation(DialectRegistry &registry)
Register the OpenMP dialect and the translation from it to the LLVM IR in the given registry;.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
llvm::SmallVector< llvm::Value *, 4 > OriginalValue
llvm::SmallVector< bool, 4 > IsDeclareTarget
llvm::SmallVector< llvm::Type *, 4 > BaseType
void append(MapInfoData &CurInfo)
Append arrays in CurInfo.
llvm::SmallVector< mlir::Operation *, 4 > MapClause
RAII object calling stackPush/stackPop on construction/destruction.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26