MLIR 23.0.0git
ACCEmitRemarksLoop.cpp
Go to the documentation of this file.
1//===- ACCEmitRemarksLoop.cpp - Emit OpenACC loop mapping remarks --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass emits optimization remarks describing how loops inside OpenACC
10// compute regions are mapped to parallelism levels and GPU dimensions.
11//
12//===----------------------------------------------------------------------===//
13
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/Support/Debug.h"
25
26namespace mlir {
27namespace acc {
28#define GEN_PASS_DEF_ACCEMITREMARKSLOOP
29#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
30} // namespace acc
31} // namespace mlir
32
33#define DEBUG_TYPE "acc-emit-remarks-loop"
34
35using namespace mlir;
36
37namespace {
38
39static bool shouldEmitLoopRemarks(acc::ComputeRegionOp computeRegion) {
40 StringRef origin = computeRegion.getOrigin();
41 if (origin == acc::KernelsOp::getOperationName() ||
42 origin == acc::ParallelOp::getOperationName() ||
43 origin == acc::SerialOp::getOperationName())
44 return true;
45
46 if (auto func = computeRegion->getParentOfType<FunctionOpInterface>())
48 return false;
49}
50
51static std::string getACCParLevelName(acc::GPUParallelDimAttr parDim,
52 const acc::ACCToGPUMappingPolicy &policy,
53 acc::ComputeRegionOp computeRegion) {
54 std::string accName;
55 if (policy.isSeq(parDim))
56 accName = "sequential";
57 else if (policy.isVector(parDim))
58 accName = "vector";
59 else if (policy.isWorker(parDim))
60 accName = "worker";
61 else if (policy.isGang(parDim))
62 accName = "gang";
63
64 if (!policy.isSeq(parDim)) {
65 if (std::optional<uint64_t> constant =
66 computeRegion.getKnownConstantLaunchArg(parDim))
67 accName += "(" + std::to_string(*constant) + ")";
68 }
69 return accName;
70}
71
72static std::string getGPUParDimName(acc::GPUParallelDimAttr parDim,
73 llvm::StringRef separator) {
74 auto formatDim = [&](llvm::StringRef prefix, char axis) {
75 return (prefix + separator).str() + axis;
76 };
77
78 if (parDim.isThreadX())
79 return formatDim("threadidx", 'x');
80 if (parDim.isThreadY())
81 return formatDim("threadidx", 'y');
82 if (parDim.isThreadZ())
83 return formatDim("threadidx", 'z');
84 if (parDim.isBlockX())
85 return formatDim("blockidx", 'x');
86 if (parDim.isBlockY())
87 return formatDim("blockidx", 'y');
88 if (parDim.isBlockZ())
89 return formatDim("blockidx", 'z');
90 return {};
91}
92
93static void emitLoopMappingRemark(acc::ComputeRegionOp computeRegion,
94 LoopLikeOpInterface loopOp,
95 acc::OpenACCSupport &accSupport,
96 const acc::ACCToGPUMappingPolicy &policy,
97 llvm::StringRef gpuDimSeparator) {
98 acc::GPUParallelDimsAttr parDimsAttr =
99 loopOp->getAttrOfType<acc::GPUParallelDimsAttr>(
100 acc::GPUParallelDimsAttr::name);
101
104 if (parDimsAttr) {
105 parDims = parDimsAttr.getArray();
106 } else if (isa<scf::ForOp>(loopOp.getOperation())) {
107 seqParDims.push_back(acc::GPUParallelDimAttr::seqDim(loopOp->getContext()));
108 parDims = seqParDims;
109 } else {
110 return;
111 }
112
113 accSupport.emitRemark(
114 loopOp,
115 [&]() {
118
119 for (acc::GPUParallelDimAttr parDim : parDims) {
120 accMsgs.push_back(getACCParLevelName(parDim, policy, computeRegion));
121 if (std::string gpuName = getGPUParDimName(parDim, gpuDimSeparator);
122 !gpuName.empty())
123 gpuMsgs.push_back(std::move(gpuName));
124 }
125
126 std::string msg = "!$acc loop " + llvm::join(accMsgs, ", ");
127
128 if (uint64_t collapseCount = acc::getCollapseCount(loopOp);
129 collapseCount > 1)
130 msg += " collapse(" + std::to_string(collapseCount) + ")";
131
132 if (!gpuMsgs.empty())
133 msg += " ! " + llvm::join(gpuMsgs, " ");
134 return msg;
135 },
136 DEBUG_TYPE);
137}
138
139class ACCEmitRemarksLoop
140 : public acc::impl::ACCEmitRemarksLoopBase<ACCEmitRemarksLoop> {
141public:
142 using ACCEmitRemarksLoopBase<ACCEmitRemarksLoop>::ACCEmitRemarksLoopBase;
143
144 void runOnOperation() override {
145 func::FuncOp func = getOperation();
146 acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
148 if (gpuDimSeparator.empty())
149 gpuDimSeparator = ".";
150
151 func.walk([&](acc::ComputeRegionOp computeRegion) {
152 if (!shouldEmitLoopRemarks(computeRegion))
153 return;
154
155 computeRegion.getRegion().walk([&](LoopLikeOpInterface loopOp) {
156 emitLoopMappingRemark(computeRegion, loopOp, accSupport, policy,
157 gpuDimSeparator);
158 });
159 });
160 }
161};
162
163} // namespace
#define DEBUG_TYPE
virtual bool isWorker(ParDimAttrT attr) const =0
Check if the attribute represents worker parallelism.
virtual bool isSeq(ParDimAttrT attr) const =0
Check if the attribute represents sequential execution.
virtual bool isVector(ParDimAttrT attr) const =0
Check if the attribute represents vector parallelism.
virtual bool isGang(ParDimAttrT attr) const =0
Check if the attribute represents gang parallelism (any gang dimension).
Default policy that provides the standard GPU mapping: gang(dim:1) -> BlockX (gridDim....
remark::detail::InFlightRemark emitRemark(Operation *op, std::function< std::string()> messageFn, llvm::StringRef category="openacc")
Emit an OpenACC remark with lazy message generation.
uint64_t getCollapseCount(Operation *op)
Number of original loops collapsed into op, or 1 when op carries no collapse_count attribute.
bool isSpecializedAccRoutine(mlir::Operation *op)
Used to check whether this is a specialized accelerator version of acc routine function.
Definition OpenACC.h:201
ACCParMappingPolicy< mlir::acc::GPUParallelDimAttr > ACCToGPUMappingPolicy
Type alias for the GPU-specific mapping policy.
Include the generated interface declarations.