MLIR 22.0.0git
OpenACCUtilsLoop.cpp
Go to the documentation of this file.
1//===- OpenACCUtilsLoop.cpp - OpenACC Loop Utilities ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utility functions for converting OpenACC loops to SCF.
10//
11//===----------------------------------------------------------------------===//
12
14
20#include "mlir/IR/IRMapping.h"
21
22using namespace mlir;
23
24namespace {
25
26/// Calculate trip count for a loop: (ub - lb + step) / step
27/// If inclusiveUpperbound is false, subtracts 1 from ub first.
28static Value calculateTripCount(OpBuilder &b, Location loc, Value lb, Value ub,
29 Value step, bool inclusiveUpperbound) {
30 Type type = b.getIndexType();
31
32 // Convert original loop arguments to index type
33 lb = getValueOrCreateCastToIndexLike(b, loc, type, lb);
35 step = getValueOrCreateCastToIndexLike(b, loc, type, step);
36
37 if (!inclusiveUpperbound) {
39 ub = b.createOrFold<arith::SubIOp>(loc, ub, one,
40 arith::IntegerOverflowFlags::nsw);
41 }
42
43 Value sub = b.createOrFold<arith::SubIOp>(loc, ub, lb,
44 arith::IntegerOverflowFlags::nsw);
45 Value add = b.createOrFold<arith::AddIOp>(loc, sub, step,
46 arith::IntegerOverflowFlags::nsw);
47 return b.createOrFold<arith::DivSIOp>(loc, add, step);
48}
49
50/// Get exclusive upper bound from acc.loop (add 1 if inclusive).
51/// The result is always in index type.
52static Value getExclusiveUpperBoundAsIndex(acc::LoopOp loopOp, size_t ivPos,
53 OpBuilder &b) {
54 bool isInclusive = false;
55 if (loopOp.getInclusiveUpperbound().has_value())
56 isInclusive = loopOp.getInclusiveUpperboundAttr().asArrayRef()[ivPos];
57
58 Value origUB = loopOp.getUpperbound()[ivPos];
59 Location loc = origUB.getLoc();
60 Type indexType = b.getIndexType();
61
62 // Cast to index first, then add if inclusive
63 Value ub = getValueOrCreateCastToIndexLike(b, loc, indexType, origUB);
64 if (isInclusive) {
66 ub = b.createOrFold<arith::AddIOp>(loc, ub, one,
67 arith::IntegerOverflowFlags::nsw);
68 }
69 return ub;
70}
71
72/// Handle differing types between SCF (index) and ACC loops.
73/// Creates casts from the new SCF IVs to the original ACC IV types and updates
74/// the mapping. The newIVs should correspond 1:1 with the ACC loop's IVs.
75static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop, ValueRange newIVs,
76 OpBuilder &b, IRMapping &mapping) {
77 for (auto [origIV, newIV] :
78 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
80 b, accLoop->getLoc(), origIV.getType(), newIV);
81 mapping.map(origIV, replacementIV);
82 }
83}
84
85/// Normalize IV uses after converting to normalized loop form.
86/// For normalized loops (lb=0, step=1), we need to denormalize the IV:
87/// original_iv = new_iv * orig_step + orig_lb
88static void normalizeIVUses(OpBuilder &b, Location loc, Value iv, Value origLB,
89 Value origStep) {
90 Type indexType = b.getIndexType();
91 Value lb = getValueOrCreateCastToIndexLike(b, loc, indexType, origLB);
92 Value step = getValueOrCreateCastToIndexLike(b, loc, indexType, origStep);
93
94 // new_iv * step + lb
95 Value scaled =
96 arith::MulIOp::create(b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
97 Value denormalized = arith::AddIOp::create(b, loc, scaled, lb,
98 arith::IntegerOverflowFlags::nsw);
99
100 // Replace uses of iv with denormalized value, except for the ops that
101 // compute the denormalized value itself (muli and addi)
103 exceptions.insert(scaled.getDefiningOp());
104 exceptions.insert(denormalized.getDefiningOp());
105 iv.replaceAllUsesExcept(denormalized, exceptions);
106}
107
108/// Clone an ACC region into a destination block, handling the ACC terminators.
109/// Returns the insertion point after the cloned operations.
110static Block::iterator cloneACCRegionInto(Region *src, Block *dest,
111 Block::iterator insertionPoint,
112 IRMapping &mapping) {
113 assert(src->hasOneBlock() && "expected single-block region");
114
115 Region *insertRegion = dest->getParent();
116 Block *postInsertBlock = dest->splitBlock(insertionPoint);
117 src->cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
118
119 auto lastNewBlock = std::prev(postInsertBlock->getIterator());
120
121 Block::iterator newInsertionPoint;
122 Operation *terminator = lastNewBlock->getTerminator();
123
124 if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
125 newInsertionPoint = std::prev(yieldOp->getIterator());
126 yieldOp.erase();
127 } else if (auto terminatorOp = dyn_cast<acc::TerminatorOp>(terminator)) {
128 newInsertionPoint = std::prev(terminatorOp->getIterator());
129 terminatorOp.erase();
130 } else {
131 llvm_unreachable("unexpected terminator in ACC region");
132 }
133
134 // Merge last block with the postInsertBlock
135 lastNewBlock->getOperations().splice(lastNewBlock->end(),
136 postInsertBlock->getOperations());
137 postInsertBlock->erase();
138
139 // Merge first block with original dest block
140 auto firstNewBlock = std::next(dest->getIterator());
141 dest->getOperations().splice(dest->end(), firstNewBlock->getOperations());
142 firstNewBlock->erase();
143
144 return newInsertionPoint;
145}
146
147/// Wrap a multi-block region with scf.execute_region.
148static scf::ExecuteRegionOp
149wrapMultiBlockRegionWithSCFExecuteRegion(Region &region, IRMapping &mapping,
150 Location loc, RewriterBase &rewriter) {
151 auto exeRegionOp = scf::ExecuteRegionOp::create(rewriter, loc, TypeRange{});
152
153 rewriter.cloneRegionBefore(region, exeRegionOp.getRegion(),
154 exeRegionOp.getRegion().end(), mapping);
155
156 // Find and replace the ACC terminator with scf.yield
157 Operation *terminator = exeRegionOp.getRegion().back().getTerminator();
158 if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
159 if (yieldOp.getNumOperands() > 0) {
160 region.getParentOp()->emitError(
161 "acc.loop with results not yet supported");
162 return nullptr;
163 }
164 } else if (!isa<acc::TerminatorOp>(terminator)) {
165 llvm_unreachable("unexpected terminator in ACC region");
166 }
167
168 rewriter.eraseOp(terminator);
169 rewriter.setInsertionPointToEnd(&exeRegionOp.getRegion().back());
170 scf::YieldOp::create(rewriter, loc);
171 return exeRegionOp;
172}
173
174} // namespace
175
176namespace mlir {
177namespace acc {
178
179scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
180 bool enableCollapse) {
181 assert(!loopOp.getUnstructured() &&
182 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
183 "loops");
184
185 Location loc = loopOp->getLoc();
186 Type indexType = rewriter.getIndexType();
187
188 // Create nested scf.for loops and build IR mapping for IVs
189 IRMapping mapping;
191
192 // Save the original insertion point
193 OpBuilder::InsertionGuard guard(rewriter);
194 rewriter.setInsertionPoint(loopOp);
195
196 for (BlockArgument iv : loopOp.getBody().getArguments()) {
197 size_t idx = iv.getArgNumber();
198
199 // For nested loops, insert inside the previous loop's body
200 if (idx > 0)
201 rewriter.setInsertionPointToStart(forOps.back().getBody());
202
204 rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
205 Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
206 Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
207 loopOp.getStep()[idx]);
208
209 scf::ForOp forOp = scf::ForOp::create(rewriter, loc, newLowerBound,
210 newUpperBound, newStep);
211 forOps.push_back(forOp);
212 mapping.map(iv, forOp.getInductionVar());
213 }
214
215 // Set insertion point inside the innermost loop for IV casts and body cloning
216 rewriter.setInsertionPointToStart(forOps.back().getBody());
217
218 // Handle IV type conversion (index -> original type)
219 SmallVector<Value> scfIVs;
220 for (scf::ForOp forOp : forOps)
221 scfIVs.push_back(forOp.getInductionVar());
222 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
223
224 // Clone the loop body into the innermost scf.for
225 cloneACCRegionInto(&loopOp.getRegion(), forOps.back().getBody(),
226 rewriter.getInsertionPoint(), mapping);
227
228 // Optionally collapse nested loops
229 if (enableCollapse && forOps.size() > 1)
230 if (failed(coalesceLoops(forOps)))
231 loopOp.emitError("failed to collapse acc.loop");
232
233 return forOps.front();
234}
235
236scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp,
237 RewriterBase &rewriter) {
238 assert(!loopOp.getUnstructured() &&
239 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
240 "loops");
241 assert(
242 rewriter.getInsertionBlock() &&
243 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
244 "builder insertion point must not be inside the loop being converted");
245
246 Location loc = loopOp->getLoc();
247
248 SmallVector<Value> lowerBounds, upperBounds, steps;
249
250 // Normalize all loops: lb=0, step=1, ub=tripCount
251 Value lb = arith::ConstantIndexOp::create(rewriter, loc, 0);
252 Value step = arith::ConstantIndexOp::create(rewriter, loc, 1);
253
254 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
255 bool inclusiveUpperbound = false;
256 if (loopOp.getInclusiveUpperbound().has_value())
257 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
258
259 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
260 loopOp.getUpperbound()[idx],
261 loopOp.getStep()[idx], inclusiveUpperbound);
262
263 lowerBounds.push_back(lb);
264 upperBounds.push_back(ub);
265 steps.push_back(step);
266 }
267
268 auto parallelOp =
269 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
270
271 // Create IV type conversions
272 IRMapping mapping;
273 rewriter.setInsertionPointToStart(parallelOp.getBody());
274 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
275 mapping);
276
277 if (!loopOp.getRegion().hasOneBlock()) {
278 auto exeRegion = wrapMultiBlockRegionWithSCFExecuteRegion(
279 loopOp.getRegion(), mapping, loc, rewriter);
280 if (!exeRegion) {
281 rewriter.eraseOp(parallelOp);
282 return nullptr;
283 }
284 } else {
285 cloneACCRegionInto(&loopOp.getRegion(), parallelOp.getBody(),
286 rewriter.getInsertionPoint(), mapping);
287 }
288
289 // Denormalize IV uses
290 rewriter.setInsertionPointToStart(parallelOp.getBody());
291 for (auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
292 if (!iv.use_empty())
293 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
294 loopOp.getStep()[idx]);
295
296 return parallelOp;
297}
298
299scf::ExecuteRegionOp
301 RewriterBase &rewriter) {
302 assert(loopOp.getUnstructured() &&
303 "use convertACCLoopToSCFFor for structured loops");
304 assert(
305 rewriter.getInsertionBlock() &&
306 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
307 "builder insertion point must not be inside the loop being converted");
308
309 IRMapping mapping;
310 return wrapMultiBlockRegionWithSCFExecuteRegion(loopOp.getRegion(), mapping,
311 loopOp->getLoc(), rewriter);
312}
313
314} // namespace acc
315} // namespace mlir
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
#define add(a, b)
This class represents an argument of a Block.
Definition Value.h:309
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:140
void erase()
Unlink this Block from its parent region and delete it.
Definition Block.cpp:66
Block * splitBlock(iterator splitBefore)
Split the block into two blocks before the specified operation or iterator.
Definition Block.cpp:318
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition Block.cpp:27
OpListType & getOperations()
Definition Block.h:137
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:244
iterator end()
Definition Block.h:144
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
IndexType getIndexType()
Definition Builders.cpp:51
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348
This class helps build Operations.
Definition Builders.h:207
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition Builders.h:445
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:431
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:398
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition Builders.h:436
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition Builders.cpp:589
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:442
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition Operation.h:686
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & back()
Definition Region.h:64
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition Region.h:200
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition Value.h:208
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition Value.cpp:71
Location getLoc() const
Return the location of this value.
Definition Value.cpp:24
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:359
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
Definition Utils.cpp:119
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition Utils.cpp:986