MLIR 23.0.0git
OpenACCUtilsLoop.cpp
Go to the documentation of this file.
1//===- OpenACCUtilsLoop.cpp - OpenACC Loop Utilities ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utility functions for converting OpenACC loops to SCF.
10//
11//===----------------------------------------------------------------------===//
12
14
20#include "mlir/IR/IRMapping.h"
21
22using namespace mlir;
23
24namespace {
25
26/// Calculate trip count for a loop: (ub - lb + step) / step
27/// If inclusiveUpperbound is false, subtracts 1 from ub first.
28static Value calculateTripCount(OpBuilder &b, Location loc, Value lb, Value ub,
29 Value step, bool inclusiveUpperbound) {
30 Type type = b.getIndexType();
31
32 // Convert original loop arguments to index type
33 lb = getValueOrCreateCastToIndexLike(b, loc, type, lb);
35 step = getValueOrCreateCastToIndexLike(b, loc, type, step);
36
37 if (!inclusiveUpperbound) {
39 ub = b.createOrFold<arith::SubIOp>(loc, ub, one,
40 arith::IntegerOverflowFlags::nsw);
41 }
42
43 Value sub = b.createOrFold<arith::SubIOp>(loc, ub, lb,
44 arith::IntegerOverflowFlags::nsw);
45 Value add = b.createOrFold<arith::AddIOp>(loc, sub, step,
46 arith::IntegerOverflowFlags::nsw);
47 return b.createOrFold<arith::DivSIOp>(loc, add, step);
48}
49
50/// Get exclusive upper bound from acc.loop (add 1 if inclusive).
51/// The result is always in index type.
52static Value getExclusiveUpperBoundAsIndex(acc::LoopOp loopOp, size_t ivPos,
53 OpBuilder &b) {
54 bool isInclusive = false;
55 if (loopOp.getInclusiveUpperbound().has_value())
56 isInclusive = loopOp.getInclusiveUpperboundAttr().asArrayRef()[ivPos];
57
58 Value origUB = loopOp.getUpperbound()[ivPos];
59 Location loc = origUB.getLoc();
60 Type indexType = b.getIndexType();
61
62 // Cast to index first, then add if inclusive
63 Value ub = getValueOrCreateCastToIndexLike(b, loc, indexType, origUB);
64 if (isInclusive) {
66 ub = b.createOrFold<arith::AddIOp>(loc, ub, one,
67 arith::IntegerOverflowFlags::nsw);
68 }
69 return ub;
70}
71
72/// Handle differing types between SCF (index) and ACC loops.
73/// Creates casts from the new SCF IVs to the original ACC IV types and updates
74/// the mapping. The newIVs should correspond 1:1 with the ACC loop's IVs.
75static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop, ValueRange newIVs,
76 OpBuilder &b, IRMapping &mapping) {
77 for (auto [origIV, newIV] :
78 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
80 b, accLoop->getLoc(), origIV.getType(), newIV);
81 mapping.map(origIV, replacementIV);
82 }
83}
84
85/// Normalize IV uses after converting to normalized loop form.
86/// For normalized loops (lb=0, step=1), we need to denormalize the IV:
87/// original_iv = new_iv * orig_step + orig_lb
88static void normalizeIVUses(OpBuilder &b, Location loc, Value iv, Value origLB,
89 Value origStep) {
90 Type indexType = b.getIndexType();
91 Value lb = getValueOrCreateCastToIndexLike(b, loc, indexType, origLB);
92 Value step = getValueOrCreateCastToIndexLike(b, loc, indexType, origStep);
93
94 // new_iv * step + lb
95 Value scaled =
96 arith::MulIOp::create(b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
97 Value denormalized = arith::AddIOp::create(b, loc, scaled, lb,
98 arith::IntegerOverflowFlags::nsw);
99
100 // Replace uses of iv with denormalized value, except for the ops that
101 // compute the denormalized value itself (muli and addi)
103 exceptions.insert(scaled.getDefiningOp());
104 exceptions.insert(denormalized.getDefiningOp());
105 iv.replaceAllUsesExcept(denormalized, exceptions);
106}
107
108/// Clone an ACC region into a destination block, handling the ACC terminators.
109/// Returns the insertion point after the cloned operations.
110static Block::iterator cloneACCRegionInto(Region *src, Block *dest,
111 Block::iterator insertionPoint,
112 IRMapping &mapping,
113 RewriterBase &rewriter) {
114 assert(src->hasOneBlock() && "expected single-block region");
115
116 Region *insertRegion = dest->getParent();
117 Block *postInsertBlock = rewriter.splitBlock(dest, insertionPoint);
118 rewriter.cloneRegionBefore(*src, *insertRegion,
119 postInsertBlock->getIterator(), mapping);
120
121 auto lastNewBlock = std::prev(postInsertBlock->getIterator());
122
123 Block::iterator newInsertionPoint;
124 Operation *terminator = lastNewBlock->getTerminator();
125
126 if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
127 newInsertionPoint = std::prev(yieldOp->getIterator());
128 rewriter.eraseOp(yieldOp);
129 } else if (auto terminatorOp = dyn_cast<acc::TerminatorOp>(terminator)) {
130 newInsertionPoint = std::prev(terminatorOp->getIterator());
131 rewriter.eraseOp(terminatorOp);
132 } else {
133 llvm_unreachable("unexpected terminator in ACC region");
134 }
135
136 // Merge last block with the postInsertBlock
137 rewriter.mergeBlocks(postInsertBlock, &*lastNewBlock);
138
139 // Merge first block with original dest block
140 Block *firstNewBlock = &*std::next(dest->getIterator());
141 rewriter.mergeBlocks(firstNewBlock, dest);
142
143 return newInsertionPoint;
144}
145
146/// Wrap a multi-block region with scf.execute_region.
147static scf::ExecuteRegionOp
148wrapMultiBlockRegionWithSCFExecuteRegion(Region &region, IRMapping &mapping,
149 Location loc, RewriterBase &rewriter) {
150 auto exeRegionOp = scf::ExecuteRegionOp::create(rewriter, loc, TypeRange{});
151
152 rewriter.cloneRegionBefore(region, exeRegionOp.getRegion(),
153 exeRegionOp.getRegion().end(), mapping);
154
155 // Find and replace the ACC terminator with scf.yield
156 Operation *terminator = exeRegionOp.getRegion().back().getTerminator();
157 if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
158 if (yieldOp.getNumOperands() > 0) {
159 region.getParentOp()->emitError(
160 "acc.loop with results not yet supported");
161 return nullptr;
162 }
163 } else if (!isa<acc::TerminatorOp>(terminator)) {
164 llvm_unreachable("unexpected terminator in ACC region");
165 }
166
167 rewriter.eraseOp(terminator);
168 rewriter.setInsertionPointToEnd(&exeRegionOp.getRegion().back());
169 scf::YieldOp::create(rewriter, loc);
170 return exeRegionOp;
171}
172
173} // namespace
174
175namespace mlir {
176namespace acc {
177
178scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
179 bool enableCollapse) {
180 assert(!loopOp.getUnstructured() &&
181 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
182 "loops");
183
184 Location loc = loopOp->getLoc();
185 Type indexType = rewriter.getIndexType();
186
187 // Create nested scf.for loops and build IR mapping for IVs
188 IRMapping mapping;
190
191 // Save the original insertion point
192 OpBuilder::InsertionGuard guard(rewriter);
193 rewriter.setInsertionPoint(loopOp);
194
195 // First, compute ALL loop bounds at the current insertion point (before
196 // any ForOp). This ensures all bounds are defined in the outer scope,
197 // which is required for coalesceLoops to work correctly.
198 SmallVector<Value> lowerBounds, upperBounds, steps;
199 for (BlockArgument iv : loopOp.getBody().getArguments()) {
200 size_t idx = iv.getArgNumber();
202 rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
203 Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
204 Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
205 loopOp.getStep()[idx]);
206 lowerBounds.push_back(newLowerBound);
207 upperBounds.push_back(newUpperBound);
208 steps.push_back(newStep);
209 }
210
211 // Now create the nested ForOps using the pre-computed bounds
212 for (BlockArgument iv : loopOp.getBody().getArguments()) {
213 size_t idx = iv.getArgNumber();
214
215 // For nested loops, insert inside the previous loop's body
216 if (idx > 0)
217 rewriter.setInsertionPointToStart(forOps.back().getBody());
218
219 scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
220 upperBounds[idx], steps[idx]);
221 forOps.push_back(forOp);
222 mapping.map(iv, forOp.getInductionVar());
223 }
224
225 // Set insertion point inside the innermost loop for IV casts and body cloning
226 rewriter.setInsertionPointToStart(forOps.back().getBody());
227
228 // Handle IV type conversion (index -> original type)
229 SmallVector<Value> scfIVs;
230 for (scf::ForOp forOp : forOps)
231 scfIVs.push_back(forOp.getInductionVar());
232 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
233
234 // Clone the loop body into the innermost scf.for
235 cloneACCRegionInto(&loopOp.getRegion(), forOps.back().getBody(),
236 rewriter.getInsertionPoint(), mapping, rewriter);
237
238 // Optionally collapse nested loops
239 if (enableCollapse && forOps.size() > 1)
240 if (failed(coalesceLoops(rewriter, forOps)))
241 loopOp.emitError("failed to collapse acc.loop");
242
243 return forOps.front();
244}
245
246scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp,
247 RewriterBase &rewriter) {
248 assert(!loopOp.getUnstructured() &&
249 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
250 "loops");
251 assert(
252 rewriter.getInsertionBlock() &&
253 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
254 "builder insertion point must not be inside the loop being converted");
255
256 Location loc = loopOp->getLoc();
257
258 SmallVector<Value> lowerBounds, upperBounds, steps;
259
260 // Normalize all loops: lb=0, step=1, ub=tripCount
261 Value lb = arith::ConstantIndexOp::create(rewriter, loc, 0);
262 Value step = arith::ConstantIndexOp::create(rewriter, loc, 1);
263
264 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
265 bool inclusiveUpperbound = false;
266 if (loopOp.getInclusiveUpperbound().has_value())
267 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
268
269 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
270 loopOp.getUpperbound()[idx],
271 loopOp.getStep()[idx], inclusiveUpperbound);
272
273 lowerBounds.push_back(lb);
274 upperBounds.push_back(ub);
275 steps.push_back(step);
276 }
277
278 auto parallelOp =
279 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
280
281 // Create IV type conversions
282 IRMapping mapping;
283 rewriter.setInsertionPointToStart(parallelOp.getBody());
284 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
285 mapping);
286
287 if (!loopOp.getRegion().hasOneBlock()) {
288 auto exeRegion = wrapMultiBlockRegionWithSCFExecuteRegion(
289 loopOp.getRegion(), mapping, loc, rewriter);
290 if (!exeRegion) {
291 rewriter.eraseOp(parallelOp);
292 return nullptr;
293 }
294 } else {
295 cloneACCRegionInto(&loopOp.getRegion(), parallelOp.getBody(),
296 rewriter.getInsertionPoint(), mapping, rewriter);
297 }
298
299 // Denormalize IV uses
300 rewriter.setInsertionPointToStart(parallelOp.getBody());
301 for (auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
302 if (!iv.use_empty())
303 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
304 loopOp.getStep()[idx]);
305
306 return parallelOp;
307}
308
309scf::ExecuteRegionOp
311 RewriterBase &rewriter) {
312 assert(loopOp.getUnstructured() &&
313 "use convertACCLoopToSCFFor for structured loops");
314 assert(
315 rewriter.getInsertionBlock() &&
316 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
317 "builder insertion point must not be inside the loop being converted");
318
319 IRMapping mapping;
320 return wrapMultiBlockRegionWithSCFExecuteRegion(loopOp.getRegion(), mapping,
321 loopOp->getLoc(), rewriter);
322}
323
324} // namespace acc
325} // namespace mlir
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
#define add(a, b)
This class represents an argument of a Block.
Definition Value.h:309
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:150
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition Block.cpp:27
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
IndexType getIndexType()
Definition Builders.cpp:55
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:350
This class helps build Operations.
Definition Builders.h:209
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition Builders.h:447
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:433
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:400
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition Builders.h:438
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition Builders.cpp:593
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:444
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition Operation.h:686
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & back()
Definition Region.h:64
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition Region.h:200
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Block * splitBlock(Block *block, Block::iterator before)
Split the operations starting at "before" (inclusive) out of the given block into a new block,...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition Value.h:208
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition Value.cpp:71
Location getLoc() const
Return the location of this value.
Definition Value.cpp:24
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:363
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
Definition Utils.cpp:120
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition Utils.cpp:987