MLIR 23.0.0git
OpenACCUtilsLoop.cpp
Go to the documentation of this file.
1//===- OpenACCUtilsLoop.cpp - OpenACC Loop Utilities ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utility functions for converting OpenACC loops to SCF.
10//
11//===----------------------------------------------------------------------===//
12
14
20#include "mlir/IR/IRMapping.h"
22#include "llvm/Support/ErrorHandling.h"
23
24using namespace mlir;
25
26namespace {
27
28/// Calculate trip count for a loop: (ub - lb + step) / step
29/// If inclusiveUpperbound is false, subtracts 1 from ub first.
30static Value calculateTripCount(OpBuilder &b, Location loc, Value lb, Value ub,
31 Value step, bool inclusiveUpperbound) {
32 Type type = b.getIndexType();
33
34 // Convert original loop arguments to index type
35 lb = getValueOrCreateCastToIndexLike(b, loc, type, lb);
37 step = getValueOrCreateCastToIndexLike(b, loc, type, step);
38
39 if (!inclusiveUpperbound) {
41 ub = b.createOrFold<arith::SubIOp>(loc, ub, one,
42 arith::IntegerOverflowFlags::nsw);
43 }
44
45 Value sub = b.createOrFold<arith::SubIOp>(loc, ub, lb,
46 arith::IntegerOverflowFlags::nsw);
47 Value add = b.createOrFold<arith::AddIOp>(loc, sub, step,
48 arith::IntegerOverflowFlags::nsw);
49 return b.createOrFold<arith::DivSIOp>(loc, add, step);
50}
51
52/// Handle differing types between SCF (index) and ACC loops.
53/// Creates casts from the new SCF IVs to the original ACC IV types and updates
54/// the mapping. The newIVs should correspond 1:1 with the ACC loop's IVs.
55static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop, ValueRange newIVs,
56 OpBuilder &b, IRMapping &mapping) {
57 for (auto [origIV, newIV] :
58 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
60 b, accLoop->getLoc(), origIV.getType(), newIV);
61 mapping.map(origIV, replacementIV);
62 }
63}
64
65/// Normalize IV uses after converting to normalized loop form.
66/// For normalized loops (lb=0, step=1), we need to denormalize the IV:
67/// original_iv = new_iv * orig_step + orig_lb
68static void normalizeIVUses(OpBuilder &b, Location loc, Value iv, Value origLB,
69 Value origStep) {
70 Type indexType = b.getIndexType();
71 Value lb = getValueOrCreateCastToIndexLike(b, loc, indexType, origLB);
72 Value step = getValueOrCreateCastToIndexLike(b, loc, indexType, origStep);
73
74 // new_iv * step + lb
75 Value scaled =
76 arith::MulIOp::create(b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
77 Value denormalized = arith::AddIOp::create(b, loc, scaled, lb,
78 arith::IntegerOverflowFlags::nsw);
79
80 // Replace uses of iv with denormalized value, except for the ops that
81 // compute the denormalized value itself (muli and addi)
83 exceptions.insert(scaled.getDefiningOp());
84 exceptions.insert(denormalized.getDefiningOp());
85 iv.replaceAllUsesExcept(denormalized, exceptions);
86}
87
88/// Helper used by loop conversion: clone region and return insertion point
89/// only.
90static Block::iterator cloneACCRegionIntoForLoop(Region *src, Block *dest,
91 Block::iterator insertionPoint,
92 IRMapping &mapping,
93 RewriterBase &rewriter) {
94 auto [replacements, ip] =
95 acc::cloneACCRegionInto(src, dest, insertionPoint, mapping, ValueRange{});
96 (void)replacements;
97 return ip;
98}
99
100} // namespace
101
102namespace mlir {
103namespace acc {
104
105std::pair<SmallVector<Value>, Block::iterator>
107 IRMapping &mapping, ValueRange resultsToReplace) {
108 if (!src->hasOneBlock())
109 llvm_unreachable("cloneACCRegionInto: multi-block region not supported "
110 "(requires scf.execute_region)");
111
112 Region *insertRegion = dest->getParent();
113 Block *postInsertBlock = dest->splitBlock(inlinePoint);
114 src->cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
115
116 SmallVector<Value> replacements;
117 Block *lastNewBlock = &*std::prev(postInsertBlock->getIterator());
118
120 if (auto yieldOp = dyn_cast<acc::YieldOp>(lastNewBlock->getTerminator())) {
121 for (auto [replacement, orig] :
122 llvm::zip(yieldOp.getOperands(), resultsToReplace)) {
124 replacements.push_back(replacement);
125 }
126 ip = std::prev(yieldOp->getIterator());
127 yieldOp.erase();
128 } else {
129 auto terminatorOp =
130 dyn_cast<acc::TerminatorOp>(lastNewBlock->getTerminator());
131 if (!terminatorOp)
132 llvm_unreachable(
133 "cloneACCRegionInto: expected acc.yield or acc.terminator");
134 ip = std::prev(terminatorOp->getIterator());
135 terminatorOp.erase();
136 }
137
138 lastNewBlock->getOperations().splice(lastNewBlock->end(),
139 postInsertBlock->getOperations());
140 postInsertBlock->erase();
141
142 Block *firstNewBlock = &*std::next(dest->getIterator());
143 dest->getOperations().splice(dest->end(), firstNewBlock->getOperations());
144 firstNewBlock->erase();
145 return {replacements, ip};
146}
147
148/// Wrap a multi-block region with scf.execute_region.
149scf::ExecuteRegionOp
151 Location loc, RewriterBase &rewriter) {
152 SmallVector<Operation *> terminators;
153 for (Block &block : region.getBlocks()) {
154 if (block.empty())
155 continue;
156 Operation *term = block.getTerminator();
157 if (term->getNumSuccessors() == 0)
158 terminators.push_back(term);
159 }
160 SmallVector<Type> resultTypes;
161 if (!terminators.empty())
162 for (Value operand : terminators.front()->getOperands())
163 resultTypes.push_back(operand.getType());
164
165 auto exeRegionOp =
166 scf::ExecuteRegionOp::create(rewriter, loc, TypeRange(resultTypes));
167
168 rewriter.cloneRegionBefore(region, exeRegionOp.getRegion(),
169 exeRegionOp.getRegion().end(), mapping);
170
171 for (Operation *term : terminators) {
172 Operation *blockTerminator = mapping.lookup(term);
173 assert(blockTerminator && "expected terminator to be in mapping");
174 rewriter.setInsertionPoint(blockTerminator);
175 (void)scf::YieldOp::create(rewriter, blockTerminator->getLoc(),
176 blockTerminator->getOperands());
177 rewriter.eraseOp(blockTerminator);
178 }
179
180 return exeRegionOp;
181}
182
183scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
184 bool enableCollapse) {
185 assert(!loopOp.getUnstructured() &&
186 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
187 "loops");
188
189 Location loc = loopOp->getLoc();
190
191 IRMapping mapping;
193
194 OpBuilder::InsertionGuard guard(rewriter);
195 rewriter.setInsertionPoint(loopOp);
196
197 // Normalize all loops: lb=0, step=1, ub=tripCount.
198 // scf.for requires a positive step, but acc.loop may have arbitrary steps
199 // (including negative). Normalizing unconditionally keeps this consistent
200 // with convertACCLoopToSCFParallel and lets later passes fold constants.
201 Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
202 Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
203
204 SmallVector<Value> tripCounts;
205 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
206 bool inclusiveUpperbound = false;
207 if (loopOp.getInclusiveUpperbound().has_value())
208 inclusiveUpperbound =
209 loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
210
211 Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
212 loopOp.getUpperbound()[idx],
213 loopOp.getStep()[idx], inclusiveUpperbound);
214 tripCounts.push_back(tc);
215 }
216
217 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
218 // For nested loops, insert inside the previous loop's body
219 if (idx > 0)
220 rewriter.setInsertionPointToStart(forOps.back().getBody());
221
222 scf::ForOp forOp =
223 scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
224 forOps.push_back(forOp);
225 mapping.map(iv, forOp.getInductionVar());
226 }
227
228 // Set insertion point inside the innermost loop for IV casts and body cloning
229 rewriter.setInsertionPointToStart(forOps.back().getBody());
230
231 // Handle IV type conversion (index -> original type)
232 SmallVector<Value> scfIVs;
233 for (scf::ForOp forOp : forOps)
234 scfIVs.push_back(forOp.getInductionVar());
235 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
236
237 // Clone the loop body into the innermost scf.for
238 cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
239 rewriter.getInsertionPoint(), mapping, rewriter);
240
241 // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
242 for (size_t idx = 0; idx < forOps.size(); ++idx) {
243 Value iv = forOps[idx].getInductionVar();
244 if (!iv.use_empty()) {
245 rewriter.setInsertionPointToStart(forOps[idx].getBody());
246 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
247 loopOp.getStep()[idx]);
248 }
249 }
250
251 // Optionally collapse nested loops
252 if (enableCollapse && forOps.size() > 1)
253 if (failed(coalesceLoops(rewriter, forOps)))
254 loopOp.emitError("failed to collapse acc.loop");
255
256 return forOps.front();
257}
258
259scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp,
260 RewriterBase &rewriter) {
261 assert(!loopOp.getUnstructured() &&
262 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
263 "loops");
264 assert(
265 rewriter.getInsertionBlock() &&
266 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
267 "builder insertion point must not be inside the loop being converted");
268
269 Location loc = loopOp->getLoc();
270
271 SmallVector<Value> lowerBounds, upperBounds, steps;
272
273 // Normalize all loops: lb=0, step=1, ub=tripCount
274 Value lb = arith::ConstantIndexOp::create(rewriter, loc, 0);
275 Value step = arith::ConstantIndexOp::create(rewriter, loc, 1);
276
277 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
278 bool inclusiveUpperbound = false;
279 if (loopOp.getInclusiveUpperbound().has_value())
280 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
281
282 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
283 loopOp.getUpperbound()[idx],
284 loopOp.getStep()[idx], inclusiveUpperbound);
285
286 lowerBounds.push_back(lb);
287 upperBounds.push_back(ub);
288 steps.push_back(step);
289 }
290
291 auto parallelOp =
292 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
293
294 // Create IV type conversions
295 IRMapping mapping;
296 rewriter.setInsertionPointToStart(parallelOp.getBody());
297 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
298 mapping);
299
300 if (!loopOp.getRegion().hasOneBlock()) {
302 loopOp.getRegion(), mapping, loc, rewriter);
303 if (!exeRegion) {
304 rewriter.eraseOp(parallelOp);
305 return nullptr;
306 }
307 } else {
308 cloneACCRegionIntoForLoop(&loopOp.getRegion(), parallelOp.getBody(),
309 rewriter.getInsertionPoint(), mapping, rewriter);
310 }
311
312 // Denormalize IV uses
313 rewriter.setInsertionPointToStart(parallelOp.getBody());
314 for (auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
315 if (!iv.use_empty())
316 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
317 loopOp.getStep()[idx]);
318
319 return parallelOp;
320}
321
322scf::ExecuteRegionOp
324 RewriterBase &rewriter) {
325 assert(loopOp.getUnstructured() &&
326 "use convertACCLoopToSCFFor for structured loops");
327 assert(
328 rewriter.getInsertionBlock() &&
329 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
330 "builder insertion point must not be inside the loop being converted");
331
332 IRMapping mapping;
333 return wrapMultiBlockRegionWithSCFExecuteRegion(loopOp.getRegion(), mapping,
334 loopOp->getLoc(), rewriter);
335}
336
337} // namespace acc
338} // namespace mlir
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement(set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
#define add(a, b)
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:150
void erase()
Unlink this Block from its parent region and delete it.
Definition Block.cpp:66
Block * splitBlock(iterator splitBefore)
Split the block into two blocks before the specified operation or iterator.
Definition Block.cpp:323
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition Block.cpp:27
OpListType & getOperations()
Definition Block.h:147
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
iterator end()
Definition Block.h:154
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
auto lookup(T from) const
Lookup a mapped value within the map.
Definition IRMapping.h:72
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:350
This class helps build Operations.
Definition Builders.h:209
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition Builders.h:447
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:433
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:400
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition Builders.cpp:593
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:444
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
unsigned getNumSuccessors()
Definition Operation.h:732
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:241
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:404
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
iterator end()
Definition Region.h:56
BlockListType & getBlocks()
Definition Region.h:45
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition Value.h:208
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition Value.cpp:71
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:363
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp wrapMultiBlockRegionWithSCFExecuteRegion(Region &region, IRMapping &mapping, Location loc, RewriterBase &rewriter)
Wrap a multi-block region in an scf.execute_region.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
std::pair< llvm::SmallVector< Value >, Block::iterator > cloneACCRegionInto(Region *src, Block *dest, Block::iterator inlinePoint, IRMapping &mapping, ValueRange resultsToReplace)
Clone an ACC region into a destination block at the given insertion point.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
Definition Utils.cpp:122
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition Utils.cpp:1023