MLIR 23.0.0git
OpenACCUtilsLoop.cpp
Go to the documentation of this file.
1//===- OpenACCUtilsLoop.cpp - OpenACC Loop Utilities ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utility functions for converting OpenACC loops to SCF.
10//
11//===----------------------------------------------------------------------===//
12
14
21#include "mlir/IR/IRMapping.h"
23#include "llvm/Support/ErrorHandling.h"
24
25using namespace mlir;
26
27namespace {
28
29/// Calculate trip count for a loop: (ub - lb + step) / step
30/// If inclusiveUpperbound is false, subtracts 1 from ub first.
31static Value calculateTripCount(OpBuilder &b, Location loc, Value lb, Value ub,
32 Value step, bool inclusiveUpperbound) {
33 Type type = b.getIndexType();
34
35 // Convert original loop arguments to index type
36 lb = getValueOrCreateCastToIndexLike(b, loc, type, lb);
38 step = getValueOrCreateCastToIndexLike(b, loc, type, step);
39
40 if (!inclusiveUpperbound) {
42 ub = b.createOrFold<arith::SubIOp>(loc, ub, one,
43 arith::IntegerOverflowFlags::nsw);
44 }
45
46 Value sub = b.createOrFold<arith::SubIOp>(loc, ub, lb,
47 arith::IntegerOverflowFlags::nsw);
48 Value add = b.createOrFold<arith::AddIOp>(loc, sub, step,
49 arith::IntegerOverflowFlags::nsw);
50 return b.createOrFold<arith::DivSIOp>(loc, add, step);
51}
52
53/// Handle differing types between SCF (index) and ACC loops.
54/// Creates casts from the new SCF IVs to the original ACC IV types and updates
55/// the mapping. The newIVs should correspond 1:1 with the ACC loop's IVs.
56static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop, ValueRange newIVs,
57 OpBuilder &b, IRMapping &mapping) {
58 for (auto [origIV, newIV] :
59 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
61 b, accLoop->getLoc(), origIV.getType(), newIV);
62 mapping.map(origIV, replacementIV);
63 }
64}
65
66/// Normalize IV uses after converting to normalized loop form.
67/// For normalized loops (lb=0, step=1), we need to denormalize the IV:
68/// original_iv = new_iv * orig_step + orig_lb
69static void normalizeIVUses(OpBuilder &b, Location loc, Value iv, Value origLB,
70 Value origStep) {
71 Type indexType = b.getIndexType();
72 Value lb = getValueOrCreateCastToIndexLike(b, loc, indexType, origLB);
73 Value step = getValueOrCreateCastToIndexLike(b, loc, indexType, origStep);
74
75 // new_iv * step + lb
76 Value scaled =
77 arith::MulIOp::create(b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
78 Value denormalized = arith::AddIOp::create(b, loc, scaled, lb,
79 arith::IntegerOverflowFlags::nsw);
80
81 // Replace uses of iv with denormalized value, except for the ops that
82 // compute the denormalized value itself (muli and addi)
84 exceptions.insert(scaled.getDefiningOp());
85 exceptions.insert(denormalized.getDefiningOp());
86 iv.replaceAllUsesExcept(denormalized, exceptions);
87}
88
89/// Helper used by loop conversion: clone region and return insertion point
90/// only.
91static Block::iterator cloneACCRegionIntoForLoop(Region *src, Block *dest,
92 Block::iterator insertionPoint,
93 IRMapping &mapping,
94 RewriterBase &rewriter) {
95 auto [replacements, ip] =
96 acc::cloneACCRegionInto(src, dest, insertionPoint, mapping, ValueRange{});
97 (void)replacements;
98 return ip;
99}
100
101} // namespace
102
103namespace mlir {
104namespace acc {
105
106std::pair<SmallVector<Value>, Block::iterator>
108 IRMapping &mapping, ValueRange resultsToReplace) {
109 if (!src->hasOneBlock())
110 llvm_unreachable("cloneACCRegionInto: multi-block region not supported "
111 "(requires scf.execute_region)");
112
113 Region *insertRegion = dest->getParent();
114 Block *postInsertBlock = dest->splitBlock(inlinePoint);
115 src->cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
116
117 SmallVector<Value> replacements;
118 Block *lastNewBlock = &*std::prev(postInsertBlock->getIterator());
119
121 if (auto yieldOp = dyn_cast<acc::YieldOp>(lastNewBlock->getTerminator())) {
122 for (auto [replacement, orig] :
123 llvm::zip(yieldOp.getOperands(), resultsToReplace)) {
125 replacements.push_back(replacement);
126 }
127 ip = std::prev(yieldOp->getIterator());
128 yieldOp.erase();
129 } else {
130 auto terminatorOp =
131 dyn_cast<acc::TerminatorOp>(lastNewBlock->getTerminator());
132 if (!terminatorOp)
133 llvm_unreachable(
134 "cloneACCRegionInto: expected acc.yield or acc.terminator");
135 ip = std::prev(terminatorOp->getIterator());
136 terminatorOp.erase();
137 }
138
139 lastNewBlock->getOperations().splice(lastNewBlock->end(),
140 postInsertBlock->getOperations());
141 postInsertBlock->erase();
142
143 Block *firstNewBlock = &*std::next(dest->getIterator());
144 dest->getOperations().splice(dest->end(), firstNewBlock->getOperations());
145 firstNewBlock->erase();
146 return {replacements, ip};
147}
148
149/// Wrap a multi-block region with scf.execute_region.
150scf::ExecuteRegionOp
152 Location loc, RewriterBase &rewriter,
153 bool convertFuncReturn) {
154 SmallVector<Operation *> terminators;
155 for (Block &block : region.getBlocks()) {
156 if (block.empty())
157 continue;
158 Operation *term = block.getTerminator();
159 if ((convertFuncReturn && isa<func::ReturnOp>(*term)) ||
160 isa<acc::YieldOp>(*term))
161 terminators.push_back(term);
162 }
163 SmallVector<Type> resultTypes;
164 if (!terminators.empty())
165 for (Value operand : terminators.front()->getOperands())
166 resultTypes.push_back(operand.getType());
167
168 auto exeRegionOp =
169 scf::ExecuteRegionOp::create(rewriter, loc, TypeRange(resultTypes));
170
171 rewriter.cloneRegionBefore(region, exeRegionOp.getRegion(),
172 exeRegionOp.getRegion().end(), mapping);
173
174 for (Operation *term : terminators) {
175 Operation *blockTerminator = mapping.lookup(term);
176 assert(blockTerminator && "expected terminator to be in mapping");
177 rewriter.setInsertionPoint(blockTerminator);
178 (void)scf::YieldOp::create(rewriter, blockTerminator->getLoc(),
179 blockTerminator->getOperands());
180 rewriter.eraseOp(blockTerminator);
181 }
182
183 return exeRegionOp;
184}
185
186scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
187 bool enableCollapse) {
188 assert(!loopOp.getUnstructured() &&
189 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
190 "loops");
191
192 Location loc = loopOp->getLoc();
193
194 IRMapping mapping;
196
197 OpBuilder::InsertionGuard guard(rewriter);
198 rewriter.setInsertionPoint(loopOp);
199
200 // Normalize all loops: lb=0, step=1, ub=tripCount.
201 // scf.for requires a positive step, but acc.loop may have arbitrary steps
202 // (including negative). Normalizing unconditionally keeps this consistent
203 // with convertACCLoopToSCFParallel and lets later passes fold constants.
204 Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
205 Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
206
207 SmallVector<Value> tripCounts;
208 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
209 bool inclusiveUpperbound = false;
210 if (loopOp.getInclusiveUpperbound().has_value())
211 inclusiveUpperbound =
212 loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
213
214 Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
215 loopOp.getUpperbound()[idx],
216 loopOp.getStep()[idx], inclusiveUpperbound);
217 tripCounts.push_back(tc);
218 }
219
220 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
221 // For nested loops, insert inside the previous loop's body
222 if (idx > 0)
223 rewriter.setInsertionPointToStart(forOps.back().getBody());
224
225 scf::ForOp forOp =
226 scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
227 forOps.push_back(forOp);
228 mapping.map(iv, forOp.getInductionVar());
229 }
230
231 // Set insertion point inside the innermost loop for IV casts and body cloning
232 rewriter.setInsertionPointToStart(forOps.back().getBody());
233
234 // Handle IV type conversion (index -> original type)
235 SmallVector<Value> scfIVs;
236 for (scf::ForOp forOp : forOps)
237 scfIVs.push_back(forOp.getInductionVar());
238 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
239
240 // Clone the loop body into the innermost scf.for
241 cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
242 rewriter.getInsertionPoint(), mapping, rewriter);
243
244 // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
245 for (size_t idx = 0; idx < forOps.size(); ++idx) {
246 Value iv = forOps[idx].getInductionVar();
247 if (!iv.use_empty()) {
248 rewriter.setInsertionPointToStart(forOps[idx].getBody());
249 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
250 loopOp.getStep()[idx]);
251 }
252 }
253
254 // Optionally collapse nested loops
255 if (enableCollapse && forOps.size() > 1)
256 if (failed(coalesceLoops(rewriter, forOps)))
257 loopOp.emitError("failed to collapse acc.loop");
258
259 return forOps.front();
260}
261
262scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp,
263 RewriterBase &rewriter) {
264 assert(!loopOp.getUnstructured() &&
265 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
266 "loops");
267 assert(
268 rewriter.getInsertionBlock() &&
269 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
270 "builder insertion point must not be inside the loop being converted");
271
272 Location loc = loopOp->getLoc();
273
274 SmallVector<Value> lowerBounds, upperBounds, steps;
275
276 // Normalize all loops: lb=0, step=1, ub=tripCount
277 Value lb = arith::ConstantIndexOp::create(rewriter, loc, 0);
278 Value step = arith::ConstantIndexOp::create(rewriter, loc, 1);
279
280 for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
281 bool inclusiveUpperbound = false;
282 if (loopOp.getInclusiveUpperbound().has_value())
283 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
284
285 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
286 loopOp.getUpperbound()[idx],
287 loopOp.getStep()[idx], inclusiveUpperbound);
288
289 lowerBounds.push_back(lb);
290 upperBounds.push_back(ub);
291 steps.push_back(step);
292 }
293
294 auto parallelOp =
295 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
296
297 // Create IV type conversions
298 IRMapping mapping;
299 rewriter.setInsertionPointToStart(parallelOp.getBody());
300 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
301 mapping);
302
303 if (!loopOp.getRegion().hasOneBlock()) {
305 loopOp.getRegion(), mapping, loc, rewriter);
306 if (!exeRegion) {
307 rewriter.eraseOp(parallelOp);
308 return nullptr;
309 }
310 } else {
311 cloneACCRegionIntoForLoop(&loopOp.getRegion(), parallelOp.getBody(),
312 rewriter.getInsertionPoint(), mapping, rewriter);
313 }
314
315 // Denormalize IV uses
316 rewriter.setInsertionPointToStart(parallelOp.getBody());
317 for (auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
318 if (!iv.use_empty())
319 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
320 loopOp.getStep()[idx]);
321
322 return parallelOp;
323}
324
325scf::ExecuteRegionOp
327 RewriterBase &rewriter) {
328 assert(loopOp.getUnstructured() &&
329 "use convertACCLoopToSCFFor for structured loops");
330 assert(
331 rewriter.getInsertionBlock() &&
332 !loopOp->isProperAncestor(rewriter.getInsertionBlock()->getParentOp()) &&
333 "builder insertion point must not be inside the loop being converted");
334
335 IRMapping mapping;
336 return wrapMultiBlockRegionWithSCFExecuteRegion(loopOp.getRegion(), mapping,
337 loopOp->getLoc(), rewriter);
338}
339
340} // namespace acc
341} // namespace mlir
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement(set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
#define add(a, b)
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:150
void erase()
Unlink this Block from its parent region and delete it.
Definition Block.cpp:66
Block * splitBlock(iterator splitBefore)
Split the block into two blocks before the specified operation or iterator.
Definition Block.cpp:323
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition Block.cpp:27
OpListType & getOperations()
Definition Block.h:147
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
iterator end()
Definition Block.h:154
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
auto lookup(T from) const
Lookup a mapped value within the map.
Definition IRMapping.h:72
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:350
This class helps build Operations.
Definition Builders.h:209
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition Builders.h:447
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:433
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:400
void cloneRegionBefore(Region &region, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Definition Builders.cpp:593
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:444
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:244
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:407
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
iterator end()
Definition Region.h:56
BlockListType & getBlocks()
Definition Region.h:45
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition Value.h:208
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition Value.cpp:71
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:363
scf::ExecuteRegionOp wrapMultiBlockRegionWithSCFExecuteRegion(Region &region, IRMapping &mapping, Location loc, RewriterBase &rewriter, bool convertFuncReturn=false)
Wrap a multi-block region in an scf.execute_region.
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
std::pair< llvm::SmallVector< Value >, Block::iterator > cloneACCRegionInto(Region *src, Block *dest, Block::iterator inlinePoint, IRMapping &mapping, ValueRange resultsToReplace)
Clone an ACC region into a destination block at the given insertion point.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
Definition Utils.cpp:122
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition Utils.cpp:999