29 Value step,
bool inclusiveUpperbound) {
30 Type type =
b.getIndexType();
37 if (!inclusiveUpperbound) {
39 ub =
b.createOrFold<arith::SubIOp>(loc,
ub, one,
40 arith::IntegerOverflowFlags::nsw);
43 Value sub =
b.createOrFold<arith::SubIOp>(loc,
ub, lb,
44 arith::IntegerOverflowFlags::nsw);
45 Value add =
b.createOrFold<arith::AddIOp>(loc, sub, step,
46 arith::IntegerOverflowFlags::nsw);
47 return b.createOrFold<arith::DivSIOp>(loc,
add, step);
52static Value getExclusiveUpperBoundAsIndex(acc::LoopOp loopOp,
size_t ivPos,
54 bool isInclusive =
false;
55 if (loopOp.getInclusiveUpperbound().has_value())
56 isInclusive = loopOp.getInclusiveUpperboundAttr().asArrayRef()[ivPos];
58 Value origUB = loopOp.getUpperbound()[ivPos];
60 Type indexType =
b.getIndexType();
66 ub =
b.createOrFold<arith::AddIOp>(loc,
ub, one,
67 arith::IntegerOverflowFlags::nsw);
75static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop,
ValueRange newIVs,
77 for (
auto [origIV, newIV] :
78 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
80 b, accLoop->getLoc(), origIV.getType(), newIV);
81 mapping.
map(origIV, replacementIV);
90 Type indexType =
b.getIndexType();
96 arith::MulIOp::create(
b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
97 Value denormalized = arith::AddIOp::create(
b, loc, scaled, lb,
98 arith::IntegerOverflowFlags::nsw);
114 assert(src->
hasOneBlock() &&
"expected single-block region");
119 postInsertBlock->getIterator(), mapping);
121 auto lastNewBlock = std::prev(postInsertBlock->getIterator());
124 Operation *terminator = lastNewBlock->getTerminator();
126 if (
auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
127 newInsertionPoint = std::prev(yieldOp->getIterator());
129 }
else if (
auto terminatorOp = dyn_cast<acc::TerminatorOp>(terminator)) {
130 newInsertionPoint = std::prev(terminatorOp->getIterator());
131 rewriter.
eraseOp(terminatorOp);
133 llvm_unreachable(
"unexpected terminator in ACC region");
137 rewriter.
mergeBlocks(postInsertBlock, &*lastNewBlock);
140 Block *firstNewBlock = &*std::next(dest->getIterator());
143 return newInsertionPoint;
147static scf::ExecuteRegionOp
148wrapMultiBlockRegionWithSCFExecuteRegion(
Region ®ion,
IRMapping &mapping,
150 auto exeRegionOp = scf::ExecuteRegionOp::create(rewriter, loc,
TypeRange{});
153 exeRegionOp.getRegion().end(), mapping);
157 if (
auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
158 if (yieldOp.getNumOperands() > 0) {
160 "acc.loop with results not yet supported");
163 }
else if (!isa<acc::TerminatorOp>(terminator)) {
164 llvm_unreachable(
"unexpected terminator in ACC region");
169 scf::YieldOp::create(rewriter, loc);
179 bool enableCollapse) {
180 assert(!loopOp.getUnstructured() &&
181 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
200 size_t idx = iv.getArgNumber();
202 rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
203 Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
205 loopOp.getStep()[idx]);
206 lowerBounds.push_back(newLowerBound);
207 upperBounds.push_back(newUpperBound);
208 steps.push_back(newStep);
213 size_t idx = iv.getArgNumber();
219 scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
220 upperBounds[idx], steps[idx]);
221 forOps.push_back(forOp);
222 mapping.
map(iv, forOp.getInductionVar());
230 for (scf::ForOp forOp : forOps)
231 scfIVs.push_back(forOp.getInductionVar());
232 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
235 cloneACCRegionInto(&loopOp.getRegion(), forOps.back().getBody(),
239 if (enableCollapse && forOps.size() > 1)
241 loopOp.emitError(
"failed to collapse acc.loop");
243 return forOps.front();
248 assert(!loopOp.getUnstructured() &&
249 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
254 "builder insertion point must not be inside the loop being converted");
264 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
265 bool inclusiveUpperbound =
false;
266 if (loopOp.getInclusiveUpperbound().has_value())
267 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
269 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
270 loopOp.getUpperbound()[idx],
271 loopOp.getStep()[idx], inclusiveUpperbound);
273 lowerBounds.push_back(lb);
274 upperBounds.push_back(
ub);
275 steps.push_back(step);
279 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
284 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
287 if (!loopOp.getRegion().hasOneBlock()) {
288 auto exeRegion = wrapMultiBlockRegionWithSCFExecuteRegion(
289 loopOp.getRegion(), mapping, loc, rewriter);
295 cloneACCRegionInto(&loopOp.getRegion(), parallelOp.getBody(),
301 for (
auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
303 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
304 loopOp.getStep()[idx]);
312 assert(loopOp.getUnstructured() &&
313 "use convertACCLoopToSCFFor for structured loops");
317 "builder insertion point must not be inside the loop being converted");
320 return wrapMultiBlockRegionWithSCFExecuteRegion(loopOp.getRegion(), mapping,
321 loopOp->getLoc(), rewriter);
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Operation * getTerminator()
Get the terminator operation of this block.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This is a utility class for mapping one set of IR entities to another.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void cloneRegionBefore(Region ®ion, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Operation * getParentOp()
Return the parent operation this region is attached to.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Block * splitBlock(Block *block, Block::iterator before)
Split the operations starting at "before" (inclusive) out of the given block into a new block,...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.