29 Value step,
bool inclusiveUpperbound) {
30 Type type =
b.getIndexType();
37 if (!inclusiveUpperbound) {
39 ub =
b.createOrFold<arith::SubIOp>(loc,
ub, one,
40 arith::IntegerOverflowFlags::nsw);
43 Value sub =
b.createOrFold<arith::SubIOp>(loc,
ub, lb,
44 arith::IntegerOverflowFlags::nsw);
45 Value add =
b.createOrFold<arith::AddIOp>(loc, sub, step,
46 arith::IntegerOverflowFlags::nsw);
47 return b.createOrFold<arith::DivSIOp>(loc,
add, step);
52static Value getExclusiveUpperBoundAsIndex(acc::LoopOp loopOp,
size_t ivPos,
54 bool isInclusive =
false;
55 if (loopOp.getInclusiveUpperbound().has_value())
56 isInclusive = loopOp.getInclusiveUpperboundAttr().asArrayRef()[ivPos];
58 Value origUB = loopOp.getUpperbound()[ivPos];
60 Type indexType =
b.getIndexType();
66 ub =
b.createOrFold<arith::AddIOp>(loc,
ub, one,
67 arith::IntegerOverflowFlags::nsw);
75static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop,
ValueRange newIVs,
77 for (
auto [origIV, newIV] :
78 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
80 b, accLoop->getLoc(), origIV.getType(), newIV);
81 mapping.
map(origIV, replacementIV);
90 Type indexType =
b.getIndexType();
96 arith::MulIOp::create(
b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
97 Value denormalized = arith::AddIOp::create(
b, loc, scaled, lb,
98 arith::IntegerOverflowFlags::nsw);
113 assert(src->
hasOneBlock() &&
"expected single-block region");
117 src->
cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
119 auto lastNewBlock = std::prev(postInsertBlock->getIterator());
122 Operation *terminator = lastNewBlock->getTerminator();
124 if (
auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
125 newInsertionPoint = std::prev(yieldOp->getIterator());
127 }
else if (
auto terminatorOp = dyn_cast<acc::TerminatorOp>(terminator)) {
128 newInsertionPoint = std::prev(terminatorOp->getIterator());
129 terminatorOp.erase();
131 llvm_unreachable(
"unexpected terminator in ACC region");
135 lastNewBlock->getOperations().splice(lastNewBlock->end(),
137 postInsertBlock->
erase();
140 auto firstNewBlock = std::next(dest->getIterator());
142 firstNewBlock->erase();
144 return newInsertionPoint;
148static scf::ExecuteRegionOp
149wrapMultiBlockRegionWithSCFExecuteRegion(
Region ®ion,
IRMapping &mapping,
151 auto exeRegionOp = scf::ExecuteRegionOp::create(rewriter, loc,
TypeRange{});
154 exeRegionOp.getRegion().end(), mapping);
158 if (
auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
159 if (yieldOp.getNumOperands() > 0) {
161 "acc.loop with results not yet supported");
164 }
else if (!isa<acc::TerminatorOp>(terminator)) {
165 llvm_unreachable(
"unexpected terminator in ACC region");
170 scf::YieldOp::create(rewriter, loc);
180 bool enableCollapse) {
181 assert(!loopOp.getUnstructured() &&
182 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
197 size_t idx = iv.getArgNumber();
204 rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
205 Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
207 loopOp.getStep()[idx]);
209 scf::ForOp forOp = scf::ForOp::create(rewriter, loc, newLowerBound,
210 newUpperBound, newStep);
211 forOps.push_back(forOp);
212 mapping.
map(iv, forOp.getInductionVar());
220 for (scf::ForOp forOp : forOps)
221 scfIVs.push_back(forOp.getInductionVar());
222 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
225 cloneACCRegionInto(&loopOp.getRegion(), forOps.back().getBody(),
229 if (enableCollapse && forOps.size() > 1)
231 loopOp.emitError(
"failed to collapse acc.loop");
233 return forOps.front();
238 assert(!loopOp.getUnstructured() &&
239 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
244 "builder insertion point must not be inside the loop being converted");
254 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
255 bool inclusiveUpperbound =
false;
256 if (loopOp.getInclusiveUpperbound().has_value())
257 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
259 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
260 loopOp.getUpperbound()[idx],
261 loopOp.getStep()[idx], inclusiveUpperbound);
263 lowerBounds.push_back(lb);
264 upperBounds.push_back(
ub);
265 steps.push_back(step);
269 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
274 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
277 if (!loopOp.getRegion().hasOneBlock()) {
278 auto exeRegion = wrapMultiBlockRegionWithSCFExecuteRegion(
279 loopOp.getRegion(), mapping, loc, rewriter);
285 cloneACCRegionInto(&loopOp.getRegion(), parallelOp.getBody(),
291 for (
auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
293 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
294 loopOp.getStep()[idx]);
302 assert(loopOp.getUnstructured() &&
303 "use convertACCLoopToSCFFor for structured loops");
307 "builder insertion point must not be inside the loop being converted");
310 return wrapMultiBlockRegionWithSCFExecuteRegion(loopOp.getRegion(), mapping,
311 loopOp->getLoc(), rewriter);
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
void erase()
Unlink this Block from its parent region and delete it.
Block * splitBlock(iterator splitBefore)
Split the block into two blocks before the specified operation or iterator.
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
OpListType & getOperations()
Operation * getTerminator()
Get the terminator operation of this block.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This is a utility class for mapping one set of IR entities to another.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void cloneRegionBefore(Region ®ion, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Operation * getParentOp()
Return the parent operation this region is attached to.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.