22#include "llvm/Support/ErrorHandling.h"
31 Value step,
bool inclusiveUpperbound) {
32 Type type =
b.getIndexType();
39 if (!inclusiveUpperbound) {
41 ub =
b.createOrFold<arith::SubIOp>(loc,
ub, one,
42 arith::IntegerOverflowFlags::nsw);
45 Value sub =
b.createOrFold<arith::SubIOp>(loc,
ub, lb,
46 arith::IntegerOverflowFlags::nsw);
47 Value add =
b.createOrFold<arith::AddIOp>(loc, sub, step,
48 arith::IntegerOverflowFlags::nsw);
49 return b.createOrFold<arith::DivSIOp>(loc,
add, step);
55static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop,
ValueRange newIVs,
57 for (
auto [origIV, newIV] :
58 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
60 b, accLoop->getLoc(), origIV.getType(), newIV);
61 mapping.
map(origIV, replacementIV);
70 Type indexType =
b.getIndexType();
76 arith::MulIOp::create(
b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
77 Value denormalized = arith::AddIOp::create(
b, loc, scaled, lb,
78 arith::IntegerOverflowFlags::nsw);
94 auto [replacements, ip] =
109 llvm_unreachable(
"cloneACCRegionInto: multi-block region not supported "
110 "(requires scf.execute_region)");
114 src->
cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
117 Block *lastNewBlock = &*std::prev(postInsertBlock->getIterator());
120 if (
auto yieldOp = dyn_cast<acc::YieldOp>(lastNewBlock->
getTerminator())) {
122 llvm::zip(yieldOp.getOperands(), resultsToReplace)) {
126 ip = std::prev(yieldOp->getIterator());
133 "cloneACCRegionInto: expected acc.yield or acc.terminator");
134 ip = std::prev(terminatorOp->getIterator());
135 terminatorOp.erase();
140 postInsertBlock->
erase();
142 Block *firstNewBlock = &*std::next(dest->getIterator());
144 firstNewBlock->
erase();
145 return {replacements, ip};
158 terminators.push_back(term);
161 if (!terminators.empty())
162 for (
Value operand : terminators.front()->getOperands())
163 resultTypes.push_back(operand.getType());
166 scf::ExecuteRegionOp::create(rewriter, loc,
TypeRange(resultTypes));
169 exeRegionOp.getRegion().
end(), mapping);
173 assert(blockTerminator &&
"expected terminator to be in mapping");
175 (
void)scf::YieldOp::create(rewriter, blockTerminator->
getLoc(),
177 rewriter.
eraseOp(blockTerminator);
184 bool enableCollapse) {
185 assert(!loopOp.getUnstructured() &&
186 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
205 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
206 bool inclusiveUpperbound =
false;
207 if (loopOp.getInclusiveUpperbound().has_value())
208 inclusiveUpperbound =
209 loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
211 Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
212 loopOp.getUpperbound()[idx],
213 loopOp.getStep()[idx], inclusiveUpperbound);
214 tripCounts.push_back(tc);
217 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
223 scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
224 forOps.push_back(forOp);
225 mapping.
map(iv, forOp.getInductionVar());
233 for (scf::ForOp forOp : forOps)
234 scfIVs.push_back(forOp.getInductionVar());
235 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
238 cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
242 for (
size_t idx = 0; idx < forOps.size(); ++idx) {
243 Value iv = forOps[idx].getInductionVar();
246 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
247 loopOp.getStep()[idx]);
252 if (enableCollapse && forOps.size() > 1)
254 loopOp.emitError(
"failed to collapse acc.loop");
256 return forOps.front();
261 assert(!loopOp.getUnstructured() &&
262 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
267 "builder insertion point must not be inside the loop being converted");
277 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
278 bool inclusiveUpperbound =
false;
279 if (loopOp.getInclusiveUpperbound().has_value())
280 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
282 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
283 loopOp.getUpperbound()[idx],
284 loopOp.getStep()[idx], inclusiveUpperbound);
286 lowerBounds.push_back(lb);
287 upperBounds.push_back(
ub);
288 steps.push_back(step);
292 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
297 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
300 if (!loopOp.getRegion().hasOneBlock()) {
302 loopOp.getRegion(), mapping, loc, rewriter);
308 cloneACCRegionIntoForLoop(&loopOp.getRegion(), parallelOp.getBody(),
314 for (
auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
316 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
317 loopOp.getStep()[idx]);
325 assert(loopOp.getUnstructured() &&
326 "use convertACCLoopToSCFFor for structured loops");
330 "builder insertion point must not be inside the loop being converted");
334 loopOp->getLoc(), rewriter);
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement(set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
Block represents an ordered list of Operations.
OpListType::iterator iterator
void erase()
Unlink this Block from its parent region and delete it.
Block * splitBlock(iterator splitBefore)
Split the block into two blocks before the specified operation or iterator.
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
OpListType & getOperations()
Operation * getTerminator()
Get the terminator operation of this block.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void cloneRegionBefore(Region ®ion, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Operation is the basic unit of execution within MLIR.
unsigned getNumSuccessors()
Location getLoc()
The source location the operation was defined or derived from.
operand_range getOperands()
Returns an iterator on the underlying Value's.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
BlockListType & getBlocks()
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp wrapMultiBlockRegionWithSCFExecuteRegion(Region ®ion, IRMapping &mapping, Location loc, RewriterBase &rewriter)
Wrap a multi-block region in an scf.execute_region.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
std::pair< llvm::SmallVector< Value >, Block::iterator > cloneACCRegionInto(Region *src, Block *dest, Block::iterator inlinePoint, IRMapping &mapping, ValueRange resultsToReplace)
Clone an ACC region into a destination block at the given insertion point.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.