23#include "llvm/Support/ErrorHandling.h"
32 Value step,
bool inclusiveUpperbound) {
33 Type type =
b.getIndexType();
40 if (!inclusiveUpperbound) {
42 ub =
b.createOrFold<arith::SubIOp>(loc,
ub, one,
43 arith::IntegerOverflowFlags::nsw);
46 Value sub =
b.createOrFold<arith::SubIOp>(loc,
ub, lb,
47 arith::IntegerOverflowFlags::nsw);
48 Value add =
b.createOrFold<arith::AddIOp>(loc, sub, step,
49 arith::IntegerOverflowFlags::nsw);
50 return b.createOrFold<arith::DivSIOp>(loc,
add, step);
56static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop,
ValueRange newIVs,
58 for (
auto [origIV, newIV] :
59 llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
61 b, accLoop->getLoc(), origIV.getType(), newIV);
62 mapping.
map(origIV, replacementIV);
71 Type indexType =
b.getIndexType();
77 arith::MulIOp::create(
b, loc, iv, step, arith::IntegerOverflowFlags::nsw);
78 Value denormalized = arith::AddIOp::create(
b, loc, scaled, lb,
79 arith::IntegerOverflowFlags::nsw);
95 auto [replacements, ip] =
110 llvm_unreachable(
"cloneACCRegionInto: multi-block region not supported "
111 "(requires scf.execute_region)");
115 src->
cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
118 Block *lastNewBlock = &*std::prev(postInsertBlock->getIterator());
121 if (
auto yieldOp = dyn_cast<acc::YieldOp>(lastNewBlock->
getTerminator())) {
123 llvm::zip(yieldOp.getOperands(), resultsToReplace)) {
127 ip = std::prev(yieldOp->getIterator());
134 "cloneACCRegionInto: expected acc.yield or acc.terminator");
135 ip = std::prev(terminatorOp->getIterator());
136 terminatorOp.erase();
141 postInsertBlock->
erase();
143 Block *firstNewBlock = &*std::next(dest->getIterator());
145 firstNewBlock->
erase();
146 return {replacements, ip};
153 bool convertFuncReturn) {
159 if ((convertFuncReturn && isa<func::ReturnOp>(*term)) ||
160 isa<acc::YieldOp>(*term))
161 terminators.push_back(term);
164 if (!terminators.empty())
165 for (
Value operand : terminators.front()->getOperands())
166 resultTypes.push_back(operand.getType());
169 scf::ExecuteRegionOp::create(rewriter, loc,
TypeRange(resultTypes));
172 exeRegionOp.getRegion().
end(), mapping);
176 assert(blockTerminator &&
"expected terminator to be in mapping");
178 (
void)scf::YieldOp::create(rewriter, blockTerminator->
getLoc(),
180 rewriter.
eraseOp(blockTerminator);
187 bool enableCollapse) {
188 assert(!loopOp.getUnstructured() &&
189 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
208 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
209 bool inclusiveUpperbound =
false;
210 if (loopOp.getInclusiveUpperbound().has_value())
211 inclusiveUpperbound =
212 loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
214 Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
215 loopOp.getUpperbound()[idx],
216 loopOp.getStep()[idx], inclusiveUpperbound);
217 tripCounts.push_back(tc);
220 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
226 scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
227 forOps.push_back(forOp);
228 mapping.
map(iv, forOp.getInductionVar());
236 for (scf::ForOp forOp : forOps)
237 scfIVs.push_back(forOp.getInductionVar());
238 mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
241 cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
245 for (
size_t idx = 0; idx < forOps.size(); ++idx) {
246 Value iv = forOps[idx].getInductionVar();
249 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
250 loopOp.getStep()[idx]);
255 if (enableCollapse && forOps.size() > 1)
257 loopOp.emitError(
"failed to collapse acc.loop");
259 return forOps.front();
264 assert(!loopOp.getUnstructured() &&
265 "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
270 "builder insertion point must not be inside the loop being converted");
280 for (
auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
281 bool inclusiveUpperbound =
false;
282 if (loopOp.getInclusiveUpperbound().has_value())
283 inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
285 Value ub = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
286 loopOp.getUpperbound()[idx],
287 loopOp.getStep()[idx], inclusiveUpperbound);
289 lowerBounds.push_back(lb);
290 upperBounds.push_back(
ub);
291 steps.push_back(step);
295 scf::ParallelOp::create(rewriter, loc, lowerBounds, upperBounds, steps);
300 mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), rewriter,
303 if (!loopOp.getRegion().hasOneBlock()) {
305 loopOp.getRegion(), mapping, loc, rewriter);
311 cloneACCRegionIntoForLoop(&loopOp.getRegion(), parallelOp.getBody(),
317 for (
auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
319 normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
320 loopOp.getStep()[idx]);
328 assert(loopOp.getUnstructured() &&
329 "use convertACCLoopToSCFFor for structured loops");
333 "builder insertion point must not be inside the loop being converted");
337 loopOp->getLoc(), rewriter);
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement(set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
Block represents an ordered list of Operations.
OpListType::iterator iterator
void erase()
Unlink this Block from its parent region and delete it.
Block * splitBlock(iterator splitBefore)
Split the block into two blocks before the specified operation or iterator.
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
OpListType & getOperations()
Operation * getTerminator()
Get the terminator operation of this block.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void cloneRegionBefore(Region ®ion, Region &parent, Region::iterator before, IRMapping &mapping)
Clone the blocks that belong to "region" before the given position in another region "parent".
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Operation is the basic unit of execution within MLIR.
Location getLoc()
The source location the operation was defined or derived from.
operand_range getOperands()
Returns an iterator on the underlying Value's.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
BlockListType & getBlocks()
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
scf::ExecuteRegionOp wrapMultiBlockRegionWithSCFExecuteRegion(Region ®ion, IRMapping &mapping, Location loc, RewriterBase &rewriter, bool convertFuncReturn=false)
Wrap a multi-block region in an scf.execute_region.
scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, RewriterBase &rewriter)
Convert acc.loop to scf.parallel.
scf::ExecuteRegionOp convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, RewriterBase &rewriter)
Convert an unstructured acc.loop to scf.execute_region.
std::pair< llvm::SmallVector< Value >, Block::iterator > cloneACCRegionInto(Region *src, Block *dest, Block::iterator inlinePoint, IRMapping &mapping, ValueRange resultsToReplace)
Clone an ACC region into a destination block at the given insertion point.
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter, bool enableCollapse)
Convert a structured acc.loop to scf.for.
Include the generated interface declarations.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.