22 #define GEN_PASS_DEF_SCFPARALLELLOOPTILING
23 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
57 std::pair<ParallelOp, ParallelOp>
59 bool noMinMaxBounds) {
61 auto zero = b.
create<arith::ConstantIndexOp>(op.
getLoc(), 0);
63 tileSizeConstants.reserve(op.getUpperBound().size());
64 for (
size_t i = 0, end = op.getUpperBound().size(); i != end; ++i) {
65 if (i < tileSizes.size())
66 tileSizeConstants.push_back(
67 b.
create<arith::ConstantIndexOp>(op.
getLoc(), tileSizes[i]));
70 tileSizeConstants.push_back(
76 newSteps.reserve(op.getStep().size());
77 for (
auto step : llvm::zip(op.getStep(), tileSizeConstants)) {
78 newSteps.push_back(b.
create<arith::MulIOp>(op.
getLoc(), std::get<0>(step),
81 auto outerLoop = b.
create<ParallelOp>(op.
getLoc(), op.getLowerBound(),
82 op.getUpperBound(), newSteps);
95 newBounds.reserve(op.getUpperBound().size());
96 bool needInboundCheck =
false;
97 for (
auto [lowerBound, upperBound, newStep, iv, step, tileSizeConstant] :
98 llvm::zip(outerLoop.getLowerBound(), outerLoop.getUpperBound(),
99 outerLoop.getStep(), outerLoop.getInductionVars(),
100 op.getStep(), tileSizeConstants)) {
102 auto lowerBoundConstant =
103 dyn_cast_or_null<arith::ConstantIndexOp>(lowerBound.getDefiningOp());
104 auto upperBoundConstant =
105 dyn_cast_or_null<arith::ConstantIndexOp>(upperBound.getDefiningOp());
107 dyn_cast_or_null<arith::ConstantIndexOp>(step.getDefiningOp());
109 cast<arith::ConstantIndexOp>(tileSizeConstant.getDefiningOp()).value();
113 if (lowerBoundConstant && upperBoundConstant && stepConstant) {
115 lowerBoundConstant.value(),
116 stepConstant.value());
117 if (numIterations % tileSize == 0) {
118 newBounds.push_back(newStep);
124 if (noMinMaxBounds) {
125 newBounds.push_back(newStep);
126 needInboundCheck =
true;
136 auto innerLoop = b.
create<ParallelOp>(
140 if (noMinMaxBounds && needInboundCheck) {
145 for (
auto [outerUpperBound, outerIV, innerIV, innerStep] :
146 llvm::zip(outerLoop.getUpperBound(), outerLoop.getInductionVars(),
147 innerLoop.getInductionVars(), innerLoop.getStep())) {
154 op.
getLoc(), arith::CmpIPredicate::ult, index, outerUpperBound);
155 inbound = b.
create<arith::AndIOp>(op.
getLoc(), inbound, dimInbound);
160 ifInbound.getThenRegion().takeBody(op.
getRegion());
161 Block &thenBlock = ifInbound.getThenRegion().
front();
169 innerLoop.getInductionVars(), outerLoop.getInductionVars()))) {
170 auto newIndex = b.
create<arith::AddIOp>(
171 op.
getLoc(), std::get<0>(ivs.value()), std::get<1>(ivs.value()));
173 .replaceAllUsesExcept(newIndex, newIndex);
177 innerLoop.getRegion().takeBody(op.
getRegion());
179 for (
auto ivs : llvm::zip(innerLoop.getInductionVars(),
180 outerLoop.getInductionVars())) {
181 Value innerIndex = std::get<0>(ivs);
182 auto newIndex = b.
create<arith::AddIOp>(op.
getLoc(), std::get<0>(ivs),
189 return std::make_pair(outerLoop, innerLoop);
193 struct ParallelLoopTiling
194 :
public impl::SCFParallelLoopTilingBase<ParallelLoopTiling> {
195 ParallelLoopTiling() =
default;
197 bool noMinMaxBounds =
false) {
198 this->tileSizes = tileSizes;
199 this->noMinMaxBounds = noMinMaxBounds;
202 void runOnOperation()
override {
203 for (
auto tileSize : tileSizes)
206 "tile size cannot be 0");
207 return signalPassFailure();
209 auto *parentOp = getOperation();
212 for (ParallelOp ploop : innermostPloops) {
214 if (ploop.getNumReductions() == 0)
221 std::unique_ptr<Pass>
223 bool noMinMaxBounds) {
224 return std::make_unique<ParallelLoopTiling>(tileSizes, noMinMaxBounds);
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
void eraseArguments(unsigned start, unsigned num)
Erases 'num' arguments from the index 'start'.
IntegerType getIntegerType(unsigned width)
MLIRContext * getContext() const
This class helps build Operations.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Operation is the basic unit of execution within MLIR.
Location getLoc()
The source location the operation was defined or derived from.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
void erase()
Remove this operation from its parent block and delete it.
MLIRContext & getContext()
Return the MLIR context for the current operation being transformed.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
std::pair< ParallelOp, ParallelOp > tileParallelLoop(ParallelOp op, llvm::ArrayRef< int64_t > tileSizes, bool noMinMaxBounds)
Tile a parallel loop of the form scf.parallel (i0, i1) = (arg0, arg1) to (arg2, arg3) step (arg4,...
Include the generated interface declarations.
std::unique_ptr< Pass > createParallelLoopTilingPass(llvm::ArrayRef< int64_t > tileSize={}, bool noMinMaxBounds=false)
Creates a pass which tiles innermost parallel loops.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.