22 #define GEN_PASS_DEF_SCFPARALLELLOOPTILING
23 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
57 std::pair<ParallelOp, ParallelOp>
59 bool noMinMaxBounds) {
63 tileSizeConstants.reserve(op.getUpperBound().size());
64 for (
size_t i = 0, end = op.getUpperBound().size(); i != end; ++i) {
65 if (i < tileSizes.size())
66 tileSizeConstants.push_back(
70 tileSizeConstants.push_back(
76 newSteps.reserve(op.getStep().size());
77 for (
auto step : llvm::zip(op.getStep(), tileSizeConstants)) {
78 newSteps.push_back(arith::MulIOp::create(b, op.getLoc(), std::get<0>(step),
81 auto outerLoop = ParallelOp::create(b, op.getLoc(), op.getLowerBound(),
82 op.getUpperBound(), newSteps);
95 newBounds.reserve(op.getUpperBound().size());
96 bool needInboundCheck =
false;
97 for (
auto [lowerBound, upperBound, newStep, iv, step, tileSizeConstant] :
98 llvm::zip(outerLoop.getLowerBound(), outerLoop.getUpperBound(),
99 outerLoop.getStep(), outerLoop.getInductionVars(),
100 op.getStep(), tileSizeConstants)) {
102 auto lowerBoundConstant =
103 lowerBound.getDefiningOp<arith::ConstantIndexOp>();
104 auto upperBoundConstant =
105 upperBound.getDefiningOp<arith::ConstantIndexOp>();
106 auto stepConstant = step.getDefiningOp<arith::ConstantIndexOp>();
108 cast<arith::ConstantIndexOp>(tileSizeConstant.getDefiningOp()).value();
112 if (lowerBoundConstant && upperBoundConstant && stepConstant) {
114 lowerBoundConstant.value(),
115 stepConstant.value());
116 if (numIterations % tileSize == 0) {
117 newBounds.push_back(newStep);
123 if (noMinMaxBounds) {
124 newBounds.push_back(newStep);
125 needInboundCheck =
true;
132 affine::AffineMinOp::create(b, op.getLoc(), b.
getIndexType(), minMap,
135 auto innerLoop = ParallelOp::create(
139 if (noMinMaxBounds && needInboundCheck) {
144 for (
auto [outerUpperBound, outerIV, innerIV, innerStep] :
145 llvm::zip(outerLoop.getUpperBound(), outerLoop.getInductionVars(),
146 innerLoop.getInductionVars(), innerLoop.getStep())) {
149 Value index = arith::AddIOp::create(
151 arith::MulIOp::create(b, op.getLoc(), innerIV, innerStep), outerIV);
152 Value dimInbound = arith::CmpIOp::create(
153 b, op.getLoc(), arith::CmpIPredicate::ult, index, outerUpperBound);
154 inbound = arith::AndIOp::create(b, op.getLoc(), inbound, dimInbound);
156 auto ifInbound = IfOp::create(b, op.getLoc(),
159 ifInbound.getThenRegion().takeBody(op.getRegion());
160 Block &thenBlock = ifInbound.getThenRegion().
front();
164 scf::YieldOp::create(b, reduceOp->
getLoc());
168 innerLoop.getInductionVars(), outerLoop.getInductionVars()))) {
169 auto newIndex = arith::AddIOp::create(
170 b, op.getLoc(), std::get<0>(ivs.value()), std::get<1>(ivs.value()));
176 innerLoop.getRegion().takeBody(op.getRegion());
178 for (
auto ivs : llvm::zip(innerLoop.getInductionVars(),
179 outerLoop.getInductionVars())) {
180 Value innerIndex = std::get<0>(ivs);
181 auto newIndex = arith::AddIOp::create(b, op.getLoc(), std::get<0>(ivs),
188 return std::make_pair(outerLoop, innerLoop);
192 struct ParallelLoopTiling
193 :
public impl::SCFParallelLoopTilingBase<ParallelLoopTiling> {
194 ParallelLoopTiling() =
default;
196 bool noMinMaxBounds =
false) {
197 this->tileSizes = tileSizes;
198 this->noMinMaxBounds = noMinMaxBounds;
201 void runOnOperation()
override {
202 for (
auto tileSize : tileSizes)
205 "tile size cannot be 0");
206 return signalPassFailure();
208 auto *parentOp = getOperation();
211 for (ParallelOp ploop : innermostPloops) {
213 if (ploop.getNumReductions() == 0)
220 std::unique_ptr<Pass>
222 bool noMinMaxBounds) {
223 return std::make_unique<ParallelLoopTiling>(tileSizes, noMinMaxBounds);
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
void eraseArguments(unsigned start, unsigned num)
Erases 'num' arguments from the index 'start'.
IntegerType getIntegerType(unsigned width)
MLIRContext * getContext() const
This class helps build Operations.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Operation is the basic unit of execution within MLIR.
Location getLoc()
The source location the operation was defined or derived from.
void erase()
Remove this operation from its parent block and delete it.
MLIRContext & getContext()
Return the MLIR context for the current operation being transformed.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
static ConstantIntOp create(OpBuilder &builder, Location location, int64_t value, unsigned width)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
std::pair< ParallelOp, ParallelOp > tileParallelLoop(ParallelOp op, llvm::ArrayRef< int64_t > tileSizes, bool noMinMaxBounds)
Tile a parallel loop of the form scf.parallel (i0, i1) = (arg0, arg1) to (arg2, arg3) step (arg4,...
Include the generated interface declarations.
std::unique_ptr< Pass > createParallelLoopTilingPass(llvm::ArrayRef< int64_t > tileSize={}, bool noMinMaxBounds=false)
Creates a pass which tiles innermost parallel loops.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.