MLIR 23.0.0git
OpenACCUtilsTiling.cpp
Go to the documentation of this file.
1//===- OpenACCUtilsTiling.cpp - OpenACC Loop Tiling Utilities -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utility functions for tiling OpenACC loops.
10//
11//===----------------------------------------------------------------------===//
12
14
20
21// Resolve unknown tile sizes (represented as -1 for tile(*)) to the default.
22// Returns a value with the same type as targetType.
24 int32_t defaultTileSize,
25 mlir::Type targetType,
26 mlir::RewriterBase &rewriter,
27 mlir::Location loc) {
28 auto constVal = mlir::getConstantIntValue(tileSize);
29 if (constVal && *constVal < 0) {
30 // Create constant with the target type directly
31 return mlir::arith::ConstantOp::create(
32 rewriter, loc, targetType,
33 rewriter.getIntegerAttr(targetType, defaultTileSize));
34 }
35 return mlir::getValueOrCreateCastToIndexLike(rewriter, loc, targetType,
36 tileSize);
37}
38
39// Remove vector/worker attributes from loop
40static void removeWorkerVectorFromLoop(mlir::acc::LoopOp loop) {
41 if (loop.hasVector() || loop.getVectorValue()) {
42 loop.removeVectorAttr();
43 loop.removeVectorOperandsDeviceTypeAttr();
44 } else if (loop.hasWorker() || loop.getWorkerValue()) {
45 loop.removeWorkerAttr();
46 loop.removeWorkerNumOperandsDeviceTypeAttr();
47 }
48}
49
50// Create a new ACC loop with new steps, lb, ub from original loop
51static mlir::acc::LoopOp
52createACCLoopFromOriginal(mlir::acc::LoopOp origLoop,
55 mlir::DenseBoolArrayAttr inclusiveUBAttr,
56 mlir::acc::CombinedConstructsTypeAttr combinedAttr,
57 mlir::Location loc, bool preserveCollapse) {
58 mlir::ArrayAttr collapseAttr = mlir::ArrayAttr{};
59 mlir::ArrayAttr collapseDeviceTypeAttr = mlir::ArrayAttr{};
60 if (preserveCollapse) {
61 collapseAttr = origLoop.getCollapseAttr();
62 collapseDeviceTypeAttr = origLoop.getCollapseDeviceTypeAttr();
63 }
64 auto newLoop = mlir::acc::LoopOp::create(
65 rewriter, loc, origLoop->getResultTypes(), lb, ub, step, inclusiveUBAttr,
66 collapseAttr, collapseDeviceTypeAttr, origLoop.getGangOperands(),
67 origLoop.getGangOperandsArgTypeAttr(),
68 origLoop.getGangOperandsSegmentsAttr(),
69 origLoop.getGangOperandsDeviceTypeAttr(), origLoop.getWorkerNumOperands(),
70 origLoop.getWorkerNumOperandsDeviceTypeAttr(),
71 origLoop.getVectorOperands(), origLoop.getVectorOperandsDeviceTypeAttr(),
72 origLoop.getSeqAttr(), origLoop.getIndependentAttr(),
73 origLoop.getAuto_Attr(), origLoop.getGangAttr(), origLoop.getWorkerAttr(),
74 origLoop.getVectorAttr(), mlir::ValueRange{}, mlir::DenseI32ArrayAttr{},
75 mlir::ArrayAttr{}, origLoop.getCacheOperands(),
76 origLoop.getPrivateOperands(), origLoop.getFirstprivateOperands(),
77 origLoop.getReductionOperands(), combinedAttr);
78 return newLoop;
79}
80
81// Create inner loop inside input loop
82static mlir::acc::LoopOp
83createInnerLoop(mlir::acc::LoopOp inputLoop, mlir::RewriterBase &rewriter,
85 mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::Location loc) {
86 mlir::acc::LoopOp elementLoop = createACCLoopFromOriginal(
87 inputLoop, rewriter, lb, ub, step, inclusiveUBAttr,
88 mlir::acc::CombinedConstructsTypeAttr{}, loc, /*preserveCollapse*/ false);
89
90 // Remove gang/worker attributes from inner loops
91 rewriter.startOpModification(elementLoop);
92 if (inputLoop.hasGang() ||
93 inputLoop.getGangValue(mlir::acc::GangArgType::Num) ||
94 inputLoop.getGangValue(mlir::acc::GangArgType::Dim) ||
95 inputLoop.getGangValue(mlir::acc::GangArgType::Static)) {
96 elementLoop.removeGangAttr();
97 elementLoop.removeGangOperandsArgTypeAttr();
98 elementLoop.removeGangOperandsSegmentsAttr();
99 elementLoop.removeGangOperandsDeviceTypeAttr();
100 }
101 if (inputLoop.hasVector() || inputLoop.getVectorValue()) {
102 elementLoop.removeWorkerAttr();
103 elementLoop.removeWorkerNumOperandsDeviceTypeAttr();
104 }
105 rewriter.finalizeOpModification(elementLoop);
106
107 // Create empty block in elementLoop and add IV argument
108 mlir::Block *blk = rewriter.createBlock(&elementLoop.getRegion(),
109 elementLoop.getRegion().begin());
110 rewriter.setInsertionPointToEnd(blk);
111 mlir::acc::YieldOp::create(rewriter, loc);
112 elementLoop.getBody().addArgument(
113 inputLoop.getBody().getArgument(0).getType(), loc);
114
115 return elementLoop;
116}
117
118// Move ops from source to target Loop and replace uses of IVs
119static void moveOpsAndReplaceIVs(mlir::acc::LoopOp sourceLoop,
120 mlir::acc::LoopOp targetLoop,
123 size_t nOps, mlir::RewriterBase &rewriter) {
124 // nOps includes the terminator; move all ops except the terminator:
125 // [begin, begin + nOps - 1)
126 mlir::Block::iterator begin = sourceLoop.getBody().begin();
127 mlir::Block::iterator end = std::next(begin, nOps - 1);
128
129 // Notify the rewriter about all ops being moved (and their nested ops).
130 // Directly moved ops have their parent block changed (rewriter fingerprint
131 // tracking invalidated). Nested ops may have operands replaced by
132 // replaceAllUsesInRegionWith below.
134 for (mlir::Block::iterator it = begin; it != end; ++it)
135 it->walk([&](mlir::Operation *op) {
136 movedOps.push_back(op);
137 rewriter.startOpModification(op);
138 });
139
140 targetLoop.getBody().getOperations().splice(
141 targetLoop.getBody().getOperations().begin(),
142 sourceLoop.getBody().getOperations(), begin, end);
143
144 // Replace uses of origIV with newIV
145 for (auto [i, newIV] : llvm::enumerate(newIVs))
146 mlir::replaceAllUsesInRegionWith(origIVs[i], newIV, targetLoop.getRegion());
147
148 for (mlir::Operation *op : movedOps)
149 rewriter.finalizeOpModification(op);
150}
151
152mlir::acc::LoopOp
154 const llvm::SmallVector<mlir::Value> &tileSizes,
155 int32_t defaultTileSize, mlir::RewriterBase &rewriter) {
156 // Tile collapsed and/or nested loops
157 mlir::acc::LoopOp outerLoop = tileLoops[0];
158 const mlir::Location loc = outerLoop.getLoc();
159
160 mlir::acc::LoopOp innerLoop = tileLoops[tileLoops.size() - 1];
167 size_t nOps = innerLoop.getBody().getOperations().size();
168
169 // Extract original inclusiveUBs
170 llvm::SmallVector<bool> inclusiveUBs;
171 for (auto tileLoop : tileLoops) {
172 for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
173 // inclusiveUBs are present on the IR from Fortran frontend for DO loops
174 // but might not be present from other frontends (python)
175 // So check if it exists
176 if (tileLoop.getInclusiveUpperboundAttr())
177 inclusiveUBs.push_back(
178 tileLoop.getInclusiveUpperboundAttr().asArrayRef()[j]);
179 else
180 inclusiveUBs.push_back(false);
181 }
182 }
183
184 // Extract original ivs, UBs, steps, and calculate new steps
185 rewriter.setInsertionPoint(outerLoop);
186 for (auto [i, tileLoop] : llvm::enumerate(tileLoops)) {
187 for (auto arg : tileLoop.getBody().getArguments())
188 origIVs.push_back(arg);
189 for (auto ub : tileLoop.getUpperbound())
190 origUBs.push_back(ub);
191
192 llvm::SmallVector<mlir::Value, 3> currentLoopSteps;
193 for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
194 origSteps.push_back(step);
195 if (i + j >= tileSizes.size()) {
196 currentLoopSteps.push_back(step);
197 } else {
199 tileSizes[i + j], defaultTileSize, step.getType(), rewriter, loc);
200 auto newLoopStep =
201 mlir::arith::MulIOp::create(rewriter, loc, step, tileSize);
202 currentLoopSteps.push_back(newLoopStep);
203 newSteps.push_back(newLoopStep);
204 }
205 }
206
207 rewriter.startOpModification(tileLoop);
208 tileLoop.getStepMutable().clear();
209 tileLoop.getStepMutable().append(currentLoopSteps);
210 rewriter.finalizeOpModification(tileLoop);
211 }
212
213 // Calculate new upper bounds for element loops
214 for (size_t i = 0; i < newSteps.size(); i++) {
215 rewriter.setInsertionPoint(innerLoop.getBody().getTerminator());
216 // UpperBound: min(origUB, origIV+(originalStep*tile_size))
217 auto stepped =
218 mlir::arith::AddIOp::create(rewriter, loc, origIVs[i], newSteps[i]);
219 mlir::Value newUB = stepped;
220 if (inclusiveUBs[i]) {
221 // Handle InclusiveUB
222 // UpperBound: min(origUB, origIV+(originalStep*tile_size - 1))
223 auto c1 = mlir::arith::ConstantOp::create(
224 rewriter, loc, newSteps[i].getType(),
225 rewriter.getIntegerAttr(newSteps[i].getType(), 1));
226 newUB = mlir::arith::SubIOp::create(rewriter, loc, stepped, c1);
227 }
228 newUBs.push_back(
229 mlir::arith::MinSIOp::create(rewriter, loc, origUBs[i], newUB));
230 }
231
232 // Create and insert nested elementLoopOps before terminator of outer loopOp
233 mlir::acc::LoopOp currentLoop = innerLoop;
234 for (size_t i = 0; i < tileSizes.size(); i++) {
235 rewriter.setInsertionPoint(currentLoop.getBody().getTerminator());
237 if (inclusiveUBs[i])
238 inclusiveUBAttr = rewriter.getDenseBoolArrayAttr({true});
239
240 mlir::acc::LoopOp elementLoop =
241 createInnerLoop(innerLoop, rewriter, mlir::ValueRange{origIVs[i]},
242 mlir::ValueRange{newUBs[i]},
243 mlir::ValueRange{origSteps[i]}, inclusiveUBAttr, loc);
244
245 // Remove vector/worker attributes from inner element loops except
246 // outermost element loop
247 if (i > 0) {
248 rewriter.startOpModification(elementLoop);
249 removeWorkerVectorFromLoop(elementLoop);
250 rewriter.finalizeOpModification(elementLoop);
251 }
252 newIVs.push_back(elementLoop.getBody().getArgument(0));
253 currentLoop = elementLoop;
254 }
255
256 // Remove vector/worker attributes from outer tile loops
257 for (auto tileLoop : tileLoops) {
258 rewriter.startOpModification(tileLoop);
260 rewriter.finalizeOpModification(tileLoop);
261 }
262
263 // Move ops from inner tile loop to inner element loop and replace IV uses
264 moveOpsAndReplaceIVs(innerLoop, currentLoop, newIVs, origIVs, nOps, rewriter);
265
266 return outerLoop;
267}
268
270mlir::acc::uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount,
271 unsigned collapseCount,
272 mlir::RewriterBase &rewriter) {
275 mlir::Location loc = origLoop.getLoc();
276 llvm::SmallVector<bool> newInclusiveUBs;
277 llvm::SmallVector<mlir::Value, 3> lbs, ubs, steps;
278 for (unsigned i = 0; i < collapseCount; i++) {
279 // inclusiveUpperbound attribute might not be set, default to false
280 bool inclusiveUB = false;
281 if (origLoop.getInclusiveUpperboundAttr())
282 inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
283 newInclusiveUBs.push_back(inclusiveUB);
284 lbs.push_back(origLoop.getLowerbound()[i]);
285 ubs.push_back(origLoop.getUpperbound()[i]);
286 steps.push_back(origLoop.getStep()[i]);
287 }
288 mlir::acc::LoopOp outerLoop = createACCLoopFromOriginal(
289 origLoop, rewriter, lbs, ubs, steps,
290 rewriter.getDenseBoolArrayAttr(newInclusiveUBs),
291 origLoop.getCombinedAttr(), loc, /*preserveCollapse*/ true);
292 mlir::Block *blk = rewriter.createBlock(&outerLoop.getRegion(),
293 outerLoop.getRegion().begin());
294 rewriter.setInsertionPointToEnd(blk);
295 mlir::acc::YieldOp::create(rewriter, loc);
296 for (unsigned i = 0; i < collapseCount; i++) {
297 outerLoop.getBody().addArgument(origLoop.getBody().getArgument(i).getType(),
298 loc);
299 newIVs.push_back(outerLoop.getBody().getArgument(i));
300 }
301 newLoops.push_back(outerLoop);
302
303 mlir::acc::LoopOp currentLoopOp = outerLoop;
304 for (unsigned i = collapseCount; i < tileCount; i++) {
305 rewriter.setInsertionPoint(currentLoopOp.getBody().getTerminator());
306 bool inclusiveUB = false;
307 if (origLoop.getInclusiveUpperboundAttr())
308 inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
309 mlir::DenseBoolArrayAttr inclusiveUBAttr =
310 rewriter.getDenseBoolArrayAttr({inclusiveUB});
311 mlir::acc::LoopOp innerLoop = createInnerLoop(
312 origLoop, rewriter, mlir::ValueRange{origLoop.getLowerbound()[i]},
313 mlir::ValueRange{origLoop.getUpperbound()[i]},
314 mlir::ValueRange{origLoop.getStep()[i]}, inclusiveUBAttr, loc);
315 newIVs.push_back(innerLoop.getBody().getArgument(0));
316 newLoops.push_back(innerLoop);
317 currentLoopOp = innerLoop;
318 }
319 // Move ops from origLoop to innermost loop and replace uses of IVs
320 size_t nOps = origLoop.getBody().getOperations().size();
322 for (auto arg : origLoop.getBody().getArguments())
323 origIVs.push_back(arg);
324 moveOpsAndReplaceIVs(origLoop, currentLoopOp, newIVs, origIVs, nOps,
325 rewriter);
326
327 return newLoops;
328}
static void removeWorkerVectorFromLoop(mlir::acc::LoopOp loop)
static mlir::acc::LoopOp createACCLoopFromOriginal(mlir::acc::LoopOp origLoop, mlir::RewriterBase &rewriter, mlir::ValueRange lb, mlir::ValueRange ub, mlir::ValueRange step, mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::acc::CombinedConstructsTypeAttr combinedAttr, mlir::Location loc, bool preserveCollapse)
static void moveOpsAndReplaceIVs(mlir::acc::LoopOp sourceLoop, mlir::acc::LoopOp targetLoop, llvm::ArrayRef< mlir::Value > newIVs, llvm::ArrayRef< mlir::Value > origIVs, size_t nOps, mlir::RewriterBase &rewriter)
static mlir::Value resolveAndCastTileSize(mlir::Value tileSize, int32_t defaultTileSize, mlir::Type targetType, mlir::RewriterBase &rewriter, mlir::Location loc)
static mlir::acc::LoopOp createInnerLoop(mlir::acc::LoopOp inputLoop, mlir::RewriterBase &rewriter, mlir::ValueRange lb, mlir::ValueRange ub, mlir::ValueRange step, mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::Location loc)
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:150
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition Builders.cpp:232
DenseBoolArrayAttr getDenseBoolArrayAttr(ArrayRef< bool > values)
Tensor-typed DenseArrayAttr getters.
Definition Builders.cpp:155
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition Builders.cpp:434
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:400
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition Builders.h:438
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void finalizeOpModification(Operation *op)
This method is used to signal the end of an in-place modification of the given operation.
virtual void startOpModification(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
mlir::acc::LoopOp tileACCLoops(llvm::SmallVector< mlir::acc::LoopOp > &tileLoops, const llvm::SmallVector< mlir::Value > &tileSizes, int32_t defaultTileSize, mlir::RewriterBase &rewriter)
Tile ACC loops according to the given tile sizes.
llvm::SmallVector< mlir::acc::LoopOp > uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount, unsigned collapseCount, mlir::RewriterBase &rewriter)
Uncollapse tile loops with multiple IVs and collapseCount < tileCount.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:307
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
Definition Utils.cpp:122
detail::DenseArrayAttrImpl< int32_t > DenseI32ArrayAttr
detail::DenseArrayAttrImpl< bool > DenseBoolArrayAttr
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.