MLIR 22.0.0git
OpenACCUtilsTiling.cpp
Go to the documentation of this file.
1//===- OpenACCUtilsTiling.cpp - OpenACC Loop Tiling Utilities -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utility functions for tiling OpenACC loops.
10//
11//===----------------------------------------------------------------------===//
12
14
20
21// Resolve unknown tile sizes (represented as -1 for tile(*)) to the default.
22// Returns a value with the same type as targetType.
24 int32_t defaultTileSize,
25 mlir::Type targetType,
26 mlir::RewriterBase &rewriter,
27 mlir::Location loc) {
28 auto constVal = mlir::getConstantIntValue(tileSize);
29 if (constVal && *constVal < 0) {
30 // Create constant with the target type directly
31 return mlir::arith::ConstantOp::create(
32 rewriter, loc, targetType,
33 rewriter.getIntegerAttr(targetType, defaultTileSize));
34 }
35 return mlir::getValueOrCreateCastToIndexLike(rewriter, loc, targetType,
36 tileSize);
37}
38
39// Remove vector/worker attributes from loop
40static void removeWorkerVectorFromLoop(mlir::acc::LoopOp loop) {
41 if (loop.hasVector() || loop.getVectorValue()) {
42 loop.removeVectorAttr();
43 loop.removeVectorOperandsDeviceTypeAttr();
44 } else if (loop.hasWorker() || loop.getWorkerValue()) {
45 loop.removeWorkerAttr();
46 loop.removeWorkerNumOperandsDeviceTypeAttr();
47 }
48}
49
50// Create a new ACC loop with new steps, lb, ub from original loop
51static mlir::acc::LoopOp
52createACCLoopFromOriginal(mlir::acc::LoopOp origLoop,
55 mlir::DenseBoolArrayAttr inclusiveUBAttr,
56 mlir::acc::CombinedConstructsTypeAttr combinedAttr,
57 mlir::Location loc, bool preserveCollapse) {
58 mlir::ArrayAttr collapseAttr = mlir::ArrayAttr{};
59 mlir::ArrayAttr collapseDeviceTypeAttr = mlir::ArrayAttr{};
60 if (preserveCollapse) {
61 collapseAttr = origLoop.getCollapseAttr();
62 collapseDeviceTypeAttr = origLoop.getCollapseDeviceTypeAttr();
63 }
64 auto newLoop = mlir::acc::LoopOp::create(
65 rewriter, loc, origLoop->getResultTypes(), lb, ub, step, inclusiveUBAttr,
66 collapseAttr, collapseDeviceTypeAttr, origLoop.getGangOperands(),
67 origLoop.getGangOperandsArgTypeAttr(),
68 origLoop.getGangOperandsSegmentsAttr(),
69 origLoop.getGangOperandsDeviceTypeAttr(), origLoop.getWorkerNumOperands(),
70 origLoop.getWorkerNumOperandsDeviceTypeAttr(),
71 origLoop.getVectorOperands(), origLoop.getVectorOperandsDeviceTypeAttr(),
72 origLoop.getSeqAttr(), origLoop.getIndependentAttr(),
73 origLoop.getAuto_Attr(), origLoop.getGangAttr(), origLoop.getWorkerAttr(),
74 origLoop.getVectorAttr(), mlir::ValueRange{}, mlir::DenseI32ArrayAttr{},
75 mlir::ArrayAttr{}, origLoop.getCacheOperands(),
76 origLoop.getPrivateOperands(), origLoop.getFirstprivateOperands(),
77 origLoop.getReductionOperands(), combinedAttr);
78 return newLoop;
79}
80
81// Create inner loop inside input loop
82static mlir::acc::LoopOp
83createInnerLoop(mlir::acc::LoopOp inputLoop, mlir::RewriterBase &rewriter,
85 mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::Location loc) {
86 mlir::acc::LoopOp elementLoop = createACCLoopFromOriginal(
87 inputLoop, rewriter, lb, ub, step, inclusiveUBAttr,
88 mlir::acc::CombinedConstructsTypeAttr{}, loc, /*preserveCollapse*/ false);
89
90 // Remove gang/worker attributes from inner loops
91 rewriter.startOpModification(elementLoop);
92 if (inputLoop.hasGang() ||
93 inputLoop.getGangValue(mlir::acc::GangArgType::Num) ||
94 inputLoop.getGangValue(mlir::acc::GangArgType::Dim) ||
95 inputLoop.getGangValue(mlir::acc::GangArgType::Static)) {
96 elementLoop.removeGangAttr();
97 elementLoop.removeGangOperandsArgTypeAttr();
98 elementLoop.removeGangOperandsSegmentsAttr();
99 elementLoop.removeGangOperandsDeviceTypeAttr();
100 }
101 if (inputLoop.hasVector() || inputLoop.getVectorValue()) {
102 elementLoop.removeWorkerAttr();
103 elementLoop.removeWorkerNumOperandsDeviceTypeAttr();
104 }
105 rewriter.finalizeOpModification(elementLoop);
106
107 // Create empty block in elementLoop and add IV argument
108 mlir::Block *blk = rewriter.createBlock(&elementLoop.getRegion(),
109 elementLoop.getRegion().begin());
110 rewriter.setInsertionPointToEnd(blk);
111 mlir::acc::YieldOp::create(rewriter, loc);
112 elementLoop.getBody().addArgument(
113 inputLoop.getBody().getArgument(0).getType(), loc);
114
115 return elementLoop;
116}
117
118// Move ops from source to target Loop and replace uses of IVs
119static void moveOpsAndReplaceIVs(mlir::acc::LoopOp sourceLoop,
120 mlir::acc::LoopOp targetLoop,
123 size_t nOps, mlir::RewriterBase &rewriter) {
124 // Move ops from source to target loop [begin, begin + nOps - 1)
125 mlir::Block::iterator begin = sourceLoop.getBody().begin();
126 targetLoop.getBody().getOperations().splice(
127 targetLoop.getBody().getOperations().begin(),
128 sourceLoop.getBody().getOperations(), begin, std::next(begin, nOps - 1));
129
130 // Replace uses of origIV with newIV
131 for (auto [i, newIV] : llvm::enumerate(newIVs))
132 mlir::replaceAllUsesInRegionWith(origIVs[i], newIV, targetLoop.getRegion());
133}
134
135mlir::acc::LoopOp
137 const llvm::SmallVector<mlir::Value> &tileSizes,
138 int32_t defaultTileSize, mlir::RewriterBase &rewriter) {
139 // Tile collapsed and/or nested loops
140 mlir::acc::LoopOp outerLoop = tileLoops[0];
141 const mlir::Location loc = outerLoop.getLoc();
142
143 mlir::acc::LoopOp innerLoop = tileLoops[tileLoops.size() - 1];
150 size_t nOps = innerLoop.getBody().getOperations().size();
151
152 // Extract original inclusiveUBs
153 llvm::SmallVector<bool> inclusiveUBs;
154 for (auto tileLoop : tileLoops) {
155 for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
156 // inclusiveUBs are present on the IR from Fortran frontend for DO loops
157 // but might not be present from other frontends (python)
158 // So check if it exists
159 if (tileLoop.getInclusiveUpperboundAttr())
160 inclusiveUBs.push_back(
161 tileLoop.getInclusiveUpperboundAttr().asArrayRef()[j]);
162 else
163 inclusiveUBs.push_back(false);
164 }
165 }
166
167 // Extract original ivs, UBs, steps, and calculate new steps
168 rewriter.setInsertionPoint(outerLoop);
169 for (auto [i, tileLoop] : llvm::enumerate(tileLoops)) {
170 for (auto arg : tileLoop.getBody().getArguments())
171 origIVs.push_back(arg);
172 for (auto ub : tileLoop.getUpperbound())
173 origUBs.push_back(ub);
174
175 llvm::SmallVector<mlir::Value, 3> currentLoopSteps;
176 for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
177 origSteps.push_back(step);
178 if (i + j >= tileSizes.size()) {
179 currentLoopSteps.push_back(step);
180 } else {
182 tileSizes[i + j], defaultTileSize, step.getType(), rewriter, loc);
183 auto newLoopStep =
184 mlir::arith::MulIOp::create(rewriter, loc, step, tileSize);
185 currentLoopSteps.push_back(newLoopStep);
186 newSteps.push_back(newLoopStep);
187 }
188 }
189
190 rewriter.startOpModification(tileLoop);
191 tileLoop.getStepMutable().clear();
192 tileLoop.getStepMutable().append(currentLoopSteps);
193 rewriter.finalizeOpModification(tileLoop);
194 }
195
196 // Calculate new upper bounds for element loops
197 for (size_t i = 0; i < newSteps.size(); i++) {
198 rewriter.setInsertionPoint(innerLoop.getBody().getTerminator());
199 // UpperBound: min(origUB, origIV+(originalStep*tile_size))
200 auto stepped =
201 mlir::arith::AddIOp::create(rewriter, loc, origIVs[i], newSteps[i]);
202 mlir::Value newUB = stepped;
203 if (inclusiveUBs[i]) {
204 // Handle InclusiveUB
205 // UpperBound: min(origUB, origIV+(originalStep*tile_size - 1))
206 auto c1 = mlir::arith::ConstantOp::create(
207 rewriter, loc, newSteps[i].getType(),
208 rewriter.getIntegerAttr(newSteps[i].getType(), 1));
209 newUB = mlir::arith::SubIOp::create(rewriter, loc, stepped, c1);
210 }
211 newUBs.push_back(
212 mlir::arith::MinSIOp::create(rewriter, loc, origUBs[i], newUB));
213 }
214
215 // Create and insert nested elementLoopOps before terminator of outer loopOp
216 mlir::acc::LoopOp currentLoop = innerLoop;
217 for (size_t i = 0; i < tileSizes.size(); i++) {
218 rewriter.setInsertionPoint(currentLoop.getBody().getTerminator());
220 if (inclusiveUBs[i])
221 inclusiveUBAttr = rewriter.getDenseBoolArrayAttr({true});
222
223 mlir::acc::LoopOp elementLoop =
224 createInnerLoop(innerLoop, rewriter, mlir::ValueRange{origIVs[i]},
225 mlir::ValueRange{newUBs[i]},
226 mlir::ValueRange{origSteps[i]}, inclusiveUBAttr, loc);
227
228 // Remove vector/worker attributes from inner element loops except
229 // outermost element loop
230 if (i > 0) {
231 rewriter.startOpModification(elementLoop);
232 removeWorkerVectorFromLoop(elementLoop);
233 rewriter.finalizeOpModification(elementLoop);
234 }
235 newIVs.push_back(elementLoop.getBody().getArgument(0));
236 currentLoop = elementLoop;
237 }
238
239 // Remove vector/worker attributes from outer tile loops
240 for (auto tileLoop : tileLoops) {
241 rewriter.startOpModification(tileLoop);
243 rewriter.finalizeOpModification(tileLoop);
244 }
245
246 // Move ops from inner tile loop to inner element loop and replace IV uses
247 moveOpsAndReplaceIVs(innerLoop, currentLoop, newIVs, origIVs, nOps, rewriter);
248
249 return outerLoop;
250}
251
253mlir::acc::uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount,
254 unsigned collapseCount,
255 mlir::RewriterBase &rewriter) {
258 mlir::Location loc = origLoop.getLoc();
259 llvm::SmallVector<bool> newInclusiveUBs;
260 llvm::SmallVector<mlir::Value, 3> lbs, ubs, steps;
261 for (unsigned i = 0; i < collapseCount; i++) {
262 // inclusiveUpperbound attribute might not be set, default to false
263 bool inclusiveUB = false;
264 if (origLoop.getInclusiveUpperboundAttr())
265 inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
266 newInclusiveUBs.push_back(inclusiveUB);
267 lbs.push_back(origLoop.getLowerbound()[i]);
268 ubs.push_back(origLoop.getUpperbound()[i]);
269 steps.push_back(origLoop.getStep()[i]);
270 }
271 mlir::acc::LoopOp outerLoop = createACCLoopFromOriginal(
272 origLoop, rewriter, lbs, ubs, steps,
273 rewriter.getDenseBoolArrayAttr(newInclusiveUBs),
274 origLoop.getCombinedAttr(), loc, /*preserveCollapse*/ true);
275 mlir::Block *blk = rewriter.createBlock(&outerLoop.getRegion(),
276 outerLoop.getRegion().begin());
277 rewriter.setInsertionPointToEnd(blk);
278 mlir::acc::YieldOp::create(rewriter, loc);
279 for (unsigned i = 0; i < collapseCount; i++) {
280 outerLoop.getBody().addArgument(origLoop.getBody().getArgument(i).getType(),
281 loc);
282 newIVs.push_back(outerLoop.getBody().getArgument(i));
283 }
284 newLoops.push_back(outerLoop);
285
286 mlir::acc::LoopOp currentLoopOp = outerLoop;
287 for (unsigned i = collapseCount; i < tileCount; i++) {
288 rewriter.setInsertionPoint(currentLoopOp.getBody().getTerminator());
289 bool inclusiveUB = false;
290 if (origLoop.getInclusiveUpperboundAttr())
291 inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
292 mlir::DenseBoolArrayAttr inclusiveUBAttr =
293 rewriter.getDenseBoolArrayAttr({inclusiveUB});
294 mlir::acc::LoopOp innerLoop = createInnerLoop(
295 origLoop, rewriter, mlir::ValueRange{origLoop.getLowerbound()[i]},
296 mlir::ValueRange{origLoop.getUpperbound()[i]},
297 mlir::ValueRange{origLoop.getStep()[i]}, inclusiveUBAttr, loc);
298 newIVs.push_back(innerLoop.getBody().getArgument(0));
299 newLoops.push_back(innerLoop);
300 currentLoopOp = innerLoop;
301 }
302 // Move ops from origLoop to innermost loop and replace uses of IVs
303 size_t nOps = origLoop.getBody().getOperations().size();
305 for (auto arg : origLoop.getBody().getArguments())
306 origIVs.push_back(arg);
307 moveOpsAndReplaceIVs(origLoop, currentLoopOp, newIVs, origIVs, nOps,
308 rewriter);
309
310 return newLoops;
311}
static void removeWorkerVectorFromLoop(mlir::acc::LoopOp loop)
static mlir::acc::LoopOp createACCLoopFromOriginal(mlir::acc::LoopOp origLoop, mlir::RewriterBase &rewriter, mlir::ValueRange lb, mlir::ValueRange ub, mlir::ValueRange step, mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::acc::CombinedConstructsTypeAttr combinedAttr, mlir::Location loc, bool preserveCollapse)
static void moveOpsAndReplaceIVs(mlir::acc::LoopOp sourceLoop, mlir::acc::LoopOp targetLoop, llvm::ArrayRef< mlir::Value > newIVs, llvm::ArrayRef< mlir::Value > origIVs, size_t nOps, mlir::RewriterBase &rewriter)
static mlir::Value resolveAndCastTileSize(mlir::Value tileSize, int32_t defaultTileSize, mlir::Type targetType, mlir::RewriterBase &rewriter, mlir::Location loc)
static mlir::acc::LoopOp createInnerLoop(mlir::acc::LoopOp inputLoop, mlir::RewriterBase &rewriter, mlir::ValueRange lb, mlir::ValueRange ub, mlir::ValueRange step, mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::Location loc)
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:140
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition Builders.cpp:228
DenseBoolArrayAttr getDenseBoolArrayAttr(ArrayRef< bool > values)
Tensor-typed DenseArrayAttr getters.
Definition Builders.cpp:151
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition Builders.cpp:430
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:398
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition Builders.h:436
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void finalizeOpModification(Operation *op)
This method is used to signal the end of an in-place modification of the given operation.
virtual void startOpModification(Operation *op)
This method is used to notify the rewriter that an in-place operation modification is about to happen...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
mlir::acc::LoopOp tileACCLoops(llvm::SmallVector< mlir::acc::LoopOp > &tileLoops, const llvm::SmallVector< mlir::Value > &tileSizes, int32_t defaultTileSize, mlir::RewriterBase &rewriter)
Tile ACC loops according to the given tile sizes.
llvm::SmallVector< mlir::acc::LoopOp > uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount, unsigned collapseCount, mlir::RewriterBase &rewriter)
Uncollapse tile loops with multiple IVs and collapseCount < tileCount.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304
Value getValueOrCreateCastToIndexLike(OpBuilder &b, Location loc, Type targetType, Value value)
Create a cast from an index-like value (index or integer) to another index-like value.
Definition Utils.cpp:119
detail::DenseArrayAttrImpl< int32_t > DenseI32ArrayAttr
detail::DenseArrayAttrImpl< bool > DenseBoolArrayAttr
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.