MLIR 23.0.0git
ACCRecipeMaterialization.cpp
Go to the documentation of this file.
1//===- ACCRecipeMaterialization.cpp - Materialize ACC recipes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Overview:
10// ---------
11// OpenACC compute constructs (acc.parallel, acc.serial, acc.kernels) and
12// acc.loop can carry data clauses (acc.private, acc.firstprivate,
13// acc.reduction) that refer to recipes (acc.private.recipe,
14// acc.firstprivate.recipe, acc.reduction.recipe). Recipes define how to
15// initialize, copy, combine, or destroy a particular variable. This pass clones
16// those regions into the construct and ensures the materialized SSA values are
17// used instead.
18//
19// Transforms:
20// -----------
21// 1. Firstprivate: Inserts acc.firstprivate_map so the initial value is
22// available on the device, then clones the recipe init and copy regions
23// into the construct and replaces uses with the materialized alloca.
24// Optional destroy region is cloned before the region terminator.
25//
26// 2. Private: Clones the recipe init region into the construct (at the
27// region entry or at the loop op for acc.loop private). Replaces uses
28// of the recipe result with the materialized alloca. Optional destroy
29// region is cloned before the region terminator.
30//
31// 3. Reduction: Creates acc.reduction_init (init region inlined) and
32// acc.reduction_combine_region (combiner region inlined). Uses within
33// the region are updated to the reduction init result.
34//
35// Requirements:
36// -------------
37// 1. OpenACCSupport: The pass uses the `acc::OpenACCSupport` analysis
38// including emitNYI for unsupported cases.
39//
40//===----------------------------------------------------------------------===//
41
49#include "mlir/IR/Block.h"
50#include "mlir/IR/Builders.h"
51#include "mlir/IR/IRMapping.h"
52#include "mlir/IR/SymbolTable.h"
53#include "mlir/IR/Value.h"
54#include "mlir/IR/ValueRange.h"
56#include "mlir/Support/LLVM.h"
58#include "llvm/ADT/STLExtras.h"
59#include "llvm/ADT/TypeSwitch.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62
63namespace mlir {
64namespace acc {
65#define GEN_PASS_DEF_ACCRECIPEMATERIALIZATION
66#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
67} // namespace acc
68} // namespace mlir
69
70#define DEBUG_TYPE "acc-recipe-materialization"
71
72namespace {
73
74using namespace mlir;
75
76static void saveVarName(StringRef name, Value dst) {
77 if (name.empty())
78 return;
79 if (Operation *dstOp = dst.getDefiningOp()) {
80 if (dstOp->getAttrOfType<acc::VarNameAttr>(acc::getVarNameAttrName()))
81 return;
82 if (isa<ACC_DATA_ENTRY_OPS>(dstOp))
83 return;
84 dstOp->setAttr(acc::getVarNameAttrName(),
85 acc::VarNameAttr::get(dstOp->getContext(), name));
86 return;
87 }
88 auto blockArg = dyn_cast<BlockArgument>(dst);
89 if (!blockArg)
90 return;
91 Block *block = blockArg.getOwner();
92 Region *region = block ? block->getParent() : nullptr;
93 if (!region || !block->isEntryBlock())
94 return;
95 Operation *parent = region->getParentOp();
96 if (!parent)
97 return;
98 auto funcOp = dyn_cast<FunctionOpInterface>(parent);
99 if (!funcOp)
100 return;
101 unsigned argIdx = blockArg.getArgNumber();
102 if (argIdx >= funcOp.getNumArguments())
103 return;
104 if (funcOp.getArgAttr(argIdx, acc::getVarNameAttrName()))
105 return;
106 funcOp.setArgAttr(argIdx, acc::getVarNameAttrName(),
107 acc::VarNameAttr::get(parent->getContext(), name));
108}
109
110static void saveVarName(Value src, Value dst) {
111 saveVarName(acc::getVariableName(src), dst);
112}
113
114// Clone the destroy region of the recipe before the terminator of the provided
115// block. Values must be provided for the destroy region block arguments
116// according to the recipe specifications.
117template <typename RecipeOpTy>
118static void cloneDestroy(RecipeOpTy recipe, mlir::Block *block,
119 const llvm::SmallVector<mlir::Value> &arguments) {
120 IRMapping mapping{};
121 Region &destroyRegion = recipe.getDestroyRegion();
122 assert(destroyRegion.getBlocks().front().getNumArguments() ==
123 arguments.size() &&
124 "unexpected acc recipe destroy block arguments");
125 mapping.map(destroyRegion.getBlocks().front().getArguments(), arguments);
126 acc::cloneACCRegionInto(&destroyRegion, block, std::prev(block->end()),
127 mapping,
128 /*resultsToReplace=*/{});
129}
130
131class ACCRecipeMaterialization
132 : public acc::impl::ACCRecipeMaterializationBase<ACCRecipeMaterialization> {
133public:
134 using acc::impl::ACCRecipeMaterializationBase<
135 ACCRecipeMaterialization>::ACCRecipeMaterializationBase;
136 void runOnOperation() override;
137
138private:
139 // When handling firstprivate, the initial value needs to be available on
140 // the GPU. One way to get that value there is to map the variable through
141 // global memory.
142 // Thus, when we materialize a firstprivate, we materialize it into
143 // a mapping action first. This function ends up with doing the following:
144 // %dev = acc.firstprivate var(%var)
145 // =>
146 // %copy = acc.firstprivate_map var(%var)
147 // %dev = acc.firstprivate var(%copy)
148 // When the recipe materialization happens, the `acc.firstprivate` ends up
149 // being removed. But because of the way we chain it to the
150 // `acc.firstprivate_map`, then its result becomes live-in to the
151 // compute region and used as the variable the initial value is loaded from.
152 void handleFirstprivateMapping(acc::FirstprivateOp firstprivateOp) const;
153 template <typename OpTy>
154 void removeRecipe(OpTy op, ModuleOp moduleOp) const;
155 template <typename OpTy, typename RecipeOpTy, typename AccOpTy>
156 LogicalResult materialize(OpTy op, RecipeOpTy recipe, AccOpTy accOp,
157 acc::OpenACCSupport &accSupport) const;
158 template <typename OpTy>
159 LogicalResult materializeForACCOp(OpTy accOp,
160 acc::OpenACCSupport &accSupport) const;
161};
162
163void ACCRecipeMaterialization::handleFirstprivateMapping(
164 acc::FirstprivateOp firstprivateOp) const {
165 OpBuilder builder(firstprivateOp);
166 auto mapFirstprivateOp = acc::FirstprivateMapInitialOp::create(
167 builder, firstprivateOp.getLoc(), firstprivateOp.getVar(),
168 firstprivateOp.getStructured(), firstprivateOp.getImplicit(),
169 firstprivateOp.getBounds());
170 mapFirstprivateOp.setName(firstprivateOp.getName());
171 firstprivateOp.getVarMutable().assign(mapFirstprivateOp.getAccVar());
172}
173
174template <typename OpTy>
175void ACCRecipeMaterialization::removeRecipe(OpTy op, ModuleOp moduleOp) const {
176 auto recipeName = op.getNameAttr();
177 if (SymbolTable::symbolKnownUseEmpty(recipeName, moduleOp)) {
178 LLVM_DEBUG(llvm::dbgs() << "erasing recipe: " << recipeName << "\n");
179 op.erase();
180 } else {
181 LLVM_DEBUG({
182 std::optional<SymbolTable::UseRange> symbolUses =
183 op.getSymbolUses(moduleOp);
184 if (symbolUses.has_value()) {
185 for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
186 llvm::dbgs() << "symbol use: ";
187 symbolUse.getUser()->dump();
188 }
189 }
190 });
191 llvm_unreachable("expected no use of recipe symbol");
192 }
193}
194
195template <typename OpTy, typename RecipeOpTy, typename AccOpTy>
196LogicalResult
197ACCRecipeMaterialization::materialize(OpTy op, RecipeOpTy recipe, AccOpTy accOp,
198 acc::OpenACCSupport &accSupport) const {
199 Region &region = accOp.getRegion();
200 Value origPtr = op.getVar();
201 Value accPtr = op.getAccVar();
202 assert(accPtr && "invalid op: null acc var");
203
204 OpBuilder b(op);
205 SmallVector<Value> triples;
206
207 // Clone init block into the region at the insertion point specified.
208 Region &initRegion = recipe.getInitRegion();
209 unsigned initNumArguments =
210 initRegion.getBlocks().front().getArguments().size();
211 if (initNumArguments > 1) {
212 // Code from C/C++ will most likely only provide extent arguments to the
213 // recipe arguments.
214 if ((initNumArguments - 1) % 3 != 0) {
215 (void)accSupport.emitNYI(recipe.getLoc(),
216 "privatization of array section with extents");
217 return failure();
218 }
219 // The remaining arguments must be the bounds triples
220 // (lower-bound, upper-bound, step), ...
221 unsigned argIdx = 1;
222 // Cast the given value to the type of the combiner region's argument
223 // at position argIdx, and increment argIdx.
224 auto castValueToArgType = [&](Location loc, Value v) {
226 b, loc, v,
227 initRegion.getBlocks().front().getArgument(argIdx++).getType(),
228 /*isUnsignedCast=*/false);
229 };
230 for (Value bound : acc::getBounds(op)) {
231 auto dataBound = bound.getDefiningOp<acc::DataBoundsOp>();
232 assert(dataBound &&
233 "acc.reduction's bound must be defined by acc.bounds");
234 // NOTE: we should probably generate get_lowerbound, get_upperbound
235 // and get_stride here, so that we can stop looking for the acc.bounds
236 // operation above, and just use the `bound` value.
237 Value lb =
238 castValueToArgType(dataBound.getLoc(), dataBound.getLowerbound());
239 Value ub =
240 castValueToArgType(dataBound.getLoc(), dataBound.getUpperbound());
241 Value step =
242 castValueToArgType(dataBound.getLoc(), dataBound.getStride());
243 triples.append({lb, ub, step});
244 }
245 assert(triples.size() + 1 == initNumArguments &&
246 "mismatch between number bounds and number of recipe init block "
247 "arguments");
248 }
249
250 IRMapping mapping;
251 SmallVector<Value> initArgs{origPtr};
252 initArgs.append(triples);
253 mapping.map(initRegion.getBlocks().front().getArguments(), initArgs);
254
255 if constexpr (std::is_same_v<OpTy, acc::PrivateOp>) {
256 // Clone the init region for a private.
257 Block *block = &region.front();
258 auto [results, ip] = acc::cloneACCRegionInto(
259 &initRegion, block, block->begin(), mapping, {accPtr});
260 assert(results.size() == 1 && "expected single result from init region");
261 saveVarName(op.getAccVar(), results[0]);
262 // Clone the destroy region for a private, if it exists.
263 if (!recipe.getDestroyRegion().empty()) {
264 results.insert(results.begin(), origPtr);
265 results.append(triples);
266 cloneDestroy(recipe, block, results);
267 }
268 } else if constexpr (std::is_same_v<OpTy, acc::FirstprivateOp>) {
269 // Clone the init region for a firstprivate.
270 Block *block = &region.front();
271 auto [results, ip] = acc::cloneACCRegionInto(
272 &initRegion, block, block->begin(), mapping, {accPtr});
273 assert(results.size() == 1 && "expected single result from init region");
274 saveVarName(op.getAccVar(), results[0]);
275 // We want the copy to store the origPtr to private
276 results.insert(results.begin(), origPtr);
277 results.append(triples);
278
279 // Clone the copy region for a firstprivate
280 mapping.clear();
281 mapping.map(recipe.getCopyRegion().front().getArguments(), results);
282 // Clone the copy region for a firstprivate.
283 acc::cloneACCRegionInto(&recipe.getCopyRegion(), block, std::next(ip),
284 mapping, {});
285 if (!recipe.getDestroyRegion().empty()) {
286 // origPtr was already pushed.
287 cloneDestroy(recipe, block, results);
288 }
289 } else if constexpr (std::is_same_v<OpTy, acc::ReductionOp>) {
290 auto cloneRegionIntoAccRegion = [&](Region *src, Region *dest,
291 bool hasResult) {
292 src->cloneInto(dest, mapping);
293 Block *block = &dest->front();
294 Operation *terminator = block->getTerminator();
295 b.setInsertionPoint(terminator);
296 if (hasResult)
297 acc::YieldOp::create(b, op.getLoc(), terminator->getOperands());
298 else
299 acc::YieldOp::create(b, op.getLoc(), ValueRange{});
300 terminator->erase();
301 };
302
303 // Clone the init region into acc.reduction_init.
304 if constexpr (std::is_same_v<AccOpTy, acc::ParallelOp>)
305 b.setInsertionPointToStart(&region.front());
306 else if constexpr (std::is_same_v<AccOpTy, acc::LoopOp>)
307 b.setInsertionPoint(op);
308 else
309 llvm_unreachable("unexpected acc op with reduction recipe");
310
311 auto reductionOp = acc::ReductionInitOp::create(
312 b, op.getLoc(), origPtr, recipe.getReductionOperatorAttr());
313 saveVarName(op.getAccVar(), reductionOp.getResult());
314 cloneRegionIntoAccRegion(&initRegion, &reductionOp.getRegion(),
315 /*hasResult=*/true);
316
317 // Update the uses within the loop to use the reduction op result.
318 replaceAllUsesInRegionWith(accPtr, reductionOp.getResult(), region);
319
320 // Clone the combiner region into acc.reduction_combine_region.
321 Region &combinerRegion = recipe.getCombinerRegion();
322 Block *entryBlock = &combinerRegion.front();
323
324 if constexpr (std::is_same_v<AccOpTy, acc::ParallelOp>)
325 b.setInsertionPoint(region.back().getTerminator());
326 else if constexpr (std::is_same_v<AccOpTy, acc::LoopOp>)
327 b.setInsertionPointAfter(accOp);
328 else
329 llvm_unreachable("unexpected acc op with reduction recipe");
330
331 // Map the first two block arguments to the original and private
332 // reduction variables. If the recipe's combiner region has the bounds
333 // arguments, we have to map them to the corresponding operands of
334 // acc.reduction operation.
335 mapping.clear();
336 SmallVector<Value, 2> argsRemapping{origPtr, reductionOp.getResult()};
337 argsRemapping.append(triples);
338 mapping.map(entryBlock->getArguments(), argsRemapping);
339
340 auto combineRegionOp = acc::ReductionCombineRegionOp::create(
341 b, op.getLoc(), origPtr, reductionOp.getResult());
342 cloneRegionIntoAccRegion(&combinerRegion, &combineRegionOp.getRegion(),
343 /*hasResult=*/false);
344
345 auto setSeqParDimsForRecipeLoops = [](Region *r) {
346 r->walk([](LoopLikeOpInterface loopLike) {
347 loopLike->setAttr(
348 acc::GPUParallelDimsAttr::name,
349 acc::GPUParallelDimsAttr::seq(loopLike->getContext()));
350 });
351 };
352 setSeqParDimsForRecipeLoops(&reductionOp.getRegion());
353 setSeqParDimsForRecipeLoops(&combineRegionOp.getRegion());
354
355 if (!recipe.getDestroyRegion().empty()) {
356 (void)accSupport.emitNYI(
357 recipe.getLoc(),
358 "OpenACC reduction variable that requires destruction code");
359 return failure();
360 }
361 } else {
362 llvm_unreachable("unexpected op type");
363 }
364
365 op.erase();
366 return success();
367}
368
369template <typename OpTy>
370LogicalResult ACCRecipeMaterialization::materializeForACCOp(
371 OpTy accOp, acc::OpenACCSupport &accSupport) const {
372 assert(isa<ACC_COMPUTE_CONSTRUCT_AND_LOOP_OPS>(accOp));
373
374 if (!accOp.getFirstprivateOperands().empty()) {
375 // Clear the firstprivate operands list so there will be no uses after
376 // the recipe is materialized.
377 SmallVector<Value> operands(accOp.getFirstprivateOperands());
378 accOp.getFirstprivateOperandsMutable().clear();
379 for (Value operand : operands) {
380 auto firstprivateOp = cast<acc::FirstprivateOp>(operand.getDefiningOp());
381 auto symbolRef = cast<SymbolRefAttr>(firstprivateOp.getRecipeAttr());
382 auto decl = SymbolTable::lookupNearestSymbolFrom(accOp, symbolRef);
383 auto recipeOp = cast<acc::FirstprivateRecipeOp>(decl);
384 LLVM_DEBUG(llvm::dbgs() << "materializing: " << firstprivateOp << "\n"
385 << symbolRef << "\n");
386 handleFirstprivateMapping(firstprivateOp);
387 if (failed(materialize(firstprivateOp, recipeOp, accOp, accSupport)))
388 return failure();
389 }
390 }
391
392 if (!accOp.getPrivateOperands().empty()) {
393 // Clear the private operands list so there will be no uses after
394 // the recipe is materialized.
395 SmallVector<Value> operands(accOp.getPrivateOperands());
396 accOp.getPrivateOperandsMutable().clear();
397 for (Value operand : operands) {
398 auto privateOp = cast<acc::PrivateOp>(operand.getDefiningOp());
399 auto symbolRef = cast<SymbolRefAttr>(privateOp.getRecipeAttr());
400 auto decl = SymbolTable::lookupNearestSymbolFrom(accOp, symbolRef);
401 auto recipeOp = cast<acc::PrivateRecipeOp>(decl);
402 LLVM_DEBUG(llvm::dbgs() << "materializing: " << privateOp << "\n"
403 << symbolRef << "\n");
404 if (failed(materialize(privateOp, recipeOp, accOp, accSupport)))
405 return failure();
406 }
407 }
408
409 if (!accOp.getReductionOperands().empty()) {
410 // Clear the reduction operands list so there will be no uses after
411 // the recipe is materialized.
412 SmallVector<Value> operands(accOp.getReductionOperands());
413 accOp.getReductionOperandsMutable().clear();
414 for (Value operand : operands) {
415 auto reductionOp = cast<acc::ReductionOp>(operand.getDefiningOp());
416 auto symbolRef = cast<SymbolRefAttr>(reductionOp.getRecipeAttr());
417 auto decl = SymbolTable::lookupNearestSymbolFrom(accOp, symbolRef);
418 auto recipeOp = cast<acc::ReductionRecipeOp>(decl);
419 LLVM_DEBUG(llvm::dbgs() << "materializing: " << reductionOp << "\n"
420 << symbolRef << "\n");
421 if (failed(materialize(reductionOp, recipeOp, accOp, accSupport)))
422 return failure();
423 }
424 }
425 return success();
426}
427
428void ACCRecipeMaterialization::runOnOperation() {
429 ModuleOp moduleOp = getOperation();
430 acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
431
432 // Materialize all recipes for all compute constructs and loop constructs.
433 bool anyFailed = false;
434 moduleOp.walk([&](Operation *op) {
435 if (anyFailed)
436 return;
438 [&](auto constructOp) {
439 if (failed(materializeForACCOp(constructOp, accSupport)))
440 anyFailed = true;
441 });
442 });
443 if (anyFailed) {
444 signalPassFailure();
445 return;
446 }
447
448 // Remove all recipes.
449 moduleOp.walk([&](Operation *op) {
450 if (auto recipe = dyn_cast<acc::ReductionRecipeOp>(op))
451 removeRecipe(recipe, moduleOp);
452 else if (auto recipe = dyn_cast<acc::PrivateRecipeOp>(op))
453 removeRecipe(recipe, moduleOp);
454 else if (auto recipe = dyn_cast<acc::FirstprivateRecipeOp>(op))
455 removeRecipe(recipe, moduleOp);
456 });
457}
458
459} // namespace
return success()
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
Block represents an ordered list of Operations.
Definition Block.h:33
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition Block.cpp:27
Operation & front()
Definition Block.h:163
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
BlockArgListType getArguments()
Definition Block.h:97
iterator end()
Definition Block.h:154
iterator begin()
Definition Block.h:153
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition Block.cpp:36
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition IRMapping.h:79
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:209
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:407
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:237
void erase()
Remove this operation from its parent block and delete it.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
Block & back()
Definition Region.h:64
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition Region.h:200
BlockListType & getBlocks()
Definition Region.h:45
This class represents a specific symbol use.
static Operation * lookupNearestSymbolFrom(Operation *from, StringAttr symbol)
Returns the operation registered with the given symbol name within the closest parent operation of,...
static bool symbolKnownUseEmpty(StringAttr symbol, Operation *from)
Return if the given symbol is known to have no uses that are nested within the given operation 'from'...
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
InFlightDiagnostic emitNYI(Location loc, const Twine &message)
Report a case that is not yet supported by the implementation.
#define ACC_COMPUTE_CONSTRUCT_AND_LOOP_OPS
Definition OpenACC.h:61
std::string getVariableName(mlir::Value v)
Attempts to extract the variable name from a value by walking through view-like operations until an a...
mlir::SmallVector< mlir::Value > getBounds(mlir::Operation *accDataClauseOp)
Used to obtain bounds from an acc data clause operation.
Definition OpenACC.cpp:5098
static constexpr StringLiteral getVarNameAttrName()
Definition OpenACC.h:205
std::pair< llvm::SmallVector< Value >, Block::iterator > cloneACCRegionInto(Region *src, Block *dest, Block::iterator inlinePoint, IRMapping &mapping, ValueRange resultsToReplace)
Clone an ACC region into a destination block at the given insertion point.
Include the generated interface declarations.
Value convertScalarToDtype(OpBuilder &b, Location loc, Value operand, Type toType, bool isUnsignedCast)
Converts a scalar value operand to type toType.
Definition Utils.cpp:239
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Definition LLVM.h:136