MLIR 23.0.0git
EliminateBarriers.cpp
Go to the documentation of this file.
1//===- EliminateBarriers.cpp - Eliminate extra barriers --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Barrier elimination pattern and pass. If a barrier does not enforce any
10// conflicting pair of memory effects, including a pair that is enforced by
11// another barrier, it is unnecessary and can be removed. Adapted from
12// "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level
13// Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in
14// PPoPP 2023 and implementation in Polygeist.
15//
16//===----------------------------------------------------------------------===//
17
24#include "mlir/IR/Operation.h"
26#include "llvm/ADT/SetOperations.h"
27#include "llvm/ADT/TypeSwitch.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/DebugLog.h"
30
31namespace mlir {
32#define GEN_PASS_DEF_GPUELIMINATEBARRIERS
33#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
34} // namespace mlir
35
36using namespace mlir;
37using namespace mlir::gpu;
38
39#define DEBUG_TYPE "gpu-erase-barriers"
40#define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias"
41
42// The functions below provide interface-like verification, but are too specific
43// to barrier elimination to become interfaces.
44
45/// Returns `true` if the op is defines the parallel region that is subject to
46/// barrier synchronization.
48 if (op->hasAttr("__parallel_region_boundary_for_test"))
49 return true;
50
51 return isa<GPUFuncOp, LaunchOp>(op);
52}
53
54/// Returns `true` if the op behaves like a sequential loop, e.g., the control
55/// flow "wraps around" from the end of the body region back to its start.
56static bool isSequentialLoopLike(Operation *op) { return isa<scf::ForOp>(op); }
57
58/// Returns `true` if the regions of the op are guaranteed to be executed at
59/// most once. Thus, if an operation in one of the nested regions of `op` is
60/// executed than so are all the other operations in this region.
62 return isa<FunctionOpInterface, scf::IfOp, memref::AllocaScopeOp>(op);
63}
64
65/// Returns `true` if the operation is known to produce a pointer-like object
66/// distinct from any other object produced by a similar operation. For example,
67/// an allocation produces such an object.
69 return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op);
70}
71
72/// Populates `effects` with all memory effects without associating them to a
73/// specific value.
81
82/// Looks through known "view-like" ops to find the base memref.
83static Value getBase(Value v) {
84 while (Operation *definingOp = v.getDefiningOp()) {
85 if (auto viewLike = dyn_cast<ViewLikeOpInterface>(definingOp)) {
86 v = viewLike.getViewSource();
87 continue;
88 }
89 if (auto transposeOp = dyn_cast<memref::TransposeOp>(definingOp)) {
90 v = transposeOp.getIn();
91 continue;
92 }
93 break;
94 }
95 return v;
96}
97
98/// Returns `true` if accesses to the given memory space could potentially be
99/// fenced by a barrier synchronizing on the given `fencedAddressSpaces`. If
100/// the set of address spaces is not given, it is equal to all possible address
101/// spaces. Memory spaces that are not `#gpu.address_space` are deemed to
102/// overlap with all GPU address spaces.
104 Attribute memorySpace,
105 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces) {
106 if (!fencedAddressSpaces)
107 return true;
108
109 auto gpuMemSpace = dyn_cast_if_present<gpu::AddressSpaceAttr>(memorySpace);
110 if (!gpuMemSpace)
111 return true;
112
113 // Check if this GPU address space is in the fenced set.
114 return llvm::is_contained(*fencedAddressSpaces, gpuMemSpace);
115}
116
117/// Succeeds if the effect operates on a memref whose memory space
118/// could be one of the given fenced address spaces. This will both look at the
119/// address space of the effect's operand and of the view-like operations that
120/// define that memref, so as to inspect any memory-space casts or similar
121/// operations (like amdgpu buffer casts) that may provide more information.
122/// This assumes that directly-conflicting casts (that is, for example, casting
123/// a memref in global memory to make it one in workspace memory) can't happen.
125 const MemoryEffects::EffectInstance &effect,
126 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces) {
127 if (!fencedAddressSpaces)
128 return success();
129
130 Value value = effect.getValue();
131 if (!value)
132 return success();
133
134 auto mightMatch = [&](Value v) {
135 auto memrefType = dyn_cast<BaseMemRefType>(v.getType());
136 if (!memrefType)
137 return true;
138 return isAddressSpacePotentiallyFenced(memrefType.getMemorySpace(),
139 fencedAddressSpaces);
140 };
141
142 if (!mightMatch(value))
143 return failure();
144
145 Value base = value;
146 while (auto viewLike = base.getDefiningOp<ViewLikeOpInterface>()) {
147 base = viewLike.getViewSource();
148 // We assume that we won't see directly incompatible casts, like global =>
149 // flat/null => workspace.
150 if (!mightMatch(base))
151 return failure();
152 }
153
154 return success();
155}
156
157/// Returns `true` if `op` is a `BarrierOp` that fences any address spaces that
158/// could overlap with the given fenced address spaces.
160 Operation *op,
161 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces) {
162 auto barrier = dyn_cast<BarrierOp>(op);
163 if (!barrier)
164 return false;
165
166 std::optional<ArrayAttr> otherFencedSpaces = barrier.getAddressSpaces();
167 // Barriers with unspecified fencing fence everything.
168 if (!otherFencedSpaces)
169 return true;
170 // While barriers that fence nothing can't close off our search.
171 if (otherFencedSpaces->empty())
172 return false;
173
174 // If we fence all memory, we've got fencing in common with anything but the
175 // non-merory barrier.
176 if (!fencedAddressSpaces)
177 return true;
178
179 return llvm::any_of(
180 otherFencedSpaces->getAsRange<gpu::AddressSpaceAttr>(),
181 [&](auto a) { return llvm::is_contained(*fencedAddressSpaces, a); });
182}
183
184/// Collect the memory effects of the given op in 'effects'. Returns 'true' if
185/// it could extract the effect information from the op, otherwise returns
186/// 'false' and conservatively populates the list with all possible effects
187/// associated with no particular value or symbol. `fencedAddressSpaces` is,
188/// if given, the set of GPU memory spaces that are being synchronized by the
189/// barrier being syrchronized - memory operations where the value being
190/// impacted is known and either it or its base value have an address space that
191/// is known to be distinct from the ones being synchronized on will not be
192/// included in the effect set.
193static bool collectEffects(
195 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces,
196 bool ignoreBarriers = true) {
197 // Skip over barriers to avoid infinite recursion (those barriers would ask
198 // this barrier again).
199 if (ignoreBarriers && isa<BarrierOp>(op))
200 return true;
201
202 // Collect effect instances the operation. Note that the implementation of
203 // getEffects erases all effect instances that have the type other than the
204 // template parameter so we collect them first in a local buffer and then
205 // copy.
206 if (auto iface = dyn_cast<MemoryEffectOpInterface>(op)) {
208 iface.getEffects(localEffects);
209 // Filter out effects that cannot affect the fenced address spaces.
210 for (const MemoryEffects::EffectInstance &effect : localEffects) {
211 if (succeeded(
212 effectMightAffectAddressSpaces(effect, fencedAddressSpaces)))
213 effects.push_back(effect);
214 }
215 return true;
216 }
218 for (auto &region : op->getRegions()) {
219 for (auto &block : region) {
220 for (auto &innerOp : block)
221 if (!collectEffects(&innerOp, effects, fencedAddressSpaces,
222 ignoreBarriers))
223 return false;
224 }
225 }
226 return true;
227 }
228
229 // We need to be conservative here in case the op doesn't have the interface
230 // and assume it can have any possible effect.
231 addAllValuelessEffects(effects);
232 return false;
233}
234
235/// Get all effects before the given operation caused by other operations in the
236/// same block. That is, this will not consider operations beyond the block.
239 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces,
240 bool stopAtBarrier) {
241 if (op == &op->getBlock()->front())
242 return true;
244 for (Operation *it = op->getPrevNode(); it != nullptr;
245 it = it->getPrevNode()) {
246 if (isBarrierWithCommonFencedMemory(it, fencedAddressSpaces)) {
247 if (stopAtBarrier)
248 return true;
249 continue;
250 }
251
252 if (!collectEffects(it, effects, fencedAddressSpaces))
253 return false;
254 }
255 return true;
256}
257
258/// Collects memory effects from operations that may be executed before `op` in
259/// a trivial structured control flow, e.g., without branches. Stops at the
260/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
261/// set. Returns `true` if the memory effects added to `effects` are exact,
262/// `false` if they are a conservative over-approximation. The latter means that
263/// `effects` contain instances not associated with a specific value.
266 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces,
267 bool stopAtBarrier) {
268 if (!op->getBlock())
269 return true;
270
271 // If there is a non-structured control flow, bail.
272 Region *region = op->getBlock()->getParent();
273 if (region && !region->hasOneBlock()) {
274 addAllValuelessEffects(effects);
275 return false;
276 }
277
278 // Collect all effects before the op.
279 getEffectsBeforeInBlock(op, effects, fencedAddressSpaces, stopAtBarrier);
280
281 // Stop if reached the parallel region boundary.
283 return true;
284
285 Operation *parent = op->getParentOp();
286 // Otherwise, keep collecting above the parent operation.
287 if (!parent->hasTrait<OpTrait::IsIsolatedFromAbove>() &&
288 !getEffectsBefore(parent, effects, fencedAddressSpaces, stopAtBarrier))
289 return false;
290
291 // If the op is loop-like, collect effects from the trailing operations until
292 // we hit a barrier because they can executed before the current operation by
293 // the previous iteration of this loop. For example, in the following loop
294 //
295 // for i = ... {
296 // op1
297 // ...
298 // barrier
299 // op2
300 // }
301 //
302 // the operation `op2` at iteration `i` is known to be executed before the
303 // operation `op1` at iteration `i+1` and the side effects must be ordered
304 // appropriately.
305 if (isSequentialLoopLike(parent)) {
306 // Assuming loop terminators have no side effects.
307 return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects,
308 fencedAddressSpaces, /*stopAtBarrier=*/true);
309 }
310
311 // If the parent operation is not guaranteed to execute its (single-block)
312 // region once, walk the block.
313 bool conservative = false;
315 op->getParentOp()->walk([&](Operation *in) {
316 if (conservative)
317 return WalkResult::interrupt();
318 if (!collectEffects(in, effects, fencedAddressSpaces)) {
319 conservative = true;
320 return WalkResult::interrupt();
321 }
322 return WalkResult::advance();
323 });
324
325 return !conservative;
326}
327
328/// Get all effects after the given operation caused by other operations in the
329/// same block. That is, this will not consider operations beyond the block.
332 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces,
333 bool stopAtBarrier) {
334 if (op == &op->getBlock()->back())
335 return true;
336
337 for (Operation *it = op->getNextNode(); it != nullptr;
338 it = it->getNextNode()) {
339 if (isBarrierWithCommonFencedMemory(it, fencedAddressSpaces)) {
340 if (stopAtBarrier)
341 return true;
342 continue;
343 }
344 if (!collectEffects(it, effects, fencedAddressSpaces))
345 return false;
346 }
347 return true;
348}
349
350/// Collects memory effects from operations that may be executed after `op` in
351/// a trivial structured control flow, e.g., without branches. Stops at the
352/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
353/// set. Returns `true` if the memory effects added to `effects` are exact,
354/// `false` if they are a conservative over-approximation. The latter means that
355/// `effects` contain instances not associated with a specific value.
356static bool getEffectsAfter(
358 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedAddressSpaces,
359 bool stopAtBarrier) {
360 if (!op->getBlock())
361 return true;
362
363 // If there is a non-structured control flow, bail.
364 Region *region = op->getBlock()->getParent();
365 if (region && !region->hasOneBlock()) {
366 addAllValuelessEffects(effects);
367 return false;
368 }
369
370 // Collect all effects after the op.
371 getEffectsAfterInBlock(op, effects, fencedAddressSpaces, stopAtBarrier);
372
373 Operation *parent = op->getParentOp();
374 // Stop if reached the parallel region boundary.
375 if (isParallelRegionBoundary(parent))
376 return true;
377
378 // Otherwise, keep collecting below the parent operation.
379 // Don't look into, for example, neighboring functions
380 if (!parent->hasTrait<OpTrait::IsIsolatedFromAbove>() &&
381 !getEffectsAfter(parent, effects, fencedAddressSpaces, stopAtBarrier))
382 return false;
383
384 // If the op is loop-like, collect effects from the leading operations until
385 // we hit a barrier because they can executed after the current operation by
386 // the next iteration of this loop. For example, in the following loop
387 //
388 // for i = ... {
389 // op1
390 // ...
391 // barrier
392 // op2
393 // }
394 //
395 // the operation `op1` at iteration `i` is known to be executed after the
396 // operation `op2` at iteration `i-1` and the side effects must be ordered
397 // appropriately.
398 if (isSequentialLoopLike(parent)) {
400 fencedAddressSpaces))
401 return true;
402
403 bool exact =
404 collectEffects(&op->getBlock()->front(), effects, fencedAddressSpaces);
405 return getEffectsAfterInBlock(&op->getBlock()->front(), effects,
406 fencedAddressSpaces,
407 /*stopAtBarrier=*/true) &&
408 exact;
409 }
410
411 // If the parent operation is not guaranteed to execute its (single-block)
412 // region once, walk the block.
413 bool conservative = false;
415 op->getParentOp()->walk([&](Operation *in) {
416 if (conservative)
417 return WalkResult::interrupt();
418 if (!collectEffects(in, effects, fencedAddressSpaces)) {
419 conservative = true;
420 return WalkResult::interrupt();
421 }
422 return WalkResult::advance();
423 });
424
425 return !conservative;
426}
427
428/// Returns `true` if the value is defined as a function argument.
429static bool isFunctionArgument(Value v) {
430 auto arg = dyn_cast<BlockArgument>(v);
431 return arg && isa<FunctionOpInterface>(arg.getOwner()->getParentOp());
432}
433
434/// Returns the operand that the operation "propagates" through it for capture
435/// purposes. That is, if the value produced by this operation is captured, then
436/// so is the returned value.
439 .Case(
440 [](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); })
441 .Case([](CastOpInterface castLike) { return castLike->getOperand(0); })
442 .Case([](memref::TransposeOp transpose) { return transpose.getIn(); })
443 .Default(nullptr);
444}
445
446/// Returns `true` if the given operation is known to capture the given value,
447/// `false` if it is known not to capture the given value, `nullopt` if neither
448/// is known.
449static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) {
451 // Store-like operations don't capture the destination, but do capture
452 // the value.
453 .Case<memref::StoreOp, vector::TransferWriteOp>(
454 [&](auto op) { return op.getValue() == v; })
455 .Case<vector::StoreOp, vector::MaskedStoreOp>(
456 [&](auto op) { return op.getValueToStore() == v; })
457 // These operations are known not to capture.
458 .Case([](memref::DeallocOp) { return false; })
459 // By default, we don't know anything.
460 .Default(std::nullopt);
461}
462
463/// Returns `true` if the value may be captured by any of its users, i.e., if
464/// the user may be storing this value into memory. This makes aliasing analysis
465/// more conservative as it cannot assume the pointer-like value is only passed
466/// around through SSA use-def.
467static bool maybeCaptured(Value v) {
468 SmallVector<Value> todo = {v};
469 while (!todo.empty()) {
470 Value v = todo.pop_back_val();
471 for (Operation *user : v.getUsers()) {
472 // A user that is known to only read cannot capture.
473 auto iface = dyn_cast<MemoryEffectOpInterface>(user);
474 if (iface) {
476 iface.getEffects(effects);
477 if (llvm::all_of(effects,
478 [](const MemoryEffects::EffectInstance &effect) {
479 return isa<MemoryEffects::Read>(effect.getEffect());
480 })) {
481 continue;
482 }
483 }
484
485 // When an operation is known to create an alias, consider if the
486 // source is captured as well.
487 if (Value v = propagatesCapture(user)) {
488 todo.push_back(v);
489 continue;
490 }
491
492 std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(user, v);
493 if (!knownCaptureStatus || *knownCaptureStatus)
494 return true;
495 }
496 }
497
498 return false;
499}
500
501/// Returns true if two values may be referencing aliasing memory. This is a
502/// rather naive and conservative analysis. Values defined by different
503/// allocation-like operations as well as values derived from those by casts and
504/// views cannot alias each other. Similarly, values defined by allocations
505/// inside a function cannot alias function arguments. Global values cannot
506/// alias each other or local allocations. Values that are captured, i.e.
507/// themselves potentially stored in memory, are considered as aliasing with
508/// everything. This seems sufficient to achieve barrier removal in structured
509/// control flow, more complex cases would require a proper dataflow analysis.
510static bool mayAlias(Value first, Value second) {
511 LDBG(DEBUG_TYPE_ALIAS, 1)
512 << "checking aliasing between " << first << " and " << second;
513
514 first = getBase(first);
515 second = getBase(second);
516
517 LDBG(DEBUG_TYPE_ALIAS, 1) << "base " << first << " and " << second;
518
519 // Values derived from the same base memref do alias (unless we do a more
520 // advanced analysis to prove non-overlapping accesses).
521 if (first == second) {
522 LDBG(DEBUG_TYPE_ALIAS, 1) << "-> do alias!";
523 return true;
524 }
525
526 // Different globals cannot alias.
527 if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) {
528 if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) {
529 return globFirst.getNameAttr() == globSecond.getNameAttr();
530 }
531 }
532
533 // Two function arguments marked as noalias do not alias.
534 auto isNoaliasFuncArgument = [](Value value) {
535 auto bbArg = dyn_cast<BlockArgument>(value);
536 if (!bbArg)
537 return false;
538 auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp());
539 if (!iface)
540 return false;
541 // TODO: we need a way to not depend on the LLVM dialect here.
542 return iface.getArgAttr(bbArg.getArgNumber(), "llvm.noalias") != nullptr;
543 };
544 if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
545 return false;
546
547 bool isDistinct[] = {producesDistinctBase(first.getDefiningOp()),
549 bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr,
550 second.getDefiningOp<memref::GetGlobalOp>() != nullptr};
551
552 // Non-equivalent distinct bases and globals cannot alias. At this point, we
553 // have already filtered out based on values being equal and global name being
554 // equal.
555 if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1]))
556 return false;
557
558 bool isArg[] = {isFunctionArgument(first), isFunctionArgument(second)};
559
560 // Distinct bases (allocations) cannot have been passed as an argument.
561 if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
562 return false;
563
564 // Non-captured base distinct values cannot conflict with another base value.
565 if (isDistinct[0] && !maybeCaptured(first))
566 return false;
567 if (isDistinct[1] && !maybeCaptured(second))
568 return false;
569
570 // Otherwise, conservatively assume aliasing.
571 LDBG(DEBUG_TYPE_ALIAS, 1) << "-> may alias!";
572 return true;
573}
574
575/// Returns `true` if the effect may be affecting memory aliasing the value. If
576/// the effect is not associated with any value, it is assumed to affect all
577/// memory and therefore aliases with everything.
579 if (Value v = a.getValue()) {
580 return mayAlias(v, v2);
581 }
582 return true;
583}
584
585/// Returns `true` if the two effects may be affecting aliasing memory. If
586/// an effect is not associated with any value, it is assumed to affect all
587/// memory and therefore aliases with everything. Effects on different resources
588/// cannot alias.
591 if (a.getResource()->getResourceID() != b.getResource()->getResourceID())
592 return false;
593 if (Value v2 = b.getValue()) {
594 return mayAlias(a, v2);
595 }
596 if (Value v = a.getValue()) {
597 return mayAlias(b, v);
598 }
599 return true;
600}
601
602/// Returns `true` if any of the "before" effect instances has a conflict with
603/// any "after" instance for the purpose of barrier elimination. The effects are
604/// supposed to be limited to a barrier synchronization scope. A conflict exists
605/// if effects instances affect aliasing memory locations and at least on of
606/// then as a write. As an exception, if the non-write effect is an allocation
607/// effect, there is no conflict since we are only expected to see the
608/// allocation happening in the same thread and it cannot be accessed from
609/// another thread without capture (which we do handle in alias analysis).
610static bool
613 for (const MemoryEffects::EffectInstance &before : beforeEffects) {
614 for (const MemoryEffects::EffectInstance &after : afterEffects) {
615 // If cannot alias, definitely no conflict.
616 if (!mayAlias(before, after))
617 continue;
618
619 // Read/read is not a conflict.
620 if (isa<MemoryEffects::Read>(before.getEffect()) &&
621 isa<MemoryEffects::Read>(after.getEffect())) {
622 continue;
623 }
624
625 // Allocate/* is not a conflict since the allocation happens within the
626 // thread context.
627 // TODO: This is not the case for */Free unless the allocation happened in
628 // the thread context, which we could also check for.
629 if (isa<MemoryEffects::Allocate>(before.getEffect()) ||
630 isa<MemoryEffects::Allocate>(after.getEffect())) {
631 continue;
632 }
633
634 // In the particular case that the before effect is a free, we only have 2
635 // possibilities:
636 // 1. either the program is well-formed and there must be an interleaved
637 // alloc that must limit the scope of effect lookback and we can
638 // safely ignore the free -> read / free -> write and free -> free
639 // conflicts.
640 // 2. either the program is ill-formed and we are in undefined behavior
641 // territory.
642 if (isa<MemoryEffects::Free>(before.getEffect()))
643 continue;
644
645 // Other kinds of effects create a conflict, e.g. read-after-write.
646 LDBG() << "found a conflict between (before): " << before.getValue()
647 << " read:" << isa<MemoryEffects::Read>(before.getEffect())
648 << " write:" << isa<MemoryEffects::Write>(before.getEffect())
649 << " alloc:" << isa<MemoryEffects::Allocate>(before.getEffect())
650 << " free:" << isa<MemoryEffects::Free>(before.getEffect());
651 LDBG() << "and (after): " << after.getValue()
652 << " read:" << isa<MemoryEffects::Read>(after.getEffect())
653 << " write:" << isa<MemoryEffects::Write>(after.getEffect())
654 << " alloc:" << isa<MemoryEffects::Allocate>(after.getEffect())
655 << " free:" << isa<MemoryEffects::Free>(after.getEffect());
656 return true;
657 }
658 }
659
660 return false;
661}
662
663namespace {
664class BarrierElimination final : public OpRewritePattern<BarrierOp> {
665public:
666 using OpRewritePattern<BarrierOp>::OpRewritePattern;
667
668 LogicalResult matchAndRewrite(BarrierOp barrier,
669 PatternRewriter &rewriter) const override {
670 LDBG() << "checking the necessity of: " << barrier << " "
671 << barrier.getLoc();
672
673 std::optional<ArrayAttr> fencedMemSpaces = barrier.getAddressSpaces();
674 if (fencedMemSpaces && fencedMemSpaces->empty()) {
675 LDBG()
676 << "barrier is not used to synchronize memory accesses, retain it\n";
677 return failure();
678 }
679
680 // Convert the fenced address spaces to the proper type for passing through.
681 SmallVector<gpu::AddressSpaceAttr> fencedSpacesStorage;
682 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedSpaces;
683 if (fencedMemSpaces) {
684 fencedSpacesStorage = llvm::map_to_vector(
685 *fencedMemSpaces, llvm::CastTo<gpu::AddressSpaceAttr>);
686 fencedSpaces = fencedSpacesStorage;
687 }
688
689 SmallVector<MemoryEffects::EffectInstance> beforeEffects;
690 getEffectsBefore(barrier, beforeEffects, fencedSpaces,
691 /*stopAtBarrier=*/true);
692
693 SmallVector<MemoryEffects::EffectInstance> afterEffects;
694 getEffectsAfter(barrier, afterEffects, fencedSpaces,
695 /*stopAtBarrier=*/true);
696
697 if (!haveConflictingEffects(beforeEffects, afterEffects)) {
698 LDBG() << "the surrounding barriers are sufficient, removing " << barrier;
699 rewriter.eraseOp(barrier);
700 return success();
701 }
702
703 LDBG() << "barrier is necessary: " << barrier << " " << barrier.getLoc();
704 return failure();
705 }
706};
707
708class GpuEliminateBarriersPass
709 : public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> {
710 void runOnOperation() override {
711 auto funcOp = getOperation();
712 RewritePatternSet patterns(&getContext());
714 if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {
715 return signalPassFailure();
716 }
717 }
718};
719
720} // namespace
721
723 patterns.insert<BarrierElimination>(patterns.getContext());
724}
return success()
static bool getEffectsAfter(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Collects memory effects from operations that may be executed after op in a trivial structured control...
static bool isSequentialLoopLike(Operation *op)
Returns true if the op behaves like a sequential loop, e.g., the control flow "wraps around" from the...
static std::optional< bool > getKnownCapturingStatus(Operation *op, Value v)
Returns true if the given operation is known to capture the given value, false if it is known not to ...
static bool collectEffects(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool ignoreBarriers=true)
Collect the memory effects of the given op in 'effects'.
static bool isFunctionArgument(Value v)
Returns true if the value is defined as a function argument.
static Value getBase(Value v)
Looks through known "view-like" ops to find the base memref.
static bool getEffectsBeforeInBlock(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Get all effects before the given operation caused by other operations in the same block.
static bool getEffectsAfterInBlock(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Get all effects after the given operation caused by other operations in the same block.
static Value propagatesCapture(Operation *op)
Returns the operand that the operation "propagates" through it for capture purposes.
static bool hasSingleExecutionBody(Operation *op)
Returns true if the regions of the op are guaranteed to be executed at most once.
static LogicalResult effectMightAffectAddressSpaces(const MemoryEffects::EffectInstance &effect, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces)
Succeeds if the effect operates on a memref whose memory space could be one of the given fenced addre...
static bool producesDistinctBase(Operation *op)
Returns true if the operation is known to produce a pointer-like object distinct from any other objec...
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static bool isParallelRegionBoundary(Operation *op)
Returns true if the op is defines the parallel region that is subject to barrier synchronization.
static bool isAddressSpacePotentiallyFenced(Attribute memorySpace, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces)
Returns true if accesses to the given memory space could potentially be fenced by a barrier synchroni...
static bool getEffectsBefore(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Collects memory effects from operations that may be executed before op in a trivial structured contro...
#define DEBUG_TYPE_ALIAS
static bool haveConflictingEffects(ArrayRef< MemoryEffects::EffectInstance > beforeEffects, ArrayRef< MemoryEffects::EffectInstance > afterEffects)
Returns true if any of the "before" effect instances has a conflict with any "after" instance for the...
static bool isBarrierWithCommonFencedMemory(Operation *op, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces)
Returns true if op is a BarrierOp that fences any address spaces that could overlap with the given fe...
static void addAllValuelessEffects(SmallVectorImpl< MemoryEffects::EffectInstance > &effects)
Populates effects with all memory effects without associating them to a specific value.
static bool maybeCaptured(Value v)
Returns true if the value may be captured by any of its users, i.e., if the user may be storing this ...
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
b getContext())
Attributes are known-constant values of operations.
Definition Attributes.h:25
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition Block.cpp:27
Operation & front()
Definition Block.h:163
Operation & back()
Definition Block.h:162
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition Operation.h:749
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:560
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:213
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition Operation.h:677
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Resource * getResource() const
Return the resource that the effect applies to.
EffectT * getEffect() const
Return the effect being applied.
Value getValue() const
Return the value the effect is applied on, or nullptr if there isn't a known value being affected.
static DerivedEffect * get()
Returns a unique instance for the given effect class.
TypeID getResourceID() const
Return the unique identifier for the base resource class.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
user_range getUsers() const
Definition Value.h:218
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static WalkResult advance()
Definition WalkResult.h:47
static WalkResult interrupt()
Definition WalkResult.h:46
SideEffects::EffectInstance< Effect > EffectInstance
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:717
Include the generated interface declarations.
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
const FrozenRewritePatternSet & patterns
void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns)
Erase barriers that do not enforce conflicting memory side effects.
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...