26#include "llvm/ADT/SetOperations.h"
27#include "llvm/ADT/TypeSwitch.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/DebugLog.h"
32#define GEN_PASS_DEF_GPUELIMINATEBARRIERS
33#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
39#define DEBUG_TYPE "gpu-erase-barriers"
40#define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias"
48 if (op->
hasAttr(
"__parallel_region_boundary_for_test"))
51 return isa<GPUFuncOp, LaunchOp>(op);
62 return isa<FunctionOpInterface, scf::IfOp, memref::AllocaScopeOp>(op);
69 return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op);
85 if (
auto viewLike = dyn_cast<ViewLikeOpInterface>(definingOp)) {
86 v = viewLike.getViewSource();
89 if (
auto transposeOp = dyn_cast<memref::TransposeOp>(definingOp)) {
90 v = transposeOp.getIn();
106 if (!fencedAddressSpaces)
109 auto gpuMemSpace = dyn_cast_if_present<gpu::AddressSpaceAttr>(memorySpace);
114 return llvm::is_contained(*fencedAddressSpaces, gpuMemSpace);
127 if (!fencedAddressSpaces)
134 auto mightMatch = [&](
Value v) {
135 auto memrefType = dyn_cast<BaseMemRefType>(v.getType());
139 fencedAddressSpaces);
142 if (!mightMatch(value))
146 while (
auto viewLike = base.
getDefiningOp<ViewLikeOpInterface>()) {
147 base = viewLike.getViewSource();
150 if (!mightMatch(base))
162 auto barrier = dyn_cast<BarrierOp>(op);
166 std::optional<ArrayAttr> otherFencedSpaces = barrier.getAddressSpaces();
168 if (!otherFencedSpaces)
171 if (otherFencedSpaces->empty())
176 if (!fencedAddressSpaces)
180 otherFencedSpaces->getAsRange<gpu::AddressSpaceAttr>(),
181 [&](
auto a) { return llvm::is_contained(*fencedAddressSpaces, a); });
196 bool ignoreBarriers =
true) {
199 if (ignoreBarriers && isa<BarrierOp>(op))
206 if (
auto iface = dyn_cast<MemoryEffectOpInterface>(op)) {
208 iface.getEffects(localEffects);
213 effects.push_back(effect);
219 for (
auto &block : region) {
220 for (
auto &innerOp : block)
240 bool stopAtBarrier) {
244 for (
Operation *it = op->getPrevNode(); it !=
nullptr;
245 it = it->getPrevNode()) {
267 bool stopAtBarrier) {
308 fencedAddressSpaces,
true);
313 bool conservative =
false;
325 return !conservative;
333 bool stopAtBarrier) {
337 for (
Operation *it = op->getNextNode(); it !=
nullptr;
338 it = it->getNextNode()) {
359 bool stopAtBarrier) {
400 fencedAddressSpaces))
413 bool conservative =
false;
425 return !conservative;
430 auto arg = dyn_cast<BlockArgument>(v);
431 return arg && isa<FunctionOpInterface>(arg.getOwner()->getParentOp());
440 [](ViewLikeOpInterface viewLike) {
return viewLike.getViewSource(); })
441 .Case([](CastOpInterface castLike) {
return castLike->getOperand(0); })
442 .Case([](memref::TransposeOp transpose) {
return transpose.getIn(); })
453 .Case<memref::StoreOp, vector::TransferWriteOp>(
454 [&](
auto op) {
return op.getValue() == v; })
455 .Case<vector::StoreOp, vector::MaskedStoreOp>(
456 [&](
auto op) {
return op.getValueToStore() == v; })
458 .Case([](memref::DeallocOp) {
return false; })
460 .Default(std::nullopt);
469 while (!todo.empty()) {
470 Value v = todo.pop_back_val();
473 auto iface = dyn_cast<MemoryEffectOpInterface>(user);
476 iface.getEffects(effects);
477 if (llvm::all_of(effects,
479 return isa<MemoryEffects::Read>(effect.
getEffect());
493 if (!knownCaptureStatus || *knownCaptureStatus)
512 <<
"checking aliasing between " << first <<
" and " << second;
521 if (first == second) {
527 if (
auto globFirst = first.
getDefiningOp<memref::GetGlobalOp>()) {
528 if (
auto globSecond = second.
getDefiningOp<memref::GetGlobalOp>()) {
529 return globFirst.getNameAttr() == globSecond.getNameAttr();
534 auto isNoaliasFuncArgument = [](
Value value) {
535 auto bbArg = dyn_cast<BlockArgument>(value);
538 auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp());
542 return iface.getArgAttr(bbArg.getArgNumber(),
"llvm.noalias") !=
nullptr;
544 if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
549 bool isGlobal[] = {first.
getDefiningOp<memref::GetGlobalOp>() !=
nullptr,
555 if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1]))
561 if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
593 if (
Value v2 =
b.getValue()) {
620 if (isa<MemoryEffects::Read>(before.getEffect()) &&
621 isa<MemoryEffects::Read>(after.getEffect())) {
629 if (isa<MemoryEffects::Allocate>(before.getEffect()) ||
630 isa<MemoryEffects::Allocate>(after.getEffect())) {
642 if (isa<MemoryEffects::Free>(before.getEffect()))
646 LDBG() <<
"found a conflict between (before): " << before.getValue()
647 <<
" read:" << isa<MemoryEffects::Read>(before.getEffect())
648 <<
" write:" << isa<MemoryEffects::Write>(before.getEffect())
649 <<
" alloc:" << isa<MemoryEffects::Allocate>(before.getEffect())
650 <<
" free:" << isa<MemoryEffects::Free>(before.getEffect());
651 LDBG() <<
"and (after): " << after.getValue()
652 <<
" read:" << isa<MemoryEffects::Read>(after.getEffect())
653 <<
" write:" << isa<MemoryEffects::Write>(after.getEffect())
654 <<
" alloc:" << isa<MemoryEffects::Allocate>(after.getEffect())
655 <<
" free:" << isa<MemoryEffects::Free>(after.getEffect());
666 using OpRewritePattern<BarrierOp>::OpRewritePattern;
668 LogicalResult matchAndRewrite(BarrierOp barrier,
669 PatternRewriter &rewriter)
const override {
670 LDBG() <<
"checking the necessity of: " << barrier <<
" "
673 std::optional<ArrayAttr> fencedMemSpaces = barrier.getAddressSpaces();
674 if (fencedMemSpaces && fencedMemSpaces->empty()) {
676 <<
"barrier is not used to synchronize memory accesses, retain it\n";
681 SmallVector<gpu::AddressSpaceAttr> fencedSpacesStorage;
682 std::optional<ArrayRef<gpu::AddressSpaceAttr>> fencedSpaces;
683 if (fencedMemSpaces) {
684 fencedSpacesStorage = llvm::map_to_vector(
685 *fencedMemSpaces, llvm::CastTo<gpu::AddressSpaceAttr>);
686 fencedSpaces = fencedSpacesStorage;
689 SmallVector<MemoryEffects::EffectInstance> beforeEffects;
693 SmallVector<MemoryEffects::EffectInstance> afterEffects;
698 LDBG() <<
"the surrounding barriers are sufficient, removing " << barrier;
703 LDBG() <<
"barrier is necessary: " << barrier <<
" " << barrier.getLoc();
708class GpuEliminateBarriersPass
710 void runOnOperation()
override {
711 auto funcOp = getOperation();
715 return signalPassFailure();
static bool getEffectsAfter(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Collects memory effects from operations that may be executed after op in a trivial structured control...
static bool isSequentialLoopLike(Operation *op)
Returns true if the op behaves like a sequential loop, e.g., the control flow "wraps around" from the...
static std::optional< bool > getKnownCapturingStatus(Operation *op, Value v)
Returns true if the given operation is known to capture the given value, false if it is known not to ...
static bool collectEffects(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool ignoreBarriers=true)
Collect the memory effects of the given op in 'effects'.
static bool isFunctionArgument(Value v)
Returns true if the value is defined as a function argument.
static Value getBase(Value v)
Looks through known "view-like" ops to find the base memref.
static bool getEffectsBeforeInBlock(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Get all effects before the given operation caused by other operations in the same block.
static bool getEffectsAfterInBlock(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Get all effects after the given operation caused by other operations in the same block.
static Value propagatesCapture(Operation *op)
Returns the operand that the operation "propagates" through it for capture purposes.
static bool hasSingleExecutionBody(Operation *op)
Returns true if the regions of the op are guaranteed to be executed at most once.
static LogicalResult effectMightAffectAddressSpaces(const MemoryEffects::EffectInstance &effect, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces)
Succeeds if the effect operates on a memref whose memory space could be one of the given fenced addre...
static bool producesDistinctBase(Operation *op)
Returns true if the operation is known to produce a pointer-like object distinct from any other objec...
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static bool isParallelRegionBoundary(Operation *op)
Returns true if the op is defines the parallel region that is subject to barrier synchronization.
static bool isAddressSpacePotentiallyFenced(Attribute memorySpace, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces)
Returns true if accesses to the given memory space could potentially be fenced by a barrier synchroni...
static bool getEffectsBefore(Operation *op, SmallVectorImpl< MemoryEffects::EffectInstance > &effects, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces, bool stopAtBarrier)
Collects memory effects from operations that may be executed before op in a trivial structured contro...
static bool haveConflictingEffects(ArrayRef< MemoryEffects::EffectInstance > beforeEffects, ArrayRef< MemoryEffects::EffectInstance > afterEffects)
Returns true if any of the "before" effect instances has a conflict with any "after" instance for the...
static bool isBarrierWithCommonFencedMemory(Operation *op, std::optional< ArrayRef< gpu::AddressSpaceAttr > > fencedAddressSpaces)
Returns true if op is a BarrierOp that fences any address spaces that could overlap with the given fe...
static void addAllValuelessEffects(SmallVectorImpl< MemoryEffects::EffectInstance > &effects)
Populates effects with all memory effects without associating them to a specific value.
static bool maybeCaptured(Value v)
Returns true if the value may be captured by any of its users, i.e., if the user may be storing this ...
Attributes are known-constant values of operations.
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Operation * getTerminator()
Get the terminator operation of this block.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Block * getBlock()
Returns the operation block that contains this operation.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
bool hasOneBlock()
Return true if this region has exactly one block.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Resource * getResource() const
Return the resource that the effect applies to.
EffectT * getEffect() const
Return the effect being applied.
Value getValue() const
Return the value the effect is applied on, or nullptr if there isn't a known value being affected.
static DerivedEffect * get()
Returns a unique instance for the given effect class.
TypeID getResourceID() const
Return the unique identifier for the base resource class.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
user_range getUsers() const
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
SideEffects::EffectInstance< Effect > EffectInstance
Include the generated interface declarations.
LogicalResult applyPatternsGreedily(Region ®ion, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
const FrozenRewritePatternSet & patterns
void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns)
Erase barriers that do not enforce conflicting memory side effects.
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...